|
|
1.1 ! root 1: #include "re.h" ! 2: #include "lre.h" ! 3: #include "hdr.h" ! 4: ! 5: #define DEBUG ! 6: ! 7: static unsigned char *eg_slowmatch(Br *, unsigned char *, unsigned char *, int); ! 8: static unsigned char *wholeb, *wholee; ! 9: static unsigned char *start[10]; ! 10: static int len[10]; ! 11: static void undobr(Br *); /* undo group assignements */ ! 12: ! 13: eg_match(register re_re *r, register unsigned char *b, register unsigned char *e, unsigned char **rb, unsigned char **re) ! 14: { ! 15: int i, ret; ! 16: ! 17: #ifdef DEBUG ! 18: if(TRACE(2)){ ! 19: PR "eg_match(%d->%d, %d, %d)\n", (int)r, (int)r->br, (int)b, (int)e); ! 20: if(r->br) ! 21: eg_brpr(r->br); ! 22: } ! 23: #endif ! 24: if((rb == 0) != (re == 0)){ ! 25: re_error("must supply both or none of group pointers"); ! 26: return(0); ! 27: } ! 28: if(r->backref || r->parens || rb){ ! 29: wholeb = e; ! 30: for(i = 1; i < 10; i++) ! 31: start[i] = 0; ! 32: if(r->br == 0) ! 33: egbr(r); ! 34: ret = (wholee = eg_slowmatch(r->br, b, e, RE_BEG|RE_END)) != 0; ! 35: if(rb && ret){ ! 36: rb[0] = wholeb; ! 37: re[0] = wholee; ! 38: for(i = 1; i < 10; i++){ ! 39: rb[i] = start[i]; ! 40: re[i] = rb[i]+len[i]; ! 41: } ! 42: #ifdef DEBUG ! 43: if(TRACE(1)){ ! 44: PR "eg_match groups:"); ! 45: for(i = 0; i < 10; i++) ! 46: if(rb[i])PR " %d: %d@%d", i, rb[i], re[i]-rb[i]); ! 47: PR "\n"); ! 48: } ! 49: #endif ! 50: } ! 51: #ifdef DEBUG ! 52: else { ! 53: if(TRACE(1)){ ! 54: PR "eg_match groups: [%d - %d]\n", wholeb, wholee); ! 55: for(i = 1; i < 10; i++) ! 56: if(start[i])PR " %d: %d@%d", i, start[i], len[i]); ! 57: PR "\n"); ! 58: } ! 59: } ! 60: #endif ! 61: } else ! 62: ret = eg_quickmatch(r, b, e, RE_BEG|RE_END) != 0; ! 63: return(ret); ! 64: } ! 65: ! 66: static unsigned char * ! 67: eg_slowmatch(Br *br, unsigned char *b, unsigned char *e, int endpts) ! 68: { ! 69: int i; ! 70: unsigned char *me, *end; ! 71: unsigned char *beg, *lbeg, *llbeg, *rbeg, *rend, *lm, *rm; ! 72: #ifdef DEBUG ! 73: char buf[EPRINTSIZE]; ! 74: static id = 1; ! 75: int myid = id++; ! 76: #endif ! 77: ! 78: #define BOFF(x) ((x)? (endpts&~RE_BEG):endpts) ! 79: #define EOFF(x) ((x)? (endpts&~RE_END):endpts) ! 80: ! 81: if(br == 0) /* nothing to match - we won! */ ! 82: return(b); ! 83: #ifdef DEBUG ! 84: if(TRACE(3)) ! 85: PR "slowmatch(br=%d, [b,e]=%d,%d id=%d, endpt=%d)\n", br, b, e, myid, endpts); ! 86: #endif ! 87: switch(br->type) ! 88: { ! 89: case br_br: ! 90: i = br->group; ! 91: #ifdef DEBUG ! 92: if(TRACE(3)) ! 93: PR "br[%d]: %d,%d b=%d,e=%d\n", i, (int)start[i], len[i], b, e); ! 94: #endif ! 95: if(start[i] == 0) ! 96: return(0); ! 97: if((len[i] > e-b) || memcmp((char *)b, (char *)start[i], len[i])) ! 98: return(0); ! 99: if(wholeb > b) wholeb = b; ! 100: #ifdef DEBUG ! 101: if(TRACE(3)) ! 102: PR "br[%d]: matched\n", i); ! 103: #endif ! 104: return(b+len[i]); ! 105: ! 106: case br_re: ! 107: #ifdef DEBUG ! 108: if(TRACE(3)){ ! 109: eg_epr(br->e, buf, 0); ! 110: PR "matching RE(%s)@%d against '", buf, br->r); ! 111: WR((char *)b, e-b); ! 112: PR "' id=%d\n", myid); ! 113: } ! 114: #endif ! 115: if((me = eg_lquickmatch(br->r, b, e, endpts)) == 0) ! 116: return(0); ! 117: #ifdef DEBUG ! 118: if(TRACE(3)){ ! 119: PR "--%s matched '", buf); ! 120: WR((char *)b, me-b); ! 121: PR "'[%d %d] id=%d\n", (int)b, (int)me, myid); ! 122: } ! 123: #endif ! 124: if(wholeb > b) ! 125: wholeb = b; ! 126: return(me); ! 127: ! 128: case br_group: ! 129: #ifdef DEBUG ! 130: if(TRACE(3)){ ! 131: PR "matching GROUP%d against '", br->group); ! 132: WR((char *)b, e-b); ! 133: PR "' id=%d\n", myid); ! 134: } ! 135: #endif ! 136: if((me = eg_slowmatch(br->lb, b, e, endpts)) == 0){ ! 137: undobr(br->lb); ! 138: return(0); ! 139: } ! 140: #ifdef DEBUG ! 141: if(TRACE(3)){ ! 142: PR "--G%d matched '", br->group); ! 143: WR((char *)b, me-b); ! 144: PR "'[%d %d]\n", (int)b, (int)me); ! 145: } ! 146: #endif ! 147: if(wholeb > b) ! 148: wholeb = b; ! 149: start[br->group] = b; ! 150: len[br->group] = me-b; ! 151: return(me); ! 152: ! 153: case br_quest: ! 154: #ifdef DEBUG ! 155: if(TRACE(3)){ ! 156: PR "matching BR? against '", buf); ! 157: WR((char *)b, e-b); ! 158: PR "'\n"); ! 159: } ! 160: #endif ! 161: if(lbeg = eg_slowmatch(br->lb, b, e, endpts)){ ! 162: return(lbeg); ! 163: } ! 164: undobr(br->lb); ! 165: return(b); ! 166: ! 167: case br_plus: ! 168: #ifdef DEBUG ! 169: if(TRACE(3)){ ! 170: PR "matching BR+ against '", buf); ! 171: WR((char *)b, e-b); ! 172: PR "' id=%d\n", myid); ! 173: } ! 174: #endif ! 175: if((lbeg = eg_slowmatch(br->lb, b, e, endpts)) == 0){ ! 176: undobr(br->lb); ! 177: return(0); ! 178: } ! 179: llbeg = b; ! 180: while(beg = eg_slowmatch(br->lb, lbeg, e, BOFF(lbeg != b))){ ! 181: llbeg = lbeg, lbeg = beg; ! 182: } ! 183: #ifdef DEBUG ! 184: if(TRACE(3)){ ! 185: PR "--+ matched [%d %d]'", (int)llbeg, (int)lbeg); ! 186: WR((char *)llbeg, lbeg-llbeg); ! 187: PR "' id=%d\n", myid); ! 188: } ! 189: #endif ! 190: return(eg_slowmatch(br->lb, llbeg, e, BOFF(llbeg != b))); ! 191: ! 192: case br_star: ! 193: #ifdef DEBUG ! 194: if(TRACE(3)){ ! 195: PR "matching BR* against '", buf); ! 196: WR((char *)b, e-b); ! 197: PR "'\n"); ! 198: } ! 199: #endif ! 200: llbeg = lbeg = b; ! 201: while(beg = eg_slowmatch(br->lb, lbeg, e, BOFF(lbeg != b))) ! 202: llbeg = lbeg, lbeg = beg; ! 203: #ifdef DEBUG ! 204: if(TRACE(3)){ ! 205: PR "--* matched '"); ! 206: WR((char *)lbeg, lbeg-llbeg); ! 207: PR "'[%d %d]\n", (int)llbeg, (int)lbeg); ! 208: } ! 209: #endif ! 210: if(beg = eg_slowmatch(br->lb, llbeg, e, BOFF(llbeg != b))) ! 211: return(beg); ! 212: undobr(br->lb); ! 213: return(b); ! 214: ! 215: case br_cat: ! 216: #ifdef DEBUG ! 217: if(TRACE(3)){ ! 218: PR "matching BRcat against '", buf); ! 219: WR((char *)b, e-b); ! 220: PR "' id=%d\n", myid); ! 221: } ! 222: #endif ! 223: /* ! 224: this is not so hard. ! 225: we try all possible matches of the left half, ! 226: and record the match that gave the longest ! 227: valid match on the right half ! 228: */ ! 229: rend = 0; ! 230: for(end = e; b <= e; e = beg-1){ ! 231: if((beg = eg_slowmatch(br->lb, b, e, EOFF(e != end))) == 0){ ! 232: break; ! 233: } ! 234: #ifdef DEBUG ! 235: if(TRACE(3)){ ! 236: PR "--cat matched '"); ! 237: WR((char *)b, beg-b); ! 238: PR "'[%d %d] id=%d\n", (int)b, (int)beg, myid); ! 239: } ! 240: #endif ! 241: if((me = eg_slowmatch(br->rb, beg, end, BOFF(beg != b))) == 0){ ! 242: continue; /* no match of right half */ ! 243: } ! 244: #ifdef DEBUG ! 245: if(TRACE(3)){ ! 246: PR "----cat matched '"); ! 247: WR((char *)b, beg-b); ! 248: PR "'[%d %d] id=%d\n", (int)b, (int)beg, myid); ! 249: } ! 250: #endif ! 251: if(me > rend){ ! 252: rend = me; ! 253: rbeg = beg; ! 254: #ifdef DEBUG ! 255: if(TRACE(3)){ ! 256: PR "--++-- cat new max rb=%d re=%d\n", (int)rbeg, (int)rend); ! 257: } ! 258: #endif ! 259: } ! 260: } ! 261: if(rend == 0){ ! 262: undobr(br->lb); ! 263: undobr(br->rb); ! 264: return(0); ! 265: } ! 266: (void)eg_slowmatch(br->lb, b, rbeg, EOFF(rbeg != end)); ! 267: return(eg_slowmatch(br->rb, rbeg, end, BOFF(rbeg != b))); ! 268: ! 269: case br_alt: ! 270: #ifdef DEBUG ! 271: if(TRACE(3)){ ! 272: PR "matching BR| against '", buf); ! 273: WR((char *)b, e-b); ! 274: PR "'\n"); ! 275: } ! 276: #endif ! 277: if(lm = eg_slowmatch(br->lb, b, e, endpts)){ ! 278: #ifdef DEBUG ! 279: if(TRACE(3)){ ! 280: PR "--|L matched '"); ! 281: WR((char *)b, lm-b); ! 282: PR "'[%d %d]\n", (int)b, (int)lm); ! 283: } ! 284: #endif ! 285: } ! 286: if(rm = eg_slowmatch(br->rb, b, e, endpts)){ ! 287: #ifdef DEBUG ! 288: if(TRACE(3)){ ! 289: PR "--|R matched '"); ! 290: WR((char *)b, rm-b); ! 291: PR "'[%d %d]\n", (int)b, (int)rm); ! 292: } ! 293: #endif ! 294: } ! 295: if(lm > rm){ ! 296: undobr(br->rb); ! 297: return(eg_slowmatch(br->lb, b, e, endpts)); ! 298: } else { ! 299: if(rm == 0){ ! 300: undobr(br->lb); ! 301: undobr(br->rb); ! 302: return(0); ! 303: } else { ! 304: undobr(br->lb); ! 305: return(beg); ! 306: } ! 307: } ! 308: } ! 309: abort(); ! 310: return(0); ! 311: } ! 312: ! 313: static void ! 314: undobr(register Br *br) ! 315: { ! 316: switch(br->type) ! 317: { ! 318: case br_group: ! 319: start[br->group] = 0; ! 320: undobr(br->lb); ! 321: break; ! 322: case br_star: ! 323: case br_plus: ! 324: case br_quest: ! 325: undobr(br->lb); ! 326: break; ! 327: case br_cat: ! 328: case br_alt: ! 329: undobr(br->lb); ! 330: undobr(br->rb); ! 331: break; ! 332: } ! 333: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.