|
|
1.1 ! root 1: #include "re.h" ! 2: #include "lre.h" ! 3: #include "hdr.h" ! 4: ! 5: typedef struct Link ! 6: { ! 7: unsigned char l; ! 8: struct Node *node; ! 9: struct Link *next; ! 10: } Link; ! 11: static Link *froot; ! 12: ! 13: #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = 0, t->node = N, t): newlink(c, 0, N)) ! 14: #define ADD(N) if(qtail) qtail = qtail->next = NEW(N);\ ! 15: else qtail = qhead = NEW(N) ! 16: #define DEL() { Link *tmp = qhead;if((qhead = qhead->next) == 0)\ ! 17: qtail = 0; tmp->next = froot; froot = tmp;} ! 18: ! 19: typedef struct Node ! 20: { ! 21: short out; ! 22: short d; ! 23: short shift1; ! 24: short shift2; ! 25: long id; ! 26: struct Node **tab; ! 27: Link *alts; ! 28: struct Node *fail; ! 29: } Node; ! 30: ! 31: static Link *newlink(re_cw*, int, Node*); ! 32: static Node *newnode(re_cw*, int); ! 33: static void zeroroot(Node*, Node*); ! 34: static void shifttab(Node*); ! 35: static void shiftprop(re_cw*,Node*); ! 36: #ifdef DEBUG ! 37: static void cwpr(register Node *n); ! 38: #endif ! 39: ! 40: re_cw * ! 41: re_cwinit(unsigned char *cmap) ! 42: { ! 43: register i; ! 44: register re_cw *c; ! 45: ! 46: if((c = (re_cw *)egmalloc(sizeof *c, "malloc failed in re_cwinit")) == 0) ! 47: return((re_cw *)0); ! 48: c->nodeid = 0; ! 49: c->maxdepth = 0; ! 50: c->mindepth = 10000; ! 51: c->seenerror = 0; ! 52: c->root = newnode(c, 0); ! 53: if(cmap) ! 54: memmove((char *)c->map, (char *)cmap, sizeof c->map); ! 55: else ! 56: for(i = 0; i < sizeof c->map; i++) ! 57: c->map[i] = i; ! 58: return(c); ! 59: } ! 60: ! 61: void ! 62: re_cwadd(register re_cw *c, register unsigned char *s, register unsigned char *e) ! 63: { ! 64: register Node *p, *state; ! 65: register Link *l; ! 66: int depth; ! 67: ! 68: for(state = c->root; s <= --e;){ ! 69: for(l = state->alts; l; l = l->next) ! 70: if(l->l == c->map[*e]) break; ! 71: if(l == 0) ! 72: break; ! 73: else ! 74: state = l->node; ! 75: } ! 76: if(s <= e){ ! 77: depth = state->d+1; ! 78: l = newlink(c, *e--, p = newnode(c, depth++)); ! 79: if((l == 0) || (p == 0)){ ! 80: c->seenerror = 1; ! 81: return; ! 82: } ! 83: l->next = state->alts; ! 84: state->alts = l; ! 85: state = p; ! 86: while(s <= e){ ! 87: state->alts = newlink(c, *e--, p = newnode(c, depth++)); ! 88: if((state->alts == 0) || (p == 0)){ ! 89: c->seenerror = 1; ! 90: return; ! 91: } ! 92: state = p; ! 93: } ! 94: } ! 95: if(c->mindepth > state->d) ! 96: c->mindepth = state->d; ! 97: state->out = 1; ! 98: } ! 99: ! 100: #ifdef DEBUG ! 101: static void ! 102: cwpr(register Node *n) ! 103: { ! 104: register Link *l; ! 105: ! 106: Fprint(1, "%d[%d,%d]: ", n->id, n->shift1, n->shift2); ! 107: for(l = n->alts; l; l = l->next){ ! 108: Fprint(1, "edge from \"%d\" to \"%d\" label {\"%c\"};", ! 109: n->id, l->node->id, l->l); ! 110: if(l->node->out) Fprint(1, " draw \"%d\" as Doublecircle;", l->node->id); ! 111: if(l->node->fail) ! 112: Fprint(1, " edge from \"%d\" to \"%d\" dashed;", l->node->id, l->node->fail->id); ! 113: Fprint(1, "\n"); ! 114: cwpr(l->node); ! 115: } ! 116: } ! 117: #endif ! 118: ! 119: static void ! 120: fail(re_cw *c, Node *root) ! 121: { ! 122: Link *qhead = 0, *qtail = 0; ! 123: register Link *l, *ll; ! 124: register Link *t; ! 125: register Node *state, *r, *s; ! 126: int a; ! 127: ! 128: for(l = root->alts; l; l = l->next){ ! 129: ADD(l->node); ! 130: l->node->fail = root; ! 131: } ! 132: while(qhead){ ! 133: r = qhead->node; ! 134: DEL(); ! 135: for(l = r->alts; l; l = l->next){ ! 136: s = l->node; ! 137: a = l->l; ! 138: ADD(s); ! 139: for(state = r->fail; state;){ ! 140: for(ll = state->alts; ll; ll = ll->next) ! 141: if(ll->l == a) ! 142: break; ! 143: if(ll || (state == root)){ ! 144: if(ll) ! 145: state = ll->node; ! 146: /* ! 147: do it here as only other exit is ! 148: state 0 ! 149: */ ! 150: if(state->out){ ! 151: s->out = 1; ! 152: } ! 153: break; ! 154: } else ! 155: state = state->fail; ! 156: } ! 157: s->fail = state; ! 158: } ! 159: } ! 160: zeroroot(root, root); ! 161: } ! 162: ! 163: static void ! 164: zeroroot(register Node *root, register Node *n) ! 165: { ! 166: register Link *l; ! 167: ! 168: if(n->fail == root) ! 169: n->fail = 0; ! 170: for(l = n->alts; l; l = l->next) ! 171: zeroroot(root, l->node); ! 172: } ! 173: ! 174: static void ! 175: shift(re_cw *c) ! 176: { ! 177: Link *qhead = 0, *qtail = 0; ! 178: register Link *l; ! 179: register Link *t; ! 180: register Node *state, *r, *s; ! 181: int k; ! 182: ! 183: for(k = 0; k < 256; k++) ! 184: c->step[k] = c->mindepth+1; ! 185: c->root->shift1 = 1; ! 186: c->root->shift2 = c->mindepth; ! 187: for(l = c->root->alts; l; l = l->next){ ! 188: /* l->node->shift2 = c->root->shift2;/**/ ! 189: ADD(l->node); ! 190: l->node->fail = 0; ! 191: } ! 192: while(qhead){ ! 193: r = qhead->node; ! 194: DEL(); ! 195: r->shift1 = c->mindepth; ! 196: r->shift2 = c->mindepth; ! 197: if(state = r->fail){ ! 198: do { ! 199: k = r->d - state->d; ! 200: if(k < state->shift1){ ! 201: state->shift1 = k; ! 202: } ! 203: if(r->out && (k < state->shift2)){ ! 204: state->shift2 = k; ! 205: } ! 206: } while(state = state->fail); ! 207: } ! 208: for(l = r->alts; l; l = l->next){ ! 209: s = l->node; ! 210: ADD(s); ! 211: } ! 212: } ! 213: shiftprop(c, c->root); ! 214: shifttab(c->root); ! 215: c->step[0] = 1; ! 216: } ! 217: ! 218: static void ! 219: shifttab(register Node *n) ! 220: { ! 221: register Link *l; ! 222: register Node **nn; ! 223: ! 224: n->tab = nn = (Node **)calloc(256, sizeof(Node *)); ! 225: for(l = n->alts; l; l = l->next) ! 226: nn[l->l] = l->node; ! 227: } ! 228: ! 229: static void ! 230: shiftprop(register re_cw *c, register Node *n) ! 231: { ! 232: register Link *l; ! 233: register Node *nn; ! 234: ! 235: for(l = n->alts; l; l = l->next){ ! 236: if(c->step[l->l] > l->node->d) ! 237: c->step[l->l] = l->node->d; ! 238: nn = l->node; ! 239: if(n->shift2 < nn->shift2) ! 240: nn->shift2 = n->shift2; ! 241: shiftprop(c, nn); ! 242: } ! 243: } ! 244: ! 245: void ! 246: re_cwcomp(re_cw *c) ! 247: { ! 248: if(c->seenerror) ! 249: return; ! 250: fail(c, c->root); ! 251: shift(c); ! 252: #ifdef DEBUG ! 253: cwpr(c->root); ! 254: #endif ! 255: } ! 256: ! 257: re_cwexec(void *re_c, RDFN rdfn, MATCHFN matchfn) ! 258: { ! 259: re_cw *c = (re_cw*)re_c; ! 260: register Node *state; ! 261: register Link *l; ! 262: register unsigned char *sp; ! 263: register unsigned char *e, *s; ! 264: register Node *ostate; ! 265: register k; ! 266: unsigned char *rs, *re; ! 267: unsigned char fake[1]; ! 268: unsigned char mappedsp; ! 269: ! 270: if(c->seenerror) ! 271: return(-1); ! 272: fake[0] = 0; ! 273: state = c->root; ! 274: for(rs = 0; (k = (*rdfn)((char**)&rs,(char**) &re)) > 0;){ ! 275: doneline: ! 276: s = rs+c->mindepth-1; ! 277: e = re; ! 278: while(s < e){ ! 279: /* scan */ ! 280: for(sp = s; ostate = state;){ ! 281: if(state->tab){ ! 282: if((state = state->tab[c->map[*sp]]) == 0) ! 283: goto nomatch; ! 284: } else { ! 285: mappedsp = c->map[*sp]; ! 286: for(l = state->alts; ; l = l->next){ ! 287: if(l == 0) ! 288: goto nomatch; ! 289: if(l->l == mappedsp){ ! 290: state = l->node; ! 291: break; ! 292: } ! 293: } ! 294: } ! 295: #ifdef DEBUG ! 296: print("state %d -0x%x-> state %d (out=%d)\n", ostate->id, *sp&0xFF, state->id, state->out); ! 297: #endif ! 298: if(state->out){ ! 299: unsigned char *bm = sp, *em = s+1; ! 300: if((k = (*matchfn)((char**)&bm, (char**)&em)) <= 0) ! 301: return(k); ! 302: rs = bm; ! 303: re = em; ! 304: state = c->root; ! 305: goto doneline; ! 306: } ! 307: if(--sp < rs){ ! 308: sp = fake; ! 309: break; ! 310: } ! 311: } ! 312: nomatch: ! 313: k = c->step[c->map[*sp]]-ostate->d-1; ! 314: if(k < ostate->shift1) ! 315: k = ostate->shift1; ! 316: if(k > ostate->shift2) ! 317: k = ostate->shift2; ! 318: s += k; ! 319: state = c->root; ! 320: } ! 321: #ifdef DEBUG ! 322: print("end of s<e loop: s=%d e=%d, rs=%d, re=%d\n", s, e, rs, re); ! 323: #endif ! 324: rs = re; /* we have analysed evrything up to here */ ! 325: } ! 326: return(k? -1:0); ! 327: } ! 328: ! 329: static void ! 330: freenode(Node *n) ! 331: { ! 332: register Link *l, *ll; ! 333: ! 334: if(n->tab) ! 335: free((char *)n->tab); ! 336: for(l = n->alts; l; l = ll){ ! 337: ll = l->next; ! 338: freenode(l->node); ! 339: } ! 340: free((char *)n); ! 341: } ! 342: ! 343: void ! 344: re_cwfree(re_cw *cw) ! 345: { ! 346: register Link *l; ! 347: ! 348: while(froot){ ! 349: l = froot->next; ! 350: free((char *)froot); ! 351: froot = l; ! 352: } ! 353: freenode(cw->root); ! 354: free((char *)cw); ! 355: } ! 356: ! 357: static Node * ! 358: newnode(re_cw *c, int d) ! 359: { ! 360: static Node *next = 0, *lim = 0; ! 361: static incr = 1000; ! 362: ! 363: if(next == lim){ ! 364: next = (Node *)malloc(incr*sizeof(Node)); ! 365: if(next == 0){ ! 366: re_error("node malloc fail"); ! 367: return 0; ! 368: } ! 369: lim = next+incr; ! 370: } ! 371: next->d = d; ! 372: if(d > c->maxdepth) c->maxdepth = d; ! 373: next->id = c->nodeid++; ! 374: next->alts = 0; ! 375: next->tab = 0; ! 376: next->out = 0; ! 377: return(next++); ! 378: } ! 379: ! 380: static Link * ! 381: newlink(re_cw *c, int l, Node *n) ! 382: { ! 383: static Link *next = 0, *lim = 0; ! 384: static incr = 1000; ! 385: ! 386: if(next == lim){ ! 387: next = (Link *)malloc(incr*sizeof(Node)); ! 388: if(next == 0){ ! 389: re_error("link malloc fail"); ! 390: return 0; ! 391: } ! 392: lim = next+incr; ! 393: } ! 394: next->l = c->map[l]; ! 395: next->node = n; ! 396: next->next = 0; ! 397: return(next++); ! 398: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.