|
|
1.1 ! root 1: /* ! 2: * See Aho, Corasick Comm ACM 18:6, and Hoffman and O'Donell JACM 29:1 ! 3: * for detail of the matching algorithm ! 4: */ ! 5: ! 6: /* turn this flag on if your machine has a fast byte string zero operation */ ! 7: /*#define BZERO 1*/ ! 8: int mtDebug = 0; ! 9: int treedebug = 0; ! 10: extern int _machstep(); ! 11: extern short int mtStart; ! 12: extern short int mtTable[], mtAccept[], mtMap[], mtPaths[], mtPathStart[]; ! 13: #ifdef LINE_XREF ! 14: extern short int mtLine[]; ! 15: #endif ! 16: extern NODEPTR mtGetNodes(), mtAction(); ! 17: extern skeleton *_allocskel(); ! 18: extern __match *_allocmatch(); ! 19: extern __partial *_allocpartial(); ! 20: ! 21: #define MPBLOCKSIZ 3000 ! 22: __match *_mpblock[MPBLOCKSIZ], **_mpbtop; ! 23: ! 24: /* ! 25: * See sym.h in the preprocessor for details ! 26: * Basically used to support eh $%n$ construct. ! 27: */ ! 28: __match ** ! 29: _getleaves (mp, skp) ! 30: register __match *mp; register skeleton *skp; ! 31: { ! 32: skeleton *stack[MAXDEPTH]; ! 33: skeleton **stp = stack; ! 34: register short int *sip = &mtPaths[mtPathStart[mp->tree]]; ! 35: register __match **mmp = _mpbtop; ! 36: __match **mmp2 = mmp; ! 37: if((_mpbtop += *sip++ + 1) > &_mpblock[MPBLOCKSIZ]) { ! 38: printf ("ich: %d\n", _mpbtop-_mpblock); ! 39: assert(0); ! 40: } ! 41: ! 42: for(;;) ! 43: switch(*sip++) { ! 44: case ePUSH: ! 45: *stp++ = skp; ! 46: skp = skp->leftchild; ! 47: break; ! 48: ! 49: case eNEXT: ! 50: skp = skp->sibling; ! 51: break; ! 52: ! 53: case eEVAL: ! 54: if ((mp = skp->succ[M_DETAG(*sip++)])==NULL) abort(); ! 55: *mmp++ = mp; ! 56: break; ! 57: ! 58: case ePOP: ! 59: skp = *--stp; ! 60: break; ! 61: ! 62: case eSTOP: ! 63: *mmp = NULL; ! 64: return (mmp2); ! 65: } ! 66: } ! 67: ! 68: _evalleaves (mpp) ! 69: register __match **mpp; ! 70: { ! 71: for (; *mpp!=NULL; mpp++) { ! 72: register __match *mp = *mpp; ! 73: _do (mp->skel, mp, 1); ! 74: } ! 75: } ! 76: ! 77: ! 78: _do(sp, winner, evalall) ! 79: skeleton *sp; register __match *winner; int evalall; ! 80: { ! 81: register __match **mpp; ! 82: register skeleton *skel = winner->skel; ! 83: if(winner==NULL) { ! 84: _prskel(sp, 0); ! 85: puts ("no winner"); ! 86: return; ! 87: } ! 88: if(winner->mode==xDEFER || (evalall && winner->mode!=xTOPDOWN)) ! 89: REORDER (winner->lleaves); ! 90: mtSetNodes (skel->parent, skel->nson, ! 91: skel->root=mtAction (winner->tree, winner->lleaves, sp)); ! 92: } ! 93: ! 94: skeleton * ! 95: _walk(sp, ostate) ! 96: register skeleton *sp; ! 97: int ostate; ! 98: { ! 99: int state, nstate, nson; ! 100: register __partial *pp; ! 101: register __match *mp, *mp2; ! 102: register skeleton *nsp, *lastchild = NULL; ! 103: NODEPTR son, root; ! 104: ! 105: root = sp->root; ! 106: nson = 1; sp->mincost = INFINITY; ! 107: state = _machstep (ostate, root, mtValue(root)); ! 108: ! 109: while((son = mtGetNodes(root, nson))!=NULL) { ! 110: nstate = _machstep (state, NULL, MV_BRANCH(nson)); ! 111: nsp = _allocskel(); ! 112: nsp->root = son; ! 113: nsp->parent = root; ! 114: nsp->nson = nson; ! 115: _walk(nsp, nstate); ! 116: if(COSTLESS(nsp->mincost, INFINITY)) { ! 117: assert (nsp->winner->mode==xREWRITE); ! 118: if (mtDebug || treedebug) { ! 119: printf ("rewrite\n"); ! 120: _prskel (nsp, 0); ! 121: } ! 122: _do(nsp, nsp->winner, 0); ! 123: _freeskel(nsp); ! 124: continue; ! 125: } ! 126: _merge (sp, nsp); ! 127: if (lastchild==NULL) sp->leftchild = nsp; ! 128: else lastchild->sibling = nsp; ! 129: lastchild = nsp; ! 130: nson++; ! 131: } ! 132: ! 133: for (pp = sp->partial; pp < &sp->partial[sp->treecnt]; pp++) ! 134: if (pp->bits&01==1) { ! 135: mp = _allocmatch(); ! 136: mp->tree = pp->treeno; ! 137: _addmatches (ostate, sp, mp); ! 138: } ! 139: if(son==NULL && nson==1) ! 140: _closure (state, ostate, sp); ! 141: ! 142: sp->rightchild = lastchild; ! 143: if (root==NULL) { ! 144: COST c; __match *win; int i; nsp = sp->leftchild; ! 145: c = INFINITY; ! 146: win = NULL; ! 147: for (i = 0; i < MAXLABELS; i++) { ! 148: mp = nsp->succ[i]; ! 149: if (mp!=NULL && COSTLESS (mp->cost, c)) ! 150: { c = mp->cost; win = mp; } ! 151: } ! 152: if (mtDebug || treedebug) ! 153: _prskel (nsp, 0); ! 154: _do (nsp, win, 0); ! 155: } ! 156: if (mtDebug) ! 157: _prskel (sp, 0); ! 158: return(sp); ! 159: } ! 160: ! 161: static short int _nodetab[MAXNDVAL], _labeltab[MAXLABELS]; ! 162: ! 163: /* ! 164: * Convert the start state which has a large branching factor into ! 165: * a index table. This must be called before the matcher is used. ! 166: */ ! 167: _matchinit() ! 168: { ! 169: short int *sp; ! 170: struct pair { short int val, where } *pp; ! 171: for (sp=_nodetab; sp < &_nodetab[MAXNDVAL]; sp++) *sp = HANG; ! 172: for (sp=_labeltab; sp < &_labeltab[MAXLABELS]; sp++) *sp = HANG; ! 173: sp = &mtTable[mtStart]; ! 174: assert (*sp==TABLE); ! 175: for (pp = (struct pair *) ++sp; pp->val!=EOF; pp++) { ! 176: if (MI_NODE(pp->val)) ! 177: _nodetab[M_DETAG(pp->val)] = pp->where; ! 178: else if (MI_LABEL(pp->val)) ! 179: _labeltab[M_DETAG(pp->val)] = pp->where; ! 180: } ! 181: } ! 182: ! 183: int ! 184: _machstep(state, root, input) ! 185: short int state, input; ! 186: NODEPTR root; ! 187: { ! 188: register short int *stp = &mtTable[state]; ! 189: int start = 0; ! 190: if (state==HANG) ! 191: return (input==(MV_BRANCH(1)) ? mtStart : HANG); ! 192: rescan: ! 193: if (stp==&mtTable[mtStart]) { ! 194: if (MI_NODE(input)) return(_nodetab[M_DETAG(input)]); ! 195: if (MI_LABEL(input)) return(_labeltab[M_DETAG(input)]); ! 196: } ! 197: ! 198: for(;;) { ! 199: if(*stp==ACCEPT) stp += 2; ! 200: ! 201: if(*stp==TABLE) { ! 202: stp++; ! 203: while(*stp!=EOT) { ! 204: if(input==*stp) { ! 205: return(*++stp); ! 206: } ! 207: stp += 2; ! 208: } ! 209: stp++; ! 210: } ! 211: if(*stp!=FAIL) { ! 212: if (start) return (HANG); ! 213: else { stp = &mtTable[mtStart]; start = 1; goto rescan;} ! 214: } else { ! 215: stp++; ! 216: stp = &mtTable[*stp]; ! 217: goto rescan; ! 218: } ! 219: } ! 220: } ! 221: ! 222: _addmatches(ostate, sp, np) ! 223: int ostate; ! 224: register skeleton *sp; ! 225: register __match *np; ! 226: { ! 227: int label; ! 228: int state; ! 229: register __match *mp; ! 230: ! 231: label = mtMap[np->tree]; ! 232: ! 233: /* ! 234: * this is a very poor substitute for good design of the DFA. ! 235: * What we need is a special case that allows any label to be accepted ! 236: * by the start state but we don't want the start state to recognize ! 237: * them after a failure. ! 238: */ ! 239: state = _machstep(ostate, NULL, MV_LABEL(label)); ! 240: if (ostate!=mtStart && state==HANG) { ! 241: _freematch(np); ! 242: return; ! 243: } ! 244: ! 245: np->lleaves = _getleaves (np, sp); ! 246: np->skel = sp; ! 247: if((np->mode = mtEvalCost(np, np->lleaves, sp))==xABORT) ! 248: { ! 249: _freematch(np); ! 250: return; ! 251: } ! 252: ! 253: /* ! 254: if(np->mode==xREWRITE && COSTLESS(np->cost, sp->mincost)) { ! 255: sp->mincost = np->cost; ! 256: sp->winner = np; ! 257: } ! 258: */ ! 259: if ((mp = sp->succ[label])!=NULL) { ! 260: if (!COSTLESS (np->cost, mp->cost)) ! 261: { _freematch(np); return; } ! 262: else _freematch(mp); ! 263: } ! 264: if(COSTLESS(np->cost,sp->mincost)) { ! 265: if(np->mode==xREWRITE) { ! 266: sp->mincost = np->cost; sp->winner = np; } ! 267: else { sp->mincost = INFINITY; sp->winner = NULL; } ! 268: } ! 269: sp->succ[label] = np; ! 270: _closure(state, ostate, sp); ! 271: } ! 272: ! 273: _closure(state, ostate, skp) ! 274: int state, ostate; ! 275: skeleton *skp; ! 276: { ! 277: register struct ap { short int tree, depth; } *ap; ! 278: short int *sp = &mtTable[state]; ! 279: register __match *mp; ! 280: ! 281: if(state==HANG || *sp!=ACCEPT) return(NULL); ! 282: ! 283: for(ap = (struct ap *) &mtAccept[*++sp]; ap->tree!=-1; ap++) ! 284: if (ap->depth==0) { ! 285: mp = _allocmatch(); ! 286: mp->tree = ap->tree; ! 287: _addmatches (ostate, skp, mp); ! 288: } else { ! 289: register __partial *pp, *lim = &skp->partial[skp->treecnt]; ! 290: for(pp=skp->partial; pp < lim; pp++) ! 291: if(pp->treeno==ap->tree) ! 292: break; ! 293: if(pp==lim) { ! 294: skp->treecnt++; ! 295: pp->treeno = ap->tree; ! 296: pp->bits = (1<<(ap->depth)); ! 297: } else pp->bits |= (1<<(ap->depth)); ! 298: } ! 299: } ! 300: ! 301: _merge (old, new) ! 302: skeleton *old, *new; ! 303: { ! 304: register __partial *op = old->partial, *np = new->partial; ! 305: int nson = new->nson; ! 306: register __partial *lim = np + new->treecnt; ! 307: if (nson==1) { ! 308: old->treecnt = new->treecnt; ! 309: for(; np < lim; op++, np++) { ! 310: op->treeno = np->treeno; ! 311: op->bits = np->bits/2; ! 312: } ! 313: } else { ! 314: __partial *newer = _allocpartial(); ! 315: register __partial *newerp = newer; ! 316: register int cnt; ! 317: lim = op+old->treecnt; ! 318: for(cnt=new->treecnt; cnt-- ; np++) { ! 319: for(op = old->partial; op < lim; op++) ! 320: if (op->treeno == np->treeno) { ! 321: newerp->treeno = op->treeno; ! 322: newerp++->bits = op->bits & (np->bits/2); ! 323: break; ! 324: } ! 325: } ! 326: _freepartial(old->partial); ! 327: old->partial = newer; ! 328: old->treecnt = newerp-newer; ! 329: } ! 330: } ! 331: ! 332: /* Save and Allocate Matches */ ! 333: #define BLKF 100 ! 334: __match *freep = NULL; ! 335: static int _count = 0; ! 336: static __match *_blockp = NULL; ! 337: #ifdef CHECKMEM ! 338: int x_matches, f_matches; ! 339: #endif ! 340: ! 341: __match * ! 342: _allocmatch() ! 343: { ! 344: __match *mp; ! 345: ! 346: if(freep!=NULL) { ! 347: mp = freep; ! 348: freep = ((__match *)((struct freeb *) freep)->next); ! 349: #ifdef CHECKMEM ! 350: f_matches--; ! 351: #endif ! 352: } else { ! 353: if(_count==0) { ! 354: _blockp = (__match *) malloc (BLKF*sizeof (__match)); ! 355: _count = BLKF; ! 356: #ifdef CHECKMEM ! 357: x_matches += BLKF; ! 358: #endif ! 359: } ! 360: mp = _blockp++; ! 361: _count--; ! 362: } ! 363: return(mp); ! 364: } ! 365: ! 366: _freematch(mp) ! 367: __match *mp; ! 368: { ! 369: ((struct freeb *) mp)->next = (struct freeb *) freep; ! 370: freep = mp; ! 371: #ifdef CHECKMEM ! 372: f_matches++; ! 373: #endif ! 374: } ! 375: ! 376: static __partial *pfreep = NULL; ! 377: static int pcount = 0; ! 378: static __partial *pblock = NULL; ! 379: #ifdef CHECKMEM ! 380: static int x_partials, f_partials; ! 381: #endif ! 382: ! 383: __partial * ! 384: _allocpartial() ! 385: { ! 386: __partial *pp; ! 387: if (pfreep!=NULL) { ! 388: pp = pfreep; ! 389: pfreep = *(__partial **) pp; ! 390: #ifdef CHECKMEM ! 391: f_partials--; ! 392: #endif ! 393: } else { ! 394: if (pcount==0) { ! 395: pblock=(__partial *)malloc(BLKF*MAXTREES*sizeof(__partial)); ! 396: pcount = BLKF; ! 397: #ifdef CHECKMEM ! 398: x_partials += BLKF; ! 399: #endif ! 400: } ! 401: pp = pblock; ! 402: pblock += MAXTREES; ! 403: pcount--; ! 404: } ! 405: return(pp); ! 406: } ! 407: ! 408: _freepartial(pp) ! 409: __partial *pp; ! 410: { ! 411: * (__partial **)pp = pfreep; ! 412: pfreep = pp; ! 413: #ifdef CHECKMEM ! 414: f_partials++; ! 415: #endif ! 416: } ! 417: ! 418: ! 419: /* storage management */ ! 420: static skeleton *sfreep = NULL; ! 421: static int scount = 0; ! 422: static skeleton * sblock = NULL; ! 423: ! 424: skeleton * ! 425: _allocskel() ! 426: { ! 427: skeleton *sp; ! 428: int i; ! 429: if(sfreep!=NULL) { ! 430: sp = sfreep; ! 431: sfreep = sp->sibling; ! 432: } else { ! 433: if(scount==0) { ! 434: sblock = (skeleton *) malloc (BLKF * sizeof (skeleton)); ! 435: scount = BLKF; ! 436: } ! 437: sp = sblock++; ! 438: scount--; ! 439: } ! 440: sp->sibling = NULL; sp->leftchild = NULL; ! 441: for (i=0; i < MAXLABELS; sp->succ[i++] = NULL); ! 442: sp->treecnt = 0; ! 443: sp->partial = _allocpartial(); ! 444: return(sp); ! 445: } ! 446: ! 447: _freeskel(sp) ! 448: skeleton *sp; ! 449: { ! 450: int i; ! 451: __match *mp; ! 452: if(sp==NULL) ! 453: return; ! 454: if(sp->leftchild) ! 455: _freeskel(sp->leftchild); ! 456: if(sp->sibling) ! 457: _freeskel(sp->sibling); ! 458: _freepartial (sp->partial); ! 459: for (i=0; i < MAXLABELS; i++) ! 460: if ((mp = sp->succ[i])!=NULL) _freematch (mp); ! 461: sp->sibling = sfreep; ! 462: sfreep = sp; ! 463: } ! 464: ! 465: _match() ! 466: { ! 467: skeleton *sp; ! 468: sp = _allocskel(); ! 469: sp->root = NULL; ! 470: _mpbtop = _mpblock; ! 471: _freeskel(_walk(sp, HANG)); ! 472: #ifdef CHECKMEM ! 473: if(f_matches+_count!=x_matches) { ! 474: printf(";#m %d %d %d\n", f_matches, _count, x_matches); ! 475: assert(0); ! 476: } ! 477: if(f_partials+pcount!=x_partials) { ! 478: printf(";#p %d %d %d\n", f_partials, pcount, x_partials); ! 479: assert(0); ! 480: } ! 481: #endif ! 482: } ! 483: ! 484: NODEPTR ! 485: _mtG(root,i) ! 486: NODEPTR root; ! 487: int i; ! 488: { ! 489: int *p = &i; ! 490: while(*p!=-1) ! 491: root = mtGetNodes(root, *p++); ! 492: return(root); ! 493: } ! 494: ! 495: /* diagnostic routines */ ! 496: ! 497: _prskel (skp, lvl) ! 498: skeleton *skp; int lvl; ! 499: { ! 500: int i; __match *mp; ! 501: __partial *pp; ! 502: if(skp==NULL) return; ! 503: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); } ! 504: printf ("###\n"); ! 505: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); } ! 506: for (i = 0; i < MAXLABELS; i++) ! 507: if ((mp=skp->succ[i])!=NULL) ! 508: #ifdef LINE_XREF ! 509: printf ("[%d<%d> %d]", mp->tree, ! 510: mtLine[mp->tree], mp->cost); ! 511: #else ! 512: printf ("[%d %d]", mp->tree, mp->cost); ! 513: #endif ! 514: putchar('\n'); ! 515: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); } ! 516: for (i = 0, pp=skp->partial; i < skp->treecnt; i++, pp++) ! 517: #ifdef LINE_XREF ! 518: printf("(%d<%d> %x)", pp->treeno, mtLine[pp->treeno], ! 519: pp->bits); ! 520: #else ! 521: printf ("(%d %x)", pp->treeno, pp->bits); ! 522: #endif ! 523: putchar('\n'); ! 524: fflush(stdout); ! 525: _prskel (skp->leftchild, lvl+2); ! 526: _prskel (skp->sibling, lvl); ! 527: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.