|
|
1.1 ! root 1: /* ! 2: * See Aho, Corasick Comm ACM 18:6, and Hoffman and O'Donell JACM 29:1 ! 3: * for detail of the matching algorithm ! 4: */ ! 5: ! 6: /* turn this flag on if your machine has a fast byte string zero operation */ ! 7: /*#define BZERO 1*/ ! 8: int mtDebug = 0; ! 9: int treedebug = 0; ! 10: extern int _machstep(); ! 11: extern short int mtStart; ! 12: extern short int mtTable[], mtAccept[], mtMap[], mtPaths[], mtPathStart[]; ! 13: #ifdef LINE_XREF ! 14: extern short int mtLine[]; ! 15: #endif ! 16: extern NODEPTR mtGetNodes(), mtAction(); ! 17: extern skeleton *_allocskel(); ! 18: extern __match *_allocmatch(); ! 19: extern __partial *_allocpartial(); ! 20: ! 21: #define MPBLOCKSIZ 3000 ! 22: __match *_mpblock[MPBLOCKSIZ], **_mpbtop; ! 23: ! 24: /* ! 25: * See sym.h in the preprocessor for details ! 26: * Basically used to support eh $%n$ construct. ! 27: */ ! 28: __match ** ! 29: _getleaves (mp, skp) ! 30: register __match *mp; register skeleton *skp; ! 31: { ! 32: skeleton *stack[MAXDEPTH]; ! 33: skeleton **stp = stack; ! 34: register short int *sip = &mtPaths[mtPathStart[mp->tree]]; ! 35: register __match **mmp = _mpbtop; ! 36: __match **mmp2 = mmp; ! 37: if((_mpbtop += *sip++ + 1) > &_mpblock[MPBLOCKSIZ]) { ! 38: printf ("ich: %d\n", _mpbtop-_mpblock); ! 39: assert(0); ! 40: } ! 41: ! 42: for(;;) ! 43: switch(*sip++) { ! 44: case ePUSH: ! 45: *stp++ = skp; ! 46: skp = skp->leftchild; ! 47: break; ! 48: ! 49: case eNEXT: ! 50: skp = skp->sibling; ! 51: break; ! 52: ! 53: case eEVAL: ! 54: if ((mp = skp->succ[M_DETAG(*sip++)])==NULL) abort(); ! 55: *mmp++ = mp; ! 56: break; ! 57: ! 58: case ePOP: ! 59: skp = *--stp; ! 60: break; ! 61: ! 62: case eSTOP: ! 63: *mmp = NULL; ! 64: return (mmp2); ! 65: } ! 66: } ! 67: ! 68: _evalleaves (mpp) ! 69: register __match **mpp; ! 70: { ! 71: for (; *mpp!=NULL; mpp++) { ! 72: register __match *mp = *mpp; ! 73: _do (mp->skel, mp, 1); ! 74: } ! 75: } ! 76: ! 77: ! 78: _do(sp, winner, evalall) ! 79: skeleton *sp; register __match *winner; int evalall; ! 80: { ! 81: register __match **mpp; ! 82: register skeleton *skel = winner->skel; ! 83: if(winner==NULL) { ! 84: _prskel(sp, 0); ! 85: puts ("no winner"); ! 86: return; ! 87: } ! 88: if(winner->mode==xDEFER || (evalall && winner->mode!=xTOPDOWN)) ! 89: REORDER (winner->lleaves); ! 90: mtSetNodes (skel->parent, skel->nson, ! 91: skel->root=mtAction (winner->tree, winner->lleaves, sp)); ! 92: } ! 93: ! 94: skeleton * ! 95: _walk(sp, ostate) ! 96: register skeleton *sp; ! 97: int ostate; ! 98: { ! 99: int state, nstate, nson; ! 100: register __partial *pp; ! 101: register __match *mp, *mp2; ! 102: register skeleton *nsp, *lastchild = NULL; ! 103: NODEPTR son, root; ! 104: ! 105: root = sp->root; ! 106: nson = 1; sp->mincost = INFINITY; ! 107: state = _machstep (ostate, root, mtValue(root)); ! 108: ! 109: while((son = mtGetNodes(root, nson))!=NULL) { ! 110: nstate = _machstep (state, NULL, MV_BRANCH(nson)); ! 111: nsp = _allocskel(); ! 112: nsp->root = son; ! 113: nsp->parent = root; ! 114: nsp->nson = nson; ! 115: _walk(nsp, nstate); ! 116: if(COSTLESS(nsp->mincost, INFINITY)) { ! 117: assert (nsp->winner->mode==xREWRITE); ! 118: if (mtDebug || treedebug) { ! 119: printf ("rewrite\n"); ! 120: _prskel (nsp, 0); ! 121: } ! 122: _do(nsp, nsp->winner, 0); ! 123: _freeskel(nsp); ! 124: continue; ! 125: } ! 126: _merge (sp, nsp); ! 127: if (lastchild==NULL) sp->leftchild = nsp; ! 128: else lastchild->sibling = nsp; ! 129: lastchild = nsp; ! 130: nson++; ! 131: } ! 132: ! 133: for (pp = sp->partial; pp < &sp->partial[sp->treecnt]; pp++) ! 134: if (pp->bits&01==1) { ! 135: mp = _allocmatch(); ! 136: mp->tree = pp->treeno; ! 137: _addmatches (ostate, sp, mp); ! 138: } ! 139: if(son==NULL && nson==1) ! 140: _closure (state, ostate, sp); ! 141: ! 142: sp->rightchild = lastchild; ! 143: if (root==NULL) { ! 144: COST c; __match *win; int i; nsp = sp->leftchild; ! 145: c = INFINITY; ! 146: win = NULL; ! 147: for (i = 0; i < MAXLABELS; i++) { ! 148: mp = nsp->succ[i]; ! 149: if (mp!=NULL && COSTLESS (mp->cost, c)) ! 150: { c = mp->cost; win = mp; } ! 151: } ! 152: if (mtDebug || treedebug) ! 153: _prskel (nsp, 0); ! 154: _do (nsp, win, 0); ! 155: } ! 156: if (mtDebug) ! 157: _prskel (sp, 0); ! 158: return(sp); ! 159: } ! 160: ! 161: static short int _nodetab[MAXNDVAL], _labeltab[MAXLABELS]; ! 162: ! 163: /* ! 164: * Convert the start state which has a large branching factor into ! 165: * a index table. This must be called before the matcher is used. ! 166: */ ! 167: _matchinit() ! 168: { ! 169: short int *sp; ! 170: struct pair { short int val, where } *pp; ! 171: for (sp=_nodetab; sp < &_nodetab[MAXNDVAL]; sp++) *sp = HANG; ! 172: for (sp=_labeltab; sp < &_labeltab[MAXLABELS]; sp++) *sp = HANG; ! 173: sp = &mtTable[mtStart]; ! 174: assert (*sp==TABLE); ! 175: for (pp = (struct pair *) ++sp; pp->val!=EOF; pp++) { ! 176: if (MI_NODE(pp->val)) ! 177: _nodetab[M_DETAG(pp->val)] = pp->where; ! 178: else if (MI_LABEL(pp->val)) ! 179: _labeltab[M_DETAG(pp->val)] = pp->where; ! 180: } ! 181: } ! 182: ! 183: int ! 184: _machstep(state, root, input) ! 185: short int state, input; ! 186: NODEPTR root; ! 187: { ! 188: register short int *stp = &mtTable[state]; ! 189: int start = 0; ! 190: if (state==HANG) ! 191: return (input==(MV_BRANCH(1)) ? mtStart : HANG); ! 192: rescan: ! 193: if (stp==&mtTable[mtStart]) { ! 194: if (MI_NODE(input)) return(_nodetab[M_DETAG(input)]); ! 195: if (MI_LABEL(input)) return(_labeltab[M_DETAG(input)]); ! 196: } ! 197: ! 198: for(;;) { ! 199: if(*stp==ACCEPT) stp += 2; ! 200: ! 201: if(*stp==TABLE) { ! 202: stp++; ! 203: while(*stp!=EOT) { ! 204: if(input==*stp) { ! 205: return(*++stp); ! 206: } ! 207: stp += 2; ! 208: } ! 209: stp++; ! 210: } ! 211: if(*stp!=FAIL) { ! 212: if (start) return (HANG); ! 213: else { stp = &mtTable[mtStart]; start = 1; goto rescan;} ! 214: } else { ! 215: stp++; ! 216: stp = &mtTable[*stp]; ! 217: goto rescan; ! 218: } ! 219: } ! 220: } ! 221: ! 222: _addmatches(ostate, sp, np) ! 223: int ostate; ! 224: register skeleton *sp; ! 225: register __match *np; ! 226: { ! 227: int label; ! 228: int state, qstate; ! 229: register __match *mp; ! 230: static short int quick[128][MAXLABELS]; ! 231: static short int qtag[128][MAXLABELS]; ! 232: ! 233: label = mtMap[np->tree]; ! 234: ! 235: /* ! 236: * The following lines replace the line: ! 237: * ! 238: * state = _machstep(ostate, NULL, MV_LABEL(label)); ! 239: * ! 240: * Statistics show that a large percentage (approx 2/3) of calls to ! 241: * _machstep occur in this function. The lines below attempt ! 242: * to reduce the number of these calls by using an unsophisticated ! 243: * but apparently adequate caching scheme. ! 244: */ ! 245: qstate = ((ostate>>2)+2)&0177; ! 246: if(ostate != qtag[qstate][label]) { ! 247: state = _machstep(ostate, NULL, MV_LABEL(label)); ! 248: quick[qstate][label] = state; ! 249: qtag[qstate][label] = ostate; ! 250: } else state = quick[qstate][label]; ! 251: ! 252: /* ! 253: * this is a very poor substitute for good design of the DFA. ! 254: * What we need is a special case that allows any label to be accepted ! 255: * by the start state but we don't want the start state to recognize ! 256: * them after a failure. ! 257: */ ! 258: if (ostate!=mtStart && state==HANG) { ! 259: _freematch(np); ! 260: return; ! 261: } ! 262: ! 263: np->lleaves = _getleaves (np, sp); ! 264: np->skel = sp; ! 265: if((np->mode = mtEvalCost(np, np->lleaves, sp))==xABORT) ! 266: { ! 267: _freematch(np); ! 268: return; ! 269: } ! 270: ! 271: if ((mp = sp->succ[label])!=NULL) { ! 272: if (!COSTLESS (np->cost, mp->cost)) ! 273: { _freematch(np); return; } ! 274: else _freematch(mp); ! 275: } ! 276: if(COSTLESS(np->cost,sp->mincost)) { ! 277: if(np->mode==xREWRITE) { ! 278: sp->mincost = np->cost; sp->winner = np; } ! 279: else { sp->mincost = INFINITY; sp->winner = NULL; } ! 280: } ! 281: sp->succ[label] = np; ! 282: _closure(state, ostate, sp); ! 283: } ! 284: ! 285: _closure(state, ostate, skp) ! 286: int state, ostate; ! 287: skeleton *skp; ! 288: { ! 289: register struct ap { short int tree, depth; } *ap; ! 290: short int *sp = &mtTable[state]; ! 291: register __match *mp; ! 292: ! 293: if(state==HANG || *sp!=ACCEPT) return(NULL); ! 294: ! 295: for(ap = (struct ap *) &mtAccept[*++sp]; ap->tree!=-1; ap++) ! 296: if (ap->depth==0) { ! 297: mp = _allocmatch(); ! 298: mp->tree = ap->tree; ! 299: _addmatches (ostate, skp, mp); ! 300: } else { ! 301: register __partial *pp, *lim = &skp->partial[skp->treecnt]; ! 302: for(pp=skp->partial; pp < lim; pp++) ! 303: if(pp->treeno==ap->tree) ! 304: break; ! 305: if(pp==lim) { ! 306: skp->treecnt++; ! 307: pp->treeno = ap->tree; ! 308: pp->bits = (1<<(ap->depth)); ! 309: } else pp->bits |= (1<<(ap->depth)); ! 310: } ! 311: } ! 312: ! 313: _merge (old, new) ! 314: skeleton *old, *new; ! 315: { ! 316: register __partial *op = old->partial, *np = new->partial; ! 317: int nson = new->nson; ! 318: register __partial *lim = np + new->treecnt; ! 319: if (nson==1) { ! 320: old->treecnt = new->treecnt; ! 321: for(; np < lim; op++, np++) { ! 322: op->treeno = np->treeno; ! 323: op->bits = np->bits/2; ! 324: } ! 325: } else { ! 326: __partial *newer = _allocpartial(); ! 327: register __partial *newerp = newer; ! 328: register int cnt; ! 329: lim = op+old->treecnt; ! 330: for(cnt=new->treecnt; cnt-- ; np++) { ! 331: for(op = old->partial; op < lim; op++) ! 332: if (op->treeno == np->treeno) { ! 333: newerp->treeno = op->treeno; ! 334: newerp++->bits = op->bits & (np->bits/2); ! 335: break; ! 336: } ! 337: } ! 338: _freepartial(old->partial); ! 339: old->partial = newer; ! 340: old->treecnt = newerp-newer; ! 341: } ! 342: } ! 343: ! 344: /* Save and Allocate Matches */ ! 345: #define BLKF 100 ! 346: __match *freep = NULL; ! 347: static int _count = 0; ! 348: static __match *_blockp = NULL; ! 349: #ifdef CHECKMEM ! 350: int x_matches, f_matches; ! 351: #endif ! 352: ! 353: __match * ! 354: _allocmatch() ! 355: { ! 356: __match *mp; ! 357: ! 358: if(freep!=NULL) { ! 359: mp = freep; ! 360: freep = ((__match *)((struct freeb *) freep)->next); ! 361: #ifdef CHECKMEM ! 362: f_matches--; ! 363: #endif ! 364: } else { ! 365: if(_count==0) { ! 366: _blockp = (__match *) malloc (BLKF*sizeof (__match)); ! 367: _count = BLKF; ! 368: #ifdef CHECKMEM ! 369: x_matches += BLKF; ! 370: #endif ! 371: } ! 372: mp = _blockp++; ! 373: _count--; ! 374: } ! 375: return(mp); ! 376: } ! 377: ! 378: _freematch(mp) ! 379: __match *mp; ! 380: { ! 381: ((struct freeb *) mp)->next = (struct freeb *) freep; ! 382: freep = mp; ! 383: #ifdef CHECKMEM ! 384: f_matches++; ! 385: #endif ! 386: } ! 387: ! 388: static __partial *pfreep = NULL; ! 389: static int pcount = 0; ! 390: static __partial *pblock = NULL; ! 391: #ifdef CHECKMEM ! 392: static int x_partials, f_partials; ! 393: #endif ! 394: ! 395: __partial * ! 396: _allocpartial() ! 397: { ! 398: __partial *pp; ! 399: if (pfreep!=NULL) { ! 400: pp = pfreep; ! 401: pfreep = *(__partial **) pp; ! 402: #ifdef CHECKMEM ! 403: f_partials--; ! 404: #endif ! 405: } else { ! 406: if (pcount==0) { ! 407: pblock=(__partial *)malloc(BLKF*MAXTREES*sizeof(__partial)); ! 408: pcount = BLKF; ! 409: #ifdef CHECKMEM ! 410: x_partials += BLKF; ! 411: #endif ! 412: } ! 413: pp = pblock; ! 414: pblock += MAXTREES; ! 415: pcount--; ! 416: } ! 417: return(pp); ! 418: } ! 419: ! 420: _freepartial(pp) ! 421: __partial *pp; ! 422: { ! 423: * (__partial **)pp = pfreep; ! 424: pfreep = pp; ! 425: #ifdef CHECKMEM ! 426: f_partials++; ! 427: #endif ! 428: } ! 429: ! 430: ! 431: /* storage management */ ! 432: static skeleton *sfreep = NULL; ! 433: static int scount = 0; ! 434: static skeleton * sblock = NULL; ! 435: ! 436: skeleton * ! 437: _allocskel() ! 438: { ! 439: skeleton *sp; ! 440: int i; ! 441: if(sfreep!=NULL) { ! 442: sp = sfreep; ! 443: sfreep = sp->sibling; ! 444: } else { ! 445: if(scount==0) { ! 446: sblock = (skeleton *) malloc (BLKF * sizeof (skeleton)); ! 447: scount = BLKF; ! 448: } ! 449: sp = sblock++; ! 450: scount--; ! 451: } ! 452: sp->sibling = NULL; sp->leftchild = NULL; ! 453: for (i=0; i < MAXLABELS; sp->succ[i++] = NULL); ! 454: sp->treecnt = 0; ! 455: sp->partial = _allocpartial(); ! 456: return(sp); ! 457: } ! 458: ! 459: _freeskel(sp) ! 460: skeleton *sp; ! 461: { ! 462: int i; ! 463: __match *mp; ! 464: if(sp==NULL) ! 465: return; ! 466: if(sp->leftchild) ! 467: _freeskel(sp->leftchild); ! 468: if(sp->sibling) ! 469: _freeskel(sp->sibling); ! 470: _freepartial (sp->partial); ! 471: for (i=0; i < MAXLABELS; i++) ! 472: if ((mp = sp->succ[i])!=NULL) _freematch (mp); ! 473: sp->sibling = sfreep; ! 474: sfreep = sp; ! 475: } ! 476: ! 477: _match() ! 478: { ! 479: skeleton *sp; ! 480: sp = _allocskel(); ! 481: sp->root = NULL; ! 482: _mpbtop = _mpblock; ! 483: _freeskel(_walk(sp, HANG)); ! 484: #ifdef CHECKMEM ! 485: if(f_matches+_count!=x_matches) { ! 486: printf(";#m %d %d %d\n", f_matches, _count, x_matches); ! 487: assert(0); ! 488: } ! 489: if(f_partials+pcount!=x_partials) { ! 490: printf(";#p %d %d %d\n", f_partials, pcount, x_partials); ! 491: assert(0); ! 492: } ! 493: #endif ! 494: } ! 495: ! 496: NODEPTR ! 497: _mtG(root,i) ! 498: NODEPTR root; ! 499: int i; ! 500: { ! 501: int *p = &i; ! 502: while(*p!=-1) ! 503: root = mtGetNodes(root, *p++); ! 504: return(root); ! 505: } ! 506: ! 507: /* diagnostic routines */ ! 508: ! 509: _prskel (skp, lvl) ! 510: skeleton *skp; int lvl; ! 511: { ! 512: int i; __match *mp; ! 513: __partial *pp; ! 514: if(skp==NULL) return; ! 515: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); } ! 516: printf ("###\n"); ! 517: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); } ! 518: for (i = 0; i < MAXLABELS; i++) ! 519: if ((mp=skp->succ[i])!=NULL) ! 520: #ifdef LINE_XREF ! 521: printf ("[%d<%d> %d]", mp->tree, ! 522: mtLine[mp->tree], mp->cost); ! 523: #else ! 524: printf ("[%d %d]", mp->tree, mp->cost); ! 525: #endif ! 526: putchar('\n'); ! 527: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); } ! 528: for (i = 0, pp=skp->partial; i < skp->treecnt; i++, pp++) ! 529: #ifdef LINE_XREF ! 530: printf("(%d<%d> %x)", pp->treeno, mtLine[pp->treeno], ! 531: pp->bits); ! 532: #else ! 533: printf ("(%d %x)", pp->treeno, pp->bits); ! 534: #endif ! 535: putchar('\n'); ! 536: fflush(stdout); ! 537: _prskel (skp->leftchild, lvl+2); ! 538: _prskel (skp->sibling, lvl); ! 539: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.