|
|
1.1 root 1: /*
2: * See Aho, Corasick Comm ACM 18:6, and Hoffman and O'Donell JACM 29:1
3: * for detail of the matching algorithm
4: */
5:
6: /* turn this flag on if your machine has a fast byte string zero operation */
7: /*#define BZERO 1*/
8: int mtDebug = 0;
9: int treedebug = 0;
10: extern int _machstep();
11: extern short int mtStart;
12: extern short int mtTable[], mtAccept[], mtMap[], mtPaths[], mtPathStart[];
13: #ifdef LINE_XREF
14: extern short int mtLine[];
15: #endif
16: extern NODEPTR mtGetNodes(), mtAction();
17: extern skeleton *_allocskel();
18: extern __match *_allocmatch();
19: extern __partial *_allocpartial();
20:
21: #define MPBLOCKSIZ 3000
22: __match *_mpblock[MPBLOCKSIZ], **_mpbtop;
23:
24: /*
25: * See sym.h in the preprocessor for details
26: * Basically used to support eh $%n$ construct.
27: */
28: __match **
29: _getleaves (mp, skp)
30: register __match *mp; register skeleton *skp;
31: {
32: skeleton *stack[MAXDEPTH];
33: skeleton **stp = stack;
34: register short int *sip = &mtPaths[mtPathStart[mp->tree]];
35: register __match **mmp = _mpbtop;
36: __match **mmp2 = mmp;
37: if((_mpbtop += *sip++ + 1) > &_mpblock[MPBLOCKSIZ]) {
38: printf ("ich: %d\n", _mpbtop-_mpblock);
39: assert(0);
40: }
41:
42: for(;;)
43: switch(*sip++) {
44: case ePUSH:
45: *stp++ = skp;
46: skp = skp->leftchild;
47: break;
48:
49: case eNEXT:
50: skp = skp->sibling;
51: break;
52:
53: case eEVAL:
54: if ((mp = skp->succ[M_DETAG(*sip++)])==NULL) abort();
55: *mmp++ = mp;
56: break;
57:
58: case ePOP:
59: skp = *--stp;
60: break;
61:
62: case eSTOP:
63: *mmp = NULL;
64: return (mmp2);
65: }
66: }
67:
68: _evalleaves (mpp)
69: register __match **mpp;
70: {
71: for (; *mpp!=NULL; mpp++) {
72: register __match *mp = *mpp;
73: _do (mp->skel, mp, 1);
74: }
75: }
76:
77:
78: _do(sp, winner, evalall)
79: skeleton *sp; register __match *winner; int evalall;
80: {
81: register __match **mpp;
82: register skeleton *skel = winner->skel;
83: if(winner==NULL) {
84: _prskel(sp, 0);
85: puts ("no winner");
86: return;
87: }
88: if(winner->mode==xDEFER || (evalall && winner->mode!=xTOPDOWN))
89: REORDER (winner->lleaves);
90: mtSetNodes (skel->parent, skel->nson,
91: skel->root=mtAction (winner->tree, winner->lleaves, sp));
92: }
93:
94: skeleton *
95: _walk(sp, ostate)
96: register skeleton *sp;
97: int ostate;
98: {
99: int state, nstate, nson;
100: register __partial *pp;
101: register __match *mp, *mp2;
102: register skeleton *nsp, *lastchild = NULL;
103: NODEPTR son, root;
104:
105: root = sp->root;
106: nson = 1; sp->mincost = INFINITY;
107: state = _machstep (ostate, root, mtValue(root));
108:
109: while((son = mtGetNodes(root, nson))!=NULL) {
110: nstate = _machstep (state, NULL, MV_BRANCH(nson));
111: nsp = _allocskel();
112: nsp->root = son;
113: nsp->parent = root;
114: nsp->nson = nson;
115: _walk(nsp, nstate);
116: if(COSTLESS(nsp->mincost, INFINITY)) {
117: assert (nsp->winner->mode==xREWRITE);
118: if (mtDebug || treedebug) {
119: printf ("rewrite\n");
120: _prskel (nsp, 0);
121: }
122: _do(nsp, nsp->winner, 0);
123: _freeskel(nsp);
124: continue;
125: }
126: _merge (sp, nsp);
127: if (lastchild==NULL) sp->leftchild = nsp;
128: else lastchild->sibling = nsp;
129: lastchild = nsp;
130: nson++;
131: }
132:
133: for (pp = sp->partial; pp < &sp->partial[sp->treecnt]; pp++)
134: if (pp->bits&01==1) {
135: mp = _allocmatch();
136: mp->tree = pp->treeno;
137: _addmatches (ostate, sp, mp);
138: }
139: if(son==NULL && nson==1)
140: _closure (state, ostate, sp);
141:
142: sp->rightchild = lastchild;
143: if (root==NULL) {
144: COST c; __match *win; int i; nsp = sp->leftchild;
145: c = INFINITY;
146: win = NULL;
147: for (i = 0; i < MAXLABELS; i++) {
148: mp = nsp->succ[i];
149: if (mp!=NULL && COSTLESS (mp->cost, c))
150: { c = mp->cost; win = mp; }
151: }
152: if (mtDebug || treedebug)
153: _prskel (nsp, 0);
154: _do (nsp, win, 0);
155: }
156: if (mtDebug)
157: _prskel (sp, 0);
158: return(sp);
159: }
160:
161: static short int _nodetab[MAXNDVAL], _labeltab[MAXLABELS];
162:
163: /*
164: * Convert the start state which has a large branching factor into
165: * a index table. This must be called before the matcher is used.
166: */
167: _matchinit()
168: {
169: short int *sp;
170: struct pair { short int val, where } *pp;
171: for (sp=_nodetab; sp < &_nodetab[MAXNDVAL]; sp++) *sp = HANG;
172: for (sp=_labeltab; sp < &_labeltab[MAXLABELS]; sp++) *sp = HANG;
173: sp = &mtTable[mtStart];
174: assert (*sp==TABLE);
175: for (pp = (struct pair *) ++sp; pp->val!=EOF; pp++) {
176: if (MI_NODE(pp->val))
177: _nodetab[M_DETAG(pp->val)] = pp->where;
178: else if (MI_LABEL(pp->val))
179: _labeltab[M_DETAG(pp->val)] = pp->where;
180: }
181: }
182:
183: int
184: _machstep(state, root, input)
185: short int state, input;
186: NODEPTR root;
187: {
188: register short int *stp = &mtTable[state];
189: int start = 0;
190: if (state==HANG)
191: return (input==(MV_BRANCH(1)) ? mtStart : HANG);
192: rescan:
193: if (stp==&mtTable[mtStart]) {
194: if (MI_NODE(input)) return(_nodetab[M_DETAG(input)]);
195: if (MI_LABEL(input)) return(_labeltab[M_DETAG(input)]);
196: }
197:
198: for(;;) {
199: if(*stp==ACCEPT) stp += 2;
200:
201: if(*stp==TABLE) {
202: stp++;
203: while(*stp!=EOT) {
204: if(input==*stp) {
205: return(*++stp);
206: }
207: stp += 2;
208: }
209: stp++;
210: }
211: if(*stp!=FAIL) {
212: if (start) return (HANG);
213: else { stp = &mtTable[mtStart]; start = 1; goto rescan;}
214: } else {
215: stp++;
216: stp = &mtTable[*stp];
217: goto rescan;
218: }
219: }
220: }
221:
222: _addmatches(ostate, sp, np)
223: int ostate;
224: register skeleton *sp;
225: register __match *np;
226: {
227: int label;
228: int state, qstate;
229: register __match *mp;
230: static short int quick[128][MAXLABELS];
231: static short int qtag[128][MAXLABELS];
232:
233: label = mtMap[np->tree];
234:
235: /*
236: * The following lines replace the line:
237: *
238: * state = _machstep(ostate, NULL, MV_LABEL(label));
239: *
240: * Statistics show that a large percentage (approx 2/3) of calls to
241: * _machstep occur in this function. The lines below attempt
242: * to reduce the number of these calls by using an unsophisticated
243: * but apparently adequate caching scheme.
244: */
245: qstate = ((ostate>>2)+2)&0177;
246: if(ostate != qtag[qstate][label]) {
247: state = _machstep(ostate, NULL, MV_LABEL(label));
248: quick[qstate][label] = state;
249: qtag[qstate][label] = ostate;
250: } else state = quick[qstate][label];
251:
252: /*
253: * this is a very poor substitute for good design of the DFA.
254: * What we need is a special case that allows any label to be accepted
255: * by the start state but we don't want the start state to recognize
256: * them after a failure.
257: */
258: if (ostate!=mtStart && state==HANG) {
259: _freematch(np);
260: return;
261: }
262:
263: np->lleaves = _getleaves (np, sp);
264: np->skel = sp;
265: if((np->mode = mtEvalCost(np, np->lleaves, sp))==xABORT)
266: {
267: _freematch(np);
268: return;
269: }
270:
271: if ((mp = sp->succ[label])!=NULL) {
272: if (!COSTLESS (np->cost, mp->cost))
273: { _freematch(np); return; }
274: else _freematch(mp);
275: }
276: if(COSTLESS(np->cost,sp->mincost)) {
277: if(np->mode==xREWRITE) {
278: sp->mincost = np->cost; sp->winner = np; }
279: else { sp->mincost = INFINITY; sp->winner = NULL; }
280: }
281: sp->succ[label] = np;
282: _closure(state, ostate, sp);
283: }
284:
285: _closure(state, ostate, skp)
286: int state, ostate;
287: skeleton *skp;
288: {
289: register struct ap { short int tree, depth; } *ap;
290: short int *sp = &mtTable[state];
291: register __match *mp;
292:
293: if(state==HANG || *sp!=ACCEPT) return(NULL);
294:
295: for(ap = (struct ap *) &mtAccept[*++sp]; ap->tree!=-1; ap++)
296: if (ap->depth==0) {
297: mp = _allocmatch();
298: mp->tree = ap->tree;
299: _addmatches (ostate, skp, mp);
300: } else {
301: register __partial *pp, *lim = &skp->partial[skp->treecnt];
302: for(pp=skp->partial; pp < lim; pp++)
303: if(pp->treeno==ap->tree)
304: break;
305: if(pp==lim) {
306: skp->treecnt++;
307: pp->treeno = ap->tree;
308: pp->bits = (1<<(ap->depth));
309: } else pp->bits |= (1<<(ap->depth));
310: }
311: }
312:
313: _merge (old, new)
314: skeleton *old, *new;
315: {
316: register __partial *op = old->partial, *np = new->partial;
317: int nson = new->nson;
318: register __partial *lim = np + new->treecnt;
319: if (nson==1) {
320: old->treecnt = new->treecnt;
321: for(; np < lim; op++, np++) {
322: op->treeno = np->treeno;
323: op->bits = np->bits/2;
324: }
325: } else {
326: __partial *newer = _allocpartial();
327: register __partial *newerp = newer;
328: register int cnt;
329: lim = op+old->treecnt;
330: for(cnt=new->treecnt; cnt-- ; np++) {
331: for(op = old->partial; op < lim; op++)
332: if (op->treeno == np->treeno) {
333: newerp->treeno = op->treeno;
334: newerp++->bits = op->bits & (np->bits/2);
335: break;
336: }
337: }
338: _freepartial(old->partial);
339: old->partial = newer;
340: old->treecnt = newerp-newer;
341: }
342: }
343:
344: /* Save and Allocate Matches */
345: #define BLKF 100
346: __match *freep = NULL;
347: static int _count = 0;
348: static __match *_blockp = NULL;
349: #ifdef CHECKMEM
350: int x_matches, f_matches;
351: #endif
352:
353: __match *
354: _allocmatch()
355: {
356: __match *mp;
357:
358: if(freep!=NULL) {
359: mp = freep;
360: freep = ((__match *)((struct freeb *) freep)->next);
361: #ifdef CHECKMEM
362: f_matches--;
363: #endif
364: } else {
365: if(_count==0) {
366: _blockp = (__match *) malloc (BLKF*sizeof (__match));
367: _count = BLKF;
368: #ifdef CHECKMEM
369: x_matches += BLKF;
370: #endif
371: }
372: mp = _blockp++;
373: _count--;
374: }
375: return(mp);
376: }
377:
378: _freematch(mp)
379: __match *mp;
380: {
381: ((struct freeb *) mp)->next = (struct freeb *) freep;
382: freep = mp;
383: #ifdef CHECKMEM
384: f_matches++;
385: #endif
386: }
387:
388: static __partial *pfreep = NULL;
389: static int pcount = 0;
390: static __partial *pblock = NULL;
391: #ifdef CHECKMEM
392: static int x_partials, f_partials;
393: #endif
394:
395: __partial *
396: _allocpartial()
397: {
398: __partial *pp;
399: if (pfreep!=NULL) {
400: pp = pfreep;
401: pfreep = *(__partial **) pp;
402: #ifdef CHECKMEM
403: f_partials--;
404: #endif
405: } else {
406: if (pcount==0) {
407: pblock=(__partial *)malloc(BLKF*MAXTREES*sizeof(__partial));
408: pcount = BLKF;
409: #ifdef CHECKMEM
410: x_partials += BLKF;
411: #endif
412: }
413: pp = pblock;
414: pblock += MAXTREES;
415: pcount--;
416: }
417: return(pp);
418: }
419:
420: _freepartial(pp)
421: __partial *pp;
422: {
423: * (__partial **)pp = pfreep;
424: pfreep = pp;
425: #ifdef CHECKMEM
426: f_partials++;
427: #endif
428: }
429:
430:
431: /* storage management */
432: static skeleton *sfreep = NULL;
433: static int scount = 0;
434: static skeleton * sblock = NULL;
435:
436: skeleton *
437: _allocskel()
438: {
439: skeleton *sp;
440: int i;
441: if(sfreep!=NULL) {
442: sp = sfreep;
443: sfreep = sp->sibling;
444: } else {
445: if(scount==0) {
446: sblock = (skeleton *) malloc (BLKF * sizeof (skeleton));
447: scount = BLKF;
448: }
449: sp = sblock++;
450: scount--;
451: }
452: sp->sibling = NULL; sp->leftchild = NULL;
453: for (i=0; i < MAXLABELS; sp->succ[i++] = NULL);
454: sp->treecnt = 0;
455: sp->partial = _allocpartial();
456: return(sp);
457: }
458:
459: _freeskel(sp)
460: skeleton *sp;
461: {
462: int i;
463: __match *mp;
464: if(sp==NULL)
465: return;
466: if(sp->leftchild)
467: _freeskel(sp->leftchild);
468: if(sp->sibling)
469: _freeskel(sp->sibling);
470: _freepartial (sp->partial);
471: for (i=0; i < MAXLABELS; i++)
472: if ((mp = sp->succ[i])!=NULL) _freematch (mp);
473: sp->sibling = sfreep;
474: sfreep = sp;
475: }
476:
477: _match()
478: {
479: skeleton *sp;
480: sp = _allocskel();
481: sp->root = NULL;
482: _mpbtop = _mpblock;
483: _freeskel(_walk(sp, HANG));
484: #ifdef CHECKMEM
485: if(f_matches+_count!=x_matches) {
486: printf(";#m %d %d %d\n", f_matches, _count, x_matches);
487: assert(0);
488: }
489: if(f_partials+pcount!=x_partials) {
490: printf(";#p %d %d %d\n", f_partials, pcount, x_partials);
491: assert(0);
492: }
493: #endif
494: }
495:
496: NODEPTR
497: _mtG(root,i)
498: NODEPTR root;
499: int i;
500: {
501: int *p = &i;
502: while(*p!=-1)
503: root = mtGetNodes(root, *p++);
504: return(root);
505: }
506:
507: /* diagnostic routines */
508:
509: _prskel (skp, lvl)
510: skeleton *skp; int lvl;
511: {
512: int i; __match *mp;
513: __partial *pp;
514: if(skp==NULL) return;
515: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
516: printf ("###\n");
517: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
518: for (i = 0; i < MAXLABELS; i++)
519: if ((mp=skp->succ[i])!=NULL)
520: #ifdef LINE_XREF
521: printf ("[%d<%d> %d]", mp->tree,
522: mtLine[mp->tree], mp->cost);
523: #else
524: printf ("[%d %d]", mp->tree, mp->cost);
525: #endif
526: putchar('\n');
527: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
528: for (i = 0, pp=skp->partial; i < skp->treecnt; i++, pp++)
529: #ifdef LINE_XREF
530: printf("(%d<%d> %x)", pp->treeno, mtLine[pp->treeno],
531: pp->bits);
532: #else
533: printf ("(%d %x)", pp->treeno, pp->bits);
534: #endif
535: putchar('\n');
536: fflush(stdout);
537: _prskel (skp->leftchild, lvl+2);
538: _prskel (skp->sibling, lvl);
539: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.