|
|
1.1 root 1: /*
2: * See Aho, Corasick Comm ACM 18:6, and Hoffman and O'Donell JACM 29:1
3: * for detail of the matching algorithm
4: */
5:
6: /* turn this flag on if your machine has a fast byte string zero operation */
7: /*#define BZERO 1*/
8: int mtDebug = 0;
9: int treedebug = 0;
10: extern int _machstep();
11: extern short int mtStart;
12: extern short int mtTable[], mtAccept[], mtMap[], mtPaths[], mtPathStart[];
13: #ifdef LINE_XREF
14: extern short int mtLine[];
15: #endif
16: extern NODEPTR mtGetNodes(), mtAction();
17: extern skeleton *_allocskel();
18: extern __match *_allocmatch();
19: extern __partial *_allocpartial();
20:
21: #define MPBLOCKSIZ 3000
22: __match *_mpblock[MPBLOCKSIZ], **_mpbtop;
23:
24: /*
25: * See sym.h in the preprocessor for details
26: * Basically used to support eh $%n$ construct.
27: */
28: __match **
29: _getleaves (mp, skp)
30: register __match *mp; register skeleton *skp;
31: {
32: skeleton *stack[MAXDEPTH];
33: skeleton **stp = stack;
34: register short int *sip = &mtPaths[mtPathStart[mp->tree]];
35: register __match **mmp = _mpbtop;
36: __match **mmp2 = mmp;
37: if((_mpbtop += *sip++ + 1) > &_mpblock[MPBLOCKSIZ]) {
38: printf ("ich: %d\n", _mpbtop-_mpblock);
39: assert(0);
40: }
41:
42: for(;;)
43: switch(*sip++) {
44: case ePUSH:
45: *stp++ = skp;
46: skp = skp->leftchild;
47: break;
48:
49: case eNEXT:
50: skp = skp->sibling;
51: break;
52:
53: case eEVAL:
54: if ((mp = skp->succ[M_DETAG(*sip++)])==NULL) abort();
55: *mmp++ = mp;
56: break;
57:
58: case ePOP:
59: skp = *--stp;
60: break;
61:
62: case eSTOP:
63: *mmp = NULL;
64: return (mmp2);
65: }
66: }
67:
68: _evalleaves (mpp)
69: register __match **mpp;
70: {
71: for (; *mpp!=NULL; mpp++) {
72: register __match *mp = *mpp;
73: _do (mp->skel, mp, 1);
74: }
75: }
76:
77:
78: _do(sp, winner, evalall)
79: skeleton *sp; register __match *winner; int evalall;
80: {
81: register __match **mpp;
82: register skeleton *skel = winner->skel;
83: if(winner==NULL) {
84: _prskel(sp, 0);
85: puts ("no winner");
86: return;
87: }
88: if(winner->mode==xDEFER || (evalall && winner->mode!=xTOPDOWN))
89: REORDER (winner->lleaves);
90: mtSetNodes (skel->parent, skel->nson,
91: skel->root=mtAction (winner->tree, winner->lleaves, sp));
92: }
93:
94: skeleton *
95: _walk(sp, ostate)
96: register skeleton *sp;
97: int ostate;
98: {
99: int state, nstate, nson;
100: register __partial *pp;
101: register __match *mp, *mp2;
102: register skeleton *nsp, *lastchild = NULL;
103: NODEPTR son, root;
104:
105: root = sp->root;
106: nson = 1; sp->mincost = INFINITY;
107: state = _machstep (ostate, root, mtValue(root));
108:
109: while((son = mtGetNodes(root, nson))!=NULL) {
110: nstate = _machstep (state, NULL, MV_BRANCH(nson));
111: nsp = _allocskel();
112: nsp->root = son;
113: nsp->parent = root;
114: nsp->nson = nson;
115: _walk(nsp, nstate);
116: if(COSTLESS(nsp->mincost, INFINITY)) {
117: assert (nsp->winner->mode==xREWRITE);
118: if (mtDebug || treedebug) {
119: printf ("rewrite\n");
120: _prskel (nsp, 0);
121: }
122: _do(nsp, nsp->winner, 0);
123: _freeskel(nsp);
124: continue;
125: }
126: _merge (sp, nsp);
127: if (lastchild==NULL) sp->leftchild = nsp;
128: else lastchild->sibling = nsp;
129: lastchild = nsp;
130: nson++;
131: }
132:
133: for (pp = sp->partial; pp < &sp->partial[sp->treecnt]; pp++)
134: if (pp->bits&01==1) {
135: mp = _allocmatch();
136: mp->tree = pp->treeno;
137: _addmatches (ostate, sp, mp);
138: }
139: if(son==NULL && nson==1)
140: _closure (state, ostate, sp);
141:
142: sp->rightchild = lastchild;
143: if (root==NULL) {
144: COST c; __match *win; int i; nsp = sp->leftchild;
145: c = INFINITY;
146: win = NULL;
147: for (i = 0; i < MAXLABELS; i++) {
148: mp = nsp->succ[i];
149: if (mp!=NULL && COSTLESS (mp->cost, c))
150: { c = mp->cost; win = mp; }
151: }
152: if (mtDebug || treedebug)
153: _prskel (nsp, 0);
154: _do (nsp, win, 0);
155: }
156: if (mtDebug)
157: _prskel (sp, 0);
158: return(sp);
159: }
160:
161: static short int _nodetab[MAXNDVAL], _labeltab[MAXLABELS];
162:
163: /*
164: * Convert the start state which has a large branching factor into
165: * a index table. This must be called before the matcher is used.
166: */
167: _matchinit()
168: {
169: short int *sp;
170: struct pair { short int val, where } *pp;
171: for (sp=_nodetab; sp < &_nodetab[MAXNDVAL]; sp++) *sp = HANG;
172: for (sp=_labeltab; sp < &_labeltab[MAXLABELS]; sp++) *sp = HANG;
173: sp = &mtTable[mtStart];
174: assert (*sp==TABLE);
175: for (pp = (struct pair *) ++sp; pp->val!=EOF; pp++) {
176: if (MI_NODE(pp->val))
177: _nodetab[M_DETAG(pp->val)] = pp->where;
178: else if (MI_LABEL(pp->val))
179: _labeltab[M_DETAG(pp->val)] = pp->where;
180: }
181: }
182:
183: int
184: _machstep(state, root, input)
185: short int state, input;
186: NODEPTR root;
187: {
188: register short int *stp = &mtTable[state];
189: int start = 0;
190: if (state==HANG)
191: return (input==(MV_BRANCH(1)) ? mtStart : HANG);
192: rescan:
193: if (stp==&mtTable[mtStart]) {
194: if (MI_NODE(input)) return(_nodetab[M_DETAG(input)]);
195: if (MI_LABEL(input)) return(_labeltab[M_DETAG(input)]);
196: }
197:
198: for(;;) {
199: if(*stp==ACCEPT) stp += 2;
200:
201: if(*stp==TABLE) {
202: stp++;
203: while(*stp!=EOT) {
204: if(input==*stp) {
205: return(*++stp);
206: }
207: stp += 2;
208: }
209: stp++;
210: }
211: if(*stp!=FAIL) {
212: if (start) return (HANG);
213: else { stp = &mtTable[mtStart]; start = 1; goto rescan;}
214: } else {
215: stp++;
216: stp = &mtTable[*stp];
217: goto rescan;
218: }
219: }
220: }
221:
222: _addmatches(ostate, sp, np)
223: int ostate;
224: register skeleton *sp;
225: register __match *np;
226: {
227: int label;
228: int state;
229: register __match *mp;
230:
231: label = mtMap[np->tree];
232:
233: /*
234: * this is a very poor substitute for good design of the DFA.
235: * What we need is a special case that allows any label to be accepted
236: * by the start state but we don't want the start state to recognize
237: * them after a failure.
238: */
239: state = _machstep(ostate, NULL, MV_LABEL(label));
240: if (ostate!=mtStart && state==HANG) {
241: _freematch(np);
242: return;
243: }
244:
245: np->lleaves = _getleaves (np, sp);
246: np->skel = sp;
247: if((np->mode = mtEvalCost(np, np->lleaves, sp))==xABORT)
248: {
249: _freematch(np);
250: return;
251: }
252:
253: /*
254: if(np->mode==xREWRITE && COSTLESS(np->cost, sp->mincost)) {
255: sp->mincost = np->cost;
256: sp->winner = np;
257: }
258: */
259: if ((mp = sp->succ[label])!=NULL) {
260: if (!COSTLESS (np->cost, mp->cost))
261: { _freematch(np); return; }
262: else _freematch(mp);
263: }
264: if(COSTLESS(np->cost,sp->mincost)) {
265: if(np->mode==xREWRITE) {
266: sp->mincost = np->cost; sp->winner = np; }
267: else { sp->mincost = INFINITY; sp->winner = NULL; }
268: }
269: sp->succ[label] = np;
270: _closure(state, ostate, sp);
271: }
272:
273: _closure(state, ostate, skp)
274: int state, ostate;
275: skeleton *skp;
276: {
277: register struct ap { short int tree, depth; } *ap;
278: short int *sp = &mtTable[state];
279: register __match *mp;
280:
281: if(state==HANG || *sp!=ACCEPT) return(NULL);
282:
283: for(ap = (struct ap *) &mtAccept[*++sp]; ap->tree!=-1; ap++)
284: if (ap->depth==0) {
285: mp = _allocmatch();
286: mp->tree = ap->tree;
287: _addmatches (ostate, skp, mp);
288: } else {
289: register __partial *pp, *lim = &skp->partial[skp->treecnt];
290: for(pp=skp->partial; pp < lim; pp++)
291: if(pp->treeno==ap->tree)
292: break;
293: if(pp==lim) {
294: skp->treecnt++;
295: pp->treeno = ap->tree;
296: pp->bits = (1<<(ap->depth));
297: } else pp->bits |= (1<<(ap->depth));
298: }
299: }
300:
301: _merge (old, new)
302: skeleton *old, *new;
303: {
304: register __partial *op = old->partial, *np = new->partial;
305: int nson = new->nson;
306: register __partial *lim = np + new->treecnt;
307: if (nson==1) {
308: old->treecnt = new->treecnt;
309: for(; np < lim; op++, np++) {
310: op->treeno = np->treeno;
311: op->bits = np->bits/2;
312: }
313: } else {
314: __partial *newer = _allocpartial();
315: register __partial *newerp = newer;
316: register int cnt;
317: lim = op+old->treecnt;
318: for(cnt=new->treecnt; cnt-- ; np++) {
319: for(op = old->partial; op < lim; op++)
320: if (op->treeno == np->treeno) {
321: newerp->treeno = op->treeno;
322: newerp++->bits = op->bits & (np->bits/2);
323: break;
324: }
325: }
326: _freepartial(old->partial);
327: old->partial = newer;
328: old->treecnt = newerp-newer;
329: }
330: }
331:
332: /* Save and Allocate Matches */
333: #define BLKF 100
334: __match *freep = NULL;
335: static int _count = 0;
336: static __match *_blockp = NULL;
337: #ifdef CHECKMEM
338: int x_matches, f_matches;
339: #endif
340:
341: __match *
342: _allocmatch()
343: {
344: __match *mp;
345:
346: if(freep!=NULL) {
347: mp = freep;
348: freep = ((__match *)((struct freeb *) freep)->next);
349: #ifdef CHECKMEM
350: f_matches--;
351: #endif
352: } else {
353: if(_count==0) {
354: _blockp = (__match *) malloc (BLKF*sizeof (__match));
355: _count = BLKF;
356: #ifdef CHECKMEM
357: x_matches += BLKF;
358: #endif
359: }
360: mp = _blockp++;
361: _count--;
362: }
363: return(mp);
364: }
365:
366: _freematch(mp)
367: __match *mp;
368: {
369: ((struct freeb *) mp)->next = (struct freeb *) freep;
370: freep = mp;
371: #ifdef CHECKMEM
372: f_matches++;
373: #endif
374: }
375:
376: static __partial *pfreep = NULL;
377: static int pcount = 0;
378: static __partial *pblock = NULL;
379: #ifdef CHECKMEM
380: static int x_partials, f_partials;
381: #endif
382:
383: __partial *
384: _allocpartial()
385: {
386: __partial *pp;
387: if (pfreep!=NULL) {
388: pp = pfreep;
389: pfreep = *(__partial **) pp;
390: #ifdef CHECKMEM
391: f_partials--;
392: #endif
393: } else {
394: if (pcount==0) {
395: pblock=(__partial *)malloc(BLKF*MAXTREES*sizeof(__partial));
396: pcount = BLKF;
397: #ifdef CHECKMEM
398: x_partials += BLKF;
399: #endif
400: }
401: pp = pblock;
402: pblock += MAXTREES;
403: pcount--;
404: }
405: return(pp);
406: }
407:
408: _freepartial(pp)
409: __partial *pp;
410: {
411: * (__partial **)pp = pfreep;
412: pfreep = pp;
413: #ifdef CHECKMEM
414: f_partials++;
415: #endif
416: }
417:
418:
419: /* storage management */
420: static skeleton *sfreep = NULL;
421: static int scount = 0;
422: static skeleton * sblock = NULL;
423:
424: skeleton *
425: _allocskel()
426: {
427: skeleton *sp;
428: int i;
429: if(sfreep!=NULL) {
430: sp = sfreep;
431: sfreep = sp->sibling;
432: } else {
433: if(scount==0) {
434: sblock = (skeleton *) malloc (BLKF * sizeof (skeleton));
435: scount = BLKF;
436: }
437: sp = sblock++;
438: scount--;
439: }
440: sp->sibling = NULL; sp->leftchild = NULL;
441: for (i=0; i < MAXLABELS; sp->succ[i++] = NULL);
442: sp->treecnt = 0;
443: sp->partial = _allocpartial();
444: return(sp);
445: }
446:
447: _freeskel(sp)
448: skeleton *sp;
449: {
450: int i;
451: __match *mp;
452: if(sp==NULL)
453: return;
454: if(sp->leftchild)
455: _freeskel(sp->leftchild);
456: if(sp->sibling)
457: _freeskel(sp->sibling);
458: _freepartial (sp->partial);
459: for (i=0; i < MAXLABELS; i++)
460: if ((mp = sp->succ[i])!=NULL) _freematch (mp);
461: sp->sibling = sfreep;
462: sfreep = sp;
463: }
464:
465: _match()
466: {
467: skeleton *sp;
468: sp = _allocskel();
469: sp->root = NULL;
470: _mpbtop = _mpblock;
471: _freeskel(_walk(sp, HANG));
472: #ifdef CHECKMEM
473: if(f_matches+_count!=x_matches) {
474: printf(";#m %d %d %d\n", f_matches, _count, x_matches);
475: assert(0);
476: }
477: if(f_partials+pcount!=x_partials) {
478: printf(";#p %d %d %d\n", f_partials, pcount, x_partials);
479: assert(0);
480: }
481: #endif
482: }
483:
484: NODEPTR
485: _mtG(root,i)
486: NODEPTR root;
487: int i;
488: {
489: int *p = &i;
490: while(*p!=-1)
491: root = mtGetNodes(root, *p++);
492: return(root);
493: }
494:
495: /* diagnostic routines */
496:
497: _prskel (skp, lvl)
498: skeleton *skp; int lvl;
499: {
500: int i; __match *mp;
501: __partial *pp;
502: if(skp==NULL) return;
503: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
504: printf ("###\n");
505: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
506: for (i = 0; i < MAXLABELS; i++)
507: if ((mp=skp->succ[i])!=NULL)
508: #ifdef LINE_XREF
509: printf ("[%d<%d> %d]", mp->tree,
510: mtLine[mp->tree], mp->cost);
511: #else
512: printf ("[%d %d]", mp->tree, mp->cost);
513: #endif
514: putchar('\n');
515: for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
516: for (i = 0, pp=skp->partial; i < skp->treecnt; i++, pp++)
517: #ifdef LINE_XREF
518: printf("(%d<%d> %x)", pp->treeno, mtLine[pp->treeno],
519: pp->bits);
520: #else
521: printf ("(%d %x)", pp->treeno, pp->bits);
522: #endif
523: putchar('\n');
524: fflush(stdout);
525: _prskel (skp->leftchild, lvl+2);
526: _prskel (skp->sibling, lvl);
527: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.