|
|
1.1 root 1: #ifndef lint
2: static char sccsid[] = "@(#)b.c 4.3 7/7/86";
3: #endif
4:
5: #include "awk.def"
6: #include "stdio.h"
7: #include "awk.h"
8:
9: extern node *op2();
10: extern struct fa *cgotofn();
11: #define MAXLIN 256
12: #define NCHARS 128
13: #define NSTATES 256
14:
15: #define type(v) v->nobj
16: #define left(v) v->narg[0]
17: #define right(v) v->narg[1]
18: #define parent(v) v->nnext
19:
20: #define LEAF case CCL: case NCCL: case CHAR: case DOT:
21: #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
22:
23: /* encoding in tree nodes:
24: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
25: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
26: binary (CAT, OR): left and right are children
27: parent contains pointer to parent
28: */
29:
30: struct fa {
31: int cch;
32: struct fa *st;
33: };
34:
35: int *state[NSTATES];
36: int *foll[MAXLIN];
37: char chars[MAXLIN];
38: int setvec[MAXLIN];
39: node *point[MAXLIN];
40:
41: int setcnt;
42: int line;
43: int maxfoll; /* index of highest foll[] entry set by cfoll() */
44:
45:
46: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
47: node *p;
48: {
49: node *p1;
50: struct fa *fap;
51: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
52: /* put DOT STAR in front of reg. exp. */
53: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
54:
55: line = 0;
56: penter(p1); /* enter parent pointers and leaf indices */
57: point[line] = p1; /* FINAL node */
58: setvec[0] = 1; /* for initial DOT STAR */
59: cfoll(p1); /* set up follow sets */
60: fap = cgotofn();
61: freetr(p1); /* add this when alloc works */
62: return(fap);
63: }
64:
65: penter(p) /* set up parent pointers and leaf indices */
66: node *p;
67: {
68: switch(type(p)) {
69: LEAF
70: left(p) = (node *) line;
71: point[line++] = p;
72: break;
73: UNARY
74: penter(left(p));
75: parent(left(p)) = p;
76: break;
77: case CAT:
78: case OR:
79: penter(left(p));
80: penter(right(p));
81: parent(left(p)) = p;
82: parent(right(p)) = p;
83: break;
84: default:
85: error(FATAL, "unknown type %d in penter\n", type(p));
86: break;
87: }
88: }
89:
90: freetr(p) /* free parse tree and follow sets */
91: node *p;
92: {
93: switch(type(p)) {
94: LEAF
95: foll_free((int) left(p));
96: xfree(p);
97: break;
98: UNARY
99: freetr(left(p));
100: xfree(p);
101: break;
102: case CAT:
103: case OR:
104: freetr(left(p));
105: freetr(right(p));
106: xfree(p);
107: break;
108: default:
109: error(FATAL, "unknown type %d in freetr", type(p));
110: break;
111: }
112: }
113: char *cclenter(p)
114: register char *p;
115: {
116: register i, c;
117: char *op;
118:
119: op = p;
120: i = 0;
121: while ((c = *p++) != 0) {
122: if (c == '-' && i > 0 && chars[i-1] != 0) {
123: if (*p != 0) {
124: c = chars[i-1];
125: while (c < *p) {
126: if (i >= MAXLIN)
127: overflo();
128: chars[i++] = ++c;
129: }
130: p++;
131: continue;
132: }
133: }
134: if (i >= MAXLIN)
135: overflo();
136: chars[i++] = c;
137: }
138: chars[i++] = '\0';
139: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
140: xfree(op);
141: return(tostring(chars));
142: }
143:
144: overflo()
145: {
146: error(FATAL, "regular expression too long\n");
147: }
148:
149: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
150: register node *v;
151: {
152: register i;
153: int prev;
154: int *add();
155:
156: maxfoll = -1;
157: switch(type(v)) {
158: LEAF
159: setcnt = 0;
160: for (i=1; i<=line; i++)
161: setvec[i] = 0;
162: follow(v);
163: if (notin(foll, ( (int) left(v))-1, &prev)) {
164: foll[(int) left(v)] = add(setcnt);
165: }
166: else
167: foll[ (int) left(v)] = foll[prev];
168: if ((int)left(v) > maxfoll)
169: maxfoll = (int)left(v);
170: break;
171: UNARY
172: cfoll(left(v));
173: break;
174: case CAT:
175: case OR:
176: cfoll(left(v));
177: cfoll(right(v));
178: break;
179: default:
180: error(FATAL, "unknown type %d in cfoll", type(v));
181: }
182: }
183:
184: first(p) /* collects initially active leaves of p into setvec */
185: register node *p; /* returns 0 or 1 depending on whether p matches empty string */
186: {
187: register b;
188:
189: switch(type(p)) {
190: LEAF
191: if (setvec[(int) left(p)] != 1) {
192: setvec[(int) left(p)] = 1;
193: setcnt++;
194: }
195: if (type(p) == CCL && (*(char *) right(p)) == '\0')
196: return(0); /* empty CCL */
197: else return(1);
198: case FINAL:
199: case PLUS:
200: if (first(left(p)) == 0) return(0);
201: return(1);
202: case STAR:
203: case QUEST:
204: first(left(p));
205: return(0);
206: case CAT:
207: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
208: return(1);
209: case OR:
210: b = first(right(p));
211: if (first(left(p)) == 0 || b == 0) return(0);
212: return(1);
213: }
214: error(FATAL, "unknown type %d in first\n", type(p));
215: return(-1);
216: }
217:
218: follow(v)
219: node *v; /* collects leaves that can follow v into setvec */
220: {
221: node *p;
222:
223: if (type(v) == FINAL)
224: return;
225: p = parent(v);
226: switch (type(p)) {
227: case STAR:
228: case PLUS: first(v);
229: follow(p);
230: return;
231:
232: case OR:
233: case QUEST: follow(p);
234: return;
235:
236: case CAT: if (v == left(p)) { /* v is left child of p */
237: if (first(right(p)) == 0) {
238: follow(p);
239: return;
240: }
241: }
242: else /* v is right child */
243: follow(p);
244: return;
245: case FINAL: if (setvec[line] != 1) {
246: setvec[line] = 1;
247: setcnt++;
248: }
249: return;
250: }
251: }
252:
253: member(c, s) /* is c in s? */
254: register char c, *s;
255: {
256: while (*s)
257: if (c == *s++)
258: return(1);
259: return(0);
260: }
261:
262: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
263: int **array;
264: int *prev; {
265: register i, j;
266: int *ptr;
267: for (i=0; i<=n; i++) {
268: ptr = array[i];
269: if (*ptr == setcnt) {
270: for (j=0; j < setcnt; j++)
271: if (setvec[*(++ptr)] != 1) goto nxt;
272: *prev = i;
273: return(0);
274: }
275: nxt: ;
276: }
277: return(1);
278: }
279:
280: int *add(n) { /* remember setvec */
281: int *ptr, *p;
282: register i;
283: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
284: overflo();
285: *ptr = n;
286: dprintf("add(%d)\n", n, NULL, NULL);
287: for (i=1; i <= line; i++)
288: if (setvec[i] == 1) {
289: *(++ptr) = i;
290: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
291: }
292: dprintf("\n", NULL, NULL, NULL);
293: return(p);
294: }
295:
296: struct fa *cgotofn()
297: {
298: register i, k;
299: register int *ptr;
300: char c;
301: char *p;
302: node *cp;
303: int j, n, s, ind, numtrans;
304: int finflg;
305: int curpos, num, prev;
306: struct fa *where[NSTATES];
307:
308: int fatab[257];
309: struct fa *pfa;
310:
311: char index[MAXLIN];
312: char iposns[MAXLIN];
313: int sposns[MAXLIN];
314: int spmax, spinit;
315:
316: char symbol[NCHARS];
317: char isyms[NCHARS];
318: char ssyms[NCHARS];
319: int ssmax, ssinit;
320:
321: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
322: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
323: setcnt = 0;
324: /* compute initial positions and symbols of state 0 */
325: ssmax = 0;
326: ptr = state[0] = foll[0];
327: spinit = *ptr;
328: for (i=0; i<spinit; i++) {
329: curpos = *(++ptr);
330: sposns[i] = curpos;
331: iposns[curpos] = 1;
332: cp = point[curpos];
333: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
334: switch (type(cp)) {
335: case CHAR:
336: k = (int) right(cp);
337: if (isyms[k] != 1) {
338: isyms[k] = 1;
339: ssyms[ssmax++] = k;
340: }
341: break;
342: case DOT:
343: for (k=1; k<NCHARS; k++) {
344: if (k != HAT) {
345: if (isyms[k] != 1) {
346: isyms[k] = 1;
347: ssyms[ssmax++] = k;
348: }
349: }
350: }
351: break;
352: case CCL:
353: for (p = (char *) right(cp); *p; p++) {
354: if (*p != HAT) {
355: if (isyms[*p] != 1) {
356: isyms[*p] = 1;
357: ssyms[ssmax++] = *p;
358: }
359: }
360: }
361: break;
362: case NCCL:
363: for (k=1; k<NCHARS; k++) {
364: if (k != HAT && !member(k, (char *) right(cp))) {
365: if (isyms[k] != 1) {
366: isyms[k] = 1;
367: ssyms[ssmax++] = k;
368: }
369: }
370: }
371: }
372: }
373: ssinit = ssmax;
374: n = 0;
375: for (s=0; s<=n; s++) {
376: dprintf("s = %d\n", s, NULL, NULL);
377: ind = 0;
378: numtrans = 0;
379: finflg = 0;
380: if (*(state[s] + *state[s]) == line) { /* s final? */
381: finflg = 1;
382: goto tenter;
383: }
384: spmax = spinit;
385: ssmax = ssinit;
386: ptr = state[s];
387: num = *ptr;
388: for (i=0; i<num; i++) {
389: curpos = *(++ptr);
390: if (iposns[curpos] != 1 && index[curpos] != 1) {
391: index[curpos] = 1;
392: sposns[spmax++] = curpos;
393: }
394: cp = point[curpos];
395: switch (type(cp)) {
396: case CHAR:
397: k = (int) right(cp);
398: if (isyms[k] == 0 && symbol[k] == 0) {
399: symbol[k] = 1;
400: ssyms[ssmax++] = k;
401: }
402: break;
403: case DOT:
404: for (k=1; k<NCHARS; k++) {
405: if (k != HAT) {
406: if (isyms[k] == 0 && symbol[k] == 0) {
407: symbol[k] = 1;
408: ssyms[ssmax++] = k;
409: }
410: }
411: }
412: break;
413: case CCL:
414: for (p = (char *) right(cp); *p; p++) {
415: if (*p != HAT) {
416: if (isyms[*p] == 0 && symbol[*p] == 0) {
417: symbol[*p] = 1;
418: ssyms[ssmax++] = *p;
419: }
420: }
421: }
422: break;
423: case NCCL:
424: for (k=1; k<NCHARS; k++) {
425: if (k != HAT && !member(k, (char *) right(cp))) {
426: if (isyms[k] == 0 && symbol[k] == 0) {
427: symbol[k] = 1;
428: ssyms[ssmax++] = k;
429: }
430: }
431: }
432: }
433: }
434: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
435: c = ssyms[j];
436: symbol[c] = 0;
437: setcnt = 0;
438: for (k=0; k<=line; k++) setvec[k] = 0;
439: for (i=0; i<spmax; i++) {
440: index[sposns[i]] = 0;
441: cp = point[sposns[i]];
442: if ((k = type(cp)) != FINAL)
443: if (k == CHAR && c == (int) right(cp)
444: || k == DOT
445: || k == CCL && member(c, (char *) right(cp))
446: || k == NCCL && !member(c, (char *) right(cp))) {
447: ptr = foll[sposns[i]];
448: num = *ptr;
449: for (k=0; k<num; k++) {
450: if (setvec[*(++ptr)] != 1
451: && iposns[*ptr] != 1) {
452: setvec[*ptr] = 1;
453: setcnt++;
454: }
455: }
456: }
457: } /* end nextstate */
458: if (notin(state, n, &prev)) {
459: if (n >= NSTATES) {
460: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
461: overflo();
462: }
463: state[++n] = add(setcnt);
464: dprintf(" delta(%d,%o) = %d", s,c,n);
465: dprintf(", ind = %d\n", ind+1, NULL, NULL);
466: fatab[++ind] = c;
467: fatab[++ind] = n;
468: numtrans++;
469: }
470: else {
471: if (prev != 0) {
472: dprintf(" delta(%d,%o) = %d", s,c,prev);
473: dprintf(", ind = %d\n", ind+1, NULL, NULL);
474: fatab[++ind] = c;
475: fatab[++ind] = prev;
476: numtrans++;
477: }
478: }
479: }
480: tenter:
481: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
482: overflo();
483: where[s] = pfa;
484: if (finflg)
485: pfa->cch = -1; /* s is a final state */
486: else
487: pfa->cch = numtrans;
488: pfa->st = 0;
489: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
490: pfa->cch = fatab[2*i-1];
491: pfa->st = (struct fa *) fatab[2*i];
492: }
493: }
494: for (i=0; i<=n; i++) {
495: /* N.b. state[0] == foll[0], not separately allocated */
496: if (i>0)
497: xfree(state[i]); /* free state[i] */
498: pfa = where[i];
499: pfa->st = where[0];
500: dprintf("state %d: (%o)\n", i, pfa, NULL);
501: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
502: for (k=1; k<=pfa->cch; k++) {
503: (pfa+k)->st = where[ (int) (pfa+k)->st];
504: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
505: }
506: }
507: pfa = where[0];
508: if ((num = pfa->cch) < 0)
509: return(where[0]);
510: for (pfa += num; num; num--, pfa--)
511: if (pfa->cch == HAT) {
512: return(pfa->st);
513: }
514: return(where[0]);
515: }
516:
517: match(pfa, p)
518: register struct fa *pfa;
519: register char *p;
520: {
521: register count;
522: char c;
523: if (p == 0) return(0);
524: if (pfa->cch == 1) { /* fast test for first character, if possible */
525: c = (++pfa)->cch;
526: do
527: if (c == *p) {
528: p++;
529: pfa = pfa->st;
530: goto adv;
531: }
532: while (*p++ != 0);
533: return(0);
534: }
535: adv: if ((count = pfa->cch) < 0) return(1);
536: do {
537: for (pfa += count; count; count--, pfa--)
538: if (pfa->cch == *p) {
539: break;
540: }
541: pfa = pfa->st;
542: if ((count = pfa->cch) < 0) return(1);
543: } while(*p++ != 0);
544: return(0);
545: }
546:
547: /*
548: * Free foll[i], taking into account identical foll[] entries.
549: * This is necessary because cfoll() uses the same physical follow set for
550: * several foll[] entries when the set is identical. Called by freetr().
551: */
552: foll_free(i)
553: int i;
554: {
555: register int j;
556: int *p = foll[i];
557: if (p==NULL) return;
558: for (j=0; j<=maxfoll; j++)
559: if (foll[j]==p) foll[j]=NULL;
560: xfree(p);
561: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.