|
|
1.1 root 1: #ifndef lint
2: static char sccsid[] = "@(#)b.c 4.2 8/11/83";
3: #endif
4:
5: #include "awk.def"
6: #include "stdio.h"
7: #include "awk.h"
8:
9: extern node *op2();
10: extern struct fa *cgotofn();
11: #define MAXLIN 256
12: #define NCHARS 128
13: #define NSTATES 256
14:
15: #define type(v) v->nobj
16: #define left(v) v->narg[0]
17: #define right(v) v->narg[1]
18: #define parent(v) v->nnext
19:
20: #define LEAF case CCL: case NCCL: case CHAR: case DOT:
21: #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
22:
23: /* encoding in tree nodes:
24: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
25: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
26: binary (CAT, OR): left and right are children
27: parent contains pointer to parent
28: */
29:
30: struct fa {
31: int cch;
32: struct fa *st;
33: };
34:
35: int *state[NSTATES];
36: int *foll[MAXLIN];
37: char chars[MAXLIN];
38: int setvec[MAXLIN];
39: node *point[MAXLIN];
40:
41: int setcnt;
42: int line;
43:
44:
45: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
46: node *p;
47: {
48: node *p1;
49: struct fa *fap;
50: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
51: /* put DOT STAR in front of reg. exp. */
52: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
53:
54: line = 0;
55: penter(p1); /* enter parent pointers and leaf indices */
56: point[line] = p1; /* FINAL node */
57: setvec[0] = 1; /* for initial DOT STAR */
58: cfoll(p1); /* set up follow sets */
59: fap = cgotofn();
60: freetr(p1); /* add this when alloc works */
61: return(fap);
62: }
63:
64: penter(p) /* set up parent pointers and leaf indices */
65: node *p;
66: {
67: switch(type(p)) {
68: LEAF
69: left(p) = (node *) line;
70: point[line++] = p;
71: break;
72: UNARY
73: penter(left(p));
74: parent(left(p)) = p;
75: break;
76: case CAT:
77: case OR:
78: penter(left(p));
79: penter(right(p));
80: parent(left(p)) = p;
81: parent(right(p)) = p;
82: break;
83: default:
84: error(FATAL, "unknown type %d in penter\n", type(p));
85: break;
86: }
87: }
88:
89: freetr(p) /* free parse tree and follow sets */
90: node *p;
91: {
92: switch(type(p)) {
93: LEAF
94: xfree(foll[(int) left(p)]);
95: xfree(p);
96: break;
97: UNARY
98: freetr(left(p));
99: xfree(p);
100: break;
101: case CAT:
102: case OR:
103: freetr(left(p));
104: freetr(right(p));
105: xfree(p);
106: break;
107: default:
108: error(FATAL, "unknown type %d in freetr", type(p));
109: break;
110: }
111: }
112: char *cclenter(p)
113: register char *p;
114: {
115: register i, c;
116: char *op;
117:
118: op = p;
119: i = 0;
120: while ((c = *p++) != 0) {
121: if (c == '-' && i > 0 && chars[i-1] != 0) {
122: if (*p != 0) {
123: c = chars[i-1];
124: while (c < *p) {
125: if (i >= MAXLIN)
126: overflo();
127: chars[i++] = ++c;
128: }
129: p++;
130: continue;
131: }
132: }
133: if (i >= MAXLIN)
134: overflo();
135: chars[i++] = c;
136: }
137: chars[i++] = '\0';
138: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
139: xfree(op);
140: return(tostring(chars));
141: }
142:
143: overflo()
144: {
145: error(FATAL, "regular expression too long\n");
146: }
147:
148: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
149: register node *v;
150: {
151: register i;
152: int prev;
153: int *add();
154:
155: switch(type(v)) {
156: LEAF
157: setcnt = 0;
158: for (i=1; i<=line; i++)
159: setvec[i] = 0;
160: follow(v);
161: if (notin(foll, ( (int) left(v))-1, &prev)) {
162: foll[(int) left(v)] = add(setcnt);
163: }
164: else
165: foll[ (int) left(v)] = foll[prev];
166: break;
167: UNARY
168: cfoll(left(v));
169: break;
170: case CAT:
171: case OR:
172: cfoll(left(v));
173: cfoll(right(v));
174: break;
175: default:
176: error(FATAL, "unknown type %d in cfoll", type(v));
177: }
178: }
179:
180: first(p) /* collects initially active leaves of p into setvec */
181: register node *p; /* returns 0 or 1 depending on whether p matches empty string */
182: {
183: register b;
184:
185: switch(type(p)) {
186: LEAF
187: if (setvec[(int) left(p)] != 1) {
188: setvec[(int) left(p)] = 1;
189: setcnt++;
190: }
191: if (type(p) == CCL && (*(char *) right(p)) == '\0')
192: return(0); /* empty CCL */
193: else return(1);
194: case FINAL:
195: case PLUS:
196: if (first(left(p)) == 0) return(0);
197: return(1);
198: case STAR:
199: case QUEST:
200: first(left(p));
201: return(0);
202: case CAT:
203: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
204: return(1);
205: case OR:
206: b = first(right(p));
207: if (first(left(p)) == 0 || b == 0) return(0);
208: return(1);
209: }
210: error(FATAL, "unknown type %d in first\n", type(p));
211: return(-1);
212: }
213:
214: follow(v)
215: node *v; /* collects leaves that can follow v into setvec */
216: {
217: node *p;
218:
219: if (type(v) == FINAL)
220: return;
221: p = parent(v);
222: switch (type(p)) {
223: case STAR:
224: case PLUS: first(v);
225: follow(p);
226: return;
227:
228: case OR:
229: case QUEST: follow(p);
230: return;
231:
232: case CAT: if (v == left(p)) { /* v is left child of p */
233: if (first(right(p)) == 0) {
234: follow(p);
235: return;
236: }
237: }
238: else /* v is right child */
239: follow(p);
240: return;
241: case FINAL: if (setvec[line] != 1) {
242: setvec[line] = 1;
243: setcnt++;
244: }
245: return;
246: }
247: }
248:
249: member(c, s) /* is c in s? */
250: register char c, *s;
251: {
252: while (*s)
253: if (c == *s++)
254: return(1);
255: return(0);
256: }
257:
258: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
259: int **array;
260: int *prev; {
261: register i, j;
262: int *ptr;
263: for (i=0; i<=n; i++) {
264: ptr = array[i];
265: if (*ptr == setcnt) {
266: for (j=0; j < setcnt; j++)
267: if (setvec[*(++ptr)] != 1) goto nxt;
268: *prev = i;
269: return(0);
270: }
271: nxt: ;
272: }
273: return(1);
274: }
275:
276: int *add(n) { /* remember setvec */
277: int *ptr, *p;
278: register i;
279: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
280: overflo();
281: *ptr = n;
282: dprintf("add(%d)\n", n, NULL, NULL);
283: for (i=1; i <= line; i++)
284: if (setvec[i] == 1) {
285: *(++ptr) = i;
286: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
287: }
288: dprintf("\n", NULL, NULL, NULL);
289: return(p);
290: }
291:
292: struct fa *cgotofn()
293: {
294: register i, k;
295: register int *ptr;
296: char c;
297: char *p;
298: node *cp;
299: int j, n, s, ind, numtrans;
300: int finflg;
301: int curpos, num, prev;
302: struct fa *where[NSTATES];
303:
304: int fatab[257];
305: struct fa *pfa;
306:
307: char index[MAXLIN];
308: char iposns[MAXLIN];
309: int sposns[MAXLIN];
310: int spmax, spinit;
311:
312: char symbol[NCHARS];
313: char isyms[NCHARS];
314: char ssyms[NCHARS];
315: int ssmax, ssinit;
316:
317: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
318: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
319: setcnt = 0;
320: /* compute initial positions and symbols of state 0 */
321: ssmax = 0;
322: ptr = state[0] = foll[0];
323: spinit = *ptr;
324: for (i=0; i<spinit; i++) {
325: curpos = *(++ptr);
326: sposns[i] = curpos;
327: iposns[curpos] = 1;
328: cp = point[curpos];
329: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
330: switch (type(cp)) {
331: case CHAR:
332: k = (int) right(cp);
333: if (isyms[k] != 1) {
334: isyms[k] = 1;
335: ssyms[ssmax++] = k;
336: }
337: break;
338: case DOT:
339: for (k=1; k<NCHARS; k++) {
340: if (k != HAT) {
341: if (isyms[k] != 1) {
342: isyms[k] = 1;
343: ssyms[ssmax++] = k;
344: }
345: }
346: }
347: break;
348: case CCL:
349: for (p = (char *) right(cp); *p; p++) {
350: if (*p != HAT) {
351: if (isyms[*p] != 1) {
352: isyms[*p] = 1;
353: ssyms[ssmax++] = *p;
354: }
355: }
356: }
357: break;
358: case NCCL:
359: for (k=1; k<NCHARS; k++) {
360: if (k != HAT && !member(k, (char *) right(cp))) {
361: if (isyms[k] != 1) {
362: isyms[k] = 1;
363: ssyms[ssmax++] = k;
364: }
365: }
366: }
367: }
368: }
369: ssinit = ssmax;
370: n = 0;
371: for (s=0; s<=n; s++) {
372: dprintf("s = %d\n", s, NULL, NULL);
373: ind = 0;
374: numtrans = 0;
375: finflg = 0;
376: if (*(state[s] + *state[s]) == line) { /* s final? */
377: finflg = 1;
378: goto tenter;
379: }
380: spmax = spinit;
381: ssmax = ssinit;
382: ptr = state[s];
383: num = *ptr;
384: for (i=0; i<num; i++) {
385: curpos = *(++ptr);
386: if (iposns[curpos] != 1 && index[curpos] != 1) {
387: index[curpos] = 1;
388: sposns[spmax++] = curpos;
389: }
390: cp = point[curpos];
391: switch (type(cp)) {
392: case CHAR:
393: k = (int) right(cp);
394: if (isyms[k] == 0 && symbol[k] == 0) {
395: symbol[k] = 1;
396: ssyms[ssmax++] = k;
397: }
398: break;
399: case DOT:
400: for (k=1; k<NCHARS; k++) {
401: if (k != HAT) {
402: if (isyms[k] == 0 && symbol[k] == 0) {
403: symbol[k] = 1;
404: ssyms[ssmax++] = k;
405: }
406: }
407: }
408: break;
409: case CCL:
410: for (p = (char *) right(cp); *p; p++) {
411: if (*p != HAT) {
412: if (isyms[*p] == 0 && symbol[*p] == 0) {
413: symbol[*p] = 1;
414: ssyms[ssmax++] = *p;
415: }
416: }
417: }
418: break;
419: case NCCL:
420: for (k=1; k<NCHARS; k++) {
421: if (k != HAT && !member(k, (char *) right(cp))) {
422: if (isyms[k] == 0 && symbol[k] == 0) {
423: symbol[k] = 1;
424: ssyms[ssmax++] = k;
425: }
426: }
427: }
428: }
429: }
430: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
431: c = ssyms[j];
432: symbol[c] = 0;
433: setcnt = 0;
434: for (k=0; k<=line; k++) setvec[k] = 0;
435: for (i=0; i<spmax; i++) {
436: index[sposns[i]] = 0;
437: cp = point[sposns[i]];
438: if ((k = type(cp)) != FINAL)
439: if (k == CHAR && c == (int) right(cp)
440: || k == DOT
441: || k == CCL && member(c, (char *) right(cp))
442: || k == NCCL && !member(c, (char *) right(cp))) {
443: ptr = foll[sposns[i]];
444: num = *ptr;
445: for (k=0; k<num; k++) {
446: if (setvec[*(++ptr)] != 1
447: && iposns[*ptr] != 1) {
448: setvec[*ptr] = 1;
449: setcnt++;
450: }
451: }
452: }
453: } /* end nextstate */
454: if (notin(state, n, &prev)) {
455: if (n >= NSTATES) {
456: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
457: overflo();
458: }
459: state[++n] = add(setcnt);
460: dprintf(" delta(%d,%o) = %d", s,c,n);
461: dprintf(", ind = %d\n", ind+1, NULL, NULL);
462: fatab[++ind] = c;
463: fatab[++ind] = n;
464: numtrans++;
465: }
466: else {
467: if (prev != 0) {
468: dprintf(" delta(%d,%o) = %d", s,c,prev);
469: dprintf(", ind = %d\n", ind+1, NULL, NULL);
470: fatab[++ind] = c;
471: fatab[++ind] = prev;
472: numtrans++;
473: }
474: }
475: }
476: tenter:
477: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
478: overflo();
479: where[s] = pfa;
480: if (finflg)
481: pfa->cch = -1; /* s is a final state */
482: else
483: pfa->cch = numtrans;
484: pfa->st = 0;
485: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
486: pfa->cch = fatab[2*i-1];
487: pfa->st = (struct fa *) fatab[2*i];
488: }
489: }
490: for (i=0; i<=n; i++) {
491: xfree(state[i]); /* free state[i] */
492: pfa = where[i];
493: pfa->st = where[0];
494: dprintf("state %d: (%o)\n", i, pfa, NULL);
495: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
496: for (k=1; k<=pfa->cch; k++) {
497: (pfa+k)->st = where[ (int) (pfa+k)->st];
498: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
499: }
500: }
501: pfa = where[0];
502: if ((num = pfa->cch) < 0)
503: return(where[0]);
504: for (pfa += num; num; num--, pfa--)
505: if (pfa->cch == HAT) {
506: return(pfa->st);
507: }
508: return(where[0]);
509: }
510:
511: match(pfa, p)
512: register struct fa *pfa;
513: register char *p;
514: {
515: register count;
516: char c;
517: if (p == 0) return(0);
518: if (pfa->cch == 1) { /* fast test for first character, if possible */
519: c = (++pfa)->cch;
520: do
521: if (c == *p) {
522: p++;
523: pfa = pfa->st;
524: goto adv;
525: }
526: while (*p++ != 0);
527: return(0);
528: }
529: adv: if ((count = pfa->cch) < 0) return(1);
530: do {
531: for (pfa += count; count; count--, pfa--)
532: if (pfa->cch == *p) {
533: break;
534: }
535: pfa = pfa->st;
536: if ((count = pfa->cch) < 0) return(1);
537: } while(*p++ != 0);
538: return(0);
539: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.