|
|
1.1 root 1: #include "awk.def"
2: #include "stdio.h"
3: #include "awk.h"
4:
5: extern node *op2();
6: extern struct fa *cgotofn();
7: #define MAXLIN 256
8: #define NCHARS 128
9: #define NSTATES 256
10:
11: #define type(v) v->nobj
12: #define left(v) v->narg[0]
13: #define right(v) v->narg[1]
14: #define parent(v) v->nnext
15:
16: #define LEAF case CCL: case NCCL: case CHAR: case DOT:
17: #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
18:
19: /* encoding in tree nodes:
20: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
21: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
22: binary (CAT, OR): left and right are children
23: parent contains pointer to parent
24: */
25:
26: struct fa {
27: int cch;
28: struct fa *st;
29: };
30:
31: int *state[NSTATES];
32: int *foll[MAXLIN];
33: char chars[MAXLIN];
34: int setvec[MAXLIN];
35: node *point[MAXLIN];
36:
37: int setcnt;
38: int line;
39:
40:
41: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
42: node *p;
43: {
44: node *p1;
45: struct fa *fap;
46: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
47: /* put DOT STAR in front of reg. exp. */
48: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
49:
50: line = 0;
51: penter(p1); /* enter parent pointers and leaf indices */
52: point[line] = p1; /* FINAL node */
53: setvec[0] = 1; /* for initial DOT STAR */
54: cfoll(p1); /* set up follow sets */
55: fap = cgotofn();
56: freetr(p1); /* add this when alloc works */
57: return(fap);
58: }
59:
60: penter(p) /* set up parent pointers and leaf indices */
61: node *p;
62: {
63: switch(type(p)) {
64: LEAF
65: left(p) = (node *) line;
66: point[line++] = p;
67: break;
68: UNARY
69: penter(left(p));
70: parent(left(p)) = p;
71: break;
72: case CAT:
73: case OR:
74: penter(left(p));
75: penter(right(p));
76: parent(left(p)) = p;
77: parent(right(p)) = p;
78: break;
79: default:
80: error(FATAL, "unknown type %d in penter\n", type(p));
81: break;
82: }
83: }
84:
85: freetr(p) /* free parse tree and follow sets */
86: node *p;
87: {
88: switch(type(p)) {
89: LEAF
90: xfree(foll[(int) left(p)]);
91: xfree(p);
92: break;
93: UNARY
94: freetr(left(p));
95: xfree(p);
96: break;
97: case CAT:
98: case OR:
99: freetr(left(p));
100: freetr(right(p));
101: xfree(p);
102: break;
103: default:
104: error(FATAL, "unknown type %d in freetr", type(p));
105: break;
106: }
107: }
108: char *cclenter(p)
109: register char *p;
110: {
111: register i, c;
112: char *op;
113:
114: op = p;
115: i = 0;
116: while ((c = *p++) != 0) {
117: if (c == '-' && i > 0 && chars[i-1] != 0) {
118: if (*p != 0) {
119: c = chars[i-1];
120: while (c < *p) {
121: if (i >= MAXLIN)
122: overflo();
123: chars[i++] = ++c;
124: }
125: p++;
126: continue;
127: }
128: }
129: if (i >= MAXLIN)
130: overflo();
131: chars[i++] = c;
132: }
133: chars[i++] = '\0';
134: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
135: xfree(op);
136: return(tostring(chars));
137: }
138:
139: overflo()
140: {
141: error(FATAL, "regular expression too long\n");
142: }
143:
144: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
145: register node *v;
146: {
147: register i;
148: int prev;
149: int *add();
150:
151: switch(type(v)) {
152: LEAF
153: setcnt = 0;
154: for (i=1; i<=line; i++)
155: setvec[i] = 0;
156: follow(v);
157: if (notin(foll, ( (int) left(v))-1, &prev)) {
158: foll[(int) left(v)] = add(setcnt);
159: }
160: else
161: foll[ (int) left(v)] = foll[prev];
162: break;
163: UNARY
164: cfoll(left(v));
165: break;
166: case CAT:
167: case OR:
168: cfoll(left(v));
169: cfoll(right(v));
170: break;
171: default:
172: error(FATAL, "unknown type %d in cfoll", type(v));
173: }
174: }
175:
176: first(p) /* collects initially active leaves of p into setvec */
177: register node *p; /* returns 0 or 1 depending on whether p matches empty string */
178: {
179: register b;
180:
181: switch(type(p)) {
182: LEAF
183: if (setvec[(int) left(p)] != 1) {
184: setvec[(int) left(p)] = 1;
185: setcnt++;
186: }
187: if (type(p) == CCL && (*(char *) right(p)) == '\0')
188: return(0); /* empty CCL */
189: else return(1);
190: case FINAL:
191: case PLUS:
192: if (first(left(p)) == 0) return(0);
193: return(1);
194: case STAR:
195: case QUEST:
196: first(left(p));
197: return(0);
198: case CAT:
199: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
200: return(1);
201: case OR:
202: b = first(right(p));
203: if (first(left(p)) == 0 || b == 0) return(0);
204: return(1);
205: }
206: error(FATAL, "unknown type %d in first\n", type(p));
207: return(-1);
208: }
209:
210: follow(v)
211: node *v; /* collects leaves that can follow v into setvec */
212: {
213: node *p;
214:
215: if (type(v) == FINAL)
216: return;
217: p = parent(v);
218: switch (type(p)) {
219: case STAR:
220: case PLUS: first(v);
221: follow(p);
222: return;
223:
224: case OR:
225: case QUEST: follow(p);
226: return;
227:
228: case CAT: if (v == left(p)) { /* v is left child of p */
229: if (first(right(p)) == 0) {
230: follow(p);
231: return;
232: }
233: }
234: else /* v is right child */
235: follow(p);
236: return;
237: case FINAL: if (setvec[line] != 1) {
238: setvec[line] = 1;
239: setcnt++;
240: }
241: return;
242: }
243: }
244:
245: member(c, s) /* is c in s? */
246: register char c, *s;
247: {
248: while (*s)
249: if (c == *s++)
250: return(1);
251: return(0);
252: }
253:
254: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
255: int **array;
256: int *prev; {
257: register i, j;
258: int *ptr;
259: for (i=0; i<=n; i++) {
260: ptr = array[i];
261: if (*ptr == setcnt) {
262: for (j=0; j < setcnt; j++)
263: if (setvec[*(++ptr)] != 1) goto nxt;
264: *prev = i;
265: return(0);
266: }
267: nxt: ;
268: }
269: return(1);
270: }
271:
272: int *add(n) { /* remember setvec */
273: int *ptr, *p;
274: register i;
275: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
276: overflo();
277: *ptr = n;
278: dprintf("add(%d)\n", n, NULL, NULL);
279: for (i=1; i <= line; i++)
280: if (setvec[i] == 1) {
281: *(++ptr) = i;
282: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
283: }
284: dprintf("\n", NULL, NULL, NULL);
285: return(p);
286: }
287:
288: struct fa *cgotofn()
289: {
290: register i, k;
291: register int *ptr;
292: char c;
293: char *p;
294: node *cp;
295: int j, n, s, ind, numtrans;
296: int finflg;
297: int curpos, num, prev;
298: struct fa *where[NSTATES];
299:
300: int fatab[257];
301: struct fa *pfa;
302:
303: char index[MAXLIN];
304: char iposns[MAXLIN];
305: int sposns[MAXLIN];
306: int spmax, spinit;
307:
308: char symbol[NCHARS];
309: char isyms[NCHARS];
310: char ssyms[NCHARS];
311: int ssmax, ssinit;
312:
313: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
314: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
315: setcnt = 0;
316: /* compute initial positions and symbols of state 0 */
317: ssmax = 0;
318: ptr = state[0] = foll[0];
319: spinit = *ptr;
320: for (i=0; i<spinit; i++) {
321: curpos = *(++ptr);
322: sposns[i] = curpos;
323: iposns[curpos] = 1;
324: cp = point[curpos];
325: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
326: switch (type(cp)) {
327: case CHAR:
328: k = (int) right(cp);
329: if (isyms[k] != 1) {
330: isyms[k] = 1;
331: ssyms[ssmax++] = k;
332: }
333: break;
334: case DOT:
335: for (k=1; k<NCHARS; k++) {
336: if (k != HAT) {
337: if (isyms[k] != 1) {
338: isyms[k] = 1;
339: ssyms[ssmax++] = k;
340: }
341: }
342: }
343: break;
344: case CCL:
345: for (p = (char *) right(cp); *p; p++) {
346: if (*p != HAT) {
347: if (isyms[*p] != 1) {
348: isyms[*p] = 1;
349: ssyms[ssmax++] = *p;
350: }
351: }
352: }
353: break;
354: case NCCL:
355: for (k=1; k<NCHARS; k++) {
356: if (k != HAT && !member(k, (char *) right(cp))) {
357: if (isyms[k] != 1) {
358: isyms[k] = 1;
359: ssyms[ssmax++] = k;
360: }
361: }
362: }
363: }
364: }
365: ssinit = ssmax;
366: n = 0;
367: for (s=0; s<=n; s++) {
368: dprintf("s = %d\n", s, NULL, NULL);
369: ind = 0;
370: numtrans = 0;
371: finflg = 0;
372: if (*(state[s] + *state[s]) == line) { /* s final? */
373: finflg = 1;
374: goto tenter;
375: }
376: spmax = spinit;
377: ssmax = ssinit;
378: ptr = state[s];
379: num = *ptr;
380: for (i=0; i<num; i++) {
381: curpos = *(++ptr);
382: if (iposns[curpos] != 1 && index[curpos] != 1) {
383: index[curpos] = 1;
384: sposns[spmax++] = curpos;
385: }
386: cp = point[curpos];
387: switch (type(cp)) {
388: case CHAR:
389: k = (int) right(cp);
390: if (isyms[k] == 0 && symbol[k] == 0) {
391: symbol[k] = 1;
392: ssyms[ssmax++] = k;
393: }
394: break;
395: case DOT:
396: for (k=1; k<NCHARS; k++) {
397: if (k != HAT) {
398: if (isyms[k] == 0 && symbol[k] == 0) {
399: symbol[k] = 1;
400: ssyms[ssmax++] = k;
401: }
402: }
403: }
404: break;
405: case CCL:
406: for (p = (char *) right(cp); *p; p++) {
407: if (*p != HAT) {
408: if (isyms[*p] == 0 && symbol[*p] == 0) {
409: symbol[*p] = 1;
410: ssyms[ssmax++] = *p;
411: }
412: }
413: }
414: break;
415: case NCCL:
416: for (k=1; k<NCHARS; k++) {
417: if (k != HAT && !member(k, (char *) right(cp))) {
418: if (isyms[k] == 0 && symbol[k] == 0) {
419: symbol[k] = 1;
420: ssyms[ssmax++] = k;
421: }
422: }
423: }
424: }
425: }
426: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
427: c = ssyms[j];
428: symbol[c] = 0;
429: setcnt = 0;
430: for (k=0; k<=line; k++) setvec[k] = 0;
431: for (i=0; i<spmax; i++) {
432: index[sposns[i]] = 0;
433: cp = point[sposns[i]];
434: if ((k = type(cp)) != FINAL)
435: if (k == CHAR && c == (int) right(cp)
436: || k == DOT
437: || k == CCL && member(c, (char *) right(cp))
438: || k == NCCL && !member(c, (char *) right(cp))) {
439: ptr = foll[sposns[i]];
440: num = *ptr;
441: for (k=0; k<num; k++) {
442: if (setvec[*(++ptr)] != 1
443: && iposns[*ptr] != 1) {
444: setvec[*ptr] = 1;
445: setcnt++;
446: }
447: }
448: }
449: } /* end nextstate */
450: if (notin(state, n, &prev)) {
451: if (n >= NSTATES) {
452: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
453: overflo();
454: }
455: state[++n] = add(setcnt);
456: dprintf(" delta(%d,%o) = %d", s,c,n);
457: dprintf(", ind = %d\n", ind+1, NULL, NULL);
458: fatab[++ind] = c;
459: fatab[++ind] = n;
460: numtrans++;
461: }
462: else {
463: if (prev != 0) {
464: dprintf(" delta(%d,%o) = %d", s,c,prev);
465: dprintf(", ind = %d\n", ind+1, NULL, NULL);
466: fatab[++ind] = c;
467: fatab[++ind] = prev;
468: numtrans++;
469: }
470: }
471: }
472: tenter:
473: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
474: overflo();
475: where[s] = pfa;
476: if (finflg)
477: pfa->cch = -1; /* s is a final state */
478: else
479: pfa->cch = numtrans;
480: pfa->st = 0;
481: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
482: pfa->cch = fatab[2*i-1];
483: pfa->st = (struct fa *) fatab[2*i];
484: }
485: }
486: for (i=0; i<=n; i++) {
487: xfree(state[i]); /* free state[i] */
488: pfa = where[i];
489: pfa->st = where[0];
490: dprintf("state %d: (%o)\n", i, pfa, NULL);
491: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
492: for (k=1; k<=pfa->cch; k++) {
493: (pfa+k)->st = where[ (int) (pfa+k)->st];
494: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
495: }
496: }
497: pfa = where[0];
498: if ((num = pfa->cch) < 0)
499: return(where[0]);
500: for (pfa += num; num; num--, pfa--)
501: if (pfa->cch == HAT) {
502: return(pfa->st);
503: }
504: return(where[0]);
505: }
506:
507: match(pfa, p)
508: register struct fa *pfa;
509: register char *p;
510: {
511: register count;
512: char c;
513: if (p == 0) return(0);
514: if (pfa->cch == 1) { /* fast test for first character, if possible */
515: c = (++pfa)->cch;
516: do
517: if (c == *p) {
518: p++;
519: pfa = pfa->st;
520: goto adv;
521: }
522: while (*p++ != 0);
523: return(0);
524: }
525: adv: if ((count = pfa->cch) < 0) return(1);
526: do {
527: for (pfa += count; count; count--, pfa--)
528: if (pfa->cch == *p) {
529: break;
530: }
531: pfa = pfa->st;
532: if ((count = pfa->cch) < 0) return(1);
533: } while(*p++ != 0);
534: return(0);
535: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.