|
|
1.1 root 1: #include "awk.def"
2: #include "stdio.h"
3: #include "awk.h"
4:
5: extern node *op2();
6: extern struct fa *cgotofn();
7: #define MAXLIN 256
8: #define NCHARS 128
9: #define NSTATES 256
10:
11: #define type(v) v->nobj
12: #define left(v) v->narg[0]
13: #define right(v) v->narg[1]
14: #define parent(v) v->nnext
15:
16: #define LEAF case CCL: case NCCL: case CHAR: case DOT:
17: #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
18:
19: /* encoding in tree nodes:
20: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
21: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
22: binary (CAT, OR): left and right are children
23: parent contains pointer to parent
24: */
25:
26: struct fa {
27: int cch;
28: struct fa *st;
29: };
30:
31: int *state[NSTATES];
32: int *foll[MAXLIN];
33: char chars[MAXLIN];
34: int setvec[MAXLIN];
35: node *point[MAXLIN];
36:
37: int setcnt;
38: int line;
39:
40:
41: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
42: node *p;
43: {
44: node *p1;
45: struct fa *fap;
46: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
47: /* put DOT STAR in front of reg. exp. */
48: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
49:
50: line = 0;
51: penter(p1); /* enter parent pointers and leaf indices */
52: point[line] = p1; /* FINAL node */
53: setvec[0] = 1; /* for initial DOT STAR */
54: cfoll(p1); /* set up follow sets */
55: fap = cgotofn();
56: freetr(p1); /* add this when alloc works */
57: return(fap);
58: }
59:
60: penter(p) /* set up parent pointers and leaf indices */
61: node *p;
62: {
63: switch(type(p)) {
64: LEAF
65: left(p) = (node *) line;
66: point[line++] = p;
67: break;
68: UNARY
69: penter(left(p));
70: parent(left(p)) = p;
71: break;
72: case CAT:
73: case OR:
74: penter(left(p));
75: penter(right(p));
76: parent(left(p)) = p;
77: parent(right(p)) = p;
78: break;
79: default:
80: error(FATAL, "unknown type %d in penter\n", type(p));
81: break;
82: }
83: }
84:
85: freetr(p) /* free parse tree and follow sets */
86: node *p;
87: {
88: switch(type(p)) {
89: LEAF
90: xfree(foll[(int) left(p)]);
91: xfree(p);
92: break;
93: UNARY
94: freetr(left(p));
95: xfree(p);
96: break;
97: case CAT:
98: case OR:
99: freetr(left(p));
100: freetr(right(p));
101: xfree(p);
102: break;
103: default:
104: error(FATAL, "unknown type %d in freetr", type(p));
105: break;
106: }
107: }
108: char *cclenter(p)
109: register char *p;
110: {
111: register i, c;
112: char *op;
113:
114: op = p;
115: i = 0;
116: while ((c = *p++) != 0) {
117: if (c == '-' && i > 0 && chars[i-1] != 0) {
118: if (*p != 0) {
119: c = chars[i-1];
120: while (c < *p) {
121: if (i >= MAXLIN)
122: overflo();
123: chars[i++] = ++c;
124: }
125: p++;
126: continue;
127: }
128: }
129: if (i >= MAXLIN)
130: overflo();
131: chars[i++] = c;
132: }
133: chars[i++] = '\0';
134: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
135: xfree(op);
136: return(tostring(chars));
137: }
138:
139: overflo()
140: {
141: error(FATAL, "regular expression too long\n");
142: }
143:
144: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
145: register node *v;
146: {
147: register i;
148: int prev;
149: int *add();
150:
151: switch(type(v)) {
152: LEAF
153: setcnt = 0;
154: for (i=1; i<=line; i++)
155: setvec[i] = 0;
156: follow(v);
157: if (notin(foll, ( (int) left(v))-1, &prev)) {
158: foll[(int) left(v)] = add(setcnt);
159: }
160: else
161: foll[ (int) left(v)] = foll[prev];
162: break;
163: UNARY
164: cfoll(left(v));
165: break;
166: case CAT:
167: case OR:
168: cfoll(left(v));
169: cfoll(right(v));
170: break;
171: default:
172: error(FATAL, "unknown type %d in cfoll", type(v));
173: }
174: }
175:
176: first(p) /* collects initially active leaves of p into setvec */
177: register node *p; /* returns 0 or 1 depending on whether p matches empty string */
178: {
179: register b;
180:
181: switch(type(p)) {
182: LEAF
183: if (setvec[(int) left(p)] != 1) {
184: setvec[(int) left(p)] = 1;
185: setcnt++;
186: }
187: if (type(p) == CCL && (*(char *) right(p)) == '\0')
188: return(0); /* empty CCL */
189: else return(1);
190: case FINAL:
191: case PLUS:
192: if (first(left(p)) == 0) return(0);
193: return(1);
194: case STAR:
195: case QUEST:
196: first(left(p));
197: return(0);
198: case CAT:
199: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
200: return(1);
201: case OR:
202: b = first(right(p));
203: if (first(left(p)) == 0 || b == 0) return(0);
204: return(1);
205: }
206: error(FATAL, "unknown type %d in first\n", type(p));
207: return(-1);
208: }
209:
210: follow(v)
211: node *v; /* collects leaves that can follow v into setvec */
212: {
213: node *p;
214:
215: if (type(v) == FINAL)
216: return;
217: p = parent(v);
218: switch (type(p)) {
219: case STAR:
220: case PLUS: first(v);
221: follow(p);
222: return;
223:
224: case OR:
225: case QUEST: follow(p);
226: return;
227:
228: case CAT: if (v == left(p)) { /* v is left child of p */
229: if (first(right(p)) == 0) {
230: follow(p);
231: return;
232: }
233: }
234: else /* v is right child */
235: follow(p);
236: return;
237: case FINAL: if (setvec[line] != 1) {
238: setvec[line] = 1;
239: setcnt++;
240: }
241: return;
242: }
243: }
244:
245: member(c, s) /* is c in s? */
246: register char c, *s;
247: {
248: while (*s)
249: if (c == *s++)
250: return(1);
251: return(0);
252: }
253:
254: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
255: int **array;
256: int *prev; {
257: register i, j;
258: int *ptr;
259: for (i=0; i<=n; i++) {
260: ptr = array[i];
261: if (*ptr == setcnt) {
262: for (j=0; j < setcnt; j++)
263: if (setvec[*(++ptr)] != 1) goto nxt;
264: *prev = i;
265: return(0);
266: }
267: nxt: ;
268: }
269: return(1);
270: }
271:
272: int *add(n) { /* remember setvec */
273: int *ptr, *p;
274: register i;
275: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
276: overflo();
277: *ptr = n;
278: dprintf("add(%d)\n", n, NULL, NULL);
279: for (i=1; i <= line; i++)
280: if (setvec[i] == 1) {
281: *(++ptr) = i;
282: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
283: }
284: dprintf("\n", NULL, NULL, NULL);
285: return(p);
286: }
287:
288: struct fa *cgotofn()
289: {
290: register i, k;
291: register int *ptr;
292: char c;
293: char *p;
294: node *cp;
295: int j, n, s, ind, numtrans;
296: int finflg;
297: int curpos, num, prev;
298: struct fa *where[NSTATES];
299:
300: int fatab[257];
301: struct fa *pfa;
302:
303: char index[MAXLIN];
304: char iposns[MAXLIN];
305: int sposns[MAXLIN];
306: int spmax, spinit;
307:
308: char symbol[NCHARS];
309: char isyms[NCHARS];
310: char ssyms[NCHARS];
311: int ssmax, ssinit;
312:
313: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
314: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
315: setcnt = 0;
316: state[0] = add(0);
317: /* compute initial positions and symbols of state 0 */
318: ssmax = 0;
319: ptr = foll[0];
320: spinit = *ptr;
321: for (i=0; i<spinit; i++) {
322: curpos = *(++ptr);
323: sposns[i] = curpos;
324: iposns[curpos] = 1;
325: cp = point[curpos];
326: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
327: switch (type(cp)) {
328: case CHAR:
329: k = (int) right(cp);
330: if (isyms[k] != 1) {
331: isyms[k] = 1;
332: ssyms[ssmax++] = k;
333: }
334: break;
335: case DOT:
336: for (k=1; k<NCHARS; k++) {
337: if (k != HAT) {
338: if (isyms[k] != 1) {
339: isyms[k] = 1;
340: ssyms[ssmax++] = k;
341: }
342: }
343: }
344: break;
345: case CCL:
346: for (p = (char *) right(cp); *p; p++) {
347: if (*p != HAT) {
348: if (isyms[*p] != 1) {
349: isyms[*p] = 1;
350: ssyms[ssmax++] = *p;
351: }
352: }
353: }
354: break;
355: case NCCL:
356: for (k=1; k<NCHARS; k++) {
357: if (k != HAT && !member(k, (char *) right(cp))) {
358: if (isyms[k] != 1) {
359: isyms[k] = 1;
360: ssyms[ssmax++] = k;
361: }
362: }
363: }
364: }
365: }
366: ssinit = ssmax;
367: n = 0;
368: for (s=0; s<=n; s++) {
369: dprintf("s = %d\n", s, NULL, NULL);
370: ind = 0;
371: numtrans = 0;
372: finflg = 0;
373: if (*(state[s] + *state[s]) == line) { /* s final? */
374: finflg = 1;
375: goto tenter;
376: }
377: spmax = spinit;
378: ssmax = ssinit;
379: ptr = state[s];
380: num = *ptr;
381: for (i=0; i<num; i++) {
382: curpos = *(++ptr);
383: if (iposns[curpos] != 1 && index[curpos] != 1) {
384: index[curpos] = 1;
385: sposns[spmax++] = curpos;
386: }
387: cp = point[curpos];
388: switch (type(cp)) {
389: case CHAR:
390: k = (int) right(cp);
391: if (isyms[k] == 0 && symbol[k] == 0) {
392: symbol[k] = 1;
393: ssyms[ssmax++] = k;
394: }
395: break;
396: case DOT:
397: for (k=1; k<NCHARS; k++) {
398: if (k != HAT) {
399: if (isyms[k] == 0 && symbol[k] == 0) {
400: symbol[k] = 1;
401: ssyms[ssmax++] = k;
402: }
403: }
404: }
405: break;
406: case CCL:
407: for (p = (char *) right(cp); *p; p++) {
408: if (*p != HAT) {
409: if (isyms[*p] == 0 && symbol[*p] == 0) {
410: symbol[*p] = 1;
411: ssyms[ssmax++] = *p;
412: }
413: }
414: }
415: break;
416: case NCCL:
417: for (k=1; k<NCHARS; k++) {
418: if (k != HAT && !member(k, (char *) right(cp))) {
419: if (isyms[k] == 0 && symbol[k] == 0) {
420: symbol[k] = 1;
421: ssyms[ssmax++] = k;
422: }
423: }
424: }
425: }
426: }
427: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
428: c = ssyms[j];
429: symbol[c] = 0;
430: setcnt = 0;
431: for (k=0; k<=line; k++) setvec[k] = 0;
432: for (i=0; i<spmax; i++) {
433: index[sposns[i]] = 0;
434: cp = point[sposns[i]];
435: if ((k = type(cp)) != FINAL)
436: if (k == CHAR && c == (int) right(cp)
437: || k == DOT
438: || k == CCL && member(c, (char *) right(cp))
439: || k == NCCL && !member(c, (char *) right(cp))) {
440: ptr = foll[sposns[i]];
441: num = *ptr;
442: for (k=0; k<num; k++) {
443: if (setvec[*(++ptr)] != 1
444: && iposns[*ptr] != 1) {
445: setvec[*ptr] = 1;
446: setcnt++;
447: }
448: }
449: }
450: } /* end nextstate */
451: if (notin(state, n, &prev)) {
452: if (n >= NSTATES) {
453: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
454: overflo();
455: }
456: state[++n] = add(setcnt);
457: dprintf(" delta(%d,%o) = %d", s,c,n);
458: dprintf(", ind = %d\n", ind+1, NULL, NULL);
459: fatab[++ind] = c;
460: fatab[++ind] = n;
461: numtrans++;
462: }
463: else {
464: if (prev != 0) {
465: dprintf(" delta(%d,%o) = %d", s,c,prev);
466: dprintf(", ind = %d\n", ind+1, NULL, NULL);
467: fatab[++ind] = c;
468: fatab[++ind] = prev;
469: numtrans++;
470: }
471: }
472: }
473: tenter:
474: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
475: overflo();
476: where[s] = pfa;
477: if (finflg)
478: pfa->cch = -1; /* s is a final state */
479: else
480: pfa->cch = numtrans;
481: pfa->st = 0;
482: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
483: pfa->cch = fatab[2*i-1];
484: pfa->st = (struct fa *) fatab[2*i];
485: }
486: }
487: for (i=0; i<=n; i++) {
488: xfree(state[i]); /* free state[i] */
489: pfa = where[i];
490: pfa->st = where[0];
491: dprintf("state %d: (%o)\n", i, pfa, NULL);
492: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
493: for (k=1; k<=pfa->cch; k++) {
494: (pfa+k)->st = where[ (int) (pfa+k)->st];
495: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
496: }
497: }
498: pfa = where[0];
499: if ((num = pfa->cch) < 0)
500: return(where[0]);
501: for (pfa += num; num; num--, pfa--)
502: if (pfa->cch == HAT) {
503: return(pfa->st);
504: }
505: return(where[0]);
506: }
507:
508: match(pfa, p)
509: register struct fa *pfa;
510: register char *p;
511: {
512: register count;
513: char c;
514: if (p == 0) return(0);
515: if (pfa->cch == 1) { /* fast test for first character, if possible */
516: c = (++pfa)->cch;
517: do
518: if (c == *p) {
519: p++;
520: pfa = pfa->st;
521: goto adv;
522: }
523: while (*p++ != 0);
524: return(0);
525: }
526: adv: if ((count = pfa->cch) < 0) return(1);
527: do {
528: for (pfa += count; count; count--, pfa--)
529: if (pfa->cch == *p) {
530: break;
531: }
532: pfa = pfa->st;
533: if ((count = pfa->cch) < 0) return(1);
534: } while(*p++ != 0);
535: return(0);
536: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.