|
|
1.1 root 1: /*
2: * egrep -- print lines containing (or not containing) a regular expression
3: *
4: * status returns:
5: * 0 - ok, and some matches
6: * 1 - ok, but no matches
7: * 2 - some error
8: */
9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10: %left OR
11: %left CHAR DOT CCL NCCL '('
12: %left CAT
13: %left STAR PLUS QUEST
14:
15: %{
16: static char *sccsid = "@(#)egrep.y 4.1 (Berkeley) 10/1/80";
17: #include <stdio.h>
18:
19: #define MAXLIN 350
20: #define MAXPOS 4000
21: #define NCHARS 128
22: #define NSTATES 128
23: #define FINAL -1
24: char gotofn[NSTATES][NCHARS];
25: int state[NSTATES];
26: char out[NSTATES];
27: int line = 1;
28: int name[MAXLIN];
29: int left[MAXLIN];
30: int right[MAXLIN];
31: int parent[MAXLIN];
32: int foll[MAXLIN];
33: int positions[MAXPOS];
34: char chars[MAXLIN];
35: int nxtpos;
36: int nxtchar = 0;
37: int tmpstat[MAXLIN];
38: int initstat[MAXLIN];
39: int xstate;
40: int count;
41: int icount;
42: char *input;
43:
44: long lnum;
45: int bflag;
46: int cflag;
47: int fflag;
48: int lflag;
49: int nflag;
50: int hflag = 1;
51: int sflag;
52: int vflag;
53: int nfile;
54: int blkno;
55: long tln;
56: int nsucc;
57:
58: int f;
59: int fname;
60: %}
61:
62: %%
63: s: t
64: ={ unary(FINAL, $1);
65: line--;
66: }
67: ;
68: t: b r
69: ={ $$ = node(CAT, $1, $2); }
70: | OR b r OR
71: ={ $$ = node(CAT, $2, $3); }
72: | OR b r
73: ={ $$ = node(CAT, $2, $3); }
74: | b r OR
75: ={ $$ = node(CAT, $1, $2); }
76: ;
77: b:
78: ={ $$ = enter(DOT);
79: $$ = unary(STAR, $$); }
80: ;
81: r: CHAR
82: ={ $$ = enter($1); }
83: | DOT
84: ={ $$ = enter(DOT); }
85: | CCL
86: ={ $$ = cclenter(CCL); }
87: | NCCL
88: ={ $$ = cclenter(NCCL); }
89: ;
90:
91: r: r OR r
92: ={ $$ = node(OR, $1, $3); }
93: | r r %prec CAT
94: ={ $$ = node(CAT, $1, $2); }
95: | r STAR
96: ={ $$ = unary(STAR, $1); }
97: | r PLUS
98: ={ $$ = unary(PLUS, $1); }
99: | r QUEST
100: ={ $$ = unary(QUEST, $1); }
101: | '(' r ')'
102: ={ $$ = $2; }
103: | error
104: ;
105:
106: %%
107: yyerror(s) {
108: fprintf(stderr, "egrep: %s\n", s);
109: exit(2);
110: }
111:
112: yylex() {
113: extern int yylval;
114: int cclcnt, x;
115: register char c, d;
116: switch(c = nextch()) {
117: case '$':
118: case '^': c = '\n';
119: goto defchar;
120: case '|': return (OR);
121: case '*': return (STAR);
122: case '+': return (PLUS);
123: case '?': return (QUEST);
124: case '(': return (c);
125: case ')': return (c);
126: case '.': return (DOT);
127: case '\0': return (0);
128: case '\n': return (OR);
129: case '[':
130: x = CCL;
131: cclcnt = 0;
132: count = nxtchar++;
133: if ((c = nextch()) == '^') {
134: x = NCCL;
135: c = nextch();
136: }
137: do {
138: if (c == '\0') synerror();
139: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
140: if ((d = nextch()) != 0) {
141: c = chars[nxtchar-1];
142: while (c < d) {
143: if (nxtchar >= MAXLIN) overflo();
144: chars[nxtchar++] = ++c;
145: cclcnt++;
146: }
147: continue;
148: }
149: }
150: if (nxtchar >= MAXLIN) overflo();
151: chars[nxtchar++] = c;
152: cclcnt++;
153: } while ((c = nextch()) != ']');
154: chars[count] = cclcnt;
155: return (x);
156: case '\\':
157: if ((c = nextch()) == '\0') synerror();
158: defchar:
159: default: yylval = c; return (CHAR);
160: }
161: }
162: nextch() {
163: register char c;
164: if (fflag) {
165: if ((c = getc(stdin)) == EOF) return(0);
166: }
167: else c = *input++;
168: return(c);
169: }
170:
171: synerror() {
172: fprintf(stderr, "egrep: syntax error\n");
173: exit(2);
174: }
175:
176: enter(x) int x; {
177: if(line >= MAXLIN) overflo();
178: name[line] = x;
179: left[line] = 0;
180: right[line] = 0;
181: return(line++);
182: }
183:
184: cclenter(x) int x; {
185: register linno;
186: linno = enter(x);
187: right[linno] = count;
188: return (linno);
189: }
190:
191: node(x, l, r) {
192: if(line >= MAXLIN) overflo();
193: name[line] = x;
194: left[line] = l;
195: right[line] = r;
196: parent[l] = line;
197: parent[r] = line;
198: return(line++);
199: }
200:
201: unary(x, d) {
202: if(line >= MAXLIN) overflo();
203: name[line] = x;
204: left[line] = d;
205: right[line] = 0;
206: parent[d] = line;
207: return(line++);
208: }
209: overflo() {
210: fprintf(stderr, "egrep: regular expression too long\n");
211: exit(2);
212: }
213:
214: cfoll(v) {
215: register i;
216: if (left[v] == 0) {
217: count = 0;
218: for (i=1; i<=line; i++) tmpstat[i] = 0;
219: follow(v);
220: add(foll, v);
221: }
222: else if (right[v] == 0) cfoll(left[v]);
223: else {
224: cfoll(left[v]);
225: cfoll(right[v]);
226: }
227: }
228: cgotofn() {
229: register c, i, k;
230: int n, s;
231: char symbol[NCHARS];
232: int j, nc, pc, pos;
233: int curpos, num;
234: int number, newpos;
235: count = 0;
236: for (n=3; n<=line; n++) tmpstat[n] = 0;
237: if (cstate(line-1)==0) {
238: tmpstat[line] = 1;
239: count++;
240: out[0] = 1;
241: }
242: for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
243: count--; /*leave out position 1 */
244: icount = count;
245: tmpstat[1] = 0;
246: add(state, 0);
247: n = 0;
248: for (s=0; s<=n; s++) {
249: if (out[s] == 1) continue;
250: for (i=0; i<NCHARS; i++) symbol[i] = 0;
251: num = positions[state[s]];
252: count = icount;
253: for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
254: pos = state[s] + 1;
255: for (i=0; i<num; i++) {
256: curpos = positions[pos];
257: if ((c = name[curpos]) >= 0) {
258: if (c < NCHARS) symbol[c] = 1;
259: else if (c == DOT) {
260: for (k=0; k<NCHARS; k++)
261: if (k!='\n') symbol[k] = 1;
262: }
263: else if (c == CCL) {
264: nc = chars[right[curpos]];
265: pc = right[curpos] + 1;
266: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
267: }
268: else if (c == NCCL) {
269: nc = chars[right[curpos]];
270: for (j = 0; j < NCHARS; j++) {
271: pc = right[curpos] + 1;
272: for (k = 0; k < nc; k++)
273: if (j==chars[pc++]) goto cont;
274: if (j!='\n') symbol[j] = 1;
275: cont:;
276: }
277: }
278: else printf("something's funny\n");
279: }
280: pos++;
281: }
282: for (c=0; c<NCHARS; c++) {
283: if (symbol[c] == 1) { /* nextstate(s,c) */
284: count = icount;
285: for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
286: pos = state[s] + 1;
287: for (i=0; i<num; i++) {
288: curpos = positions[pos];
289: if ((k = name[curpos]) >= 0)
290: if (
291: (k == c)
292: | (k == DOT)
293: | (k == CCL && member(c, right[curpos], 1))
294: | (k == NCCL && member(c, right[curpos], 0))
295: ) {
296: number = positions[foll[curpos]];
297: newpos = foll[curpos] + 1;
298: for (k=0; k<number; k++) {
299: if (tmpstat[positions[newpos]] != 1) {
300: tmpstat[positions[newpos]] = 1;
301: count++;
302: }
303: newpos++;
304: }
305: }
306: pos++;
307: } /* end nextstate */
308: if (notin(n)) {
309: if (n >= NSTATES) overflo();
310: add(state, ++n);
311: if (tmpstat[line] == 1) out[n] = 1;
312: gotofn[s][c] = n;
313: }
314: else {
315: gotofn[s][c] = xstate;
316: }
317: }
318: }
319: }
320: }
321:
322: cstate(v) {
323: register b;
324: if (left[v] == 0) {
325: if (tmpstat[v] != 1) {
326: tmpstat[v] = 1;
327: count++;
328: }
329: return(1);
330: }
331: else if (right[v] == 0) {
332: if (cstate(left[v]) == 0) return (0);
333: else if (name[v] == PLUS) return (1);
334: else return (0);
335: }
336: else if (name[v] == CAT) {
337: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
338: else return (1);
339: }
340: else { /* name[v] == OR */
341: b = cstate(right[v]);
342: if (cstate(left[v]) == 0 || b == 0) return (0);
343: else return (1);
344: }
345: }
346:
347:
348: member(symb, set, torf) {
349: register i, num, pos;
350: num = chars[set];
351: pos = set + 1;
352: for (i=0; i<num; i++)
353: if (symb == chars[pos++]) return (torf);
354: return (!torf);
355: }
356:
357: notin(n) {
358: register i, j, pos;
359: for (i=0; i<=n; i++) {
360: if (positions[state[i]] == count) {
361: pos = state[i] + 1;
362: for (j=0; j < count; j++)
363: if (tmpstat[positions[pos++]] != 1) goto nxt;
364: xstate = i;
365: return (0);
366: }
367: nxt: ;
368: }
369: return (1);
370: }
371:
372: add(array, n) int *array; {
373: register i;
374: if (nxtpos + count > MAXPOS) overflo();
375: array[n] = nxtpos;
376: positions[nxtpos++] = count;
377: for (i=3; i <= line; i++) {
378: if (tmpstat[i] == 1) {
379: positions[nxtpos++] = i;
380: }
381: }
382: }
383:
384: follow(v) int v; {
385: int p;
386: if (v == line) return;
387: p = parent[v];
388: switch(name[p]) {
389: case STAR:
390: case PLUS: cstate(v);
391: follow(p);
392: return;
393:
394: case OR:
395: case QUEST: follow(p);
396: return;
397:
398: case CAT: if (v == left[p]) {
399: if (cstate(right[p]) == 0) {
400: follow(p);
401: return;
402: }
403: }
404: else follow(p);
405: return;
406: case FINAL: if (tmpstat[line] != 1) {
407: tmpstat[line] = 1;
408: count++;
409: }
410: return;
411: }
412: }
413:
414:
415: main(argc, argv)
416: char **argv;
417: {
418: while (--argc > 0 && (++argv)[0][0]=='-')
419: switch (argv[0][1]) {
420:
421: case 's':
422: sflag++;
423: continue;
424:
425: case 'h':
426: hflag = 0;
427: continue;
428:
429: case 'b':
430: bflag++;
431: continue;
432:
433: case 'c':
434: cflag++;
435: continue;
436:
437: case 'e':
438: argc--;
439: argv++;
440: goto out;
441:
442: case 'f':
443: fflag++;
444: continue;
445:
446: case 'l':
447: lflag++;
448: continue;
449:
450: case 'n':
451: nflag++;
452: continue;
453:
454: case 'v':
455: vflag++;
456: continue;
457:
458: default:
459: fprintf(stderr, "egrep: unknown flag\n");
460: continue;
461: }
462: out:
463: if (argc<=0)
464: exit(2);
465: if (fflag) {
466: if (freopen(fname = *argv, "r", stdin) == NULL) {
467: fprintf(stderr, "egrep: can't open %s\n", fname);
468: exit(2);
469: }
470: }
471: else input = *argv;
472: argc--;
473: argv++;
474:
475: yyparse();
476:
477: cfoll(line-1);
478: cgotofn();
479: nfile = argc;
480: if (argc<=0) {
481: if (lflag) exit(1);
482: execute(0);
483: }
484: else while (--argc >= 0) {
485: execute(*argv);
486: argv++;
487: }
488: exit(nsucc == 0);
489: }
490:
491: execute(file)
492: char *file;
493: {
494: register char *p;
495: register cstat;
496: register ccount;
497: char buf[1024];
498: char *nlp;
499: int istat;
500: if (file) {
501: if ((f = open(file, 0)) < 0) {
502: fprintf(stderr, "egrep: can't open %s\n", file);
503: exit(2);
504: }
505: }
506: else f = 0;
507: ccount = 0;
508: lnum = 1;
509: tln = 0;
510: blkno = 0;
511: p = buf;
512: nlp = p;
513: if ((ccount = read(f,p,512))<=0) goto done;
514: istat = cstat = gotofn[0]['\n'];
515: if (out[cstat]) goto found;
516: for (;;) {
517: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
518: if (out[cstat]) {
519: found: for(;;) {
520: if (*p++ == '\n') {
521: if (vflag == 0) {
522: succeed: nsucc = 1;
523: if (cflag) tln++;
524: else if (sflag)
525: ; /* ugh */
526: else if (lflag) {
527: printf("%s\n", file);
528: close(f);
529: return;
530: }
531: else {
532: if (nfile > 1 && hflag) printf("%s:", file);
533: if (bflag) printf("%d:", blkno);
534: if (nflag) printf("%ld:", lnum);
535: if (p <= nlp) {
536: while (nlp < &buf[1024]) putchar(*nlp++);
537: nlp = buf;
538: }
539: while (nlp < p) putchar(*nlp++);
540: }
541: }
542: lnum++;
543: nlp = p;
544: if ((out[(cstat=istat)]) == 0) goto brk2;
545: }
546: cfound:
547: if (--ccount <= 0) {
548: if (p <= &buf[512]) {
549: if ((ccount = read(f, p, 512)) <= 0) goto done;
550: }
551: else if (p == &buf[1024]) {
552: p = buf;
553: if ((ccount = read(f, p, 512)) <= 0) goto done;
554: }
555: else {
556: if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
557: }
558: blkno++;
559: }
560: }
561: }
562: if (*p++ == '\n') {
563: if (vflag) goto succeed;
564: else {
565: lnum++;
566: nlp = p;
567: if (out[(cstat=istat)]) goto cfound;
568: }
569: }
570: brk2:
571: if (--ccount <= 0) {
572: if (p <= &buf[512]) {
573: if ((ccount = read(f, p, 512)) <= 0) break;
574: }
575: else if (p == &buf[1024]) {
576: p = buf;
577: if ((ccount = read(f, p, 512)) <= 0) break;
578: }
579: else {
580: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
581: }
582: blkno++;
583: }
584: }
585: done: close(f);
586: if (cflag) {
587: if (nfile > 1)
588: printf("%s:", file);
589: printf("%ld\n", tln);
590: }
591: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.