|
|
1.1 root 1: /*
2: * egrep -- print lines containing (or not containing) a regular expression
3: *
4: * status returns:
5: * 0 - ok, and some matches
6: * 1 - ok, but no matches
7: * 2 - some error
8: */
9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10: %left OR
11: %left CHAR DOT CCL NCCL '('
12: %left CAT
13: %left STAR PLUS QUEST
14:
15: %{
16: static char *sccsid = "@(#)egrep.y 4.3 (Berkeley) 3/13/83";
17: #include <stdio.h>
18:
19: #define MAXLIN 350
20: #define MAXPOS 4000
21: #define NCHARS 128
22: #define NSTATES 128
23: #define FINAL -1
24: char gotofn[NSTATES][NCHARS];
25: int state[NSTATES];
26: char out[NSTATES];
27: int line = 1;
28: int name[MAXLIN];
29: int left[MAXLIN];
30: int right[MAXLIN];
31: int parent[MAXLIN];
32: int foll[MAXLIN];
33: int positions[MAXPOS];
34: char chars[MAXLIN];
35: int nxtpos;
36: int nxtchar = 0;
37: int tmpstat[MAXLIN];
38: int initstat[MAXLIN];
39: int xstate;
40: int count;
41: int icount;
42: char *input;
43: FILE *exprfile;
44:
45: long lnum;
46: int bflag;
47: int cflag;
48: int fflag;
49: int lflag;
50: int nflag;
51: int hflag = 1;
52: int sflag;
53: int vflag;
54: int retcode = 0;
55: int nfile;
56: int blkno;
57: long tln;
58: int nsucc;
59:
60: int f;
61: char *fname;
62: %}
63:
64: %%
65: s: t
66: ={ unary(FINAL, $1);
67: line--;
68: }
69: ;
70: t: b r
71: ={ $$ = node(CAT, $1, $2); }
72: | OR b r OR
73: ={ $$ = node(CAT, $2, $3); }
74: | OR b r
75: ={ $$ = node(CAT, $2, $3); }
76: | b r OR
77: ={ $$ = node(CAT, $1, $2); }
78: ;
79: b:
80: ={ $$ = enter(DOT);
81: $$ = unary(STAR, $$); }
82: ;
83: r: CHAR
84: ={ $$ = enter($1); }
85: | DOT
86: ={ $$ = enter(DOT); }
87: | CCL
88: ={ $$ = cclenter(CCL); }
89: | NCCL
90: ={ $$ = cclenter(NCCL); }
91: ;
92:
93: r: r OR r
94: ={ $$ = node(OR, $1, $3); }
95: | r r %prec CAT
96: ={ $$ = node(CAT, $1, $2); }
97: | r STAR
98: ={ $$ = unary(STAR, $1); }
99: | r PLUS
100: ={ $$ = unary(PLUS, $1); }
101: | r QUEST
102: ={ $$ = unary(QUEST, $1); }
103: | '(' r ')'
104: ={ $$ = $2; }
105: | error
106: ;
107:
108: %%
109: yyerror(s) {
110: fprintf(stderr, "egrep: %s\n", s);
111: exit(2);
112: }
113:
114: yylex() {
115: extern int yylval;
116: int cclcnt, x;
117: register char c, d;
118: switch(c = nextch()) {
119: case '$':
120: case '^': c = '\n';
121: goto defchar;
122: case '|': return (OR);
123: case '*': return (STAR);
124: case '+': return (PLUS);
125: case '?': return (QUEST);
126: case '(': return (c);
127: case ')': return (c);
128: case '.': return (DOT);
129: case '\0': return (0);
130: case '\n': return (OR);
131: case '[':
132: x = CCL;
133: cclcnt = 0;
134: count = nxtchar++;
135: if ((c = nextch()) == '^') {
136: x = NCCL;
137: c = nextch();
138: }
139: do {
140: if (c == '\0') synerror();
141: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
142: if ((d = nextch()) != 0) {
143: c = chars[nxtchar-1];
144: while (c < d) {
145: if (nxtchar >= MAXLIN) overflo();
146: chars[nxtchar++] = ++c;
147: cclcnt++;
148: }
149: continue;
150: }
151: }
152: if (nxtchar >= MAXLIN) overflo();
153: chars[nxtchar++] = c;
154: cclcnt++;
155: } while ((c = nextch()) != ']');
156: chars[count] = cclcnt;
157: return (x);
158: case '\\':
159: if ((c = nextch()) == '\0') synerror();
160: defchar:
161: default: yylval = c; return (CHAR);
162: }
163: }
164: nextch() {
165: register char c;
166: if (fflag) {
167: if ((c = getc(exprfile)) == EOF) {
168: fclose(exprfile);
169: return(0);
170: }
171: }
172: else c = *input++;
173: return(c);
174: }
175:
176: synerror() {
177: fprintf(stderr, "egrep: syntax error\n");
178: exit(2);
179: }
180:
181: enter(x) int x; {
182: if(line >= MAXLIN) overflo();
183: name[line] = x;
184: left[line] = 0;
185: right[line] = 0;
186: return(line++);
187: }
188:
189: cclenter(x) int x; {
190: register linno;
191: linno = enter(x);
192: right[linno] = count;
193: return (linno);
194: }
195:
196: node(x, l, r) {
197: if(line >= MAXLIN) overflo();
198: name[line] = x;
199: left[line] = l;
200: right[line] = r;
201: parent[l] = line;
202: parent[r] = line;
203: return(line++);
204: }
205:
206: unary(x, d) {
207: if(line >= MAXLIN) overflo();
208: name[line] = x;
209: left[line] = d;
210: right[line] = 0;
211: parent[d] = line;
212: return(line++);
213: }
214: overflo() {
215: fprintf(stderr, "egrep: regular expression too long\n");
216: exit(2);
217: }
218:
219: cfoll(v) {
220: register i;
221: if (left[v] == 0) {
222: count = 0;
223: for (i=1; i<=line; i++) tmpstat[i] = 0;
224: follow(v);
225: add(foll, v);
226: }
227: else if (right[v] == 0) cfoll(left[v]);
228: else {
229: cfoll(left[v]);
230: cfoll(right[v]);
231: }
232: }
233: cgotofn() {
234: register c, i, k;
235: int n, s;
236: char symbol[NCHARS];
237: int j, nc, pc, pos;
238: int curpos, num;
239: int number, newpos;
240: count = 0;
241: for (n=3; n<=line; n++) tmpstat[n] = 0;
242: if (cstate(line-1)==0) {
243: tmpstat[line] = 1;
244: count++;
245: out[0] = 1;
246: }
247: for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
248: count--; /*leave out position 1 */
249: icount = count;
250: tmpstat[1] = 0;
251: add(state, 0);
252: n = 0;
253: for (s=0; s<=n; s++) {
254: if (out[s] == 1) continue;
255: for (i=0; i<NCHARS; i++) symbol[i] = 0;
256: num = positions[state[s]];
257: count = icount;
258: for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
259: pos = state[s] + 1;
260: for (i=0; i<num; i++) {
261: curpos = positions[pos];
262: if ((c = name[curpos]) >= 0) {
263: if (c < NCHARS) symbol[c] = 1;
264: else if (c == DOT) {
265: for (k=0; k<NCHARS; k++)
266: if (k!='\n') symbol[k] = 1;
267: }
268: else if (c == CCL) {
269: nc = chars[right[curpos]];
270: pc = right[curpos] + 1;
271: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
272: }
273: else if (c == NCCL) {
274: nc = chars[right[curpos]];
275: for (j = 0; j < NCHARS; j++) {
276: pc = right[curpos] + 1;
277: for (k = 0; k < nc; k++)
278: if (j==chars[pc++]) goto cont;
279: if (j!='\n') symbol[j] = 1;
280: cont:;
281: }
282: }
283: else printf("something's funny\n");
284: }
285: pos++;
286: }
287: for (c=0; c<NCHARS; c++) {
288: if (symbol[c] == 1) { /* nextstate(s,c) */
289: count = icount;
290: for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
291: pos = state[s] + 1;
292: for (i=0; i<num; i++) {
293: curpos = positions[pos];
294: if ((k = name[curpos]) >= 0)
295: if (
296: (k == c)
297: | (k == DOT)
298: | (k == CCL && member(c, right[curpos], 1))
299: | (k == NCCL && member(c, right[curpos], 0))
300: ) {
301: number = positions[foll[curpos]];
302: newpos = foll[curpos] + 1;
303: for (k=0; k<number; k++) {
304: if (tmpstat[positions[newpos]] != 1) {
305: tmpstat[positions[newpos]] = 1;
306: count++;
307: }
308: newpos++;
309: }
310: }
311: pos++;
312: } /* end nextstate */
313: if (notin(n)) {
314: if (n >= NSTATES) overflo();
315: add(state, ++n);
316: if (tmpstat[line] == 1) out[n] = 1;
317: gotofn[s][c] = n;
318: }
319: else {
320: gotofn[s][c] = xstate;
321: }
322: }
323: }
324: }
325: }
326:
327: cstate(v) {
328: register b;
329: if (left[v] == 0) {
330: if (tmpstat[v] != 1) {
331: tmpstat[v] = 1;
332: count++;
333: }
334: return(1);
335: }
336: else if (right[v] == 0) {
337: if (cstate(left[v]) == 0) return (0);
338: else if (name[v] == PLUS) return (1);
339: else return (0);
340: }
341: else if (name[v] == CAT) {
342: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
343: else return (1);
344: }
345: else { /* name[v] == OR */
346: b = cstate(right[v]);
347: if (cstate(left[v]) == 0 || b == 0) return (0);
348: else return (1);
349: }
350: }
351:
352:
353: member(symb, set, torf) {
354: register i, num, pos;
355: num = chars[set];
356: pos = set + 1;
357: for (i=0; i<num; i++)
358: if (symb == chars[pos++]) return (torf);
359: return (!torf);
360: }
361:
362: notin(n) {
363: register i, j, pos;
364: for (i=0; i<=n; i++) {
365: if (positions[state[i]] == count) {
366: pos = state[i] + 1;
367: for (j=0; j < count; j++)
368: if (tmpstat[positions[pos++]] != 1) goto nxt;
369: xstate = i;
370: return (0);
371: }
372: nxt: ;
373: }
374: return (1);
375: }
376:
377: add(array, n) int *array; {
378: register i;
379: if (nxtpos + count > MAXPOS) overflo();
380: array[n] = nxtpos;
381: positions[nxtpos++] = count;
382: for (i=3; i <= line; i++) {
383: if (tmpstat[i] == 1) {
384: positions[nxtpos++] = i;
385: }
386: }
387: }
388:
389: follow(v) int v; {
390: int p;
391: if (v == line) return;
392: p = parent[v];
393: switch(name[p]) {
394: case STAR:
395: case PLUS: cstate(v);
396: follow(p);
397: return;
398:
399: case OR:
400: case QUEST: follow(p);
401: return;
402:
403: case CAT: if (v == left[p]) {
404: if (cstate(right[p]) == 0) {
405: follow(p);
406: return;
407: }
408: }
409: else follow(p);
410: return;
411: case FINAL: if (tmpstat[line] != 1) {
412: tmpstat[line] = 1;
413: count++;
414: }
415: return;
416: }
417: }
418:
419:
420: main(argc, argv)
421: char **argv;
422: {
423: while (--argc > 0 && (++argv)[0][0]=='-')
424: switch (argv[0][1]) {
425:
426: case 's':
427: sflag++;
428: continue;
429:
430: case 'h':
431: hflag = 0;
432: continue;
433:
434: case 'b':
435: bflag++;
436: continue;
437:
438: case 'c':
439: cflag++;
440: continue;
441:
442: case 'e':
443: argc--;
444: argv++;
445: goto out;
446:
447: case 'f':
448: fflag++;
449: continue;
450:
451: case 'l':
452: lflag++;
453: continue;
454:
455: case 'n':
456: nflag++;
457: continue;
458:
459: case 'v':
460: vflag++;
461: continue;
462:
463: default:
464: fprintf(stderr, "egrep: unknown flag\n");
465: continue;
466: }
467: out:
468: if (argc<=0)
469: exit(2);
470: if (fflag) {
471: fname = *argv;
472: exprfile = fopen(fname, "r");
473: if (exprfile == (FILE *)NULL) {
474: fprintf(stderr, "egrep: can't open %s\n", fname);
475: exit(2);
476: }
477: }
478: else input = *argv;
479: argc--;
480: argv++;
481:
482: yyparse();
483:
484: cfoll(line-1);
485: cgotofn();
486: nfile = argc;
487: if (argc<=0) {
488: if (lflag) exit(1);
489: execute(0);
490: }
491: else while (--argc >= 0) {
492: execute(*argv);
493: argv++;
494: }
495: exit(retcode != 0 ? retcode : nsucc == 0);
496: }
497:
498: execute(file)
499: char *file;
500: {
501: register char *p;
502: register cstat;
503: register ccount;
504: char buf[1024];
505: char *nlp;
506: int istat;
507: if (file) {
508: if ((f = open(file, 0)) < 0) {
509: fprintf(stderr, "egrep: can't open %s\n", file);
510: retcode = 2;
511: return;
512: }
513: }
514: else f = 0;
515: ccount = 0;
516: lnum = 1;
517: tln = 0;
518: blkno = 0;
519: p = buf;
520: nlp = p;
521: if ((ccount = read(f,p,512))<=0) goto done;
522: istat = cstat = gotofn[0]['\n'];
523: if (out[cstat]) goto found;
524: for (;;) {
525: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
526: if (out[cstat]) {
527: found: for(;;) {
528: if (*p++ == '\n') {
529: if (vflag == 0) {
530: succeed: nsucc = 1;
531: if (cflag) tln++;
532: else if (sflag)
533: ; /* ugh */
534: else if (lflag) {
535: printf("%s\n", file);
536: close(f);
537: return;
538: }
539: else {
540: if (nfile > 1 && hflag) printf("%s:", file);
541: if (bflag) printf("%d:", blkno);
542: if (nflag) printf("%ld:", lnum);
543: if (p <= nlp) {
544: while (nlp < &buf[1024]) putchar(*nlp++);
545: nlp = buf;
546: }
547: while (nlp < p) putchar(*nlp++);
548: }
549: }
550: lnum++;
551: nlp = p;
552: if ((out[(cstat=istat)]) == 0) goto brk2;
553: }
554: cfound:
555: if (--ccount <= 0) {
556: if (p <= &buf[512]) {
557: if ((ccount = read(f, p, 512)) <= 0) goto done;
558: }
559: else if (p == &buf[1024]) {
560: p = buf;
561: if ((ccount = read(f, p, 512)) <= 0) goto done;
562: }
563: else {
564: if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
565: }
566: blkno++;
567: }
568: }
569: }
570: if (*p++ == '\n') {
571: if (vflag) goto succeed;
572: else {
573: lnum++;
574: nlp = p;
575: if (out[(cstat=istat)]) goto cfound;
576: }
577: }
578: brk2:
579: if (--ccount <= 0) {
580: if (p <= &buf[512]) {
581: if ((ccount = read(f, p, 512)) <= 0) break;
582: }
583: else if (p == &buf[1024]) {
584: p = buf;
585: if ((ccount = read(f, p, 512)) <= 0) break;
586: }
587: else {
588: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
589: }
590: blkno++;
591: }
592: }
593: done: close(f);
594: if (cflag) {
595: if (nfile > 1)
596: printf("%s:", file);
597: printf("%ld\n", tln);
598: }
599: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.