|
|
1.1 root 1: /*
2: * egrep -- print lines containing (or not containing) a regular expression
3: *
4: * status returns:
5: * 0 - ok, and some matches
6: * 1 - ok, but no matches
7: * 2 - some error
8: */
9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10: %left OR
11: %left CHAR DOT CCL NCCL '('
12: %left CAT
13: %left STAR PLUS QUEST
14:
15: %{
16: static char *sccsid = "@(#)egrep.y 4.4 (Berkeley) 5/29/85";
17: #include <stdio.h>
18: #include <sys/types.h>
19: #include <sys/stat.h>
20:
21: #define BLKSIZE 8192
22: #define MAXLIN 350
23: #define MAXPOS 4000
24: #define NCHARS 128
25: #define NSTATES 128
26: #define FINAL -1
27: char gotofn[NSTATES][NCHARS];
28: int state[NSTATES];
29: char out[NSTATES];
30: int line = 1;
31: int name[MAXLIN];
32: int left[MAXLIN];
33: int right[MAXLIN];
34: int parent[MAXLIN];
35: int foll[MAXLIN];
36: int positions[MAXPOS];
37: char chars[MAXLIN];
38: int nxtpos;
39: int nxtchar = 0;
40: int tmpstat[MAXLIN];
41: int initstat[MAXLIN];
42: int xstate;
43: int count;
44: int icount;
45: char *input;
46: FILE *exprfile;
47:
48: long lnum;
49: int bflag;
50: int cflag;
51: int fflag;
52: int lflag;
53: int nflag;
54: int hflag = 1;
55: int sflag;
56: int vflag;
57: int retcode = 0;
58: int nfile;
59: int blkno;
60: long tln;
61: int nsucc;
62:
63: int f;
64: char *fname;
65: %}
66:
67: %%
68: s: t
69: ={ unary(FINAL, $1);
70: line--;
71: }
72: ;
73: t: b r
74: ={ $$ = node(CAT, $1, $2); }
75: | OR b r OR
76: ={ $$ = node(CAT, $2, $3); }
77: | OR b r
78: ={ $$ = node(CAT, $2, $3); }
79: | b r OR
80: ={ $$ = node(CAT, $1, $2); }
81: ;
82: b:
83: ={ $$ = enter(DOT);
84: $$ = unary(STAR, $$); }
85: ;
86: r: CHAR
87: ={ $$ = enter($1); }
88: | DOT
89: ={ $$ = enter(DOT); }
90: | CCL
91: ={ $$ = cclenter(CCL); }
92: | NCCL
93: ={ $$ = cclenter(NCCL); }
94: ;
95:
96: r: r OR r
97: ={ $$ = node(OR, $1, $3); }
98: | r r %prec CAT
99: ={ $$ = node(CAT, $1, $2); }
100: | r STAR
101: ={ $$ = unary(STAR, $1); }
102: | r PLUS
103: ={ $$ = unary(PLUS, $1); }
104: | r QUEST
105: ={ $$ = unary(QUEST, $1); }
106: | '(' r ')'
107: ={ $$ = $2; }
108: | error
109: ;
110:
111: %%
112: yyerror(s) {
113: fprintf(stderr, "egrep: %s\n", s);
114: exit(2);
115: }
116:
117: yylex() {
118: extern int yylval;
119: int cclcnt, x;
120: register char c, d;
121: switch(c = nextch()) {
122: case '$':
123: case '^': c = '\n';
124: goto defchar;
125: case '|': return (OR);
126: case '*': return (STAR);
127: case '+': return (PLUS);
128: case '?': return (QUEST);
129: case '(': return (c);
130: case ')': return (c);
131: case '.': return (DOT);
132: case '\0': return (0);
133: case '\n': return (OR);
134: case '[':
135: x = CCL;
136: cclcnt = 0;
137: count = nxtchar++;
138: if ((c = nextch()) == '^') {
139: x = NCCL;
140: c = nextch();
141: }
142: do {
143: if (c == '\0') synerror();
144: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
145: if ((d = nextch()) != 0) {
146: c = chars[nxtchar-1];
147: while (c < d) {
148: if (nxtchar >= MAXLIN) overflo();
149: chars[nxtchar++] = ++c;
150: cclcnt++;
151: }
152: continue;
153: }
154: }
155: if (nxtchar >= MAXLIN) overflo();
156: chars[nxtchar++] = c;
157: cclcnt++;
158: } while ((c = nextch()) != ']');
159: chars[count] = cclcnt;
160: return (x);
161: case '\\':
162: if ((c = nextch()) == '\0') synerror();
163: defchar:
164: default: yylval = c; return (CHAR);
165: }
166: }
167: nextch() {
168: register char c;
169: if (fflag) {
170: if ((c = getc(exprfile)) == EOF) {
171: fclose(exprfile);
172: return(0);
173: }
174: }
175: else c = *input++;
176: return(c);
177: }
178:
179: synerror() {
180: fprintf(stderr, "egrep: syntax error\n");
181: exit(2);
182: }
183:
184: enter(x) int x; {
185: if(line >= MAXLIN) overflo();
186: name[line] = x;
187: left[line] = 0;
188: right[line] = 0;
189: return(line++);
190: }
191:
192: cclenter(x) int x; {
193: register linno;
194: linno = enter(x);
195: right[linno] = count;
196: return (linno);
197: }
198:
199: node(x, l, r) {
200: if(line >= MAXLIN) overflo();
201: name[line] = x;
202: left[line] = l;
203: right[line] = r;
204: parent[l] = line;
205: parent[r] = line;
206: return(line++);
207: }
208:
209: unary(x, d) {
210: if(line >= MAXLIN) overflo();
211: name[line] = x;
212: left[line] = d;
213: right[line] = 0;
214: parent[d] = line;
215: return(line++);
216: }
217: overflo() {
218: fprintf(stderr, "egrep: regular expression too long\n");
219: exit(2);
220: }
221:
222: cfoll(v) {
223: register i;
224: if (left[v] == 0) {
225: count = 0;
226: for (i=1; i<=line; i++) tmpstat[i] = 0;
227: follow(v);
228: add(foll, v);
229: }
230: else if (right[v] == 0) cfoll(left[v]);
231: else {
232: cfoll(left[v]);
233: cfoll(right[v]);
234: }
235: }
236: cgotofn() {
237: register c, i, k;
238: int n, s;
239: char symbol[NCHARS];
240: int j, nc, pc, pos;
241: int curpos, num;
242: int number, newpos;
243: count = 0;
244: for (n=3; n<=line; n++) tmpstat[n] = 0;
245: if (cstate(line-1)==0) {
246: tmpstat[line] = 1;
247: count++;
248: out[0] = 1;
249: }
250: for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
251: count--; /*leave out position 1 */
252: icount = count;
253: tmpstat[1] = 0;
254: add(state, 0);
255: n = 0;
256: for (s=0; s<=n; s++) {
257: if (out[s] == 1) continue;
258: for (i=0; i<NCHARS; i++) symbol[i] = 0;
259: num = positions[state[s]];
260: count = icount;
261: for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
262: pos = state[s] + 1;
263: for (i=0; i<num; i++) {
264: curpos = positions[pos];
265: if ((c = name[curpos]) >= 0) {
266: if (c < NCHARS) symbol[c] = 1;
267: else if (c == DOT) {
268: for (k=0; k<NCHARS; k++)
269: if (k!='\n') symbol[k] = 1;
270: }
271: else if (c == CCL) {
272: nc = chars[right[curpos]];
273: pc = right[curpos] + 1;
274: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
275: }
276: else if (c == NCCL) {
277: nc = chars[right[curpos]];
278: for (j = 0; j < NCHARS; j++) {
279: pc = right[curpos] + 1;
280: for (k = 0; k < nc; k++)
281: if (j==chars[pc++]) goto cont;
282: if (j!='\n') symbol[j] = 1;
283: cont:;
284: }
285: }
286: else printf("something's funny\n");
287: }
288: pos++;
289: }
290: for (c=0; c<NCHARS; c++) {
291: if (symbol[c] == 1) { /* nextstate(s,c) */
292: count = icount;
293: for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
294: pos = state[s] + 1;
295: for (i=0; i<num; i++) {
296: curpos = positions[pos];
297: if ((k = name[curpos]) >= 0)
298: if (
299: (k == c)
300: | (k == DOT)
301: | (k == CCL && member(c, right[curpos], 1))
302: | (k == NCCL && member(c, right[curpos], 0))
303: ) {
304: number = positions[foll[curpos]];
305: newpos = foll[curpos] + 1;
306: for (k=0; k<number; k++) {
307: if (tmpstat[positions[newpos]] != 1) {
308: tmpstat[positions[newpos]] = 1;
309: count++;
310: }
311: newpos++;
312: }
313: }
314: pos++;
315: } /* end nextstate */
316: if (notin(n)) {
317: if (n >= NSTATES) overflo();
318: add(state, ++n);
319: if (tmpstat[line] == 1) out[n] = 1;
320: gotofn[s][c] = n;
321: }
322: else {
323: gotofn[s][c] = xstate;
324: }
325: }
326: }
327: }
328: }
329:
330: cstate(v) {
331: register b;
332: if (left[v] == 0) {
333: if (tmpstat[v] != 1) {
334: tmpstat[v] = 1;
335: count++;
336: }
337: return(1);
338: }
339: else if (right[v] == 0) {
340: if (cstate(left[v]) == 0) return (0);
341: else if (name[v] == PLUS) return (1);
342: else return (0);
343: }
344: else if (name[v] == CAT) {
345: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
346: else return (1);
347: }
348: else { /* name[v] == OR */
349: b = cstate(right[v]);
350: if (cstate(left[v]) == 0 || b == 0) return (0);
351: else return (1);
352: }
353: }
354:
355:
356: member(symb, set, torf) {
357: register i, num, pos;
358: num = chars[set];
359: pos = set + 1;
360: for (i=0; i<num; i++)
361: if (symb == chars[pos++]) return (torf);
362: return (!torf);
363: }
364:
365: notin(n) {
366: register i, j, pos;
367: for (i=0; i<=n; i++) {
368: if (positions[state[i]] == count) {
369: pos = state[i] + 1;
370: for (j=0; j < count; j++)
371: if (tmpstat[positions[pos++]] != 1) goto nxt;
372: xstate = i;
373: return (0);
374: }
375: nxt: ;
376: }
377: return (1);
378: }
379:
380: add(array, n) int *array; {
381: register i;
382: if (nxtpos + count > MAXPOS) overflo();
383: array[n] = nxtpos;
384: positions[nxtpos++] = count;
385: for (i=3; i <= line; i++) {
386: if (tmpstat[i] == 1) {
387: positions[nxtpos++] = i;
388: }
389: }
390: }
391:
392: follow(v) int v; {
393: int p;
394: if (v == line) return;
395: p = parent[v];
396: switch(name[p]) {
397: case STAR:
398: case PLUS: cstate(v);
399: follow(p);
400: return;
401:
402: case OR:
403: case QUEST: follow(p);
404: return;
405:
406: case CAT: if (v == left[p]) {
407: if (cstate(right[p]) == 0) {
408: follow(p);
409: return;
410: }
411: }
412: else follow(p);
413: return;
414: case FINAL: if (tmpstat[line] != 1) {
415: tmpstat[line] = 1;
416: count++;
417: }
418: return;
419: }
420: }
421:
422:
423: main(argc, argv)
424: char **argv;
425: {
426: while (--argc > 0 && (++argv)[0][0]=='-')
427: switch (argv[0][1]) {
428:
429: case 's':
430: sflag++;
431: continue;
432:
433: case 'h':
434: hflag = 0;
435: continue;
436:
437: case 'b':
438: bflag++;
439: continue;
440:
441: case 'c':
442: cflag++;
443: continue;
444:
445: case 'e':
446: argc--;
447: argv++;
448: goto out;
449:
450: case 'f':
451: fflag++;
452: continue;
453:
454: case 'l':
455: lflag++;
456: continue;
457:
458: case 'n':
459: nflag++;
460: continue;
461:
462: case 'v':
463: vflag++;
464: continue;
465:
466: default:
467: fprintf(stderr, "egrep: unknown flag\n");
468: continue;
469: }
470: out:
471: if (argc<=0)
472: exit(2);
473: if (fflag) {
474: fname = *argv;
475: exprfile = fopen(fname, "r");
476: if (exprfile == (FILE *)NULL) {
477: fprintf(stderr, "egrep: can't open %s\n", fname);
478: exit(2);
479: }
480: }
481: else input = *argv;
482: argc--;
483: argv++;
484:
485: yyparse();
486:
487: cfoll(line-1);
488: cgotofn();
489: nfile = argc;
490: if (argc<=0) {
491: if (lflag) exit(1);
492: execute(0);
493: }
494: else while (--argc >= 0) {
495: execute(*argv);
496: argv++;
497: }
498: exit(retcode != 0 ? retcode : nsucc == 0);
499: }
500:
501: execute(file)
502: char *file;
503: {
504: register char *p;
505: register cstat;
506: register ccount;
507: static char *buf;
508: static int blksize;
509: struct stat stb;
510: char *nlp;
511: int istat;
512: if (file) {
513: if ((f = open(file, 0)) < 0) {
514: fprintf(stderr, "egrep: can't open %s\n", file);
515: retcode = 2;
516: return;
517: }
518: }
519: else f = 0;
520: if (buf == NULL) {
521: if (fstat(f, &stb) > 0 && stb.st_blksize > 0)
522: blksize = stb.st_blksize;
523: else
524: blksize = BLKSIZE;
525: buf = (char *)malloc(2*blksize);
526: if (buf == NULL) {
527: fprintf(stderr, "egrep: no memory for %s\n", file);
528: retcode = 2;
529: return;
530: }
531: }
532: ccount = 0;
533: lnum = 1;
534: tln = 0;
535: blkno = 0;
536: p = buf;
537: nlp = p;
538: if ((ccount = read(f,p,blksize))<=0) goto done;
539: istat = cstat = gotofn[0]['\n'];
540: if (out[cstat]) goto found;
541: for (;;) {
542: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
543: if (out[cstat]) {
544: found: for(;;) {
545: if (*p++ == '\n') {
546: if (vflag == 0) {
547: succeed: nsucc = 1;
548: if (cflag) tln++;
549: else if (sflag)
550: ; /* ugh */
551: else if (lflag) {
552: printf("%s\n", file);
553: close(f);
554: return;
555: }
556: else {
557: if (nfile > 1 && hflag) printf("%s:", file);
558: if (bflag) printf("%d:", blkno);
559: if (nflag) printf("%ld:", lnum);
560: if (p <= nlp) {
561: while (nlp < &buf[2*blksize]) putchar(*nlp++);
562: nlp = buf;
563: }
564: while (nlp < p) putchar(*nlp++);
565: }
566: }
567: lnum++;
568: nlp = p;
569: if ((out[(cstat=istat)]) == 0) goto brk2;
570: }
571: cfound:
572: if (--ccount <= 0) {
573: if (p <= &buf[blksize]) {
574: if ((ccount = read(f, p, blksize)) <= 0) goto done;
575: }
576: else if (p == &buf[2*blksize]) {
577: p = buf;
578: if ((ccount = read(f, p, blksize)) <= 0) goto done;
579: }
580: else {
581: if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done;
582: }
583: blkno += ccount / 512;
584: }
585: }
586: }
587: if (*p++ == '\n') {
588: if (vflag) goto succeed;
589: else {
590: lnum++;
591: nlp = p;
592: if (out[(cstat=istat)]) goto cfound;
593: }
594: }
595: brk2:
596: if (--ccount <= 0) {
597: if (p <= &buf[blksize]) {
598: if ((ccount = read(f, p, blksize)) <= 0) break;
599: }
600: else if (p == &buf[2*blksize]) {
601: p = buf;
602: if ((ccount = read(f, p, blksize)) <= 0) break;
603: }
604: else {
605: if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break;
606: }
607: blkno += ccount / 512;
608: }
609: }
610: done: close(f);
611: if (cflag) {
612: if (nfile > 1)
613: printf("%s:", file);
614: printf("%ld\n", tln);
615: }
616: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.