|
|
1.1 root 1: /*
2: * egrep -- print lines containing (or not containing) a regular expression
3: *
4: * status returns:
5: * 0 - ok, and some matches
6: * 1 - ok, but no matches
7: * 2 - some error
8: */
9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10: %left OR
11: %left CHAR DOT CCL NCCL '('
12: %left CAT
13: %left STAR PLUS QUEST
14:
15: %{
16: static char *sccsid = "@(#)old.egrep.y 4.6 (Berkeley) 10/7/87";
17: #include <stdio.h>
18: #include <sys/types.h>
19: #include <sys/stat.h>
20: #include <ctype.h>
21:
22: #define BLKSIZE 8192
23: #define MAXLIN 350
24: #define MAXPOS 4000
25: #define NCHARS 128
26: #define NSTATES 128
27: #define FINAL -1
28: char gotofn[NSTATES][NCHARS];
29: char cmap[256];
30: int state[NSTATES];
31: char out[NSTATES];
32: int line = 1;
33: int name[MAXLIN];
34: int left[MAXLIN];
35: int right[MAXLIN];
36: int parent[MAXLIN];
37: int foll[MAXLIN];
38: int positions[MAXPOS];
39: char chars[MAXLIN];
40: int nxtpos;
41: int nxtchar = 0;
42: int tmpstat[MAXLIN];
43: int initstat[MAXLIN];
44: int xstate;
45: int count;
46: int icount;
47: char *input;
48: FILE *exprfile;
49:
50: long lnum;
51: int bflag;
52: int cflag;
53: int fflag;
54: int iflag;
55: int lflag;
56: int nflag;
57: int hflag = 1;
58: int oflag;
59: int sflag;
60: int vflag;
61: int retcode = 0;
62: int nfile;
63: int blkno;
64: long tln;
65: int nsucc;
66:
67: int f;
68: char *fname;
69: %}
70:
71: %%
72: s: t
73: ={ unary(FINAL, $1);
74: line--;
75: }
76: ;
77: t: b r
78: ={ $$ = node(CAT, $1, $2); }
79: | OR b r OR
80: ={ $$ = node(CAT, $2, $3); }
81: | OR b r
82: ={ $$ = node(CAT, $2, $3); }
83: | b r OR
84: ={ $$ = node(CAT, $1, $2); }
85: ;
86: b:
87: ={ $$ = enter(DOT);
88: $$ = unary(STAR, $$); }
89: ;
90: r: CHAR
91: ={ $$ = enter($1); }
92: | DOT
93: ={ $$ = enter(DOT); }
94: | CCL
95: ={ $$ = cclenter(CCL); }
96: | NCCL
97: ={ $$ = cclenter(NCCL); }
98: ;
99:
100: r: r OR r
101: ={ $$ = node(OR, $1, $3); }
102: | r r %prec CAT
103: ={ $$ = node(CAT, $1, $2); }
104: | r STAR
105: ={ $$ = unary(STAR, $1); }
106: | r PLUS
107: ={ $$ = unary(PLUS, $1); }
108: | r QUEST
109: ={ $$ = unary(QUEST, $1); }
110: | '(' r ')'
111: ={ $$ = $2; }
112: | error
113: ;
114:
115: %%
116: yyerror(s) {
117: fprintf(stderr, "egrep: %s\n", s);
118: exit(2);
119: }
120:
121: yylex() {
122: extern int yylval;
123: int cclcnt, x;
124: register char c, d;
125: switch(c = nextch()) {
126: case '$':
127: case '^': c = '\n';
128: goto defchar;
129: case '|': return (OR);
130: case '*': return (STAR);
131: case '+': return (PLUS);
132: case '?': return (QUEST);
133: case '(': return (c);
134: case ')': return (c);
135: case '.': return (DOT);
136: case '\0': return (0);
137: case '\n': return (OR);
138: case '[':
139: x = CCL;
140: cclcnt = 0;
141: count = nxtchar++;
142: if ((c = nextch()) == '^') {
143: x = NCCL;
144: c = nextch();
145: }
146: do {
147: if (c == '\0') synerror();
148: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
149: if ((d = nextch()) != 0) {
150: c = chars[nxtchar-1];
151: while (c < d) {
152: if (nxtchar >= MAXLIN) overflo();
153: chars[nxtchar++] = ++c;
154: cclcnt++;
155: }
156: continue;
157: }
158: }
159: if (nxtchar >= MAXLIN) overflo();
160: chars[nxtchar++] = c;
161: cclcnt++;
162: } while ((c = nextch()) != ']');
163: chars[count] = cclcnt;
164: return (x);
165: case '\\':
166: if ((c = nextch()) == '\0') synerror();
167: defchar:
168: default: yylval = c; return (CHAR);
169: }
170: }
171: nextch() {
172: register int c;
173: if (fflag) {
174: if ((c = getc(exprfile)) == EOF) {
175: fclose(exprfile);
176: return(0);
177: }
178: }
179: else c = *input++;
180: return(c);
181: }
182:
183: synerror() {
184: fprintf(stderr, "egrep: syntax error\n");
185: exit(2);
186: }
187:
188: enter(x) int x; {
189: if(line >= MAXLIN) overflo();
190: name[line] = x;
191: left[line] = 0;
192: right[line] = 0;
193: return(line++);
194: }
195:
196: cclenter(x) int x; {
197: register linno;
198: linno = enter(x);
199: right[linno] = count;
200: return (linno);
201: }
202:
203: node(x, l, r) {
204: if(line >= MAXLIN) overflo();
205: name[line] = x;
206: left[line] = l;
207: right[line] = r;
208: parent[l] = line;
209: parent[r] = line;
210: return(line++);
211: }
212:
213: unary(x, d) {
214: if(line >= MAXLIN) overflo();
215: name[line] = x;
216: left[line] = d;
217: right[line] = 0;
218: parent[d] = line;
219: return(line++);
220: }
221: overflo() {
222: fprintf(stderr, "egrep: regular expression too long\n");
223: exit(2);
224: }
225:
226: cfoll(v) {
227: register i;
228: if (left[v] == 0) {
229: count = 0;
230: for (i=1; i<=line; i++) tmpstat[i] = 0;
231: follow(v);
232: add(foll, v);
233: }
234: else if (right[v] == 0) cfoll(left[v]);
235: else {
236: cfoll(left[v]);
237: cfoll(right[v]);
238: }
239: }
240: cgotofn() {
241: register c, i, k;
242: int n, s;
243: char symbol[NCHARS];
244: int j, nc, pc, pos;
245: int curpos, num;
246: int number, newpos;
247: count = 0;
248: for (n=3; n<=line; n++) tmpstat[n] = 0;
249: if (cstate(line-1)==0) {
250: tmpstat[line] = 1;
251: count++;
252: out[0] = 1;
253: }
254: for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
255: count--; /*leave out position 1 */
256: icount = count;
257: tmpstat[1] = 0;
258: add(state, 0);
259: n = 0;
260: for (s=0; s<=n; s++) {
261: if (out[s] == 1) continue;
262: for (i=0; i<NCHARS; i++) symbol[i] = 0;
263: num = positions[state[s]];
264: count = icount;
265: for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
266: pos = state[s] + 1;
267: for (i=0; i<num; i++) {
268: curpos = positions[pos];
269: if ((c = name[curpos]) >= 0) {
270: if (c < NCHARS) symbol[c] = 1;
271: else if (c == DOT) {
272: for (k=0; k<NCHARS; k++)
273: if (k!='\n') symbol[k] = 1;
274: }
275: else if (c == CCL) {
276: nc = chars[right[curpos]];
277: pc = right[curpos] + 1;
278: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
279: }
280: else if (c == NCCL) {
281: nc = chars[right[curpos]];
282: for (j = 0; j < NCHARS; j++) {
283: pc = right[curpos] + 1;
284: for (k = 0; k < nc; k++)
285: if (j==chars[pc++]) goto cont;
286: if (j!='\n') symbol[j] = 1;
287: cont:;
288: }
289: }
290: else printf("something's funny\n");
291: }
292: pos++;
293: }
294: for (c=0; c<NCHARS; c++) {
295: if (symbol[c] == 1) { /* nextstate(s,c) */
296: count = icount;
297: for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
298: pos = state[s] + 1;
299: for (i=0; i<num; i++) {
300: curpos = positions[pos];
301: if ((k = name[curpos]) >= 0)
302: if (
303: (k == c)
304: | (k == DOT)
305: | (k == CCL && member(c, right[curpos], 1))
306: | (k == NCCL && member(c, right[curpos], 0))
307: ) {
308: number = positions[foll[curpos]];
309: newpos = foll[curpos] + 1;
310: for (k=0; k<number; k++) {
311: if (tmpstat[positions[newpos]] != 1) {
312: tmpstat[positions[newpos]] = 1;
313: count++;
314: }
315: newpos++;
316: }
317: }
318: pos++;
319: } /* end nextstate */
320: if (notin(n)) {
321: if (n >= NSTATES) overflo();
322: add(state, ++n);
323: if (tmpstat[line] == 1) out[n] = 1;
324: gotofn[s][c] = n;
325: }
326: else {
327: gotofn[s][c] = xstate;
328: }
329: }
330: }
331: }
332: }
333:
334: cstate(v) {
335: register b;
336: if (left[v] == 0) {
337: if (tmpstat[v] != 1) {
338: tmpstat[v] = 1;
339: count++;
340: }
341: return(1);
342: }
343: else if (right[v] == 0) {
344: if (cstate(left[v]) == 0) return (0);
345: else if (name[v] == PLUS) return (1);
346: else return (0);
347: }
348: else if (name[v] == CAT) {
349: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
350: else return (1);
351: }
352: else { /* name[v] == OR */
353: b = cstate(right[v]);
354: if (cstate(left[v]) == 0 || b == 0) return (0);
355: else return (1);
356: }
357: }
358:
359:
360: member(symb, set, torf) {
361: register i, num, pos;
362: num = chars[set];
363: pos = set + 1;
364: for (i=0; i<num; i++)
365: if (symb == chars[pos++]) return (torf);
366: return (!torf);
367: }
368:
369: notin(n) {
370: register i, j, pos;
371: for (i=0; i<=n; i++) {
372: if (positions[state[i]] == count) {
373: pos = state[i] + 1;
374: for (j=0; j < count; j++)
375: if (tmpstat[positions[pos++]] != 1) goto nxt;
376: xstate = i;
377: return (0);
378: }
379: nxt: ;
380: }
381: return (1);
382: }
383:
384: add(array, n) int *array; {
385: register i;
386: if (nxtpos + count > MAXPOS) overflo();
387: array[n] = nxtpos;
388: positions[nxtpos++] = count;
389: for (i=3; i <= line; i++) {
390: if (tmpstat[i] == 1) {
391: positions[nxtpos++] = i;
392: }
393: }
394: }
395:
396: follow(v) int v; {
397: int p;
398: if (v == line) return;
399: p = parent[v];
400: switch(name[p]) {
401: case STAR:
402: case PLUS: cstate(v);
403: follow(p);
404: return;
405:
406: case OR:
407: case QUEST: follow(p);
408: return;
409:
410: case CAT: if (v == left[p]) {
411: if (cstate(right[p]) == 0) {
412: follow(p);
413: return;
414: }
415: }
416: else follow(p);
417: return;
418: case FINAL: if (tmpstat[line] != 1) {
419: tmpstat[line] = 1;
420: count++;
421: }
422: return;
423: }
424: }
425:
426:
427: main(argc, argv)
428: char **argv;
429: {
430: register int i;
431:
432: while (--argc > 0 && (++argv)[0][0]=='-')
433: switch (argv[0][1]) {
434:
435: case 's':
436: sflag++;
437: continue;
438:
439: case 'h':
440: hflag = 0;
441: continue;
442:
443: case 'o':
444: oflag++;
445: continue;
446:
447: case 'b':
448: bflag++;
449: continue;
450:
451: case 'c':
452: cflag++;
453: continue;
454:
455: case 'e':
456: argc--;
457: argv++;
458: goto out;
459:
460: case 'f':
461: fflag++;
462: continue;
463:
464: case 'i':
465: iflag++;
466: for ( i = 'A'; i <= 'Z'; i++ )
467: cmap[i] = (char) tolower ( i );
468: continue;
469:
470: case 'l':
471: lflag++;
472: continue;
473:
474: case 'n':
475: nflag++;
476: continue;
477:
478: case 'v':
479: vflag++;
480: continue;
481:
482: default:
483: fprintf(stderr, "egrep: unknown flag\n");
484: continue;
485: }
486: out:
487: if (argc<=0)
488: exit(2);
489:
490: for (i = 0; i < 256; ++i)
491: cmap[i] = (char)i;
492:
493: if (fflag) {
494: fname = *argv;
495: exprfile = fopen(fname, "r");
496: if (exprfile == (FILE *)NULL) {
497: fprintf(stderr, "egrep: can't open %s\n", fname);
498: exit(2);
499: }
500: }
501: else input = *argv;
502: if ( iflag ) {
503: register char *s;
504: for ( s = input; *s != '\0'; s++ )
505: if ( isupper ( (int)(*s) ) )
506: *s = (char) tolower ( (int)(*s) );
507: }
508: argc--;
509: argv++;
510:
511: yyparse();
512:
513: cfoll(line-1);
514: cgotofn();
515: nfile = argc;
516: if (argc<=0) {
517: if (lflag) exit(1);
518: execute(0);
519: }
520: else while (--argc >= 0) {
521: execute(*argv);
522: argv++;
523: }
524: exit(retcode != 0 ? retcode : nsucc == 0);
525: }
526:
527: execute(file)
528: char *file;
529: {
530: register char *p;
531: register cstat;
532: register ccount;
533: register char *cmapr = cmap;
534: static char *buf;
535: static int blksize;
536: struct stat stb;
537: char *nlp;
538: int istat;
539: if (file) {
540: if ((f = open(file, 0)) < 0) {
541: fprintf(stderr, "egrep: can't open %s\n", file);
542: retcode = 2;
543: return;
544: }
545: }
546: else f = 0;
547: if (buf == NULL) {
548: if (fstat(f, &stb) > 0 && stb.st_blksize > 0)
549: blksize = stb.st_blksize;
550: else
551: blksize = BLKSIZE;
552: buf = (char *)malloc(2*blksize);
553: if (buf == NULL) {
554: fprintf(stderr, "egrep: no memory for %s\n", file);
555: retcode = 2;
556: return;
557: }
558: }
559: ccount = 0;
560: lnum = 1;
561: tln = 0;
562: blkno = 0;
563: p = buf;
564: nlp = p;
565: if ((ccount = read(f,p,blksize))<=0) goto done;
566: istat = cstat = gotofn[0]['\n'];
567: if (out[cstat]) goto found;
568: for (;;) {
569: cstat = gotofn[cstat][(unsigned char)cmapr[*(unsigned char *)p]];
570: if (out[cstat]) {
571: found: for(;;) {
572: if (*p++ == '\n') {
573: if (vflag == 0) {
574: succeed: nsucc = 1;
575: if (cflag) tln++;
576: else if (sflag)
577: ; /* ugh */
578: else if (lflag) {
579: printf("%s\n", file);
580: close(f);
581: return;
582: }
583: else {
584: if (nfile > 1 && hflag || oflag) printf("%s:", file);
585: if (bflag) printf("%d:", blkno);
586: if (nflag) printf("%ld:", lnum);
587: if (p <= nlp) {
588: while (nlp < &buf[2*blksize]) putchar(*nlp++);
589: nlp = buf;
590: }
591: while (nlp < p) putchar(*nlp++);
592: }
593: }
594: lnum++;
595: nlp = p;
596: if ((out[(cstat=istat)]) == 0) goto brk2;
597: }
598: cfound:
599: if (--ccount <= 0) {
600: if (p <= &buf[blksize]) {
601: if ((ccount = read(f, p, blksize)) <= 0) goto done;
602: }
603: else if (p == &buf[2*blksize]) {
604: p = buf;
605: if ((ccount = read(f, p, blksize)) <= 0) goto done;
606: }
607: else {
608: if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done;
609: }
610: blkno += ccount / 512;
611: }
612: }
613: }
614: if (*p++ == '\n') {
615: if (vflag) goto succeed;
616: else {
617: lnum++;
618: nlp = p;
619: if (out[(cstat=istat)]) goto cfound;
620: }
621: }
622: brk2:
623: if (--ccount <= 0) {
624: if (p <= &buf[blksize]) {
625: if ((ccount = read(f, p, blksize)) <= 0) break;
626: }
627: else if (p == &buf[2*blksize]) {
628: p = buf;
629: if ((ccount = read(f, p, blksize)) <= 0) break;
630: }
631: else {
632: if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break;
633: }
634: blkno += ccount / 512;
635: }
636: }
637: done: close(f);
638: if (cflag) {
639: if (nfile > 1)
640: printf("%s:", file);
641: printf("%ld\n", tln);
642: }
643: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.