|
|
1.1 root 1: /*
2: * egrep -- print lines containing (or not containing) a regular expression
3: *
4: * status returns:
5: * 0 - ok, and some matches
6: * 1 - ok, but no matches
7: * 2 - some error
8: */
9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10: %left OR
11: %left CHAR DOT CCL NCCL '('
12: %left CAT
13: %left STAR PLUS QUEST
14:
15: %{
16: #include <stdio.h>
17:
18: #define MAXLIN 350
19: #define MAXPOS 4000
20: #define NCHARS 128
21: #define NSTATES 128
22: #define FINAL -1
23: char gotofn[NSTATES][NCHARS];
24: int state[NSTATES];
25: char out[NSTATES];
26: int line = 1;
27: int name[MAXLIN];
28: int left[MAXLIN];
29: int right[MAXLIN];
30: int parent[MAXLIN];
31: int foll[MAXLIN];
32: int positions[MAXPOS];
33: char chars[MAXLIN];
34: int nxtpos;
35: int nxtchar = 0;
36: int tmpstat[MAXLIN];
37: int initstat[MAXLIN];
38: int xstate;
39: int count;
40: int icount;
41: char *input;
42:
43: long lnum;
44: int bflag;
45: int cflag;
46: int fflag;
47: int lflag;
48: int nflag;
49: int hflag = 1;
50: int sflag;
51: int vflag;
52: int nfile;
53: int blkno;
54: long tln;
55: int nsucc;
56:
57: int f;
58: int fname;
59: %}
60:
61: %%
62: s: t
63: ={ unary(FINAL, $1);
64: line--;
65: }
66: ;
67: t: b r
68: ={ $$ = node(CAT, $1, $2); }
69: | OR b r OR
70: ={ $$ = node(CAT, $2, $3); }
71: | OR b r
72: ={ $$ = node(CAT, $2, $3); }
73: | b r OR
74: ={ $$ = node(CAT, $1, $2); }
75: ;
76: b:
77: ={ $$ = enter(DOT);
78: $$ = unary(STAR, $$); }
79: ;
80: r: CHAR
81: ={ $$ = enter($1); }
82: | DOT
83: ={ $$ = enter(DOT); }
84: | CCL
85: ={ $$ = cclenter(CCL); }
86: | NCCL
87: ={ $$ = cclenter(NCCL); }
88: ;
89:
90: r: r OR r
91: ={ $$ = node(OR, $1, $3); }
92: | r r %prec CAT
93: ={ $$ = node(CAT, $1, $2); }
94: | r STAR
95: ={ $$ = unary(STAR, $1); }
96: | r PLUS
97: ={ $$ = unary(PLUS, $1); }
98: | r QUEST
99: ={ $$ = unary(QUEST, $1); }
100: | '(' r ')'
101: ={ $$ = $2; }
102: | error
103: ;
104:
105: %%
106: yyerror(s) {
107: fprintf(stderr, "egrep: %s\n", s);
108: exit(2);
109: }
110:
111: yylex() {
112: extern int yylval;
113: int cclcnt, x;
114: register char c, d;
115: switch(c = nextch()) {
116: case '$':
117: case '^': c = '\n';
118: goto defchar;
119: case '|': return (OR);
120: case '*': return (STAR);
121: case '+': return (PLUS);
122: case '?': return (QUEST);
123: case '(': return (c);
124: case ')': return (c);
125: case '.': return (DOT);
126: case '\0': return (0);
127: case '\n': return (OR);
128: case '[':
129: x = CCL;
130: cclcnt = 0;
131: count = nxtchar++;
132: if ((c = nextch()) == '^') {
133: x = NCCL;
134: c = nextch();
135: }
136: do {
137: if (c == '\0') synerror();
138: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
139: if ((d = nextch()) != 0) {
140: c = chars[nxtchar-1];
141: while (c < d) {
142: if (nxtchar >= MAXLIN) overflo();
143: chars[nxtchar++] = ++c;
144: cclcnt++;
145: }
146: continue;
147: }
148: }
149: if (nxtchar >= MAXLIN) overflo();
150: chars[nxtchar++] = c;
151: cclcnt++;
152: } while ((c = nextch()) != ']');
153: chars[count] = cclcnt;
154: return (x);
155: case '\\':
156: if ((c = nextch()) == '\0') synerror();
157: defchar:
158: default: yylval = c; return (CHAR);
159: }
160: }
161: nextch() {
162: register char c;
163: if (fflag) {
164: if ((c = getc(stdin)) == EOF) return(0);
165: }
166: else c = *input++;
167: return(c);
168: }
169:
170: synerror() {
171: fprintf(stderr, "egrep: syntax error\n");
172: exit(2);
173: }
174:
175: enter(x) int x; {
176: if(line >= MAXLIN) overflo();
177: name[line] = x;
178: left[line] = 0;
179: right[line] = 0;
180: return(line++);
181: }
182:
183: cclenter(x) int x; {
184: register linno;
185: linno = enter(x);
186: right[linno] = count;
187: return (linno);
188: }
189:
190: node(x, l, r) {
191: if(line >= MAXLIN) overflo();
192: name[line] = x;
193: left[line] = l;
194: right[line] = r;
195: parent[l] = line;
196: parent[r] = line;
197: return(line++);
198: }
199:
200: unary(x, d) {
201: if(line >= MAXLIN) overflo();
202: name[line] = x;
203: left[line] = d;
204: right[line] = 0;
205: parent[d] = line;
206: return(line++);
207: }
208: overflo() {
209: fprintf(stderr, "egrep: regular expression too long\n");
210: exit(2);
211: }
212:
213: cfoll(v) {
214: register i;
215: if (left[v] == 0) {
216: count = 0;
217: for (i=1; i<=line; i++) tmpstat[i] = 0;
218: follow(v);
219: add(foll, v);
220: }
221: else if (right[v] == 0) cfoll(left[v]);
222: else {
223: cfoll(left[v]);
224: cfoll(right[v]);
225: }
226: }
227: cgotofn() {
228: register c, i, k;
229: int n, s;
230: char symbol[NCHARS];
231: int j, nc, pc, pos;
232: int curpos, num;
233: int number, newpos;
234: count = 0;
235: for (n=3; n<=line; n++) tmpstat[n] = 0;
236: if (cstate(line-1)==0) {
237: tmpstat[line] = 1;
238: count++;
239: out[0] = 1;
240: }
241: for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
242: count--; /*leave out position 1 */
243: icount = count;
244: tmpstat[1] = 0;
245: add(state, 0);
246: n = 0;
247: for (s=0; s<=n; s++) {
248: if (out[s] == 1) continue;
249: for (i=0; i<NCHARS; i++) symbol[i] = 0;
250: num = positions[state[s]];
251: count = icount;
252: for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
253: pos = state[s] + 1;
254: for (i=0; i<num; i++) {
255: curpos = positions[pos];
256: if ((c = name[curpos]) >= 0) {
257: if (c < NCHARS) symbol[c] = 1;
258: else if (c == DOT) {
259: for (k=0; k<NCHARS; k++)
260: if (k!='\n') symbol[k] = 1;
261: }
262: else if (c == CCL) {
263: nc = chars[right[curpos]];
264: pc = right[curpos] + 1;
265: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
266: }
267: else if (c == NCCL) {
268: nc = chars[right[curpos]];
269: for (j = 0; j < NCHARS; j++) {
270: pc = right[curpos] + 1;
271: for (k = 0; k < nc; k++)
272: if (j==chars[pc++]) goto cont;
273: if (j!='\n') symbol[j] = 1;
274: cont:;
275: }
276: }
277: else printf("something's funny\n");
278: }
279: pos++;
280: }
281: for (c=0; c<NCHARS; c++) {
282: if (symbol[c] == 1) { /* nextstate(s,c) */
283: count = icount;
284: for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
285: pos = state[s] + 1;
286: for (i=0; i<num; i++) {
287: curpos = positions[pos];
288: if ((k = name[curpos]) >= 0)
289: if (
290: (k == c)
291: | (k == DOT)
292: | (k == CCL && member(c, right[curpos], 1))
293: | (k == NCCL && member(c, right[curpos], 0))
294: ) {
295: number = positions[foll[curpos]];
296: newpos = foll[curpos] + 1;
297: for (k=0; k<number; k++) {
298: if (tmpstat[positions[newpos]] != 1) {
299: tmpstat[positions[newpos]] = 1;
300: count++;
301: }
302: newpos++;
303: }
304: }
305: pos++;
306: } /* end nextstate */
307: if (notin(n)) {
308: if (n >= NSTATES) overflo();
309: add(state, ++n);
310: if (tmpstat[line] == 1) out[n] = 1;
311: gotofn[s][c] = n;
312: }
313: else {
314: gotofn[s][c] = xstate;
315: }
316: }
317: }
318: }
319: }
320:
321: cstate(v) {
322: register b;
323: if (left[v] == 0) {
324: if (tmpstat[v] != 1) {
325: tmpstat[v] = 1;
326: count++;
327: }
328: return(1);
329: }
330: else if (right[v] == 0) {
331: if (cstate(left[v]) == 0) return (0);
332: else if (name[v] == PLUS) return (1);
333: else return (0);
334: }
335: else if (name[v] == CAT) {
336: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
337: else return (1);
338: }
339: else { /* name[v] == OR */
340: b = cstate(right[v]);
341: if (cstate(left[v]) == 0 || b == 0) return (0);
342: else return (1);
343: }
344: }
345:
346:
347: member(symb, set, torf) {
348: register i, num, pos;
349: num = chars[set];
350: pos = set + 1;
351: for (i=0; i<num; i++)
352: if (symb == chars[pos++]) return (torf);
353: return (!torf);
354: }
355:
356: notin(n) {
357: register i, j, pos;
358: for (i=0; i<=n; i++) {
359: if (positions[state[i]] == count) {
360: pos = state[i] + 1;
361: for (j=0; j < count; j++)
362: if (tmpstat[positions[pos++]] != 1) goto nxt;
363: xstate = i;
364: return (0);
365: }
366: nxt: ;
367: }
368: return (1);
369: }
370:
371: add(array, n) int *array; {
372: register i;
373: if (nxtpos + count > MAXPOS) overflo();
374: array[n] = nxtpos;
375: positions[nxtpos++] = count;
376: for (i=3; i <= line; i++) {
377: if (tmpstat[i] == 1) {
378: positions[nxtpos++] = i;
379: }
380: }
381: }
382:
383: follow(v) int v; {
384: int p;
385: if (v == line) return;
386: p = parent[v];
387: switch(name[p]) {
388: case STAR:
389: case PLUS: cstate(v);
390: follow(p);
391: return;
392:
393: case OR:
394: case QUEST: follow(p);
395: return;
396:
397: case CAT: if (v == left[p]) {
398: if (cstate(right[p]) == 0) {
399: follow(p);
400: return;
401: }
402: }
403: else follow(p);
404: return;
405: case FINAL: if (tmpstat[line] != 1) {
406: tmpstat[line] = 1;
407: count++;
408: }
409: return;
410: }
411: }
412:
413:
414: main(argc, argv)
415: char **argv;
416: {
417: while (--argc > 0 && (++argv)[0][0]=='-')
418: switch (argv[0][1]) {
419:
420: case 's':
421: sflag++;
422: continue;
423:
424: case 'h':
425: hflag = 0;
426: continue;
427:
428: case 'b':
429: bflag++;
430: continue;
431:
432: case 'c':
433: cflag++;
434: continue;
435:
436: case 'e':
437: argc--;
438: argv++;
439: goto out;
440:
441: case 'f':
442: fflag++;
443: continue;
444:
445: case 'l':
446: lflag++;
447: continue;
448:
449: case 'n':
450: nflag++;
451: continue;
452:
453: case 'v':
454: vflag++;
455: continue;
456:
457: default:
458: fprintf(stderr, "egrep: unknown flag\n");
459: continue;
460: }
461: out:
462: if (argc<=0)
463: exit(2);
464: if (fflag) {
465: if (freopen(fname = *argv, "r", stdin) == NULL) {
466: fprintf(stderr, "egrep: can't open %s\n", fname);
467: exit(2);
468: }
469: }
470: else input = *argv;
471: argc--;
472: argv++;
473:
474: yyparse();
475:
476: cfoll(line-1);
477: cgotofn();
478: nfile = argc;
479: if (argc<=0) {
480: if (lflag) exit(1);
481: execute(0);
482: }
483: else while (--argc >= 0) {
484: execute(*argv);
485: argv++;
486: }
487: exit(nsucc == 0);
488: }
489:
490: execute(file)
491: char *file;
492: {
493: register char *p;
494: register cstat;
495: register ccount;
496: char buf[1024];
497: char *nlp;
498: int istat;
499: if (file) {
500: if ((f = open(file, 0)) < 0) {
501: fprintf(stderr, "egrep: can't open %s\n", file);
502: exit(2);
503: }
504: }
505: else f = 0;
506: ccount = 0;
507: lnum = 1;
508: tln = 0;
509: blkno = 0;
510: p = buf;
511: nlp = p;
512: if ((ccount = read(f,p,512))<=0) goto done;
513: istat = cstat = gotofn[0]['\n'];
514: if (out[cstat]) goto found;
515: for (;;) {
516: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
517: if (out[cstat]) {
518: found: for(;;) {
519: if (*p++ == '\n') {
520: if (vflag == 0) {
521: succeed: nsucc = 1;
522: if (cflag) tln++;
523: else if (sflag)
524: ; /* ugh */
525: else if (lflag) {
526: printf("%s\n", file);
527: close(f);
528: return;
529: }
530: else {
531: if (nfile > 1 && hflag) printf("%s:", file);
532: if (bflag) printf("%d:", blkno);
533: if (nflag) printf("%ld:", lnum);
534: if (p <= nlp) {
535: while (nlp < &buf[1024]) putchar(*nlp++);
536: nlp = buf;
537: }
538: while (nlp < p) putchar(*nlp++);
539: }
540: }
541: lnum++;
542: nlp = p;
543: if ((out[(cstat=istat)]) == 0) goto brk2;
544: }
545: cfound:
546: if (--ccount <= 0) {
547: if (p <= &buf[512]) {
548: if ((ccount = read(f, p, 512)) <= 0) goto done;
549: }
550: else if (p == &buf[1024]) {
551: p = buf;
552: if ((ccount = read(f, p, 512)) <= 0) goto done;
553: }
554: else {
555: if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
556: }
557: blkno++;
558: }
559: }
560: }
561: if (*p++ == '\n') {
562: if (vflag) goto succeed;
563: else {
564: lnum++;
565: nlp = p;
566: if (out[(cstat=istat)]) goto cfound;
567: }
568: }
569: brk2:
570: if (--ccount <= 0) {
571: if (p <= &buf[512]) {
572: if ((ccount = read(f, p, 512)) <= 0) break;
573: }
574: else if (p == &buf[1024]) {
575: p = buf;
576: if ((ccount = read(f, p, 512)) <= 0) break;
577: }
578: else {
579: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
580: }
581: blkno++;
582: }
583: }
584: done: close(f);
585: if (cflag) {
586: if (nfile > 1)
587: printf("%s:", file);
588: printf("%ld\n", tln);
589: }
590: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.