|
|
1.1 ! root 1: /* ! 2: * egrep -- print lines containing (or not containing) a regular expression ! 3: * ! 4: * status returns: ! 5: * 0 - ok, and some matches ! 6: * 1 - ok, but no matches ! 7: * 2 - some error ! 8: */ ! 9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST ! 10: %left OR ! 11: %left CHAR DOT CCL NCCL '(' ! 12: %left CAT ! 13: %left STAR PLUS QUEST ! 14: ! 15: %{ ! 16: static char *sccsid = "@(#)egrep.y 4.1 (Berkeley) 10/1/80"; ! 17: #include <stdio.h> ! 18: ! 19: #define MAXLIN 350 ! 20: #define MAXPOS 4000 ! 21: #define NCHARS 128 ! 22: #define NSTATES 128 ! 23: #define FINAL -1 ! 24: char gotofn[NSTATES][NCHARS]; ! 25: int state[NSTATES]; ! 26: char out[NSTATES]; ! 27: int line = 1; ! 28: int name[MAXLIN]; ! 29: int left[MAXLIN]; ! 30: int right[MAXLIN]; ! 31: int parent[MAXLIN]; ! 32: int foll[MAXLIN]; ! 33: int positions[MAXPOS]; ! 34: char chars[MAXLIN]; ! 35: int nxtpos; ! 36: int nxtchar = 0; ! 37: int tmpstat[MAXLIN]; ! 38: int initstat[MAXLIN]; ! 39: int xstate; ! 40: int count; ! 41: int icount; ! 42: char *input; ! 43: ! 44: long lnum; ! 45: int bflag; ! 46: int cflag; ! 47: int fflag; ! 48: int lflag; ! 49: int nflag; ! 50: int hflag = 1; ! 51: int sflag; ! 52: int vflag; ! 53: int nfile; ! 54: int blkno; ! 55: long tln; ! 56: int nsucc; ! 57: ! 58: int f; ! 59: int fname; ! 60: %} ! 61: ! 62: %% ! 63: s: t ! 64: ={ unary(FINAL, $1); ! 65: line--; ! 66: } ! 67: ; ! 68: t: b r ! 69: ={ $$ = node(CAT, $1, $2); } ! 70: | OR b r OR ! 71: ={ $$ = node(CAT, $2, $3); } ! 72: | OR b r ! 73: ={ $$ = node(CAT, $2, $3); } ! 74: | b r OR ! 75: ={ $$ = node(CAT, $1, $2); } ! 76: ; ! 77: b: ! 78: ={ $$ = enter(DOT); ! 79: $$ = unary(STAR, $$); } ! 80: ; ! 81: r: CHAR ! 82: ={ $$ = enter($1); } ! 83: | DOT ! 84: ={ $$ = enter(DOT); } ! 85: | CCL ! 86: ={ $$ = cclenter(CCL); } ! 87: | NCCL ! 88: ={ $$ = cclenter(NCCL); } ! 89: ; ! 90: ! 91: r: r OR r ! 92: ={ $$ = node(OR, $1, $3); } ! 93: | r r %prec CAT ! 94: ={ $$ = node(CAT, $1, $2); } ! 95: | r STAR ! 96: ={ $$ = unary(STAR, $1); } ! 97: | r PLUS ! 98: ={ $$ = unary(PLUS, $1); } ! 99: | r QUEST ! 100: ={ $$ = unary(QUEST, $1); } ! 101: | '(' r ')' ! 102: ={ $$ = $2; } ! 103: | error ! 104: ; ! 105: ! 106: %% ! 107: yyerror(s) { ! 108: fprintf(stderr, "egrep: %s\n", s); ! 109: exit(2); ! 110: } ! 111: ! 112: yylex() { ! 113: extern int yylval; ! 114: int cclcnt, x; ! 115: register char c, d; ! 116: switch(c = nextch()) { ! 117: case '$': ! 118: case '^': c = '\n'; ! 119: goto defchar; ! 120: case '|': return (OR); ! 121: case '*': return (STAR); ! 122: case '+': return (PLUS); ! 123: case '?': return (QUEST); ! 124: case '(': return (c); ! 125: case ')': return (c); ! 126: case '.': return (DOT); ! 127: case '\0': return (0); ! 128: case '\n': return (OR); ! 129: case '[': ! 130: x = CCL; ! 131: cclcnt = 0; ! 132: count = nxtchar++; ! 133: if ((c = nextch()) == '^') { ! 134: x = NCCL; ! 135: c = nextch(); ! 136: } ! 137: do { ! 138: if (c == '\0') synerror(); ! 139: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { ! 140: if ((d = nextch()) != 0) { ! 141: c = chars[nxtchar-1]; ! 142: while (c < d) { ! 143: if (nxtchar >= MAXLIN) overflo(); ! 144: chars[nxtchar++] = ++c; ! 145: cclcnt++; ! 146: } ! 147: continue; ! 148: } ! 149: } ! 150: if (nxtchar >= MAXLIN) overflo(); ! 151: chars[nxtchar++] = c; ! 152: cclcnt++; ! 153: } while ((c = nextch()) != ']'); ! 154: chars[count] = cclcnt; ! 155: return (x); ! 156: case '\\': ! 157: if ((c = nextch()) == '\0') synerror(); ! 158: defchar: ! 159: default: yylval = c; return (CHAR); ! 160: } ! 161: } ! 162: nextch() { ! 163: register char c; ! 164: if (fflag) { ! 165: if ((c = getc(stdin)) == EOF) return(0); ! 166: } ! 167: else c = *input++; ! 168: return(c); ! 169: } ! 170: ! 171: synerror() { ! 172: fprintf(stderr, "egrep: syntax error\n"); ! 173: exit(2); ! 174: } ! 175: ! 176: enter(x) int x; { ! 177: if(line >= MAXLIN) overflo(); ! 178: name[line] = x; ! 179: left[line] = 0; ! 180: right[line] = 0; ! 181: return(line++); ! 182: } ! 183: ! 184: cclenter(x) int x; { ! 185: register linno; ! 186: linno = enter(x); ! 187: right[linno] = count; ! 188: return (linno); ! 189: } ! 190: ! 191: node(x, l, r) { ! 192: if(line >= MAXLIN) overflo(); ! 193: name[line] = x; ! 194: left[line] = l; ! 195: right[line] = r; ! 196: parent[l] = line; ! 197: parent[r] = line; ! 198: return(line++); ! 199: } ! 200: ! 201: unary(x, d) { ! 202: if(line >= MAXLIN) overflo(); ! 203: name[line] = x; ! 204: left[line] = d; ! 205: right[line] = 0; ! 206: parent[d] = line; ! 207: return(line++); ! 208: } ! 209: overflo() { ! 210: fprintf(stderr, "egrep: regular expression too long\n"); ! 211: exit(2); ! 212: } ! 213: ! 214: cfoll(v) { ! 215: register i; ! 216: if (left[v] == 0) { ! 217: count = 0; ! 218: for (i=1; i<=line; i++) tmpstat[i] = 0; ! 219: follow(v); ! 220: add(foll, v); ! 221: } ! 222: else if (right[v] == 0) cfoll(left[v]); ! 223: else { ! 224: cfoll(left[v]); ! 225: cfoll(right[v]); ! 226: } ! 227: } ! 228: cgotofn() { ! 229: register c, i, k; ! 230: int n, s; ! 231: char symbol[NCHARS]; ! 232: int j, nc, pc, pos; ! 233: int curpos, num; ! 234: int number, newpos; ! 235: count = 0; ! 236: for (n=3; n<=line; n++) tmpstat[n] = 0; ! 237: if (cstate(line-1)==0) { ! 238: tmpstat[line] = 1; ! 239: count++; ! 240: out[0] = 1; ! 241: } ! 242: for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; ! 243: count--; /*leave out position 1 */ ! 244: icount = count; ! 245: tmpstat[1] = 0; ! 246: add(state, 0); ! 247: n = 0; ! 248: for (s=0; s<=n; s++) { ! 249: if (out[s] == 1) continue; ! 250: for (i=0; i<NCHARS; i++) symbol[i] = 0; ! 251: num = positions[state[s]]; ! 252: count = icount; ! 253: for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; ! 254: pos = state[s] + 1; ! 255: for (i=0; i<num; i++) { ! 256: curpos = positions[pos]; ! 257: if ((c = name[curpos]) >= 0) { ! 258: if (c < NCHARS) symbol[c] = 1; ! 259: else if (c == DOT) { ! 260: for (k=0; k<NCHARS; k++) ! 261: if (k!='\n') symbol[k] = 1; ! 262: } ! 263: else if (c == CCL) { ! 264: nc = chars[right[curpos]]; ! 265: pc = right[curpos] + 1; ! 266: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; ! 267: } ! 268: else if (c == NCCL) { ! 269: nc = chars[right[curpos]]; ! 270: for (j = 0; j < NCHARS; j++) { ! 271: pc = right[curpos] + 1; ! 272: for (k = 0; k < nc; k++) ! 273: if (j==chars[pc++]) goto cont; ! 274: if (j!='\n') symbol[j] = 1; ! 275: cont:; ! 276: } ! 277: } ! 278: else printf("something's funny\n"); ! 279: } ! 280: pos++; ! 281: } ! 282: for (c=0; c<NCHARS; c++) { ! 283: if (symbol[c] == 1) { /* nextstate(s,c) */ ! 284: count = icount; ! 285: for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; ! 286: pos = state[s] + 1; ! 287: for (i=0; i<num; i++) { ! 288: curpos = positions[pos]; ! 289: if ((k = name[curpos]) >= 0) ! 290: if ( ! 291: (k == c) ! 292: | (k == DOT) ! 293: | (k == CCL && member(c, right[curpos], 1)) ! 294: | (k == NCCL && member(c, right[curpos], 0)) ! 295: ) { ! 296: number = positions[foll[curpos]]; ! 297: newpos = foll[curpos] + 1; ! 298: for (k=0; k<number; k++) { ! 299: if (tmpstat[positions[newpos]] != 1) { ! 300: tmpstat[positions[newpos]] = 1; ! 301: count++; ! 302: } ! 303: newpos++; ! 304: } ! 305: } ! 306: pos++; ! 307: } /* end nextstate */ ! 308: if (notin(n)) { ! 309: if (n >= NSTATES) overflo(); ! 310: add(state, ++n); ! 311: if (tmpstat[line] == 1) out[n] = 1; ! 312: gotofn[s][c] = n; ! 313: } ! 314: else { ! 315: gotofn[s][c] = xstate; ! 316: } ! 317: } ! 318: } ! 319: } ! 320: } ! 321: ! 322: cstate(v) { ! 323: register b; ! 324: if (left[v] == 0) { ! 325: if (tmpstat[v] != 1) { ! 326: tmpstat[v] = 1; ! 327: count++; ! 328: } ! 329: return(1); ! 330: } ! 331: else if (right[v] == 0) { ! 332: if (cstate(left[v]) == 0) return (0); ! 333: else if (name[v] == PLUS) return (1); ! 334: else return (0); ! 335: } ! 336: else if (name[v] == CAT) { ! 337: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); ! 338: else return (1); ! 339: } ! 340: else { /* name[v] == OR */ ! 341: b = cstate(right[v]); ! 342: if (cstate(left[v]) == 0 || b == 0) return (0); ! 343: else return (1); ! 344: } ! 345: } ! 346: ! 347: ! 348: member(symb, set, torf) { ! 349: register i, num, pos; ! 350: num = chars[set]; ! 351: pos = set + 1; ! 352: for (i=0; i<num; i++) ! 353: if (symb == chars[pos++]) return (torf); ! 354: return (!torf); ! 355: } ! 356: ! 357: notin(n) { ! 358: register i, j, pos; ! 359: for (i=0; i<=n; i++) { ! 360: if (positions[state[i]] == count) { ! 361: pos = state[i] + 1; ! 362: for (j=0; j < count; j++) ! 363: if (tmpstat[positions[pos++]] != 1) goto nxt; ! 364: xstate = i; ! 365: return (0); ! 366: } ! 367: nxt: ; ! 368: } ! 369: return (1); ! 370: } ! 371: ! 372: add(array, n) int *array; { ! 373: register i; ! 374: if (nxtpos + count > MAXPOS) overflo(); ! 375: array[n] = nxtpos; ! 376: positions[nxtpos++] = count; ! 377: for (i=3; i <= line; i++) { ! 378: if (tmpstat[i] == 1) { ! 379: positions[nxtpos++] = i; ! 380: } ! 381: } ! 382: } ! 383: ! 384: follow(v) int v; { ! 385: int p; ! 386: if (v == line) return; ! 387: p = parent[v]; ! 388: switch(name[p]) { ! 389: case STAR: ! 390: case PLUS: cstate(v); ! 391: follow(p); ! 392: return; ! 393: ! 394: case OR: ! 395: case QUEST: follow(p); ! 396: return; ! 397: ! 398: case CAT: if (v == left[p]) { ! 399: if (cstate(right[p]) == 0) { ! 400: follow(p); ! 401: return; ! 402: } ! 403: } ! 404: else follow(p); ! 405: return; ! 406: case FINAL: if (tmpstat[line] != 1) { ! 407: tmpstat[line] = 1; ! 408: count++; ! 409: } ! 410: return; ! 411: } ! 412: } ! 413: ! 414: ! 415: main(argc, argv) ! 416: char **argv; ! 417: { ! 418: while (--argc > 0 && (++argv)[0][0]=='-') ! 419: switch (argv[0][1]) { ! 420: ! 421: case 's': ! 422: sflag++; ! 423: continue; ! 424: ! 425: case 'h': ! 426: hflag = 0; ! 427: continue; ! 428: ! 429: case 'b': ! 430: bflag++; ! 431: continue; ! 432: ! 433: case 'c': ! 434: cflag++; ! 435: continue; ! 436: ! 437: case 'e': ! 438: argc--; ! 439: argv++; ! 440: goto out; ! 441: ! 442: case 'f': ! 443: fflag++; ! 444: continue; ! 445: ! 446: case 'l': ! 447: lflag++; ! 448: continue; ! 449: ! 450: case 'n': ! 451: nflag++; ! 452: continue; ! 453: ! 454: case 'v': ! 455: vflag++; ! 456: continue; ! 457: ! 458: default: ! 459: fprintf(stderr, "egrep: unknown flag\n"); ! 460: continue; ! 461: } ! 462: out: ! 463: if (argc<=0) ! 464: exit(2); ! 465: if (fflag) { ! 466: if (freopen(fname = *argv, "r", stdin) == NULL) { ! 467: fprintf(stderr, "egrep: can't open %s\n", fname); ! 468: exit(2); ! 469: } ! 470: } ! 471: else input = *argv; ! 472: argc--; ! 473: argv++; ! 474: ! 475: yyparse(); ! 476: ! 477: cfoll(line-1); ! 478: cgotofn(); ! 479: nfile = argc; ! 480: if (argc<=0) { ! 481: if (lflag) exit(1); ! 482: execute(0); ! 483: } ! 484: else while (--argc >= 0) { ! 485: execute(*argv); ! 486: argv++; ! 487: } ! 488: exit(nsucc == 0); ! 489: } ! 490: ! 491: execute(file) ! 492: char *file; ! 493: { ! 494: register char *p; ! 495: register cstat; ! 496: register ccount; ! 497: char buf[1024]; ! 498: char *nlp; ! 499: int istat; ! 500: if (file) { ! 501: if ((f = open(file, 0)) < 0) { ! 502: fprintf(stderr, "egrep: can't open %s\n", file); ! 503: exit(2); ! 504: } ! 505: } ! 506: else f = 0; ! 507: ccount = 0; ! 508: lnum = 1; ! 509: tln = 0; ! 510: blkno = 0; ! 511: p = buf; ! 512: nlp = p; ! 513: if ((ccount = read(f,p,512))<=0) goto done; ! 514: istat = cstat = gotofn[0]['\n']; ! 515: if (out[cstat]) goto found; ! 516: for (;;) { ! 517: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ ! 518: if (out[cstat]) { ! 519: found: for(;;) { ! 520: if (*p++ == '\n') { ! 521: if (vflag == 0) { ! 522: succeed: nsucc = 1; ! 523: if (cflag) tln++; ! 524: else if (sflag) ! 525: ; /* ugh */ ! 526: else if (lflag) { ! 527: printf("%s\n", file); ! 528: close(f); ! 529: return; ! 530: } ! 531: else { ! 532: if (nfile > 1 && hflag) printf("%s:", file); ! 533: if (bflag) printf("%d:", blkno); ! 534: if (nflag) printf("%ld:", lnum); ! 535: if (p <= nlp) { ! 536: while (nlp < &buf[1024]) putchar(*nlp++); ! 537: nlp = buf; ! 538: } ! 539: while (nlp < p) putchar(*nlp++); ! 540: } ! 541: } ! 542: lnum++; ! 543: nlp = p; ! 544: if ((out[(cstat=istat)]) == 0) goto brk2; ! 545: } ! 546: cfound: ! 547: if (--ccount <= 0) { ! 548: if (p <= &buf[512]) { ! 549: if ((ccount = read(f, p, 512)) <= 0) goto done; ! 550: } ! 551: else if (p == &buf[1024]) { ! 552: p = buf; ! 553: if ((ccount = read(f, p, 512)) <= 0) goto done; ! 554: } ! 555: else { ! 556: if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; ! 557: } ! 558: blkno++; ! 559: } ! 560: } ! 561: } ! 562: if (*p++ == '\n') { ! 563: if (vflag) goto succeed; ! 564: else { ! 565: lnum++; ! 566: nlp = p; ! 567: if (out[(cstat=istat)]) goto cfound; ! 568: } ! 569: } ! 570: brk2: ! 571: if (--ccount <= 0) { ! 572: if (p <= &buf[512]) { ! 573: if ((ccount = read(f, p, 512)) <= 0) break; ! 574: } ! 575: else if (p == &buf[1024]) { ! 576: p = buf; ! 577: if ((ccount = read(f, p, 512)) <= 0) break; ! 578: } ! 579: else { ! 580: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; ! 581: } ! 582: blkno++; ! 583: } ! 584: } ! 585: done: close(f); ! 586: if (cflag) { ! 587: if (nfile > 1) ! 588: printf("%s:", file); ! 589: printf("%ld\n", tln); ! 590: } ! 591: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.