|
|
1.1 ! root 1: /* ! 2: * egrep -- print lines containing (or not containing) a regular expression ! 3: * ! 4: * status returns: ! 5: * 0 - ok, and some matches ! 6: * 1 - ok, but no matches ! 7: * 2 - some error ! 8: */ ! 9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST ! 10: %left OR ! 11: %left CHAR DOT CCL NCCL '(' ! 12: %left CAT ! 13: %left STAR PLUS QUEST ! 14: ! 15: %{ ! 16: static char *sccsid = "@(#)egrep.y 4.4 (Berkeley) 5/29/85"; ! 17: #include <stdio.h> ! 18: #include <sys/types.h> ! 19: #include <sys/stat.h> ! 20: ! 21: #define BLKSIZE 8192 ! 22: #define MAXLIN 350 ! 23: #define MAXPOS 4000 ! 24: #define NCHARS 128 ! 25: #define NSTATES 128 ! 26: #define FINAL -1 ! 27: char gotofn[NSTATES][NCHARS]; ! 28: int state[NSTATES]; ! 29: char out[NSTATES]; ! 30: int line = 1; ! 31: int name[MAXLIN]; ! 32: int left[MAXLIN]; ! 33: int right[MAXLIN]; ! 34: int parent[MAXLIN]; ! 35: int foll[MAXLIN]; ! 36: int positions[MAXPOS]; ! 37: char chars[MAXLIN]; ! 38: int nxtpos; ! 39: int nxtchar = 0; ! 40: int tmpstat[MAXLIN]; ! 41: int initstat[MAXLIN]; ! 42: int xstate; ! 43: int count; ! 44: int icount; ! 45: char *input; ! 46: FILE *exprfile; ! 47: ! 48: long lnum; ! 49: int bflag; ! 50: int cflag; ! 51: int fflag; ! 52: int lflag; ! 53: int nflag; ! 54: int hflag = 1; ! 55: int sflag; ! 56: int vflag; ! 57: int retcode = 0; ! 58: int nfile; ! 59: int blkno; ! 60: long tln; ! 61: int nsucc; ! 62: ! 63: int f; ! 64: char *fname; ! 65: %} ! 66: ! 67: %% ! 68: s: t ! 69: ={ unary(FINAL, $1); ! 70: line--; ! 71: } ! 72: ; ! 73: t: b r ! 74: ={ $$ = node(CAT, $1, $2); } ! 75: | OR b r OR ! 76: ={ $$ = node(CAT, $2, $3); } ! 77: | OR b r ! 78: ={ $$ = node(CAT, $2, $3); } ! 79: | b r OR ! 80: ={ $$ = node(CAT, $1, $2); } ! 81: ; ! 82: b: ! 83: ={ $$ = enter(DOT); ! 84: $$ = unary(STAR, $$); } ! 85: ; ! 86: r: CHAR ! 87: ={ $$ = enter($1); } ! 88: | DOT ! 89: ={ $$ = enter(DOT); } ! 90: | CCL ! 91: ={ $$ = cclenter(CCL); } ! 92: | NCCL ! 93: ={ $$ = cclenter(NCCL); } ! 94: ; ! 95: ! 96: r: r OR r ! 97: ={ $$ = node(OR, $1, $3); } ! 98: | r r %prec CAT ! 99: ={ $$ = node(CAT, $1, $2); } ! 100: | r STAR ! 101: ={ $$ = unary(STAR, $1); } ! 102: | r PLUS ! 103: ={ $$ = unary(PLUS, $1); } ! 104: | r QUEST ! 105: ={ $$ = unary(QUEST, $1); } ! 106: | '(' r ')' ! 107: ={ $$ = $2; } ! 108: | error ! 109: ; ! 110: ! 111: %% ! 112: yyerror(s) { ! 113: fprintf(stderr, "egrep: %s\n", s); ! 114: exit(2); ! 115: } ! 116: ! 117: yylex() { ! 118: extern int yylval; ! 119: int cclcnt, x; ! 120: register char c, d; ! 121: switch(c = nextch()) { ! 122: case '$': ! 123: case '^': c = '\n'; ! 124: goto defchar; ! 125: case '|': return (OR); ! 126: case '*': return (STAR); ! 127: case '+': return (PLUS); ! 128: case '?': return (QUEST); ! 129: case '(': return (c); ! 130: case ')': return (c); ! 131: case '.': return (DOT); ! 132: case '\0': return (0); ! 133: case '\n': return (OR); ! 134: case '[': ! 135: x = CCL; ! 136: cclcnt = 0; ! 137: count = nxtchar++; ! 138: if ((c = nextch()) == '^') { ! 139: x = NCCL; ! 140: c = nextch(); ! 141: } ! 142: do { ! 143: if (c == '\0') synerror(); ! 144: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { ! 145: if ((d = nextch()) != 0) { ! 146: c = chars[nxtchar-1]; ! 147: while (c < d) { ! 148: if (nxtchar >= MAXLIN) overflo(); ! 149: chars[nxtchar++] = ++c; ! 150: cclcnt++; ! 151: } ! 152: continue; ! 153: } ! 154: } ! 155: if (nxtchar >= MAXLIN) overflo(); ! 156: chars[nxtchar++] = c; ! 157: cclcnt++; ! 158: } while ((c = nextch()) != ']'); ! 159: chars[count] = cclcnt; ! 160: return (x); ! 161: case '\\': ! 162: if ((c = nextch()) == '\0') synerror(); ! 163: defchar: ! 164: default: yylval = c; return (CHAR); ! 165: } ! 166: } ! 167: nextch() { ! 168: register char c; ! 169: if (fflag) { ! 170: if ((c = getc(exprfile)) == EOF) { ! 171: fclose(exprfile); ! 172: return(0); ! 173: } ! 174: } ! 175: else c = *input++; ! 176: return(c); ! 177: } ! 178: ! 179: synerror() { ! 180: fprintf(stderr, "egrep: syntax error\n"); ! 181: exit(2); ! 182: } ! 183: ! 184: enter(x) int x; { ! 185: if(line >= MAXLIN) overflo(); ! 186: name[line] = x; ! 187: left[line] = 0; ! 188: right[line] = 0; ! 189: return(line++); ! 190: } ! 191: ! 192: cclenter(x) int x; { ! 193: register linno; ! 194: linno = enter(x); ! 195: right[linno] = count; ! 196: return (linno); ! 197: } ! 198: ! 199: node(x, l, r) { ! 200: if(line >= MAXLIN) overflo(); ! 201: name[line] = x; ! 202: left[line] = l; ! 203: right[line] = r; ! 204: parent[l] = line; ! 205: parent[r] = line; ! 206: return(line++); ! 207: } ! 208: ! 209: unary(x, d) { ! 210: if(line >= MAXLIN) overflo(); ! 211: name[line] = x; ! 212: left[line] = d; ! 213: right[line] = 0; ! 214: parent[d] = line; ! 215: return(line++); ! 216: } ! 217: overflo() { ! 218: fprintf(stderr, "egrep: regular expression too long\n"); ! 219: exit(2); ! 220: } ! 221: ! 222: cfoll(v) { ! 223: register i; ! 224: if (left[v] == 0) { ! 225: count = 0; ! 226: for (i=1; i<=line; i++) tmpstat[i] = 0; ! 227: follow(v); ! 228: add(foll, v); ! 229: } ! 230: else if (right[v] == 0) cfoll(left[v]); ! 231: else { ! 232: cfoll(left[v]); ! 233: cfoll(right[v]); ! 234: } ! 235: } ! 236: cgotofn() { ! 237: register c, i, k; ! 238: int n, s; ! 239: char symbol[NCHARS]; ! 240: int j, nc, pc, pos; ! 241: int curpos, num; ! 242: int number, newpos; ! 243: count = 0; ! 244: for (n=3; n<=line; n++) tmpstat[n] = 0; ! 245: if (cstate(line-1)==0) { ! 246: tmpstat[line] = 1; ! 247: count++; ! 248: out[0] = 1; ! 249: } ! 250: for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; ! 251: count--; /*leave out position 1 */ ! 252: icount = count; ! 253: tmpstat[1] = 0; ! 254: add(state, 0); ! 255: n = 0; ! 256: for (s=0; s<=n; s++) { ! 257: if (out[s] == 1) continue; ! 258: for (i=0; i<NCHARS; i++) symbol[i] = 0; ! 259: num = positions[state[s]]; ! 260: count = icount; ! 261: for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; ! 262: pos = state[s] + 1; ! 263: for (i=0; i<num; i++) { ! 264: curpos = positions[pos]; ! 265: if ((c = name[curpos]) >= 0) { ! 266: if (c < NCHARS) symbol[c] = 1; ! 267: else if (c == DOT) { ! 268: for (k=0; k<NCHARS; k++) ! 269: if (k!='\n') symbol[k] = 1; ! 270: } ! 271: else if (c == CCL) { ! 272: nc = chars[right[curpos]]; ! 273: pc = right[curpos] + 1; ! 274: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; ! 275: } ! 276: else if (c == NCCL) { ! 277: nc = chars[right[curpos]]; ! 278: for (j = 0; j < NCHARS; j++) { ! 279: pc = right[curpos] + 1; ! 280: for (k = 0; k < nc; k++) ! 281: if (j==chars[pc++]) goto cont; ! 282: if (j!='\n') symbol[j] = 1; ! 283: cont:; ! 284: } ! 285: } ! 286: else printf("something's funny\n"); ! 287: } ! 288: pos++; ! 289: } ! 290: for (c=0; c<NCHARS; c++) { ! 291: if (symbol[c] == 1) { /* nextstate(s,c) */ ! 292: count = icount; ! 293: for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; ! 294: pos = state[s] + 1; ! 295: for (i=0; i<num; i++) { ! 296: curpos = positions[pos]; ! 297: if ((k = name[curpos]) >= 0) ! 298: if ( ! 299: (k == c) ! 300: | (k == DOT) ! 301: | (k == CCL && member(c, right[curpos], 1)) ! 302: | (k == NCCL && member(c, right[curpos], 0)) ! 303: ) { ! 304: number = positions[foll[curpos]]; ! 305: newpos = foll[curpos] + 1; ! 306: for (k=0; k<number; k++) { ! 307: if (tmpstat[positions[newpos]] != 1) { ! 308: tmpstat[positions[newpos]] = 1; ! 309: count++; ! 310: } ! 311: newpos++; ! 312: } ! 313: } ! 314: pos++; ! 315: } /* end nextstate */ ! 316: if (notin(n)) { ! 317: if (n >= NSTATES) overflo(); ! 318: add(state, ++n); ! 319: if (tmpstat[line] == 1) out[n] = 1; ! 320: gotofn[s][c] = n; ! 321: } ! 322: else { ! 323: gotofn[s][c] = xstate; ! 324: } ! 325: } ! 326: } ! 327: } ! 328: } ! 329: ! 330: cstate(v) { ! 331: register b; ! 332: if (left[v] == 0) { ! 333: if (tmpstat[v] != 1) { ! 334: tmpstat[v] = 1; ! 335: count++; ! 336: } ! 337: return(1); ! 338: } ! 339: else if (right[v] == 0) { ! 340: if (cstate(left[v]) == 0) return (0); ! 341: else if (name[v] == PLUS) return (1); ! 342: else return (0); ! 343: } ! 344: else if (name[v] == CAT) { ! 345: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); ! 346: else return (1); ! 347: } ! 348: else { /* name[v] == OR */ ! 349: b = cstate(right[v]); ! 350: if (cstate(left[v]) == 0 || b == 0) return (0); ! 351: else return (1); ! 352: } ! 353: } ! 354: ! 355: ! 356: member(symb, set, torf) { ! 357: register i, num, pos; ! 358: num = chars[set]; ! 359: pos = set + 1; ! 360: for (i=0; i<num; i++) ! 361: if (symb == chars[pos++]) return (torf); ! 362: return (!torf); ! 363: } ! 364: ! 365: notin(n) { ! 366: register i, j, pos; ! 367: for (i=0; i<=n; i++) { ! 368: if (positions[state[i]] == count) { ! 369: pos = state[i] + 1; ! 370: for (j=0; j < count; j++) ! 371: if (tmpstat[positions[pos++]] != 1) goto nxt; ! 372: xstate = i; ! 373: return (0); ! 374: } ! 375: nxt: ; ! 376: } ! 377: return (1); ! 378: } ! 379: ! 380: add(array, n) int *array; { ! 381: register i; ! 382: if (nxtpos + count > MAXPOS) overflo(); ! 383: array[n] = nxtpos; ! 384: positions[nxtpos++] = count; ! 385: for (i=3; i <= line; i++) { ! 386: if (tmpstat[i] == 1) { ! 387: positions[nxtpos++] = i; ! 388: } ! 389: } ! 390: } ! 391: ! 392: follow(v) int v; { ! 393: int p; ! 394: if (v == line) return; ! 395: p = parent[v]; ! 396: switch(name[p]) { ! 397: case STAR: ! 398: case PLUS: cstate(v); ! 399: follow(p); ! 400: return; ! 401: ! 402: case OR: ! 403: case QUEST: follow(p); ! 404: return; ! 405: ! 406: case CAT: if (v == left[p]) { ! 407: if (cstate(right[p]) == 0) { ! 408: follow(p); ! 409: return; ! 410: } ! 411: } ! 412: else follow(p); ! 413: return; ! 414: case FINAL: if (tmpstat[line] != 1) { ! 415: tmpstat[line] = 1; ! 416: count++; ! 417: } ! 418: return; ! 419: } ! 420: } ! 421: ! 422: ! 423: main(argc, argv) ! 424: char **argv; ! 425: { ! 426: while (--argc > 0 && (++argv)[0][0]=='-') ! 427: switch (argv[0][1]) { ! 428: ! 429: case 's': ! 430: sflag++; ! 431: continue; ! 432: ! 433: case 'h': ! 434: hflag = 0; ! 435: continue; ! 436: ! 437: case 'b': ! 438: bflag++; ! 439: continue; ! 440: ! 441: case 'c': ! 442: cflag++; ! 443: continue; ! 444: ! 445: case 'e': ! 446: argc--; ! 447: argv++; ! 448: goto out; ! 449: ! 450: case 'f': ! 451: fflag++; ! 452: continue; ! 453: ! 454: case 'l': ! 455: lflag++; ! 456: continue; ! 457: ! 458: case 'n': ! 459: nflag++; ! 460: continue; ! 461: ! 462: case 'v': ! 463: vflag++; ! 464: continue; ! 465: ! 466: default: ! 467: fprintf(stderr, "egrep: unknown flag\n"); ! 468: continue; ! 469: } ! 470: out: ! 471: if (argc<=0) ! 472: exit(2); ! 473: if (fflag) { ! 474: fname = *argv; ! 475: exprfile = fopen(fname, "r"); ! 476: if (exprfile == (FILE *)NULL) { ! 477: fprintf(stderr, "egrep: can't open %s\n", fname); ! 478: exit(2); ! 479: } ! 480: } ! 481: else input = *argv; ! 482: argc--; ! 483: argv++; ! 484: ! 485: yyparse(); ! 486: ! 487: cfoll(line-1); ! 488: cgotofn(); ! 489: nfile = argc; ! 490: if (argc<=0) { ! 491: if (lflag) exit(1); ! 492: execute(0); ! 493: } ! 494: else while (--argc >= 0) { ! 495: execute(*argv); ! 496: argv++; ! 497: } ! 498: exit(retcode != 0 ? retcode : nsucc == 0); ! 499: } ! 500: ! 501: execute(file) ! 502: char *file; ! 503: { ! 504: register char *p; ! 505: register cstat; ! 506: register ccount; ! 507: static char *buf; ! 508: static int blksize; ! 509: struct stat stb; ! 510: char *nlp; ! 511: int istat; ! 512: if (file) { ! 513: if ((f = open(file, 0)) < 0) { ! 514: fprintf(stderr, "egrep: can't open %s\n", file); ! 515: retcode = 2; ! 516: return; ! 517: } ! 518: } ! 519: else f = 0; ! 520: if (buf == NULL) { ! 521: if (fstat(f, &stb) > 0 && stb.st_blksize > 0) ! 522: blksize = stb.st_blksize; ! 523: else ! 524: blksize = BLKSIZE; ! 525: buf = (char *)malloc(2*blksize); ! 526: if (buf == NULL) { ! 527: fprintf(stderr, "egrep: no memory for %s\n", file); ! 528: retcode = 2; ! 529: return; ! 530: } ! 531: } ! 532: ccount = 0; ! 533: lnum = 1; ! 534: tln = 0; ! 535: blkno = 0; ! 536: p = buf; ! 537: nlp = p; ! 538: if ((ccount = read(f,p,blksize))<=0) goto done; ! 539: istat = cstat = gotofn[0]['\n']; ! 540: if (out[cstat]) goto found; ! 541: for (;;) { ! 542: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ ! 543: if (out[cstat]) { ! 544: found: for(;;) { ! 545: if (*p++ == '\n') { ! 546: if (vflag == 0) { ! 547: succeed: nsucc = 1; ! 548: if (cflag) tln++; ! 549: else if (sflag) ! 550: ; /* ugh */ ! 551: else if (lflag) { ! 552: printf("%s\n", file); ! 553: close(f); ! 554: return; ! 555: } ! 556: else { ! 557: if (nfile > 1 && hflag) printf("%s:", file); ! 558: if (bflag) printf("%d:", blkno); ! 559: if (nflag) printf("%ld:", lnum); ! 560: if (p <= nlp) { ! 561: while (nlp < &buf[2*blksize]) putchar(*nlp++); ! 562: nlp = buf; ! 563: } ! 564: while (nlp < p) putchar(*nlp++); ! 565: } ! 566: } ! 567: lnum++; ! 568: nlp = p; ! 569: if ((out[(cstat=istat)]) == 0) goto brk2; ! 570: } ! 571: cfound: ! 572: if (--ccount <= 0) { ! 573: if (p <= &buf[blksize]) { ! 574: if ((ccount = read(f, p, blksize)) <= 0) goto done; ! 575: } ! 576: else if (p == &buf[2*blksize]) { ! 577: p = buf; ! 578: if ((ccount = read(f, p, blksize)) <= 0) goto done; ! 579: } ! 580: else { ! 581: if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done; ! 582: } ! 583: blkno += ccount / 512; ! 584: } ! 585: } ! 586: } ! 587: if (*p++ == '\n') { ! 588: if (vflag) goto succeed; ! 589: else { ! 590: lnum++; ! 591: nlp = p; ! 592: if (out[(cstat=istat)]) goto cfound; ! 593: } ! 594: } ! 595: brk2: ! 596: if (--ccount <= 0) { ! 597: if (p <= &buf[blksize]) { ! 598: if ((ccount = read(f, p, blksize)) <= 0) break; ! 599: } ! 600: else if (p == &buf[2*blksize]) { ! 601: p = buf; ! 602: if ((ccount = read(f, p, blksize)) <= 0) break; ! 603: } ! 604: else { ! 605: if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break; ! 606: } ! 607: blkno += ccount / 512; ! 608: } ! 609: } ! 610: done: close(f); ! 611: if (cflag) { ! 612: if (nfile > 1) ! 613: printf("%s:", file); ! 614: printf("%ld\n", tln); ! 615: } ! 616: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.