|
|
1.1 ! root 1: /* ! 2: * egrep -- print lines containing (or not containing) a regular expression ! 3: * ! 4: * status returns: ! 5: * 0 - ok, and some matches ! 6: * 1 - ok, but no matches ! 7: * 2 - some error ! 8: */ ! 9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST ! 10: %left OR ! 11: %left CHAR DOT CCL NCCL '(' ! 12: %left CAT ! 13: %left STAR PLUS QUEST ! 14: ! 15: %{ ! 16: #include <stdio.h> ! 17: ! 18: #define MAXLIN 350 ! 19: #define MAXPOS 4000 ! 20: #define NCHARS 128 ! 21: #define NSTATES 128 ! 22: #define FINAL -1 ! 23: char gotofn[NSTATES][NCHARS]; ! 24: int state[NSTATES]; ! 25: char out[NSTATES]; ! 26: int line = 1; ! 27: int name[MAXLIN]; ! 28: int left[MAXLIN]; ! 29: int right[MAXLIN]; ! 30: int parent[MAXLIN]; ! 31: int foll[MAXLIN]; ! 32: int positions[MAXPOS]; ! 33: char chars[MAXLIN]; ! 34: int nxtpos; ! 35: int nxtchar = 0; ! 36: int tmpstat[MAXLIN]; ! 37: int initstat[MAXLIN]; ! 38: int xstate; ! 39: int count; ! 40: int icount; ! 41: char *input; ! 42: ! 43: long lnum; ! 44: int bflag; ! 45: int cflag; ! 46: int fflag; ! 47: int lflag; ! 48: int nflag; ! 49: int hflag = 1; ! 50: int sflag; ! 51: int vflag; ! 52: int nfile; ! 53: int blkno; ! 54: long tln; ! 55: int nsucc; ! 56: ! 57: int f; ! 58: int fname; ! 59: %} ! 60: ! 61: %% ! 62: s: t ! 63: ={ unary(FINAL, $1); ! 64: line--; ! 65: } ! 66: ; ! 67: t: b r ! 68: ={ $$ = node(CAT, $1, $2); } ! 69: | OR b r OR ! 70: ={ $$ = node(CAT, $2, $3); } ! 71: | OR b r ! 72: ={ $$ = node(CAT, $2, $3); } ! 73: | b r OR ! 74: ={ $$ = node(CAT, $1, $2); } ! 75: ; ! 76: b: ! 77: ={ $$ = enter(DOT); ! 78: $$ = unary(STAR, $$); } ! 79: ; ! 80: r: CHAR ! 81: ={ $$ = enter($1); } ! 82: | DOT ! 83: ={ $$ = enter(DOT); } ! 84: | CCL ! 85: ={ $$ = cclenter(CCL); } ! 86: | NCCL ! 87: ={ $$ = cclenter(NCCL); } ! 88: ; ! 89: ! 90: r: r OR r ! 91: ={ $$ = node(OR, $1, $3); } ! 92: | r r %prec CAT ! 93: ={ $$ = node(CAT, $1, $2); } ! 94: | r STAR ! 95: ={ $$ = unary(STAR, $1); } ! 96: | r PLUS ! 97: ={ $$ = unary(PLUS, $1); } ! 98: | r QUEST ! 99: ={ $$ = unary(QUEST, $1); } ! 100: | '(' r ')' ! 101: ={ $$ = $2; } ! 102: | error ! 103: ; ! 104: ! 105: %% ! 106: yyerror(s) { ! 107: fprintf(stderr, "egrep: %s\n", s); ! 108: exit(2); ! 109: } ! 110: ! 111: yylex() { ! 112: extern int yylval; ! 113: int cclcnt, x; ! 114: register char c, d; ! 115: switch(c = nextch()) { ! 116: case '$': ! 117: case '^': c = '\n'; ! 118: goto defchar; ! 119: case '|': return (OR); ! 120: case '*': return (STAR); ! 121: case '+': return (PLUS); ! 122: case '?': return (QUEST); ! 123: case '(': return (c); ! 124: case ')': return (c); ! 125: case '.': return (DOT); ! 126: case '\0': return (0); ! 127: case '\n': return (OR); ! 128: case '[': ! 129: x = CCL; ! 130: cclcnt = 0; ! 131: count = nxtchar++; ! 132: if ((c = nextch()) == '^') { ! 133: x = NCCL; ! 134: c = nextch(); ! 135: } ! 136: do { ! 137: if (c == '\0') synerror(); ! 138: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { ! 139: if ((d = nextch()) != 0) { ! 140: c = chars[nxtchar-1]; ! 141: while (c < d) { ! 142: if (nxtchar >= MAXLIN) overflo(); ! 143: chars[nxtchar++] = ++c; ! 144: cclcnt++; ! 145: } ! 146: continue; ! 147: } ! 148: } ! 149: if (nxtchar >= MAXLIN) overflo(); ! 150: chars[nxtchar++] = c; ! 151: cclcnt++; ! 152: } while ((c = nextch()) != ']'); ! 153: chars[count] = cclcnt; ! 154: return (x); ! 155: case '\\': ! 156: if ((c = nextch()) == '\0') synerror(); ! 157: defchar: ! 158: default: yylval = c; return (CHAR); ! 159: } ! 160: } ! 161: nextch() { ! 162: register char c; ! 163: if (fflag) { ! 164: if ((c = getc(stdin)) == EOF) return(0); ! 165: } ! 166: else c = *input++; ! 167: return(c); ! 168: } ! 169: ! 170: synerror() { ! 171: fprintf(stderr, "egrep: syntax error\n"); ! 172: exit(2); ! 173: } ! 174: ! 175: enter(x) int x; { ! 176: if(line >= MAXLIN) overflo(); ! 177: name[line] = x; ! 178: left[line] = 0; ! 179: right[line] = 0; ! 180: return(line++); ! 181: } ! 182: ! 183: cclenter(x) int x; { ! 184: register linno; ! 185: linno = enter(x); ! 186: right[linno] = count; ! 187: return (linno); ! 188: } ! 189: ! 190: node(x, l, r) { ! 191: if(line >= MAXLIN) overflo(); ! 192: name[line] = x; ! 193: left[line] = l; ! 194: right[line] = r; ! 195: parent[l] = line; ! 196: parent[r] = line; ! 197: return(line++); ! 198: } ! 199: ! 200: unary(x, d) { ! 201: if(line >= MAXLIN) overflo(); ! 202: name[line] = x; ! 203: left[line] = d; ! 204: right[line] = 0; ! 205: parent[d] = line; ! 206: return(line++); ! 207: } ! 208: overflo() { ! 209: fprintf(stderr, "egrep: regular expression too long\n"); ! 210: exit(2); ! 211: } ! 212: ! 213: cfoll(v) { ! 214: register i; ! 215: if (left[v] == 0) { ! 216: count = 0; ! 217: for (i=1; i<=line; i++) tmpstat[i] = 0; ! 218: follow(v); ! 219: add(foll, v); ! 220: } ! 221: else if (right[v] == 0) cfoll(left[v]); ! 222: else { ! 223: cfoll(left[v]); ! 224: cfoll(right[v]); ! 225: } ! 226: } ! 227: cgotofn() { ! 228: register c, i, k; ! 229: int n, s; ! 230: char symbol[NCHARS]; ! 231: int j, nc, pc, pos; ! 232: int curpos, num; ! 233: int number, newpos; ! 234: count = 0; ! 235: for (n=3; n<=line; n++) tmpstat[n] = 0; ! 236: if (cstate(line-1)==0) { ! 237: tmpstat[line] = 1; ! 238: count++; ! 239: out[0] = 1; ! 240: } ! 241: for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; ! 242: count--; /*leave out position 1 */ ! 243: icount = count; ! 244: tmpstat[1] = 0; ! 245: add(state, 0); ! 246: n = 0; ! 247: for (s=0; s<=n; s++) { ! 248: if (out[s] == 1) continue; ! 249: for (i=0; i<NCHARS; i++) symbol[i] = 0; ! 250: num = positions[state[s]]; ! 251: count = icount; ! 252: for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; ! 253: pos = state[s] + 1; ! 254: for (i=0; i<num; i++) { ! 255: curpos = positions[pos]; ! 256: if ((c = name[curpos]) >= 0) { ! 257: if (c < NCHARS) symbol[c] = 1; ! 258: else if (c == DOT) { ! 259: for (k=0; k<NCHARS; k++) ! 260: if (k!='\n') symbol[k] = 1; ! 261: } ! 262: else if (c == CCL) { ! 263: nc = chars[right[curpos]]; ! 264: pc = right[curpos] + 1; ! 265: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; ! 266: } ! 267: else if (c == NCCL) { ! 268: nc = chars[right[curpos]]; ! 269: for (j = 0; j < NCHARS; j++) { ! 270: pc = right[curpos] + 1; ! 271: for (k = 0; k < nc; k++) ! 272: if (j==chars[pc++]) goto cont; ! 273: if (j!='\n') symbol[j] = 1; ! 274: cont:; ! 275: } ! 276: } ! 277: else printf("something's funny\n"); ! 278: } ! 279: pos++; ! 280: } ! 281: for (c=0; c<NCHARS; c++) { ! 282: if (symbol[c] == 1) { /* nextstate(s,c) */ ! 283: count = icount; ! 284: for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; ! 285: pos = state[s] + 1; ! 286: for (i=0; i<num; i++) { ! 287: curpos = positions[pos]; ! 288: if ((k = name[curpos]) >= 0) ! 289: if ( ! 290: (k == c) ! 291: | (k == DOT) ! 292: | (k == CCL && member(c, right[curpos], 1)) ! 293: | (k == NCCL && member(c, right[curpos], 0)) ! 294: ) { ! 295: number = positions[foll[curpos]]; ! 296: newpos = foll[curpos] + 1; ! 297: for (k=0; k<number; k++) { ! 298: if (tmpstat[positions[newpos]] != 1) { ! 299: tmpstat[positions[newpos]] = 1; ! 300: count++; ! 301: } ! 302: newpos++; ! 303: } ! 304: } ! 305: pos++; ! 306: } /* end nextstate */ ! 307: if (notin(n)) { ! 308: if (n >= NSTATES) overflo(); ! 309: add(state, ++n); ! 310: if (tmpstat[line] == 1) out[n] = 1; ! 311: gotofn[s][c] = n; ! 312: } ! 313: else { ! 314: gotofn[s][c] = xstate; ! 315: } ! 316: } ! 317: } ! 318: } ! 319: } ! 320: ! 321: cstate(v) { ! 322: register b; ! 323: if (left[v] == 0) { ! 324: if (tmpstat[v] != 1) { ! 325: tmpstat[v] = 1; ! 326: count++; ! 327: } ! 328: return(1); ! 329: } ! 330: else if (right[v] == 0) { ! 331: if (cstate(left[v]) == 0) return (0); ! 332: else if (name[v] == PLUS) return (1); ! 333: else return (0); ! 334: } ! 335: else if (name[v] == CAT) { ! 336: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); ! 337: else return (1); ! 338: } ! 339: else { /* name[v] == OR */ ! 340: b = cstate(right[v]); ! 341: if (cstate(left[v]) == 0 || b == 0) return (0); ! 342: else return (1); ! 343: } ! 344: } ! 345: ! 346: ! 347: member(symb, set, torf) { ! 348: register i, num, pos; ! 349: num = chars[set]; ! 350: pos = set + 1; ! 351: for (i=0; i<num; i++) ! 352: if (symb == chars[pos++]) return (torf); ! 353: return (!torf); ! 354: } ! 355: ! 356: notin(n) { ! 357: register i, j, pos; ! 358: for (i=0; i<=n; i++) { ! 359: if (positions[state[i]] == count) { ! 360: pos = state[i] + 1; ! 361: for (j=0; j < count; j++) ! 362: if (tmpstat[positions[pos++]] != 1) goto nxt; ! 363: xstate = i; ! 364: return (0); ! 365: } ! 366: nxt: ; ! 367: } ! 368: return (1); ! 369: } ! 370: ! 371: add(array, n) int *array; { ! 372: register i; ! 373: if (nxtpos + count > MAXPOS) overflo(); ! 374: array[n] = nxtpos; ! 375: positions[nxtpos++] = count; ! 376: for (i=3; i <= line; i++) { ! 377: if (tmpstat[i] == 1) { ! 378: positions[nxtpos++] = i; ! 379: } ! 380: } ! 381: } ! 382: ! 383: follow(v) int v; { ! 384: int p; ! 385: if (v == line) return; ! 386: p = parent[v]; ! 387: switch(name[p]) { ! 388: case STAR: ! 389: case PLUS: cstate(v); ! 390: follow(p); ! 391: return; ! 392: ! 393: case OR: ! 394: case QUEST: follow(p); ! 395: return; ! 396: ! 397: case CAT: if (v == left[p]) { ! 398: if (cstate(right[p]) == 0) { ! 399: follow(p); ! 400: return; ! 401: } ! 402: } ! 403: else follow(p); ! 404: return; ! 405: case FINAL: if (tmpstat[line] != 1) { ! 406: tmpstat[line] = 1; ! 407: count++; ! 408: } ! 409: return; ! 410: } ! 411: } ! 412: ! 413: ! 414: main(argc, argv) ! 415: char **argv; ! 416: { ! 417: while (--argc > 0 && (++argv)[0][0]=='-') ! 418: switch (argv[0][1]) { ! 419: ! 420: case 's': ! 421: sflag++; ! 422: continue; ! 423: ! 424: case 'h': ! 425: hflag = 0; ! 426: continue; ! 427: ! 428: case 'b': ! 429: bflag++; ! 430: continue; ! 431: ! 432: case 'c': ! 433: cflag++; ! 434: continue; ! 435: ! 436: case 'e': ! 437: argc--; ! 438: argv++; ! 439: goto out; ! 440: ! 441: case 'f': ! 442: fflag++; ! 443: continue; ! 444: ! 445: case 'l': ! 446: lflag++; ! 447: continue; ! 448: ! 449: case 'n': ! 450: nflag++; ! 451: continue; ! 452: ! 453: case 'v': ! 454: vflag++; ! 455: continue; ! 456: ! 457: default: ! 458: fprintf(stderr, "egrep: unknown flag\n"); ! 459: continue; ! 460: } ! 461: out: ! 462: if (argc<=0) ! 463: exit(2); ! 464: if (fflag) { ! 465: if (freopen(fname = *argv, "r", stdin) == NULL) { ! 466: fprintf(stderr, "egrep: can't open %s\n", fname); ! 467: exit(2); ! 468: } ! 469: } ! 470: else input = *argv; ! 471: argc--; ! 472: argv++; ! 473: ! 474: yyparse(); ! 475: ! 476: cfoll(line-1); ! 477: cgotofn(); ! 478: nfile = argc; ! 479: if (argc<=0) { ! 480: if (lflag) exit(1); ! 481: execute(0); ! 482: } ! 483: else while (--argc >= 0) { ! 484: execute(*argv); ! 485: argv++; ! 486: } ! 487: exit(nsucc == 0); ! 488: } ! 489: ! 490: execute(file) ! 491: char *file; ! 492: { ! 493: register char *p; ! 494: register cstat; ! 495: register ccount; ! 496: char buf[1024]; ! 497: char *nlp; ! 498: int istat; ! 499: if (file) { ! 500: if ((f = open(file, 0)) < 0) { ! 501: fprintf(stderr, "egrep: can't open %s\n", file); ! 502: exit(2); ! 503: } ! 504: } ! 505: else f = 0; ! 506: ccount = 0; ! 507: lnum = 1; ! 508: tln = 0; ! 509: blkno = 0; ! 510: p = buf; ! 511: nlp = p; ! 512: if ((ccount = read(f,p,512))<=0) goto done; ! 513: istat = cstat = gotofn[0]['\n']; ! 514: if (out[cstat]) goto found; ! 515: for (;;) { ! 516: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ ! 517: if (out[cstat]) { ! 518: found: for(;;) { ! 519: if (*p++ == '\n') { ! 520: if (vflag == 0) { ! 521: succeed: nsucc = 1; ! 522: if (cflag) tln++; ! 523: else if (sflag) ! 524: ; /* ugh */ ! 525: else if (lflag) { ! 526: printf("%s\n", file); ! 527: close(f); ! 528: return; ! 529: } ! 530: else { ! 531: if (nfile > 1 && hflag) printf("%s:", file); ! 532: if (bflag) printf("%d:", blkno); ! 533: if (nflag) printf("%ld:", lnum); ! 534: if (p <= nlp) { ! 535: while (nlp < &buf[1024]) putchar(*nlp++); ! 536: nlp = buf; ! 537: } ! 538: while (nlp < p) putchar(*nlp++); ! 539: } ! 540: } ! 541: lnum++; ! 542: nlp = p; ! 543: if ((out[(cstat=istat)]) == 0) goto brk2; ! 544: } ! 545: cfound: ! 546: if (--ccount <= 0) { ! 547: if (p <= &buf[512]) { ! 548: if ((ccount = read(f, p, 512)) <= 0) goto done; ! 549: } ! 550: else if (p == &buf[1024]) { ! 551: p = buf; ! 552: if ((ccount = read(f, p, 512)) <= 0) goto done; ! 553: } ! 554: else { ! 555: if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; ! 556: } ! 557: blkno++; ! 558: } ! 559: } ! 560: } ! 561: if (*p++ == '\n') { ! 562: if (vflag) goto succeed; ! 563: else { ! 564: lnum++; ! 565: nlp = p; ! 566: if (out[(cstat=istat)]) goto cfound; ! 567: } ! 568: } ! 569: brk2: ! 570: if (--ccount <= 0) { ! 571: if (p <= &buf[512]) { ! 572: if ((ccount = read(f, p, 512)) <= 0) break; ! 573: } ! 574: else if (p == &buf[1024]) { ! 575: p = buf; ! 576: if ((ccount = read(f, p, 512)) <= 0) break; ! 577: } ! 578: else { ! 579: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; ! 580: } ! 581: blkno++; ! 582: } ! 583: } ! 584: done: close(f); ! 585: if (cflag) { ! 586: if (nfile > 1) ! 587: printf("%s:", file); ! 588: printf("%ld\n", tln); ! 589: } ! 590: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.