|
|
1.1 ! root 1: /* ! 2: * diction -- print all sentences containing one of default phrases ! 3: * ! 4: * status returns: ! 5: * 0 - ok, and some matches ! 6: * 1 - ok, but no matches ! 7: * 2 - some error ! 8: */ ! 9: ! 10: #include <stdio.h> ! 11: #include <ctype.h> ! 12: ! 13: #define MAXSIZ 6500 ! 14: #define QSIZE 650 ! 15: int linemsg; ! 16: long olcount; ! 17: long lcount; ! 18: struct words { ! 19: char inp; ! 20: char out; ! 21: struct words *nst; ! 22: struct words *link; ! 23: struct words *fail; ! 24: } w[MAXSIZ], *smax, *q; ! 25: ! 26: char table[128] = { ! 27: 0, 0, 0, 0, 0, 0, 0, 0, ! 28: 0, 0, ' ', 0, 0, 0, 0, 0, ! 29: 0, 0, 0, 0, 0, 0, 0, 0, ! 30: 0, 0, 0, 0, 0, 0, 0, 0, ! 31: ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', ! 32: ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', ! 33: '0', '1', '2', '3', '4', '5', '6', '7', ! 34: '8', '9', ' ', ' ', ' ', ' ', ' ', '.', ! 35: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', ! 36: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', ! 37: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', ! 38: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', ! 39: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', ! 40: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', ! 41: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', ! 42: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' ! 43: }; ! 44: int caps = 0; ! 45: int lineno = 0; ! 46: int fflag= 0; ! 47: int nflag = 1; /*use default file*/ ! 48: char *filename; ! 49: int mflg = 0; /*don't catch output*/ ! 50: int nfile; ! 51: int nsucc; ! 52: long nsent = 0; ! 53: long nhits = 0; ! 54: char *nlp; ! 55: char *begp, *endp; ! 56: int beg, last; ! 57: char *myst; ! 58: int myct = 0; ! 59: int oct = 0; ! 60: FILE *wordf; ! 61: FILE *mine; ! 62: FILE *fl; ! 63: char *listn; ! 64: int list = 0; ! 65: char *argptr; ! 66: long tl = 0; ! 67: long th = 0; ! 68: ! 69: main(argc, argv) ! 70: char *argv[]; ! 71: { ! 72: int sv; ! 73: char cc; ! 74: while (--argc > 0 && (++argv)[0][0]=='-') ! 75: switch (argv[0][1]) { ! 76: ! 77: case 'f': ! 78: fflag++; ! 79: filename = (++argv)[0]; ! 80: argc--; ! 81: continue; ! 82: ! 83: case 'n': ! 84: nflag = 0; ! 85: continue; ! 86: case 'd': ! 87: mflg=0; ! 88: continue; ! 89: case 'c': ! 90: caps++; ! 91: continue; ! 92: case 'l': ! 93: lineno++; ! 94: continue; ! 95: case 'A': /* for acro */ ! 96: for(cc='A';cc<='Z';cc++) ! 97: table[cc] = cc; ! 98: continue; ! 99: case 'o': /*to put hits to file*/ ! 100: listn = (++argv)[0]; ! 101: argc--; ! 102: list++; ! 103: if((fl=fopen(listn,"a"))== NULL){ ! 104: fprintf(stderr,"diction: can't open file %s\n", ! 105: listn); ! 106: exit(2); ! 107: } ! 108: continue; ! 109: default: ! 110: fprintf(stderr, "diction: unknown flag\n"); ! 111: continue; ! 112: } ! 113: out: ! 114: if(nflag){ ! 115: wordf = fopen(DICT,"r"); ! 116: if(wordf == NULL){ ! 117: fprintf(stderr,"diction: can't open default dictionary\n"); ! 118: exit(2); ! 119: } ! 120: } ! 121: else { ! 122: wordf = fopen(filename,"r"); ! 123: if(wordf == NULL){ ! 124: fprintf(stderr,"diction: can't open %s\n",filename); ! 125: exit(2); ! 126: } ! 127: } ! 128: ! 129: #ifdef CATCH ! 130: if(fopen(CATCH,"r") != NULL){ ! 131: if((mine=fopen(CATCH,"a"))==NULL)mflg=0; ! 132: else mflg = 1; ! 133: } ! 134: #else ! 135: mflg = 0; ! 136: #endif ! 137: #ifdef MACS ! 138: if(caps){ ! 139: printf(".so "); ! 140: printf(MACS); ! 141: printf("\n"); ! 142: } ! 143: #endif ! 144: cgotofn(); ! 145: cfail(); ! 146: nfile = argc; ! 147: if (argc<=0) { ! 148: execute((char *)NULL); ! 149: } ! 150: else while (--argc >= 0) { ! 151: execute(*argv); ! 152: if(lineno){ ! 153: printf("file %s: number of lines %ld number of phrases found %ld\n", ! 154: *argv, lcount-1, nhits); ! 155: tl += lcount-1; ! 156: th += nhits; ! 157: sv = lcount-1; ! 158: lcount = nhits = 0; ! 159: } ! 160: argv++; ! 161: } ! 162: if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); ! 163: if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); ! 164: else if(tl != sv) ! 165: if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); ! 166: exit(nsucc == 0); ! 167: } ! 168: ! 169: execute(file) ! 170: char *file; ! 171: { ! 172: register char *p; ! 173: register struct words *c; ! 174: register ccount; ! 175: int count1; ! 176: char *beg1; ! 177: struct words *savc; ! 178: char *savp; ! 179: int savct; ! 180: int scr; ! 181: char buf[1024]; ! 182: int f; ! 183: int hit; ! 184: last = 0; ! 185: if (file) { ! 186: if ((f = open(file, 0)) < 0) { ! 187: fprintf(stderr, "diction: can't open %s\n", file); ! 188: exit(2); ! 189: } ! 190: } ! 191: else f = 0; ! 192: lcount = olcount = 1; ! 193: linemsg = 1; ! 194: ccount = 0; ! 195: count1 = -1; ! 196: p = buf; ! 197: nlp = p; ! 198: c = w; ! 199: oct = hit = 0; ! 200: savc = (struct words *) 0; ! 201: savp = (char *) 0; ! 202: for (;;) { ! 203: if(--ccount <= 0) { ! 204: if (p == &buf[1024]) p = buf; ! 205: if (p > &buf[512]) { ! 206: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; ! 207: } ! 208: else if ((ccount = read(f, p, 512)) <= 0) break; ! 209: if(caps && (count1 > 0)) ! 210: fwrite(beg1,sizeof(*beg1),count1,stdout); ! 211: count1 = ccount; ! 212: beg1 = p; ! 213: } ! 214: if(p == &buf[1024])p=buf; ! 215: nstate: ! 216: if (c->inp == table[*p]) { ! 217: c = c->nst; ! 218: } ! 219: else if (c->link != 0) { ! 220: c = c->link; ! 221: goto nstate; ! 222: } ! 223: else { ! 224: if(savp != 0){ ! 225: c=savc; ! 226: p=savp; ! 227: if(ccount > savct)ccount += savct; ! 228: else ccount = savct; ! 229: savc = (struct words *) 0; ! 230: savp = (char *) 0; ! 231: goto hadone; ! 232: } ! 233: c = c->fail; ! 234: if (c==0) { ! 235: c = w; ! 236: istate: ! 237: if (c->inp == table[*p]) { ! 238: c = c->nst; ! 239: } ! 240: else if (c->link != 0) { ! 241: c = c->link; ! 242: goto istate; ! 243: } ! 244: } ! 245: else goto nstate; ! 246: } ! 247: if(c->out){ ! 248: if((c->inp == table[*(p+1)]) && (c->nst != 0)){ ! 249: savp=p; ! 250: savc=c; ! 251: savct=ccount; ! 252: goto cont; ! 253: } ! 254: else if(c->link != 0){ ! 255: savc=c; ! 256: while((savc=savc->link)!= 0){ ! 257: if(savc->inp == table[*(p+1)]){ ! 258: savp=p; ! 259: savc=c; ! 260: savct=ccount; ! 261: goto cont; ! 262: } ! 263: } ! 264: } ! 265: hadone: ! 266: savc = (struct words *) 0; ! 267: savp = (char *) 0; ! 268: if(c->out == (char)(0377)){ ! 269: c=w; ! 270: goto nstate; ! 271: } ! 272: begp = p - (c->out); ! 273: if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); ! 274: endp=p; ! 275: if(mflg){ ! 276: if(begp-20 < &buf[0]){ ! 277: myst = &buf[1024]-20; ! 278: if(nlp < &buf[512])myst=nlp; ! 279: } ! 280: else myst = begp-20; ! 281: if(myst < nlp)myst = nlp; ! 282: beg = 0; ! 283: } ! 284: hit = 1; ! 285: nhits++; ! 286: if(*p == '\n')lcount++; ! 287: if (table[*p++] == '.') { ! 288: linemsg = 1; ! 289: if (--ccount <= 0) { ! 290: if (p == &buf[1024]) p = buf; ! 291: if (p > &buf[512]) { ! 292: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; ! 293: } ! 294: else if ((ccount = read(f, p, 512)) <= 0) break; ! 295: if(caps && (count1 > 0)) ! 296: fwrite(beg1,sizeof(*beg1),count1,stdout); ! 297: count1=ccount; ! 298: beg1=p; ! 299: } ! 300: } ! 301: succeed: nsucc = 1; ! 302: { ! 303: if (p <= nlp) { ! 304: outc(&buf[1024],file); ! 305: nlp = buf; ! 306: } ! 307: outc(p,file); ! 308: } ! 309: if(mflg)last=1; ! 310: nomatch: ! 311: nlp = p; ! 312: c = w; ! 313: begp = endp = 0; ! 314: continue; ! 315: } ! 316: cont: ! 317: if(*p == '\n')lcount++; ! 318: if (table[*p++] == '.'){ ! 319: if(hit){ ! 320: if(p <= nlp){ ! 321: outc(&buf[1024],file); ! 322: nlp = buf; ! 323: } ! 324: outc(p,file); ! 325: if(!caps)printf("\n\n"); ! 326: if(mflg && last){putc('\n',mine);myct = 0;} ! 327: } ! 328: linemsg = 1; ! 329: if(*p == '\n')olcount = lcount+1; ! 330: else ! 331: olcount=lcount; ! 332: last = 0; ! 333: hit = 0; ! 334: oct = 0; ! 335: nlp = p; ! 336: c = w; ! 337: begp = endp = 0; ! 338: nsent++; ! 339: } ! 340: } ! 341: if(caps && (count1 > 0)) ! 342: fwrite(beg1,sizeof(*beg1),count1,stdout); ! 343: close(f); ! 344: } ! 345: ! 346: getargc() ! 347: { ! 348: register c; ! 349: if (wordf){ ! 350: if((c=getc(wordf))==EOF){ ! 351: fclose(wordf); ! 352: if(nflag && fflag){ ! 353: nflag=0; ! 354: wordf=fopen(filename,"r"); ! 355: if(wordf == NULL){ ! 356: fprintf(stderr,"diction can't open %s\n",filename); ! 357: exit(2); ! 358: } ! 359: return(getc(wordf)); ! 360: } ! 361: else return(EOF); ! 362: } ! 363: else return(c); ! 364: } ! 365: if ((c = *argptr++) == '\0') ! 366: return(EOF); ! 367: return(c); ! 368: } ! 369: ! 370: cgotofn() { ! 371: register c; ! 372: register struct words *s; ! 373: register ct; ! 374: int neg; ! 375: ! 376: s = smax = w; ! 377: neg = ct = 0; ! 378: nword: for(;;) { ! 379: c = getargc(); ! 380: if(c == '~'){ ! 381: neg++; ! 382: c = getargc(); ! 383: } ! 384: if (c==EOF) ! 385: return; ! 386: if (c == '\n') { ! 387: if(neg)s->out = 0377; ! 388: else s->out = ct-1; ! 389: neg = ct = 0; ! 390: s = w; ! 391: } else { ! 392: loop: if (s->inp == c) { ! 393: s = s->nst; ! 394: ct++; ! 395: continue; ! 396: } ! 397: if (s->inp == 0) goto enter; ! 398: if (s->link == 0) { ! 399: if (smax >= &w[MAXSIZ - 1]) overflo(); ! 400: s->link = ++smax; ! 401: s = smax; ! 402: goto enter; ! 403: } ! 404: s = s->link; ! 405: goto loop; ! 406: } ! 407: } ! 408: ! 409: enter: ! 410: do { ! 411: s->inp = c; ! 412: ct++; ! 413: if (smax >= &w[MAXSIZ - 1]) overflo(); ! 414: s->nst = ++smax; ! 415: s = smax; ! 416: } while ((c = getargc()) != '\n' && c!=EOF); ! 417: if(neg)smax->out = 0377; ! 418: else smax->out = ct-1; ! 419: neg = ct = 0; ! 420: s = w; ! 421: if (c != EOF) ! 422: goto nword; ! 423: } ! 424: ! 425: overflo() { ! 426: fprintf(stderr, "wordlist too large\n"); ! 427: exit(2); ! 428: } ! 429: cfail() { ! 430: struct words *queue[QSIZE]; ! 431: struct words **front, **rear; ! 432: struct words *state; ! 433: int bstart; ! 434: register char c; ! 435: register struct words *s; ! 436: s = w; ! 437: front = rear = queue; ! 438: init: if ((s->inp) != 0) { ! 439: *rear++ = s->nst; ! 440: if (rear >= &queue[QSIZE - 1]) overflo(); ! 441: } ! 442: if ((s = s->link) != 0) { ! 443: goto init; ! 444: } ! 445: ! 446: while (rear!=front) { ! 447: s = *front; ! 448: if (front == &queue[QSIZE-1]) ! 449: front = queue; ! 450: else front++; ! 451: cloop: if ((c = s->inp) != 0) { ! 452: bstart=0; ! 453: *rear = (q = s->nst); ! 454: if (front < rear) ! 455: if (rear >= &queue[QSIZE-1]) ! 456: if (front == queue) overflo(); ! 457: else rear = queue; ! 458: else rear++; ! 459: else ! 460: if (++rear == front) overflo(); ! 461: state = s->fail; ! 462: floop: if (state == 0){ state = w;bstart=1;} ! 463: if (state->inp == c) { ! 464: qloop: q->fail = state->nst; ! 465: if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; ! 466: if((q=q->link) != 0)goto qloop; ! 467: } ! 468: else if((state->link) != 0){ ! 469: state = state->link; ! 470: goto floop; ! 471: } ! 472: else if((state = state->fail) != 0) ! 473: goto floop; ! 474: else if(bstart==0){state=0; goto floop;} ! 475: } ! 476: if ((s = s->link) != 0) ! 477: goto cloop; ! 478: } ! 479: /* for(s=w;s<=smax;s++) ! 480: printf("s %d ch %c out %d nst %d link %d fail %d\n",s, ! 481: s->inp,s->out,s->nst,s->link,s->fail); ! 482: */ ! 483: } ! 484: outc(addr,file) ! 485: char *addr; ! 486: char *file; ! 487: { ! 488: static inside = 0; ! 489: ! 490: if(!caps && lineno && linemsg){ ! 491: printf("beginning line %ld",olcount); ! 492: if(file != (char *)NULL)printf(" %s\n",file); ! 493: else printf("\n"); ! 494: linemsg = 0; ! 495: } ! 496: while(nlp < addr){ ! 497: if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ ! 498: oct=0; ! 499: putchar('\n'); ! 500: } ! 501: if(nlp == begp){ ! 502: if(caps)inside++; ! 503: else { ! 504: if(list)inside++; ! 505: if( oct >45){putchar('\n'); ! 506: oct=0; ! 507: } ! 508: if( oct==0 || table[*nlp] != ' '){ ! 509: printf("*["); ! 510: oct+=2; ! 511: } ! 512: else {printf(" *[");; ! 513: oct+=3; ! 514: } ! 515: } ! 516: if(mflg)putc('[',mine); ! 517: } ! 518: if(inside && caps){ ! 519: if(islower(*nlp))*nlp = toupper(*nlp); ! 520: } ! 521: else { ! 522: if(inside && list)putc(table[*nlp],fl); ! 523: if(!caps && *nlp == '\n')*nlp = ' '; ! 524: if(*nlp == ' ' && oct==0); ! 525: else if(!caps) {putchar(*nlp); oct++;} ! 526: } ! 527: if(nlp == endp){ ! 528: if(caps) ! 529: inside= 0; ! 530: else { ! 531: if(list && inside){ ! 532: inside = 0; ! 533: putc('\n',fl); ! 534: } ! 535: if(*(nlp) != ' '){printf("]*"); ! 536: oct+=2; ! 537: } ! 538: else {printf("]* "); ! 539: oct+=3; ! 540: } ! 541: if(oct >60){putchar('\n'); ! 542: oct=0; ! 543: } ! 544: } ! 545: if(mflg)putc(']',mine); ! 546: beg = 0; ! 547: } ! 548: if(mflg){ ! 549: if(nlp == myst)beg = 1; ! 550: if(beg || last){ ! 551: putc(*nlp,mine); ! 552: if(myct++ >= 72 || last == 20){ ! 553: putc('\n',mine); ! 554: if(last == 20)last=myct=0; ! 555: else myct=0; ! 556: } ! 557: if(last)last++; ! 558: } ! 559: } ! 560: nlp++; ! 561: } ! 562: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.