|
|
1.1 ! root 1: /* ! 2: * diction -- print all sentences containing one of default phrases ! 3: * ! 4: * status returns: ! 5: * 0 - ok, and some matches ! 6: * 1 - ok, but no matches ! 7: * 2 - some error ! 8: */ ! 9: ! 10: #include <stdio.h> ! 11: #include <ctype.h> ! 12: ! 13: #define MAXSIZ 6500 ! 14: #define QSIZE 650 ! 15: struct words { ! 16: char inp; ! 17: char out; ! 18: struct words *nst; ! 19: struct words *link; ! 20: struct words *fail; ! 21: } w[MAXSIZ], *smax, *q; ! 22: ! 23: int fflag; ! 24: int nflag = 1; /*use default file*/ ! 25: char *filename; ! 26: int nfile; ! 27: int nsucc; ! 28: long nsent = 0; ! 29: long nhits = 0; ! 30: char *nlp; ! 31: char *begp, *endp; ! 32: int oct = 0; ! 33: FILE *wordf; ! 34: char *argptr; ! 35: ! 36: main(argc, argv) ! 37: char **argv; ! 38: { ! 39: while (--argc > 0 && (++argv)[0][0]=='-') ! 40: switch (argv[0][1]) { ! 41: ! 42: case 'f': ! 43: fflag++; ! 44: filename = ++argv; ! 45: argc--; ! 46: continue; ! 47: ! 48: case 'n': ! 49: nflag = 0; ! 50: continue; ! 51: case 'd': ! 52: continue; ! 53: default: ! 54: fprintf(stderr, "diction: unknown flag\n"); ! 55: continue; ! 56: } ! 57: out: ! 58: if(nflag){ ! 59: wordf = fopen(DICT,"r"); ! 60: if(wordf == NULL){ ! 61: fprintf(stderr,"diction: can't open default dictionary\n"); ! 62: exit(2); ! 63: } ! 64: } ! 65: else { ! 66: wordf = fopen(*filename,"r"); ! 67: if(wordf == NULL){ ! 68: fprintf(stderr,"diction: can't open %s\n",filename); ! 69: exit(2); ! 70: } ! 71: } ! 72: ! 73: cgotofn(); ! 74: cfail(); ! 75: nfile = argc; ! 76: if (argc<=0) { ! 77: execute((char *)NULL); ! 78: } ! 79: else while (--argc >= 0) { ! 80: execute(*argv); ! 81: argv++; ! 82: } ! 83: printf("number of sentences %ld number of hits %ld\n",nsent,nhits); ! 84: exit(nsucc == 0); ! 85: } ! 86: ! 87: execute(file) ! 88: char *file; ! 89: { ! 90: register char *p; ! 91: register struct words *c; ! 92: register ccount; ! 93: struct words *savc; ! 94: char *savp; ! 95: int savct; ! 96: int scr; ! 97: char buf[1024]; ! 98: int f; ! 99: int hit; ! 100: if (file) { ! 101: if ((f = open(file, 0)) < 0) { ! 102: fprintf(stderr, "diction: can't open %s\n", file); ! 103: exit(2); ! 104: } ! 105: } ! 106: else f = 0; ! 107: ccount = 0; ! 108: p = buf; ! 109: nlp = p; ! 110: c = w; ! 111: oct = hit = 0; ! 112: savc = savp = 0; ! 113: for (;;) { ! 114: if (--ccount <= 0) { ! 115: if (p == &buf[1024]) p = buf; ! 116: if (p > &buf[512]) { ! 117: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; ! 118: } ! 119: else if ((ccount = read(f, p, 512)) <= 0) break; ! 120: convert(p,ccount); ! 121: } ! 122: if(p == &buf[1024])p=buf; ! 123: nstate: ! 124: if (c->inp == *p) { ! 125: c = c->nst; ! 126: } ! 127: else if (c->link != 0) { ! 128: c = c->link; ! 129: goto nstate; ! 130: } ! 131: else { ! 132: if(savp != 0){ ! 133: c=savc; ! 134: p=savp; ! 135: if(ccount > savct)ccount += savct; ! 136: else ccount = savct; ! 137: savc=savp=0; ! 138: goto hadone; ! 139: } ! 140: c = c->fail; ! 141: if (c==0) { ! 142: c = w; ! 143: istate: ! 144: if (c->inp == *p) { ! 145: c = c->nst; ! 146: } ! 147: else if (c->link != 0) { ! 148: c = c->link; ! 149: goto istate; ! 150: } ! 151: } ! 152: else goto nstate; ! 153: } ! 154: if(c->out){ ! 155: if((c->inp == *(p+1)) && (c->nst != 0)){ ! 156: savp=p; ! 157: savc=c; ! 158: savct=ccount; ! 159: goto cont; ! 160: } ! 161: else if(c->link != 0){ ! 162: savc=c; ! 163: while((savc=savc->link)!= 0){ ! 164: if(savc->inp == *(p+1)){ ! 165: savp=p; ! 166: savc=c; ! 167: savct=ccount; ! 168: goto cont; ! 169: } ! 170: } ! 171: } ! 172: hadone: ! 173: savc=savp=0; ! 174: if(c->out == (char)(0377)){ ! 175: c=w; ! 176: goto nstate; ! 177: } ! 178: begp = p - (c->out); ! 179: if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); ! 180: endp=p; ! 181: hit = 1; ! 182: nhits++; ! 183: if (*p++ == '.') { ! 184: if (--ccount <= 0) { ! 185: if (p == &buf[1024]) p = buf; ! 186: if (p > &buf[512]) { ! 187: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; ! 188: } ! 189: else if ((ccount = read(f, p, 512)) <= 0) break; ! 190: convert(p,ccount); ! 191: } ! 192: } ! 193: succeed: nsucc = 1; ! 194: { ! 195: if (p <= nlp) { ! 196: outc(&buf[1024]); ! 197: nlp = buf; ! 198: } ! 199: outc(p); ! 200: } ! 201: nomatch: ! 202: nlp = p; ! 203: c = w; ! 204: begp = endp = 0; ! 205: continue; ! 206: } ! 207: cont: ! 208: if (*p++ == '.'){ ! 209: if(hit){ ! 210: if(p <= nlp){ ! 211: outc(&buf[1024]); ! 212: nlp = buf; ! 213: } ! 214: outc(p); ! 215: putchar('\n'); putchar('\n'); ! 216: } ! 217: hit = 0; ! 218: oct = 0; ! 219: nlp = p; ! 220: c = w; ! 221: begp = endp = 0; ! 222: } ! 223: } ! 224: close(f); ! 225: } ! 226: ! 227: getargc() ! 228: { ! 229: register c; ! 230: if (wordf){ ! 231: if((c=getc(wordf))==EOF){ ! 232: fclose(wordf); ! 233: if(nflag && fflag){ ! 234: nflag=0; ! 235: wordf=fopen(*filename,"r"); ! 236: if(wordf == NULL){ ! 237: fprintf("can't open %s\n",filename); ! 238: exit(2); ! 239: } ! 240: return(getc(wordf)); ! 241: } ! 242: else return(EOF); ! 243: } ! 244: else return(c); ! 245: } ! 246: if ((c = *argptr++) == '\0') ! 247: return(EOF); ! 248: return(c); ! 249: } ! 250: ! 251: cgotofn() { ! 252: register c; ! 253: register struct words *s; ! 254: register ct; ! 255: int neg; ! 256: ! 257: s = smax = w; ! 258: neg = ct = 0; ! 259: nword: for(;;) { ! 260: c = getargc(); ! 261: if(c == '~'){ ! 262: neg++; ! 263: c = getargc(); ! 264: } ! 265: if (c==EOF) ! 266: return; ! 267: if (c == '\n') { ! 268: if(neg)s->out = 0377; ! 269: else s->out = ct-1; ! 270: neg = ct = 0; ! 271: s = w; ! 272: } else { ! 273: loop: if (s->inp == c) { ! 274: s = s->nst; ! 275: ct++; ! 276: continue; ! 277: } ! 278: if (s->inp == 0) goto enter; ! 279: if (s->link == 0) { ! 280: if (smax >= &w[MAXSIZ - 1]) overflo(); ! 281: s->link = ++smax; ! 282: s = smax; ! 283: goto enter; ! 284: } ! 285: s = s->link; ! 286: goto loop; ! 287: } ! 288: } ! 289: ! 290: enter: ! 291: do { ! 292: s->inp = c; ! 293: ct++; ! 294: if (smax >= &w[MAXSIZ - 1]) overflo(); ! 295: s->nst = ++smax; ! 296: s = smax; ! 297: } while ((c = getargc()) != '\n' && c!=EOF); ! 298: if(neg)smax->out = 0377; ! 299: else smax->out = ct-1; ! 300: neg = ct = 0; ! 301: s = w; ! 302: if (c != EOF) ! 303: goto nword; ! 304: } ! 305: ! 306: overflo() { ! 307: fprintf(stderr, "wordlist too large\n"); ! 308: exit(2); ! 309: } ! 310: cfail() { ! 311: struct words *queue[QSIZE]; ! 312: struct words **front, **rear; ! 313: struct words *state; ! 314: int bstart; ! 315: register char c; ! 316: register struct words *s; ! 317: s = w; ! 318: front = rear = queue; ! 319: init: if ((s->inp) != 0) { ! 320: *rear++ = s->nst; ! 321: if (rear >= &queue[QSIZE - 1]) overflo(); ! 322: } ! 323: if ((s = s->link) != 0) { ! 324: goto init; ! 325: } ! 326: ! 327: while (rear!=front) { ! 328: s = *front; ! 329: if (front == &queue[QSIZE-1]) ! 330: front = queue; ! 331: else front++; ! 332: cloop: if ((c = s->inp) != 0) { ! 333: bstart=0; ! 334: *rear = (q = s->nst); ! 335: if (front < rear) ! 336: if (rear >= &queue[QSIZE-1]) ! 337: if (front == queue) overflo(); ! 338: else rear = queue; ! 339: else rear++; ! 340: else ! 341: if (++rear == front) overflo(); ! 342: state = s->fail; ! 343: floop: if (state == 0){ state = w;bstart=1;} ! 344: if (state->inp == c) { ! 345: qloop: q->fail = state->nst; ! 346: if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; ! 347: if((q=q->link) != 0)goto qloop; ! 348: } ! 349: else if ((state = state->link) != 0) ! 350: goto floop; ! 351: else if(bstart==0){state=0; goto floop;} ! 352: } ! 353: if ((s = s->link) != 0) ! 354: goto cloop; ! 355: } ! 356: /* for(s=w;s<=smax;s++) ! 357: printf("s %d ch %c out %d nst %d link %d fail %d\n",s, ! 358: s->inp,s->out,s->nst,s->link,s->fail); ! 359: */ ! 360: } ! 361: convert(p,ccount) ! 362: char *p; ! 363: { ! 364: int ct; ! 365: char *pt; ! 366: for(pt=p,ct=ccount;--ct>=0;pt++){ ! 367: if(isupper(*pt))*pt=tolower(*pt); ! 368: else if(isspace(*pt))*pt=' '; ! 369: else if(*pt=='.' || *pt=='?'||*pt=='!'){ ! 370: *pt='.'; ! 371: nsent++; ! 372: } ! 373: else if(ispunct(*pt))*pt=' '; ! 374: } ! 375: } ! 376: outc(addr) ! 377: char *addr; ! 378: { ! 379: ! 380: while(nlp < addr){ ! 381: if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){ ! 382: oct=0; ! 383: putchar('\n'); ! 384: } ! 385: if(nlp == begp){ ! 386: putchar('['); ! 387: } ! 388: putchar(*nlp); ! 389: if(nlp == endp){ ! 390: putchar(']'); ! 391: } ! 392: nlp++; ! 393: } ! 394: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.