|
|
1.1 root 1: /*
2: * diction -- print all sentences containing one of default phrases
3: *
4: * status returns:
5: * 0 - ok, and some matches
6: * 1 - ok, but no matches
7: * 2 - some error
8: */
9:
10: #include <stdio.h>
11: #include <ctype.h>
12:
13: #define MAXSIZ 6500
14: #define QSIZE 650
15: int linemsg;
16: long olcount;
17: long lcount;
18: struct words {
19: char inp;
20: char out;
21: struct words *nst;
22: struct words *link;
23: struct words *fail;
24: } w[MAXSIZ], *smax, *q;
25:
26: char table[128] = {
27: 0, 0, 0, 0, 0, 0, 0, 0,
28: 0, 0, ' ', 0, 0, 0, 0, 0,
29: 0, 0, 0, 0, 0, 0, 0, 0,
30: 0, 0, 0, 0, 0, 0, 0, 0,
31: ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
32: ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
33: '0', '1', '2', '3', '4', '5', '6', '7',
34: '8', '9', ' ', ' ', ' ', ' ', ' ', '.',
35: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
36: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
37: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
38: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
39: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
40: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
41: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
42: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
43: };
44: int caps = 0;
45: int lineno = 0;
46: int fflag= 0;
47: int nflag = 1; /*use default file*/
48: char *filename;
49: int mflg = 0; /*don't catch output*/
50: int nfile;
51: int nsucc;
52: long nsent = 0;
53: long nhits = 0;
54: char *nlp;
55: char *begp, *endp;
56: int beg, last;
57: char *myst;
58: int myct = 0;
59: int oct = 0;
60: FILE *wordf;
61: FILE *mine;
62: FILE *fl;
63: char *listn;
64: int list = 0;
65: char *argptr;
66: long tl = 0;
67: long th = 0;
68:
69: main(argc, argv)
70: char *argv[];
71: {
72: int sv;
73: char cc;
74: while (--argc > 0 && (++argv)[0][0]=='-')
75: switch (argv[0][1]) {
76:
77: case 'f':
78: fflag++;
79: filename = (++argv)[0];
80: argc--;
81: continue;
82:
83: case 'n':
84: nflag = 0;
85: continue;
86: case 'd':
87: mflg=0;
88: continue;
89: case 'c':
90: caps++;
91: continue;
92: case 'l':
93: lineno++;
94: continue;
95: case 'A': /* for acro */
96: for(cc='A';cc<='Z';cc++)
97: table[cc] = cc;
98: continue;
99: case 'o': /*to put hits to file*/
100: listn = (++argv)[0];
101: argc--;
102: list++;
103: if((fl=fopen(listn,"a"))== NULL){
104: fprintf(stderr,"diction: can't open file %s\n",
105: listn);
106: exit(2);
107: }
108: continue;
109: default:
110: fprintf(stderr, "diction: unknown flag\n");
111: continue;
112: }
113: out:
114: if(nflag){
115: wordf = fopen(DICT,"r");
116: if(wordf == NULL){
117: fprintf(stderr,"diction: can't open default dictionary\n");
118: exit(2);
119: }
120: }
121: else {
122: wordf = fopen(filename,"r");
123: if(wordf == NULL){
124: fprintf(stderr,"diction: can't open %s\n",filename);
125: exit(2);
126: }
127: }
128:
129: #ifdef CATCH
130: if(fopen(CATCH,"r") != NULL){
131: if((mine=fopen(CATCH,"a"))==NULL)mflg=0;
132: else mflg = 1;
133: }
134: #else
135: mflg = 0;
136: #endif
137: #ifdef MACS
138: if(caps){
139: printf(".so ");
140: printf(MACS);
141: printf("\n");
142: }
143: #endif
144: cgotofn();
145: cfail();
146: nfile = argc;
147: if (argc<=0) {
148: execute((char *)NULL);
149: }
150: else while (--argc >= 0) {
151: execute(*argv);
152: if(lineno){
153: printf("file %s: number of lines %ld number of phrases found %ld\n",
154: *argv, lcount-1, nhits);
155: tl += lcount-1;
156: th += nhits;
157: sv = lcount-1;
158: lcount = nhits = 0;
159: }
160: argv++;
161: }
162: if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
163: if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
164: else if(tl != sv)
165: if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
166: exit(nsucc == 0);
167: }
168:
169: execute(file)
170: char *file;
171: {
172: register char *p;
173: register struct words *c;
174: register ccount;
175: int count1;
176: char *beg1;
177: struct words *savc;
178: char *savp;
179: int savct;
180: int scr;
181: char buf[1024];
182: int f;
183: int hit;
184: last = 0;
185: if (file) {
186: if ((f = open(file, 0)) < 0) {
187: fprintf(stderr, "diction: can't open %s\n", file);
188: exit(2);
189: }
190: }
191: else f = 0;
192: lcount = olcount = 1;
193: linemsg = 1;
194: ccount = 0;
195: count1 = -1;
196: p = buf;
197: nlp = p;
198: c = w;
199: oct = hit = 0;
200: savc = (struct words *) 0;
201: savp = (char *) 0;
202: for (;;) {
203: if(--ccount <= 0) {
204: if (p == &buf[1024]) p = buf;
205: if (p > &buf[512]) {
206: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
207: }
208: else if ((ccount = read(f, p, 512)) <= 0) break;
209: if(caps && (count1 > 0))
210: fwrite(beg1,sizeof(*beg1),count1,stdout);
211: count1 = ccount;
212: beg1 = p;
213: }
214: if(p == &buf[1024])p=buf;
215: nstate:
216: if (c->inp == table[*p]) {
217: c = c->nst;
218: }
219: else if (c->link != 0) {
220: c = c->link;
221: goto nstate;
222: }
223: else {
224: if(savp != 0){
225: c=savc;
226: p=savp;
227: if(ccount > savct)ccount += savct;
228: else ccount = savct;
229: savc = (struct words *) 0;
230: savp = (char *) 0;
231: goto hadone;
232: }
233: c = c->fail;
234: if (c==0) {
235: c = w;
236: istate:
237: if (c->inp == table[*p]) {
238: c = c->nst;
239: }
240: else if (c->link != 0) {
241: c = c->link;
242: goto istate;
243: }
244: }
245: else goto nstate;
246: }
247: if(c->out){
248: if((c->inp == table[*(p+1)]) && (c->nst != 0)){
249: savp=p;
250: savc=c;
251: savct=ccount;
252: goto cont;
253: }
254: else if(c->link != 0){
255: savc=c;
256: while((savc=savc->link)!= 0){
257: if(savc->inp == table[*(p+1)]){
258: savp=p;
259: savc=c;
260: savct=ccount;
261: goto cont;
262: }
263: }
264: }
265: hadone:
266: savc = (struct words *) 0;
267: savp = (char *) 0;
268: if(c->out == (char)(0377)){
269: c=w;
270: goto nstate;
271: }
272: begp = p - (c->out);
273: if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
274: endp=p;
275: if(mflg){
276: if(begp-20 < &buf[0]){
277: myst = &buf[1024]-20;
278: if(nlp < &buf[512])myst=nlp;
279: }
280: else myst = begp-20;
281: if(myst < nlp)myst = nlp;
282: beg = 0;
283: }
284: hit = 1;
285: nhits++;
286: if(*p == '\n')lcount++;
287: if (table[*p++] == '.') {
288: linemsg = 1;
289: if (--ccount <= 0) {
290: if (p == &buf[1024]) p = buf;
291: if (p > &buf[512]) {
292: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
293: }
294: else if ((ccount = read(f, p, 512)) <= 0) break;
295: if(caps && (count1 > 0))
296: fwrite(beg1,sizeof(*beg1),count1,stdout);
297: count1=ccount;
298: beg1=p;
299: }
300: }
301: succeed: nsucc = 1;
302: {
303: if (p <= nlp) {
304: outc(&buf[1024],file);
305: nlp = buf;
306: }
307: outc(p,file);
308: }
309: if(mflg)last=1;
310: nomatch:
311: nlp = p;
312: c = w;
313: begp = endp = 0;
314: continue;
315: }
316: cont:
317: if(*p == '\n')lcount++;
318: if (table[*p++] == '.'){
319: if(hit){
320: if(p <= nlp){
321: outc(&buf[1024],file);
322: nlp = buf;
323: }
324: outc(p,file);
325: if(!caps)printf("\n\n");
326: if(mflg && last){putc('\n',mine);myct = 0;}
327: }
328: linemsg = 1;
329: if(*p == '\n')olcount = lcount+1;
330: else
331: olcount=lcount;
332: last = 0;
333: hit = 0;
334: oct = 0;
335: nlp = p;
336: c = w;
337: begp = endp = 0;
338: nsent++;
339: }
340: }
341: if(caps && (count1 > 0))
342: fwrite(beg1,sizeof(*beg1),count1,stdout);
343: close(f);
344: }
345:
346: getargc()
347: {
348: register c;
349: if (wordf){
350: if((c=getc(wordf))==EOF){
351: fclose(wordf);
352: if(nflag && fflag){
353: nflag=0;
354: wordf=fopen(filename,"r");
355: if(wordf == NULL){
356: fprintf(stderr,"diction can't open %s\n",filename);
357: exit(2);
358: }
359: return(getc(wordf));
360: }
361: else return(EOF);
362: }
363: else return(c);
364: }
365: if ((c = *argptr++) == '\0')
366: return(EOF);
367: return(c);
368: }
369:
370: cgotofn() {
371: register c;
372: register struct words *s;
373: register ct;
374: int neg;
375:
376: s = smax = w;
377: neg = ct = 0;
378: nword: for(;;) {
379: c = getargc();
380: if(c == '~'){
381: neg++;
382: c = getargc();
383: }
384: if (c==EOF)
385: return;
386: if (c == '\n') {
387: if(neg)s->out = 0377;
388: else s->out = ct-1;
389: neg = ct = 0;
390: s = w;
391: } else {
392: loop: if (s->inp == c) {
393: s = s->nst;
394: ct++;
395: continue;
396: }
397: if (s->inp == 0) goto enter;
398: if (s->link == 0) {
399: if (smax >= &w[MAXSIZ - 1]) overflo();
400: s->link = ++smax;
401: s = smax;
402: goto enter;
403: }
404: s = s->link;
405: goto loop;
406: }
407: }
408:
409: enter:
410: do {
411: s->inp = c;
412: ct++;
413: if (smax >= &w[MAXSIZ - 1]) overflo();
414: s->nst = ++smax;
415: s = smax;
416: } while ((c = getargc()) != '\n' && c!=EOF);
417: if(neg)smax->out = 0377;
418: else smax->out = ct-1;
419: neg = ct = 0;
420: s = w;
421: if (c != EOF)
422: goto nword;
423: }
424:
425: overflo() {
426: fprintf(stderr, "wordlist too large\n");
427: exit(2);
428: }
429: cfail() {
430: struct words *queue[QSIZE];
431: struct words **front, **rear;
432: struct words *state;
433: int bstart;
434: register char c;
435: register struct words *s;
436: s = w;
437: front = rear = queue;
438: init: if ((s->inp) != 0) {
439: *rear++ = s->nst;
440: if (rear >= &queue[QSIZE - 1]) overflo();
441: }
442: if ((s = s->link) != 0) {
443: goto init;
444: }
445:
446: while (rear!=front) {
447: s = *front;
448: if (front == &queue[QSIZE-1])
449: front = queue;
450: else front++;
451: cloop: if ((c = s->inp) != 0) {
452: bstart=0;
453: *rear = (q = s->nst);
454: if (front < rear)
455: if (rear >= &queue[QSIZE-1])
456: if (front == queue) overflo();
457: else rear = queue;
458: else rear++;
459: else
460: if (++rear == front) overflo();
461: state = s->fail;
462: floop: if (state == 0){ state = w;bstart=1;}
463: if (state->inp == c) {
464: qloop: q->fail = state->nst;
465: if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
466: if((q=q->link) != 0)goto qloop;
467: }
468: else if((state->link) != 0){
469: state = state->link;
470: goto floop;
471: }
472: else if((state = state->fail) != 0)
473: goto floop;
474: else if(bstart==0){state=0; goto floop;}
475: }
476: if ((s = s->link) != 0)
477: goto cloop;
478: }
479: /* for(s=w;s<=smax;s++)
480: printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
481: s->inp,s->out,s->nst,s->link,s->fail);
482: */
483: }
484: outc(addr,file)
485: char *addr;
486: char *file;
487: {
488: static inside = 0;
489:
490: if(!caps && lineno && linemsg){
491: printf("beginning line %ld",olcount);
492: if(file != (char *)NULL)printf(" %s\n",file);
493: else printf("\n");
494: linemsg = 0;
495: }
496: while(nlp < addr){
497: if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
498: oct=0;
499: putchar('\n');
500: }
501: if(nlp == begp){
502: if(caps)inside++;
503: else {
504: if(list)inside++;
505: if( oct >45){putchar('\n');
506: oct=0;
507: }
508: if( oct==0 || table[*nlp] != ' '){
509: printf("*[");
510: oct+=2;
511: }
512: else {printf(" *[");;
513: oct+=3;
514: }
515: }
516: if(mflg)putc('[',mine);
517: }
518: if(inside && caps){
519: if(islower(*nlp))*nlp = toupper(*nlp);
520: }
521: else {
522: if(inside && list)putc(table[*nlp],fl);
523: if(!caps && *nlp == '\n')*nlp = ' ';
524: if(*nlp == ' ' && oct==0);
525: else if(!caps) {putchar(*nlp); oct++;}
526: }
527: if(nlp == endp){
528: if(caps)
529: inside= 0;
530: else {
531: if(list && inside){
532: inside = 0;
533: putc('\n',fl);
534: }
535: if(*(nlp) != ' '){printf("]*");
536: oct+=2;
537: }
538: else {printf("]* ");
539: oct+=3;
540: }
541: if(oct >60){putchar('\n');
542: oct=0;
543: }
544: }
545: if(mflg)putc(']',mine);
546: beg = 0;
547: }
548: if(mflg){
549: if(nlp == myst)beg = 1;
550: if(beg || last){
551: putc(*nlp,mine);
552: if(myct++ >= 72 || last == 20){
553: putc('\n',mine);
554: if(last == 20)last=myct=0;
555: else myct=0;
556: }
557: if(last)last++;
558: }
559: }
560: nlp++;
561: }
562: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.