|
|
1.1 root 1: #ifndef lint
2: static char sccsid[] = "@(#)diction.c 4.2 (Berkeley) 82/11/06";
3: #endif not lint
4:
5: /*
6: * diction -- print all sentences containing one of default phrases
7: *
8: * status returns:
9: * 0 - ok, and some matches
10: * 1 - ok, but no matches
11: * 2 - some error
12: */
13:
14: #include <stdio.h>
15: #include <ctype.h>
16:
17: #define MAXSIZ 6500
18: #define QSIZE 650
19: int linemsg;
20: long olcount;
21: long lcount;
22: struct words {
23: char inp;
24: char out;
25: struct words *nst;
26: struct words *link;
27: struct words *fail;
28: } w[MAXSIZ], *smax, *q;
29:
30: char table[128] = {
31: 0, 0, 0, 0, 0, 0, 0, 0,
32: 0, 0, ' ', 0, 0, 0, 0, 0,
33: 0, 0, 0, 0, 0, 0, 0, 0,
34: 0, 0, 0, 0, 0, 0, 0, 0,
35: ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
36: ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
37: '0', '1', '2', '3', '4', '5', '6', '7',
38: '8', '9', ' ', ' ', ' ', ' ', ' ', '.',
39: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
40: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
41: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
42: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
43: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
44: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
45: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
46: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
47: };
48: int caps = 0;
49: int lineno = 0;
50: int fflag;
51: int nflag = 1; /*use default file*/
52: char *filename;
53: int mflg = 0; /*don't catch output*/
54: int nfile;
55: int nsucc;
56: long nsent = 0;
57: long nhits = 0;
58: char *nlp;
59: char *begp, *endp;
60: int beg, last;
61: char *myst;
62: int myct = 0;
63: int oct = 0;
64: FILE *wordf;
65: FILE *mine;
66: char *argptr;
67: long tl = 0;
68: long th = 0;
69:
70: main(argc, argv)
71: char *argv[];
72: {
73: int sv;
74: while (--argc > 0 && (++argv)[0][0]=='-')
75: switch (argv[0][1]) {
76:
77: case 'f':
78: fflag++;
79: filename = (++argv)[0];
80: argc--;
81: continue;
82:
83: case 'n':
84: nflag = 0;
85: continue;
86: case 'd':
87: mflg=0;
88: continue;
89: case 'c':
90: caps++;
91: continue;
92: case 'l':
93: lineno++;
94: continue;
95: default:
96: fprintf(stderr, "diction: unknown flag\n");
97: continue;
98: }
99: out:
100: if(nflag){
101: wordf = fopen(DICT,"r");
102: if(wordf == NULL){
103: fprintf(stderr,"diction: can't open default dictionary\n");
104: exit(2);
105: }
106: }
107: else {
108: wordf = fopen(filename,"r");
109: if(wordf == NULL){
110: fprintf(stderr,"diction: can't open %s\n",filename);
111: exit(2);
112: }
113: }
114:
115: #ifdef CATCH
116: if(fopen(CATCH,"r") != NULL)
117: if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
118: #endif
119: #ifdef MACS
120: if(caps){
121: printf(".so ");
122: printf(MACS);
123: printf("\n");
124: }
125: #endif
126: cgotofn();
127: cfail();
128: nfile = argc;
129: if (argc<=0) {
130: execute((char *)NULL);
131: }
132: else while (--argc >= 0) {
133: execute(*argv);
134: if(lineno){
135: printf("file %s: number of lines %ld number of phrases found %ld\n",
136: *argv, lcount-1, nhits);
137: tl += lcount-1;
138: th += nhits;
139: sv = lcount-1;
140: lcount = nhits = 0;
141: }
142: argv++;
143: }
144: if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
145: if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
146: else if(tl != sv)
147: if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
148: exit(nsucc == 0);
149: }
150:
151: execute(file)
152: char *file;
153: {
154: register char *p;
155: register struct words *c;
156: register ccount;
157: int count1;
158: char *beg1;
159: struct words *savc;
160: char *savp;
161: int savct;
162: int scr;
163: char buf[1024];
164: int f;
165: int hit;
166: last = 0;
167: if (file) {
168: if ((f = open(file, 0)) < 0) {
169: fprintf(stderr, "diction: can't open %s\n", file);
170: exit(2);
171: }
172: }
173: else f = 0;
174: lcount = olcount = 1;
175: linemsg = 1;
176: ccount = 0;
177: count1 = -1;
178: p = buf;
179: nlp = p;
180: c = w;
181: oct = hit = 0;
182: savc = (struct words *) 0;
183: savp = (char *) 0;
184: for (;;) {
185: if(--ccount <= 0) {
186: if (p == &buf[1024]) p = buf;
187: if (p > &buf[512]) {
188: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
189: }
190: else if ((ccount = read(f, p, 512)) <= 0) break;
191: if(caps && (count1 > 0))
192: fwrite(beg1,sizeof(*beg1),count1,stdout);
193: count1 = ccount;
194: beg1 = p;
195: }
196: if(p == &buf[1024])p=buf;
197: nstate:
198: if (c->inp == table[*p]) {
199: c = c->nst;
200: }
201: else if (c->link != 0) {
202: c = c->link;
203: goto nstate;
204: }
205: else {
206: if(savp != 0){
207: c=savc;
208: p=savp;
209: if(ccount > savct)ccount += savct;
210: else ccount = savct;
211: savc = (struct words *) 0;
212: savp = (char *) 0;
213: goto hadone;
214: }
215: c = c->fail;
216: if (c==0) {
217: c = w;
218: istate:
219: if (c->inp == table[*p]) {
220: c = c->nst;
221: }
222: else if (c->link != 0) {
223: c = c->link;
224: goto istate;
225: }
226: }
227: else goto nstate;
228: }
229: if(c->out){
230: if((c->inp == table[*(p+1)]) && (c->nst != 0)){
231: savp=p;
232: savc=c;
233: savct=ccount;
234: goto cont;
235: }
236: else if(c->link != 0){
237: savc=c;
238: while((savc=savc->link)!= 0){
239: if(savc->inp == table[*(p+1)]){
240: savp=p;
241: savc=c;
242: savct=ccount;
243: goto cont;
244: }
245: }
246: }
247: hadone:
248: savc = (struct words *) 0;
249: savp = (char *) 0;
250: if(c->out == (char)(0377)){
251: c=w;
252: goto nstate;
253: }
254: begp = p - (c->out);
255: if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
256: endp=p;
257: if(mflg){
258: if(begp-20 < &buf[0]){
259: myst = &buf[1024]-20;
260: if(nlp < &buf[512])myst=nlp;
261: }
262: else myst = begp-20;
263: if(myst < nlp)myst = nlp;
264: beg = 0;
265: }
266: hit = 1;
267: nhits++;
268: if(*p == '\n')lcount++;
269: if (table[*p++] == '.') {
270: linemsg = 1;
271: if (--ccount <= 0) {
272: if (p == &buf[1024]) p = buf;
273: if (p > &buf[512]) {
274: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
275: }
276: else if ((ccount = read(f, p, 512)) <= 0) break;
277: if(caps && (count1 > 0))
278: fwrite(beg1,sizeof(*beg1),count1,stdout);
279: count1=ccount;
280: beg1=p;
281: }
282: }
283: succeed: nsucc = 1;
284: {
285: if (p <= nlp) {
286: outc(&buf[1024],file);
287: nlp = buf;
288: }
289: outc(p,file);
290: }
291: if(mflg)last=1;
292: nomatch:
293: nlp = p;
294: c = w;
295: begp = endp = 0;
296: continue;
297: }
298: cont:
299: if(*p == '\n')lcount++;
300: if (table[*p++] == '.'){
301: if(hit){
302: if(p <= nlp){
303: outc(&buf[1024],file);
304: nlp = buf;
305: }
306: outc(p,file);
307: if(!caps)printf("\n\n");
308: if(mflg && last){putc('\n',mine);myct = 0;}
309: }
310: linemsg = 1;
311: if(*p == '\n')olcount = lcount+1;
312: else
313: olcount=lcount;
314: last = 0;
315: hit = 0;
316: oct = 0;
317: nlp = p;
318: c = w;
319: begp = endp = 0;
320: nsent++;
321: }
322: }
323: if(caps && (count1 > 0))
324: fwrite(beg1,sizeof(*beg1),count1,stdout);
325: close(f);
326: }
327:
328: getargc()
329: {
330: register c;
331: if (wordf){
332: if((c=getc(wordf))==EOF){
333: fclose(wordf);
334: if(nflag && fflag){
335: nflag=0;
336: wordf=fopen(filename,"r");
337: if(wordf == NULL){
338: fprintf("diction can't open %s\n",filename);
339: exit(2);
340: }
341: return(getc(wordf));
342: }
343: else return(EOF);
344: }
345: else return(c);
346: }
347: if ((c = *argptr++) == '\0')
348: return(EOF);
349: return(c);
350: }
351:
352: cgotofn() {
353: register c;
354: register struct words *s;
355: register ct;
356: int neg;
357:
358: s = smax = w;
359: neg = ct = 0;
360: nword: for(;;) {
361: c = getargc();
362: if(c == '~'){
363: neg++;
364: c = getargc();
365: }
366: if (c==EOF)
367: return;
368: if (c == '\n') {
369: if(neg)s->out = 0377;
370: else s->out = ct-1;
371: neg = ct = 0;
372: s = w;
373: } else {
374: loop: if (s->inp == c) {
375: s = s->nst;
376: ct++;
377: continue;
378: }
379: if (s->inp == 0) goto enter;
380: if (s->link == 0) {
381: if (smax >= &w[MAXSIZ - 1]) overflo();
382: s->link = ++smax;
383: s = smax;
384: goto enter;
385: }
386: s = s->link;
387: goto loop;
388: }
389: }
390:
391: enter:
392: do {
393: s->inp = c;
394: ct++;
395: if (smax >= &w[MAXSIZ - 1]) overflo();
396: s->nst = ++smax;
397: s = smax;
398: } while ((c = getargc()) != '\n' && c!=EOF);
399: if(neg)smax->out = 0377;
400: else smax->out = ct-1;
401: neg = ct = 0;
402: s = w;
403: if (c != EOF)
404: goto nword;
405: }
406:
407: overflo() {
408: fprintf(stderr, "wordlist too large\n");
409: exit(2);
410: }
411: cfail() {
412: struct words *queue[QSIZE];
413: struct words **front, **rear;
414: struct words *state;
415: int bstart;
416: register char c;
417: register struct words *s;
418: s = w;
419: front = rear = queue;
420: init: if ((s->inp) != 0) {
421: *rear++ = s->nst;
422: if (rear >= &queue[QSIZE - 1]) overflo();
423: }
424: if ((s = s->link) != 0) {
425: goto init;
426: }
427:
428: while (rear!=front) {
429: s = *front;
430: if (front == &queue[QSIZE-1])
431: front = queue;
432: else front++;
433: cloop: if ((c = s->inp) != 0) {
434: bstart=0;
435: *rear = (q = s->nst);
436: if (front < rear)
437: if (rear >= &queue[QSIZE-1])
438: if (front == queue) overflo();
439: else rear = queue;
440: else rear++;
441: else
442: if (++rear == front) overflo();
443: state = s->fail;
444: floop: if (state == 0){ state = w;bstart=1;}
445: if (state->inp == c) {
446: qloop: q->fail = state->nst;
447: if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
448: if((q=q->link) != 0)goto qloop;
449: }
450: else if ((state = state->link) != 0)
451: goto floop;
452: else if(bstart==0){state=0; goto floop;}
453: }
454: if ((s = s->link) != 0)
455: goto cloop;
456: }
457: /* for(s=w;s<=smax;s++)
458: printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
459: s->inp,s->out,s->nst,s->link,s->fail);
460: */
461: }
462: outc(addr,file)
463: char *addr;
464: char *file;
465: {
466: int inside;
467:
468: inside = 0;
469: if(!caps && lineno && linemsg){
470: printf("beginning line %ld",olcount);
471: if(file != (char *)NULL)printf(" %s\n",file);
472: else printf("\n");
473: linemsg = 0;
474: }
475: while(nlp < addr){
476: if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
477: oct=0;
478: putchar('\n');
479: }
480: if(nlp == begp){
481: if(caps)inside++;
482: else {
483: if( oct >45){putchar('\n');
484: oct=0;
485: }
486: if( oct==0 || table[*nlp] != ' '){
487: printf("*[");
488: oct+=2;
489: }
490: else {printf(" *[");;
491: oct+=3;
492: }
493: }
494: if(mflg)putc('[',mine);
495: }
496: if(inside){
497: if(islower(*nlp))*nlp = toupper(*nlp);
498: }
499: else {
500: if(!caps && *nlp == '\n')*nlp = ' ';
501: if(*nlp == ' ' && oct==0);
502: else if(!caps) {putchar(*nlp); oct++;}
503: }
504: if(nlp == endp){
505: if(caps)
506: inside= 0;
507: else {
508: if(*(nlp) != ' '){printf("]*");
509: oct+=2;
510: }
511: else {printf("]* ");
512: oct+=3;
513: }
514: if(oct >60){putchar('\n');
515: oct=0;
516: }
517: }
518: if(mflg)putc(']',mine);
519: beg = 0;
520: }
521: if(mflg){
522: if(nlp == myst)beg = 1;
523: if(beg || last){
524: putc(*nlp,mine);
525: if(myct++ >= 72 || last == 20){
526: putc('\n',mine);
527: if(last == 20)last=myct=0;
528: else myct=0;
529: }
530: if(last)last++;
531: }
532: }
533: nlp++;
534: }
535: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.