|
|
1.1 root 1: /*
2: * diction -- print all sentences containing one of default phrases
3: *
4: * status returns:
5: * 0 - ok, and some matches
6: * 1 - ok, but no matches
7: * 2 - some error
8: */
9:
10: #include <stdio.h>
11: #include <ctype.h>
12:
13: #define MAXSIZ 6500
14: #define QSIZE 650
15: struct words {
16: char inp;
17: char out;
18: struct words *nst;
19: struct words *link;
20: struct words *fail;
21: } w[MAXSIZ], *smax, *q;
22:
23: int fflag;
24: int nflag = 1; /*use default file*/
25: char *filename;
26: int nfile;
27: int nsucc;
28: long nsent = 0;
29: long nhits = 0;
30: char *nlp;
31: char *begp, *endp;
32: int oct = 0;
33: FILE *wordf;
34: char *argptr;
35:
36: main(argc, argv)
37: char **argv;
38: {
39: while (--argc > 0 && (++argv)[0][0]=='-')
40: switch (argv[0][1]) {
41:
42: case 'f':
43: fflag++;
44: filename = ++argv;
45: argc--;
46: continue;
47:
48: case 'n':
49: nflag = 0;
50: continue;
51: case 'd':
52: continue;
53: default:
54: fprintf(stderr, "diction: unknown flag\n");
55: continue;
56: }
57: out:
58: if(nflag){
59: wordf = fopen(DICT,"r");
60: if(wordf == NULL){
61: fprintf(stderr,"diction: can't open default dictionary\n");
62: exit(2);
63: }
64: }
65: else {
66: wordf = fopen(*filename,"r");
67: if(wordf == NULL){
68: fprintf(stderr,"diction: can't open %s\n",filename);
69: exit(2);
70: }
71: }
72:
73: cgotofn();
74: cfail();
75: nfile = argc;
76: if (argc<=0) {
77: execute((char *)NULL);
78: }
79: else while (--argc >= 0) {
80: execute(*argv);
81: argv++;
82: }
83: printf("number of sentences %ld number of hits %ld\n",nsent,nhits);
84: exit(nsucc == 0);
85: }
86:
87: execute(file)
88: char *file;
89: {
90: register char *p;
91: register struct words *c;
92: register ccount;
93: struct words *savc;
94: char *savp;
95: int savct;
96: int scr;
97: char buf[1024];
98: int f;
99: int hit;
100: if (file) {
101: if ((f = open(file, 0)) < 0) {
102: fprintf(stderr, "diction: can't open %s\n", file);
103: exit(2);
104: }
105: }
106: else f = 0;
107: ccount = 0;
108: p = buf;
109: nlp = p;
110: c = w;
111: oct = hit = 0;
112: savc = savp = 0;
113: for (;;) {
114: if (--ccount <= 0) {
115: if (p == &buf[1024]) p = buf;
116: if (p > &buf[512]) {
117: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
118: }
119: else if ((ccount = read(f, p, 512)) <= 0) break;
120: convert(p,ccount);
121: }
122: if(p == &buf[1024])p=buf;
123: nstate:
124: if (c->inp == *p) {
125: c = c->nst;
126: }
127: else if (c->link != 0) {
128: c = c->link;
129: goto nstate;
130: }
131: else {
132: if(savp != 0){
133: c=savc;
134: p=savp;
135: if(ccount > savct)ccount += savct;
136: else ccount = savct;
137: savc=savp=0;
138: goto hadone;
139: }
140: c = c->fail;
141: if (c==0) {
142: c = w;
143: istate:
144: if (c->inp == *p) {
145: c = c->nst;
146: }
147: else if (c->link != 0) {
148: c = c->link;
149: goto istate;
150: }
151: }
152: else goto nstate;
153: }
154: if(c->out){
155: if((c->inp == *(p+1)) && (c->nst != 0)){
156: savp=p;
157: savc=c;
158: savct=ccount;
159: goto cont;
160: }
161: else if(c->link != 0){
162: savc=c;
163: while((savc=savc->link)!= 0){
164: if(savc->inp == *(p+1)){
165: savp=p;
166: savc=c;
167: savct=ccount;
168: goto cont;
169: }
170: }
171: }
172: hadone:
173: savc=savp=0;
174: if(c->out == (char)(0377)){
175: c=w;
176: goto nstate;
177: }
178: begp = p - (c->out);
179: if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
180: endp=p;
181: hit = 1;
182: nhits++;
183: if (*p++ == '.') {
184: if (--ccount <= 0) {
185: if (p == &buf[1024]) p = buf;
186: if (p > &buf[512]) {
187: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
188: }
189: else if ((ccount = read(f, p, 512)) <= 0) break;
190: convert(p,ccount);
191: }
192: }
193: succeed: nsucc = 1;
194: {
195: if (p <= nlp) {
196: outc(&buf[1024]);
197: nlp = buf;
198: }
199: outc(p);
200: }
201: nomatch:
202: nlp = p;
203: c = w;
204: begp = endp = 0;
205: continue;
206: }
207: cont:
208: if (*p++ == '.'){
209: if(hit){
210: if(p <= nlp){
211: outc(&buf[1024]);
212: nlp = buf;
213: }
214: outc(p);
215: putchar('\n'); putchar('\n');
216: }
217: hit = 0;
218: oct = 0;
219: nlp = p;
220: c = w;
221: begp = endp = 0;
222: }
223: }
224: close(f);
225: }
226:
227: getargc()
228: {
229: register c;
230: if (wordf){
231: if((c=getc(wordf))==EOF){
232: fclose(wordf);
233: if(nflag && fflag){
234: nflag=0;
235: wordf=fopen(*filename,"r");
236: if(wordf == NULL){
237: fprintf("can't open %s\n",filename);
238: exit(2);
239: }
240: return(getc(wordf));
241: }
242: else return(EOF);
243: }
244: else return(c);
245: }
246: if ((c = *argptr++) == '\0')
247: return(EOF);
248: return(c);
249: }
250:
251: cgotofn() {
252: register c;
253: register struct words *s;
254: register ct;
255: int neg;
256:
257: s = smax = w;
258: neg = ct = 0;
259: nword: for(;;) {
260: c = getargc();
261: if(c == '~'){
262: neg++;
263: c = getargc();
264: }
265: if (c==EOF)
266: return;
267: if (c == '\n') {
268: if(neg)s->out = 0377;
269: else s->out = ct-1;
270: neg = ct = 0;
271: s = w;
272: } else {
273: loop: if (s->inp == c) {
274: s = s->nst;
275: ct++;
276: continue;
277: }
278: if (s->inp == 0) goto enter;
279: if (s->link == 0) {
280: if (smax >= &w[MAXSIZ - 1]) overflo();
281: s->link = ++smax;
282: s = smax;
283: goto enter;
284: }
285: s = s->link;
286: goto loop;
287: }
288: }
289:
290: enter:
291: do {
292: s->inp = c;
293: ct++;
294: if (smax >= &w[MAXSIZ - 1]) overflo();
295: s->nst = ++smax;
296: s = smax;
297: } while ((c = getargc()) != '\n' && c!=EOF);
298: if(neg)smax->out = 0377;
299: else smax->out = ct-1;
300: neg = ct = 0;
301: s = w;
302: if (c != EOF)
303: goto nword;
304: }
305:
306: overflo() {
307: fprintf(stderr, "wordlist too large\n");
308: exit(2);
309: }
310: cfail() {
311: struct words *queue[QSIZE];
312: struct words **front, **rear;
313: struct words *state;
314: int bstart;
315: register char c;
316: register struct words *s;
317: s = w;
318: front = rear = queue;
319: init: if ((s->inp) != 0) {
320: *rear++ = s->nst;
321: if (rear >= &queue[QSIZE - 1]) overflo();
322: }
323: if ((s = s->link) != 0) {
324: goto init;
325: }
326:
327: while (rear!=front) {
328: s = *front;
329: if (front == &queue[QSIZE-1])
330: front = queue;
331: else front++;
332: cloop: if ((c = s->inp) != 0) {
333: bstart=0;
334: *rear = (q = s->nst);
335: if (front < rear)
336: if (rear >= &queue[QSIZE-1])
337: if (front == queue) overflo();
338: else rear = queue;
339: else rear++;
340: else
341: if (++rear == front) overflo();
342: state = s->fail;
343: floop: if (state == 0){ state = w;bstart=1;}
344: if (state->inp == c) {
345: qloop: q->fail = state->nst;
346: if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
347: if((q=q->link) != 0)goto qloop;
348: }
349: else if ((state = state->link) != 0)
350: goto floop;
351: else if(bstart==0){state=0; goto floop;}
352: }
353: if ((s = s->link) != 0)
354: goto cloop;
355: }
356: /* for(s=w;s<=smax;s++)
357: printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
358: s->inp,s->out,s->nst,s->link,s->fail);
359: */
360: }
361: convert(p,ccount)
362: char *p;
363: {
364: int ct;
365: char *pt;
366: for(pt=p,ct=ccount;--ct>=0;pt++){
367: if(isupper(*pt))*pt=tolower(*pt);
368: else if(isspace(*pt))*pt=' ';
369: else if(*pt=='.' || *pt=='?'||*pt=='!'){
370: *pt='.';
371: nsent++;
372: }
373: else if(ispunct(*pt))*pt=' ';
374: }
375: }
376: outc(addr)
377: char *addr;
378: {
379:
380: while(nlp < addr){
381: if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){
382: oct=0;
383: putchar('\n');
384: }
385: if(nlp == begp){
386: putchar('[');
387: }
388: putchar(*nlp);
389: if(nlp == endp){
390: putchar(']');
391: }
392: nlp++;
393: }
394: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.