|
|
1.1 root 1: #ifndef lint
2: static char sccsid[] = "@(#)spell.c 4.2 6/3/86";
3: #endif
4:
5: #include "spell.h"
6: #define DLEV 2
7:
8: char *strcat();
9: int strip();
10: char *skipv();
11: int an();
12: int s();
13: int es();
14: int ily();
15: int ncy();
16: int CCe();
17: int VCe();
18: int bility();
19: int tion();
20: int ize();
21: int y_to_e();
22: int i_to_y();
23: int nop();
24: int metry();
25:
26: struct suftab {
27: char *suf;
28: int (*p1)();
29: int n1;
30: char *d1;
31: char *a1;
32: int (*p2)();
33: int n2;
34: char *d2;
35: char *a2;
36: } suftab[] = {
37: {"ssen",ily,4,"-y+iness","+ness" },
38: {"ssel",ily,4,"-y+i+less","+less" },
39: {"se",s,1,"","+s", es,2,"-y+ies","+es" },
40: {"s'",s,2,"","+'s"},
41: {"s",s,1,"","+s"},
42: {"ecn",ncy,1,"","-t+ce"},
43: {"ycn",ncy,1,"","-cy+t"},
44: {"ytilb",nop,0,"",""},
45: {"ytilib",bility,5,"-le+ility",""},
46: {"elbaif",i_to_y,4,"-y+iable",""},
47: {"elba",CCe,4,"-e+able","+able"},
48: {"yti",CCe,3,"-e+ity","+ity"},
49: {"ylb",y_to_e,1,"-e+y",""},
50: {"yl",ily,2,"-y+ily","+ly"},
51: {"laci",strip,2,"","+al"},
52: {"latnem",strip,2,"","+al"},
53: {"lanoi",strip,2,"","+al"},
54: {"tnem",strip,4,"","+ment"},
55: {"gni",CCe,3,"-e+ing","+ing"},
56: {"reta",nop,0,"",""},
57: {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"},
58: {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"},
59: {"citsi",strip,2,"","+ic"},
60: {"cihparg",i_to_y,1,"-y+ic",""},
61: {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"},
62: {"cirtem",i_to_y,1,"-y+ic",""},
63: {"yrtem",metry,0,"-ry+er",""},
64: {"cigol",i_to_y,1,"-y+ic",""},
65: {"tsigol",i_to_y,2,"-y+ist",""},
66: {"tsi",VCe,3,"-e+ist","+ist"},
67: {"msi",VCe,3,"-e+ism","+ist"},
68: {"noitacif",i_to_y,6,"-y+ication",""},
69: {"noitazi",ize,5,"-e+ation",""},
70: {"rota",tion,2,"-e+or",""},
71: {"noit",tion,3,"-e+ion","+ion"},
72: {"naino",an,3,"","+ian"},
73: {"na",an,1,"","+n"},
74: {"evit",tion,3,"-e+ive","+ive"},
75: {"ezi",CCe,3,"-e+ize","+ize"},
76: {"pihs",strip,4,"","+ship"},
77: {"dooh",ily,4,"-y+hood","+hood"},
78: {"ekil",strip,4,"","+like"},
79: 0
80: };
81:
82: char *preftab[] = {
83: "anti",
84: "bio",
85: "dis",
86: "electro",
87: "en",
88: "fore",
89: "hyper",
90: "intra",
91: "inter",
92: "iso",
93: "kilo",
94: "magneto",
95: "meta",
96: "micro",
97: "milli",
98: "mis",
99: "mono",
100: "multi",
101: "non",
102: "out",
103: "over",
104: "photo",
105: "poly",
106: "pre",
107: "pseudo",
108: "re",
109: "semi",
110: "stereo",
111: "sub",
112: "super",
113: "thermo",
114: "ultra",
115: "under", /*must precede un*/
116: "un",
117: 0
118: };
119:
120: int vflag;
121: int xflag;
122: char word[100];
123: char original[100];
124: char *deriv[40];
125: char affix[40];
126:
127: main(argc,argv)
128: char **argv;
129: {
130: register char *ep, *cp;
131: register char *dp;
132: int fold;
133: int j;
134: FILE *file, *found;
135: if(!prime(argc,argv)) {
136: fprintf(stderr,
137: "spell: cannot initialize hash table\n");
138: exit(1);
139: }
140: found = fopen(argv[2],"w");
141: for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
142: switch(argv[0][1]) {
143: case 'b':
144: ise();
145: break;
146: case 'v':
147: vflag++;
148: break;
149: case 'x':
150: xflag++;
151: break;
152: }
153: for(;; fprintf(file,"%s%s\n",affix,original)) {
154: affix[0] = 0;
155: file = found;
156: for(ep=word;(*ep=j=getchar())!='\n';ep++)
157: if(j == EOF) {
158: fclose(found);
159: exit(0);
160: }
161: for(cp=word,dp=original; cp<ep; )
162: *dp++ = *cp++;
163: *dp = 0;
164: fold = 0;
165: for(cp=word;cp<ep;cp++)
166: if(islower(*cp))
167: goto lcase;
168: if(putsuf(ep,".",0))
169: continue;
170: ++fold;
171: for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
172: *dp = Tolower(*cp);
173: lcase:
174: if(putsuf(ep,".",0)||suffix(ep,0))
175: continue;
176: if(isupper(word[0])) {
177: for(cp=original,dp=word; *dp = *cp++; dp++)
178: if (fold) *dp = Tolower(*dp);
179: word[0] = Tolower(word[0]);
180: goto lcase;
181: }
182: file = stdout;
183: }
184: }
185:
186: suffix(ep,lev)
187: char *ep;
188: {
189: register struct suftab *t;
190: register char *cp, *sp;
191: lev += DLEV;
192: deriv[lev] = deriv[lev-1] = 0;
193: for(t= &suftab[0];sp=t->suf;t++) {
194: cp = ep;
195: while(*sp)
196: if(*--cp!=*sp++)
197: goto next;
198: for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
199: if(sp<word)
200: return(0);
201: if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
202: return(1);
203: if(t->p2!=0) {
204: deriv[lev] = deriv[lev+1] = 0;
205: return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
206: }
207: return(0);
208: next: ;
209: }
210: return(0);
211: }
212:
213: nop()
214: {
215: return(0);
216: }
217:
218: strip(ep,d,a,lev)
219: char *ep,*d,*a;
220: {
221: return(putsuf(ep,a,lev)||suffix(ep,lev));
222: }
223:
224: s(ep,d,a,lev)
225: char *ep,*d,*a;
226: {
227: if(lev>DLEV+1)
228: return(0);
229: if(*ep=='s'&&ep[-1]=='s')
230: return(0);
231: return(strip(ep,d,a,lev));
232: }
233:
234: an(ep,d,a,lev)
235: char *ep,*d,*a;
236: {
237: if(!isupper(*word)) /*must be proper name*/
238: return(0);
239: return(putsuf(ep,a,lev));
240: }
241:
242: ize(ep,d,a,lev)
243: char *ep,*d,*a;
244: {
245: *ep++ = 'e';
246: return(strip(ep,"",d,lev));
247: }
248:
249: y_to_e(ep,d,a,lev)
250: char *ep,*d,*a;
251: {
252: *ep++ = 'e';
253: return(strip(ep,"",d,lev));
254: }
255:
256: ily(ep,d,a,lev)
257: char *ep,*d,*a;
258: {
259: if(ep[-1]=='i')
260: return(i_to_y(ep,d,a,lev));
261: else
262: return(strip(ep,d,a,lev));
263: }
264:
265: ncy(ep,d,a,lev)
266: char *ep, *d, *a;
267: {
268: if(skipv(skipv(ep-1))<word)
269: return(0);
270: ep[-1] = 't';
271: return(strip(ep,d,a,lev));
272: }
273:
274: bility(ep,d,a,lev)
275: char *ep,*d,*a;
276: {
277: *ep++ = 'l';
278: return(y_to_e(ep,d,a,lev));
279: }
280:
281: i_to_y(ep,d,a,lev)
282: char *ep,*d,*a;
283: {
284: if(ep[-1]=='i') {
285: ep[-1] = 'y';
286: a = d;
287: }
288: return(strip(ep,"",a,lev));
289: }
290:
291: es(ep,d,a,lev)
292: char *ep,*d,*a;
293: {
294: if(lev>DLEV)
295: return(0);
296: switch(ep[-1]) {
297: default:
298: return(0);
299: case 'i':
300: return(i_to_y(ep,d,a,lev));
301: case 's':
302: case 'h':
303: case 'z':
304: case 'x':
305: return(strip(ep,d,a,lev));
306: }
307: }
308:
309: metry(ep,d,a,lev)
310: char *ep, *d,*a;
311: {
312: ep[-2] = 'e';
313: ep[-1] = 'r';
314: return(strip(ep,d,a,lev));
315: }
316:
317: tion(ep,d,a,lev)
318: char *ep,*d,*a;
319: {
320: switch(ep[-2]) {
321: case 'c':
322: case 'r':
323: return(putsuf(ep,a,lev));
324: case 'a':
325: return(y_to_e(ep,d,a,lev));
326: }
327: return(0);
328: }
329:
330: /* possible consonant-consonant-e ending*/
331: CCe(ep,d,a,lev)
332: char *ep,*d,*a;
333: {
334: switch(ep[-1]) {
335: case 'l':
336: if(vowel(ep[-2]))
337: break;
338: switch(ep[-2]) {
339: case 'l':
340: case 'r':
341: case 'w':
342: break;
343: default:
344: return(y_to_e(ep,d,a,lev));
345: }
346: break;
347: case 's':
348: if(ep[-2]=='s')
349: break;
350: case 'c':
351: case 'g':
352: if(*ep=='a')
353: return(0);
354: case 'v':
355: case 'z':
356: if(vowel(ep[-2]))
357: break;
358: case 'u':
359: if(y_to_e(ep,d,a,lev))
360: return(1);
361: if(!(ep[-2]=='n'&&ep[-1]=='g'))
362: return(0);
363: }
364: return(VCe(ep,d,a,lev));
365: }
366:
367: /* possible consonant-vowel-consonant-e ending*/
368: VCe(ep,d,a,lev)
369: char *ep,*d,*a;
370: {
371: char c;
372: c = ep[-1];
373: if(c=='e')
374: return(0);
375: if(!vowel(c) && vowel(ep[-2])) {
376: c = *ep;
377: *ep++ = 'e';
378: if(putsuf(ep,d,lev)||suffix(ep,lev))
379: return(1);
380: ep--;
381: *ep = c;
382: }
383: return(strip(ep,d,a,lev));
384: }
385:
386: char *lookuppref(wp,ep)
387: char **wp;
388: char *ep;
389: {
390: register char **sp;
391: register char *bp,*cp;
392: for(sp=preftab;*sp;sp++) {
393: bp = *wp;
394: for(cp= *sp;*cp;cp++,bp++)
395: if(Tolower(*bp)!=*cp)
396: goto next;
397: for(cp=bp;cp<ep;cp++)
398: if(vowel(*cp)) {
399: *wp = bp;
400: return(*sp);
401: }
402: next: ;
403: }
404: return(0);
405: }
406:
407: putsuf(ep,a,lev)
408: char *ep,*a;
409: {
410: register char *cp;
411: char *bp;
412: register char *pp;
413: int val = 0;
414: char space[20];
415: deriv[lev] = a;
416: if(putw(word,ep,lev))
417: return(1);
418: bp = word;
419: pp = space;
420: deriv[lev+1] = pp;
421: while(cp=lookuppref(&bp,ep)) {
422: *pp++ = '+';
423: while(*pp = *cp++)
424: pp++;
425: if(putw(bp,ep,lev+1)) {
426: val = 1;
427: break;
428: }
429: }
430: deriv[lev+1] = deriv[lev+2] = 0;
431: return(val);
432: }
433:
434: putw(bp,ep,lev)
435: char *bp,*ep;
436: {
437: register i, j;
438: char duple[3];
439: if(ep-bp<=1)
440: return(0);
441: if(vowel(*ep)) {
442: if(monosyl(bp,ep))
443: return(0);
444: }
445: i = dict(bp,ep);
446: if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
447: ep--;
448: deriv[++lev] = duple;
449: duple[0] = '+';
450: duple[1] = *ep;
451: duple[2] = 0;
452: i = dict(bp,ep);
453: }
454: if(vflag==0||i==0)
455: return(i);
456: j = lev;
457: do {
458: if(deriv[j])
459: strcat(affix,deriv[j]);
460: } while(--j>0);
461: strcat(affix,"\t");
462: return(i);
463: }
464:
465:
466: monosyl(bp,ep)
467: char *bp, *ep;
468: {
469: if(ep<bp+2)
470: return(0);
471: if(vowel(*--ep)||!vowel(*--ep)
472: ||ep[1]=='x'||ep[1]=='w')
473: return(0);
474: while(--ep>=bp)
475: if(vowel(*ep))
476: return(0);
477: return(1);
478: }
479:
480: char *
481: skipv(s)
482: char *s;
483: {
484: if(s>=word&&vowel(*s))
485: s--;
486: while(s>=word&&!vowel(*s))
487: s--;
488: return(s);
489: }
490:
491: vowel(c)
492: {
493: switch(Tolower(c)) {
494: case 'a':
495: case 'e':
496: case 'i':
497: case 'o':
498: case 'u':
499: case 'y':
500: return(1);
501: }
502: return(0);
503: }
504:
505: /* crummy way to Britishise */
506: ise()
507: {
508: register struct suftab *p;
509: for(p = suftab;p->suf;p++) {
510: ztos(p->suf);
511: ztos(p->d1);
512: ztos(p->a1);
513: }
514: }
515: ztos(s)
516: char *s;
517: {
518: for(;*s;s++)
519: if(*s=='z')
520: *s = 's';
521: }
522:
523: dict(bp,ep)
524: char *bp, *ep;
525: {
526: register char *wp;
527: long h;
528: register long *lp;
529: register i;
530: if(xflag)
531: printf("=%.*s\n",ep-bp,bp);
532: for(i=0; i<NP; i++) {
533: for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
534: h += *wp * *lp;
535: h += '\n' * *lp;
536: h %= p[i];
537: if(get(h)==0)
538: return(0);
539: }
540: return(1);
541: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.