|
|
1.1 root 1: #ifndef lint
2: static char sccsid[] = "@(#)spell.c 4.3 5/15/90";
3: #endif
4:
5: #include "spell.h"
6: #define DLEV 2
7:
8: char *strcat();
9: int strip();
10: char *skipv();
11: int an();
12: int s();
13: int es();
14: int ily();
15: int ncy();
16: int CCe();
17: int VCe();
18: int bility();
19: int tion();
20: int ize();
21: int y_to_e();
22: int i_to_y();
23: int nop();
24: int metry();
25:
26: struct suftab {
27: char *suf;
28: int (*p1)();
29: int n1;
30: char *d1;
31: char *a1;
32: int (*p2)();
33: int n2;
34: char *d2;
35: char *a2;
36: } suftab[] = {
37: {"ssen",ily,4,"-y+iness","+ness" },
38: {"ssel",ily,4,"-y+i+less","+less" },
39: {"se",s,1,"","+s", es,2,"-y+ies","+es" },
40: {"s'",s,2,"","+'s"},
41: {"s",s,1,"","+s"},
42: {"ecn",ncy,1,"","-t+ce"},
43: {"ycn",ncy,1,"","-cy+t"},
44: {"ytilb",nop,0,"",""},
45: {"ytilib",bility,5,"-le+ility",""},
46: {"elbaif",i_to_y,4,"-y+iable",""},
47: {"elba",CCe,4,"-e+able","+able"},
48: {"yti",CCe,3,"-e+ity","+ity"},
49: {"ylb",y_to_e,1,"-e+y",""},
50: {"yl",ily,2,"-y+ily","+ly"},
51: {"laci",strip,2,"","+al"},
52: {"latnem",strip,2,"","+al"},
53: {"lanoi",strip,2,"","+al"},
54: {"tnem",strip,4,"","+ment"},
55: {"gni",CCe,3,"-e+ing","+ing"},
56: {"reta",nop,0,"",""},
57: {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"},
58: {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"},
59: {"citsi",strip,2,"","+ic"},
60: {"cihparg",i_to_y,1,"-y+ic",""},
61: {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"},
62: {"cirtem",i_to_y,1,"-y+ic",""},
63: {"yrtem",metry,0,"-ry+er",""},
64: {"cigol",i_to_y,1,"-y+ic",""},
65: {"tsigol",i_to_y,2,"-y+ist",""},
66: {"tsi",VCe,3,"-e+ist","+ist"},
67: {"msi",VCe,3,"-e+ism","+ist"},
68: {"noitacif",i_to_y,6,"-y+ication",""},
69: {"noitazi",ize,5,"-e+ation",""},
70: {"rota",tion,2,"-e+or",""},
71: {"noit",tion,3,"-e+ion","+ion"},
72: {"naino",an,3,"","+ian"},
73: {"na",an,1,"","+n"},
74: {"evit",tion,3,"-e+ive","+ive"},
75: {"ezi",CCe,3,"-e+ize","+ize"},
76: {"pihs",strip,4,"","+ship"},
77: {"dooh",ily,4,"-y+hood","+hood"},
78: {"ekil",strip,4,"","+like"},
79: 0
80: };
81:
82: char *preftab[] = {
83: "anti",
84: "bio",
85: "dis",
86: "electro",
87: "en",
88: "fore",
89: "hyper",
90: "intra",
91: "inter",
92: "iso",
93: "kilo",
94: "magneto",
95: "meta",
96: "micro",
97: "milli",
98: "mis",
99: "mono",
100: "multi",
101: "non",
102: "out",
103: "over",
104: "photo",
105: "poly",
106: "pre",
107: "pseudo",
108: "re",
109: "semi",
110: "stereo",
111: "sub",
112: "super",
113: "thermo",
114: "ultra",
115: "under", /*must precede un*/
116: "un",
117: 0
118: };
119:
120: int vflag;
121: int xflag;
122: char word[100];
123: char original[100];
124: char *deriv[40];
125: char affix[40];
126:
127: main(argc,argv)
128: char **argv;
129: {
130: register char *ep, *cp;
131: register char *dp;
132: int fold;
133: int j;
134: FILE *file, *found;
135: if(!prime(argc,argv)) {
136: fprintf(stderr,
137: "spell: cannot initialize hash table\n");
138: exit(1);
139: }
140: found = fopen(argv[2],"w");
141: for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
142: switch(argv[0][1]) {
143: case 'b':
144: ise();
145: break;
146: case 'v':
147: vflag++;
148: break;
149: case 'x':
150: xflag++;
151: break;
152: }
153: for(;; fprintf(file,"%s%s\n",affix,original)) {
154: affix[0] = 0;
155: file = found;
156: for(ep=word;(*ep=j=getchar())!='\n';ep++)
157: if(j == EOF) {
158: fclose(found);
159: exit(0);
160: }
161: for(cp=word,dp=original; cp<ep; )
162: *dp++ = *cp++;
163: *dp = 0;
164: fold = 0;
165: for(cp=word;cp<ep;cp++)
166: if(islower(*cp))
167: goto lcase;
168: if(putsuf(ep,".",0))
169: continue;
170: ++fold;
171: for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
172: *dp = Tolower(*cp);
173: lcase:
174: if(putsuf(ep,".",0)||suffix(ep,0))
175: continue;
176: if(isupper(word[0])) {
177: for(cp=original,dp=word; *dp = *cp++; dp++)
178: if (fold) *dp = Tolower(*dp);
179: word[0] = Tolower(word[0]);
180: goto lcase;
181: }
182: file = stdout;
183: }
184: }
185:
186: suffix(ep,lev)
187: char *ep;
188: {
189: register struct suftab *t;
190: register char *cp, *sp;
191: lev += DLEV;
192: deriv[lev] = deriv[lev-1] = 0;
193: for(t= &suftab[0];sp=t->suf;t++) {
194: cp = ep;
195: while(*sp)
196: if(*--cp!=*sp++)
197: goto next;
198: for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
199: if(sp<word)
200: return(0);
201: if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
202: return(1);
203: if(t->p2!=0) {
204: deriv[lev] = deriv[lev+1] = 0;
205: return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
206: }
207: return(0);
208: next: ;
209: }
210: return(0);
211: }
212:
213: nop()
214: {
215: return(0);
216: }
217:
218: strip(ep,d,a,lev)
219: char *ep,*d,*a;
220: {
221: return(putsuf(ep,a,lev)||suffix(ep,lev));
222: }
223:
224: s(ep,d,a,lev)
225: char *ep,*d,*a;
226: {
227: if(lev>DLEV+1)
228: return(0);
229: if(*ep=='s'&&ep[-1]=='s')
230: return(0);
231: return(strip(ep,d,a,lev));
232: }
233:
234: an(ep,d,a,lev)
235: char *ep,*d,*a;
236: {
237: if(!isupper(*word)) /*must be proper name*/
238: return(0);
239: return(putsuf(ep,a,lev));
240: }
241:
242: ize(ep,d,a,lev)
243: char *ep,*d,*a;
244: {
245: *ep++ = 'e';
246: return(strip(ep,"",d,lev));
247: }
248:
249: y_to_e(ep,d,a,lev)
250: char *ep,*d,*a;
251: {
252: char c = *ep;
253: *ep++ = 'e';
254: if (strip(ep,"",d,lev))
255: return (1);
256: ep[-1] = c;
257: return (0);
258: }
259:
260: ily(ep,d,a,lev)
261: char *ep,*d,*a;
262: {
263: if(ep[-1]=='i')
264: return(i_to_y(ep,d,a,lev));
265: else
266: return(strip(ep,d,a,lev));
267: }
268:
269: ncy(ep,d,a,lev)
270: char *ep, *d, *a;
271: {
272: if(skipv(skipv(ep-1))<word)
273: return(0);
274: ep[-1] = 't';
275: return(strip(ep,d,a,lev));
276: }
277:
278: bility(ep,d,a,lev)
279: char *ep,*d,*a;
280: {
281: *ep++ = 'l';
282: return(y_to_e(ep,d,a,lev));
283: }
284:
285: i_to_y(ep,d,a,lev)
286: char *ep,*d,*a;
287: {
288: if(ep[-1]=='i') {
289: ep[-1] = 'y';
290: a = d;
291: }
292: return(strip(ep,"",a,lev));
293: }
294:
295: es(ep,d,a,lev)
296: char *ep,*d,*a;
297: {
298: if(lev>DLEV)
299: return(0);
300: switch(ep[-1]) {
301: default:
302: return(0);
303: case 'i':
304: return(i_to_y(ep,d,a,lev));
305: case 's':
306: case 'h':
307: case 'z':
308: case 'x':
309: return(strip(ep,d,a,lev));
310: }
311: }
312:
313: metry(ep,d,a,lev)
314: char *ep, *d,*a;
315: {
316: ep[-2] = 'e';
317: ep[-1] = 'r';
318: return(strip(ep,d,a,lev));
319: }
320:
321: tion(ep,d,a,lev)
322: char *ep,*d,*a;
323: {
324: switch(ep[-2]) {
325: case 'c':
326: case 'r':
327: return(putsuf(ep,a,lev));
328: case 'a':
329: return(y_to_e(ep,d,a,lev));
330: }
331: return(0);
332: }
333:
334: /* possible consonant-consonant-e ending*/
335: CCe(ep,d,a,lev)
336: char *ep,*d,*a;
337: {
338: switch(ep[-1]) {
339: case 'l':
340: if(vowel(ep[-2]))
341: break;
342: switch(ep[-2]) {
343: case 'l':
344: case 'r':
345: case 'w':
346: break;
347: default:
348: return(y_to_e(ep,d,a,lev));
349: }
350: break;
351: case 's':
352: if(ep[-2]=='s')
353: break;
354: case 'c':
355: case 'g':
356: if(*ep=='a')
357: return(0);
358: case 'v':
359: case 'z':
360: if(vowel(ep[-2]))
361: break;
362: case 'u':
363: if(y_to_e(ep,d,a,lev))
364: return(1);
365: if(!(ep[-2]=='n'&&ep[-1]=='g'))
366: return(0);
367: }
368: return(VCe(ep,d,a,lev));
369: }
370:
371: /* possible consonant-vowel-consonant-e ending*/
372: VCe(ep,d,a,lev)
373: char *ep,*d,*a;
374: {
375: char c;
376: c = ep[-1];
377: if(c=='e')
378: return(0);
379: if(!vowel(c) && vowel(ep[-2])) {
380: c = *ep;
381: *ep++ = 'e';
382: if(putsuf(ep,d,lev)||suffix(ep,lev))
383: return(1);
384: ep--;
385: *ep = c;
386: }
387: return(strip(ep,d,a,lev));
388: }
389:
390: char *lookuppref(wp,ep)
391: char **wp;
392: char *ep;
393: {
394: register char **sp;
395: register char *bp,*cp;
396: for(sp=preftab;*sp;sp++) {
397: bp = *wp;
398: for(cp= *sp;*cp;cp++,bp++)
399: if(Tolower(*bp)!=*cp)
400: goto next;
401: for(cp=bp;cp<ep;cp++)
402: if(vowel(*cp)) {
403: *wp = bp;
404: return(*sp);
405: }
406: next: ;
407: }
408: return(0);
409: }
410:
411: putsuf(ep,a,lev)
412: char *ep,*a;
413: {
414: register char *cp;
415: char *bp;
416: register char *pp;
417: int val = 0;
418: char space[20];
419: deriv[lev] = a;
420: if(putw(word,ep,lev))
421: return(1);
422: bp = word;
423: pp = space;
424: deriv[lev+1] = pp;
425: while(cp=lookuppref(&bp,ep)) {
426: *pp++ = '+';
427: while(*pp = *cp++)
428: pp++;
429: if(putw(bp,ep,lev+1)) {
430: val = 1;
431: break;
432: }
433: }
434: deriv[lev+1] = deriv[lev+2] = 0;
435: return(val);
436: }
437:
438: putw(bp,ep,lev)
439: char *bp,*ep;
440: {
441: register i, j;
442: char duple[3];
443: if(ep-bp<=1)
444: return(0);
445: if(vowel(*ep)) {
446: if(monosyl(bp,ep))
447: return(0);
448: }
449: i = dict(bp,ep);
450: if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
451: ep--;
452: deriv[++lev] = duple;
453: duple[0] = '+';
454: duple[1] = *ep;
455: duple[2] = 0;
456: i = dict(bp,ep);
457: }
458: if(vflag==0||i==0)
459: return(i);
460: j = lev;
461: do {
462: if(deriv[j])
463: strcat(affix,deriv[j]);
464: } while(--j>0);
465: strcat(affix,"\t");
466: return(i);
467: }
468:
469:
470: monosyl(bp,ep)
471: char *bp, *ep;
472: {
473: if(ep<bp+2)
474: return(0);
475: if(vowel(*--ep)||!vowel(*--ep)
476: ||ep[1]=='x'||ep[1]=='w')
477: return(0);
478: while(--ep>=bp)
479: if(vowel(*ep))
480: return(0);
481: return(1);
482: }
483:
484: char *
485: skipv(s)
486: char *s;
487: {
488: if(s>=word&&vowel(*s))
489: s--;
490: while(s>=word&&!vowel(*s))
491: s--;
492: return(s);
493: }
494:
495: vowel(c)
496: {
497: switch(Tolower(c)) {
498: case 'a':
499: case 'e':
500: case 'i':
501: case 'o':
502: case 'u':
503: case 'y':
504: return(1);
505: }
506: return(0);
507: }
508:
509: /* crummy way to Britishise */
510: ise()
511: {
512: register struct suftab *p;
513: for(p = suftab;p->suf;p++) {
514: ztos(p->suf);
515: ztos(p->d1);
516: ztos(p->a1);
517: }
518: }
519: ztos(s)
520: char *s;
521: {
522: for(;*s;s++)
523: if(*s=='z')
524: *s = 's';
525: }
526:
527: dict(bp,ep)
528: char *bp, *ep;
529: {
530: register char *wp;
531: long h;
532: register long *lp;
533: register i;
534: if(xflag)
535: printf("=%.*s\n",ep-bp,bp);
536: for(i=0; i<NP; i++) {
537: for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
538: h += *wp * *lp;
539: h += '\n' * *lp;
540: h %= p[i];
541: if(get(h)==0)
542: return(0);
543: }
544: return(1);
545: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.