|
|
1.1 root 1: #ifndef lint
2: static char sccsid[] = "@(#)spell.c 4.1 12/18/82";
3: #endif
4:
5: #include "spell.h"
6: #define DLEV 2
7:
8: char *strcat();
9: int strip();
10: char *skipv();
11: int an();
12: int s();
13: int es();
14: int ily();
15: int ncy();
16: int CCe();
17: int VCe();
18: int bility();
19: int tion();
20: int ize();
21: int y_to_e();
22: int i_to_y();
23: int nop();
24: int metry();
25:
26: struct suftab {
27: char *suf;
28: int (*p1)();
29: int n1;
30: char *d1;
31: char *a1;
32: int (*p2)();
33: int n2;
34: char *d2;
35: char *a2;
36: } suftab[] = {
37: {"ssen",ily,4,"-y+iness","+ness" },
38: {"ssel",ily,4,"-y+i+less","+less" },
39: {"se",s,1,"","+s", es,2,"-y+ies","+es" },
40: {"s'",s,2,"","+'s"},
41: {"s",s,1,"","+s"},
42: {"ecn",ncy,1,"","-t+ce"},
43: {"ycn",ncy,1,"","-cy+t"},
44: {"ytilb",nop,0,"",""},
45: {"ytilib",bility,5,"-le+ility",""},
46: {"elbaif",i_to_y,4,"-y+iable",""},
47: {"elba",CCe,4,"-e+able","+able"},
48: {"yti",CCe,3,"-e+ity","+ity"},
49: {"ylb",y_to_e,1,"-e+y",""},
50: {"yl",ily,2,"-y+ily","+ly"},
51: {"laci",strip,2,"","+al"},
52: {"latnem",strip,2,"","+al"},
53: {"lanoi",strip,2,"","+al"},
54: {"tnem",strip,4,"","+ment"},
55: {"gni",CCe,3,"-e+ing","+ing"},
56: {"reta",nop,0,"",""},
57: {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"},
58: {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"},
59: {"citsi",strip,2,"","+ic"},
60: {"cihparg",i_to_y,1,"-y+ic",""},
61: {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"},
62: {"cirtem",i_to_y,1,"-y+ic",""},
63: {"yrtem",metry,0,"-ry+er",""},
64: {"cigol",i_to_y,1,"-y+ic",""},
65: {"tsigol",i_to_y,2,"-y+ist",""},
66: {"tsi",VCe,3,"-e+ist","+ist"},
67: {"msi",VCe,3,"-e+ism","+ist"},
68: {"noitacif",i_to_y,6,"-y+ication",""},
69: {"noitazi",ize,5,"-e+ation",""},
70: {"rota",tion,2,"-e+or",""},
71: {"noit",tion,3,"-e+ion","+ion"},
72: {"naino",an,3,"","+ian"},
73: {"na",an,1,"","+n"},
74: {"evit",tion,3,"-e+ive","+ive"},
75: {"ezi",CCe,3,"-e+ize","+ize"},
76: {"pihs",strip,4,"","+ship"},
77: {"dooh",ily,4,"-y+hood","+hood"},
78: {"ekil",strip,4,"","+like"},
79: 0
80: };
81:
82: char *preftab[] = {
83: "anti",
84: "bio",
85: "dis",
86: "electro",
87: "en",
88: "fore",
89: "hyper",
90: "intra",
91: "inter",
92: "iso",
93: "kilo",
94: "magneto",
95: "meta",
96: "micro",
97: "milli",
98: "mis",
99: "mono",
100: "multi",
101: "non",
102: "out",
103: "over",
104: "photo",
105: "poly",
106: "pre",
107: "pseudo",
108: "re",
109: "semi",
110: "stereo",
111: "sub",
112: "super",
113: "thermo",
114: "ultra",
115: "under", /*must precede un*/
116: "un",
117: 0
118: };
119:
120: int vflag;
121: int xflag;
122: char word[100];
123: char original[100];
124: char *deriv[40];
125: char affix[40];
126:
127: main(argc,argv)
128: char **argv;
129: {
130: register char *ep, *cp;
131: register char *dp;
132: int fold;
133: int j;
134: FILE *file, *found;
135: if(!prime(argc,argv)) {
136: fprintf(stderr,
137: "spell: cannot initialize hash table\n");
138: exit(1);
139: }
140: found = fopen(argv[2],"w");
141: for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
142: switch(argv[0][1]) {
143: case 'b':
144: ise();
145: break;
146: case 'v':
147: vflag++;
148: break;
149: case 'x':
150: xflag++;
151: break;
152: }
153: for(;; fprintf(file,"%s%s\n",affix,original)) {
154: affix[0] = 0;
155: file = found;
156: for(ep=word;(*ep=j=getchar())!='\n';ep++)
157: if(j == EOF)
158: exit(0);
159: for(cp=word,dp=original; cp<ep; )
160: *dp++ = *cp++;
161: *dp = 0;
162: fold = 0;
163: for(cp=word;cp<ep;cp++)
164: if(islower(*cp))
165: goto lcase;
166: if(putsuf(ep,".",0))
167: continue;
168: ++fold;
169: for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
170: *dp = Tolower(*cp);
171: lcase:
172: if(putsuf(ep,".",0)||suffix(ep,0))
173: continue;
174: if(isupper(word[0])) {
175: for(cp=original,dp=word; *dp = *cp++; dp++)
176: if (fold) *dp = Tolower(*dp);
177: word[0] = Tolower(word[0]);
178: goto lcase;
179: }
180: file = stdout;
181: }
182: }
183:
184: suffix(ep,lev)
185: char *ep;
186: {
187: register struct suftab *t;
188: register char *cp, *sp;
189: lev += DLEV;
190: deriv[lev] = deriv[lev-1] = 0;
191: for(t= &suftab[0];sp=t->suf;t++) {
192: cp = ep;
193: while(*sp)
194: if(*--cp!=*sp++)
195: goto next;
196: for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
197: if(sp<word)
198: return(0);
199: if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
200: return(1);
201: if(t->p2!=0) {
202: deriv[lev] = deriv[lev+1] = 0;
203: return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
204: }
205: return(0);
206: next: ;
207: }
208: return(0);
209: }
210:
211: nop()
212: {
213: return(0);
214: }
215:
216: strip(ep,d,a,lev)
217: char *ep,*d,*a;
218: {
219: return(putsuf(ep,a,lev)||suffix(ep,lev));
220: }
221:
222: s(ep,d,a,lev)
223: char *ep,*d,*a;
224: {
225: if(lev>DLEV+1)
226: return(0);
227: if(*ep=='s'&&ep[-1]=='s')
228: return(0);
229: return(strip(ep,d,a,lev));
230: }
231:
232: an(ep,d,a,lev)
233: char *ep,*d,*a;
234: {
235: if(!isupper(*word)) /*must be proper name*/
236: return(0);
237: return(putsuf(ep,a,lev));
238: }
239:
240: ize(ep,d,a,lev)
241: char *ep,*d,*a;
242: {
243: *ep++ = 'e';
244: return(strip(ep,"",d,lev));
245: }
246:
247: y_to_e(ep,d,a,lev)
248: char *ep,*d,*a;
249: {
250: *ep++ = 'e';
251: return(strip(ep,"",d,lev));
252: }
253:
254: ily(ep,d,a,lev)
255: char *ep,*d,*a;
256: {
257: if(ep[-1]=='i')
258: return(i_to_y(ep,d,a,lev));
259: else
260: return(strip(ep,d,a,lev));
261: }
262:
263: ncy(ep,d,a,lev)
264: char *ep, *d, *a;
265: {
266: if(skipv(skipv(ep-1))<word)
267: return(0);
268: ep[-1] = 't';
269: return(strip(ep,d,a,lev));
270: }
271:
272: bility(ep,d,a,lev)
273: char *ep,*d,*a;
274: {
275: *ep++ = 'l';
276: return(y_to_e(ep,d,a,lev));
277: }
278:
279: i_to_y(ep,d,a,lev)
280: char *ep,*d,*a;
281: {
282: if(ep[-1]=='i') {
283: ep[-1] = 'y';
284: a = d;
285: }
286: return(strip(ep,"",a,lev));
287: }
288:
289: es(ep,d,a,lev)
290: char *ep,*d,*a;
291: {
292: if(lev>DLEV)
293: return(0);
294: switch(ep[-1]) {
295: default:
296: return(0);
297: case 'i':
298: return(i_to_y(ep,d,a,lev));
299: case 's':
300: case 'h':
301: case 'z':
302: case 'x':
303: return(strip(ep,d,a,lev));
304: }
305: }
306:
307: metry(ep,d,a,lev)
308: char *ep, *d,*a;
309: {
310: ep[-2] = 'e';
311: ep[-1] = 'r';
312: return(strip(ep,d,a,lev));
313: }
314:
315: tion(ep,d,a,lev)
316: char *ep,*d,*a;
317: {
318: switch(ep[-2]) {
319: case 'c':
320: case 'r':
321: return(putsuf(ep,a,lev));
322: case 'a':
323: return(y_to_e(ep,d,a,lev));
324: }
325: return(0);
326: }
327:
328: /* possible consonant-consonant-e ending*/
329: CCe(ep,d,a,lev)
330: char *ep,*d,*a;
331: {
332: switch(ep[-1]) {
333: case 'l':
334: if(vowel(ep[-2]))
335: break;
336: switch(ep[-2]) {
337: case 'l':
338: case 'r':
339: case 'w':
340: break;
341: default:
342: return(y_to_e(ep,d,a,lev));
343: }
344: break;
345: case 's':
346: if(ep[-2]=='s')
347: break;
348: case 'c':
349: case 'g':
350: if(*ep=='a')
351: return(0);
352: case 'v':
353: case 'z':
354: if(vowel(ep[-2]))
355: break;
356: case 'u':
357: if(y_to_e(ep,d,a,lev))
358: return(1);
359: if(!(ep[-2]=='n'&&ep[-1]=='g'))
360: return(0);
361: }
362: return(VCe(ep,d,a,lev));
363: }
364:
365: /* possible consonant-vowel-consonant-e ending*/
366: VCe(ep,d,a,lev)
367: char *ep,*d,*a;
368: {
369: char c;
370: c = ep[-1];
371: if(c=='e')
372: return(0);
373: if(!vowel(c) && vowel(ep[-2])) {
374: c = *ep;
375: *ep++ = 'e';
376: if(putsuf(ep,d,lev)||suffix(ep,lev))
377: return(1);
378: ep--;
379: *ep = c;
380: }
381: return(strip(ep,d,a,lev));
382: }
383:
384: char *lookuppref(wp,ep)
385: char **wp;
386: char *ep;
387: {
388: register char **sp;
389: register char *bp,*cp;
390: for(sp=preftab;*sp;sp++) {
391: bp = *wp;
392: for(cp= *sp;*cp;cp++,bp++)
393: if(Tolower(*bp)!=*cp)
394: goto next;
395: for(cp=bp;cp<ep;cp++)
396: if(vowel(*cp)) {
397: *wp = bp;
398: return(*sp);
399: }
400: next: ;
401: }
402: return(0);
403: }
404:
405: putsuf(ep,a,lev)
406: char *ep,*a;
407: {
408: register char *cp;
409: char *bp;
410: register char *pp;
411: int val = 0;
412: char space[20];
413: deriv[lev] = a;
414: if(putw(word,ep,lev))
415: return(1);
416: bp = word;
417: pp = space;
418: deriv[lev+1] = pp;
419: while(cp=lookuppref(&bp,ep)) {
420: *pp++ = '+';
421: while(*pp = *cp++)
422: pp++;
423: if(putw(bp,ep,lev+1)) {
424: val = 1;
425: break;
426: }
427: }
428: deriv[lev+1] = deriv[lev+2] = 0;
429: return(val);
430: }
431:
432: putw(bp,ep,lev)
433: char *bp,*ep;
434: {
435: register i, j;
436: char duple[3];
437: if(ep-bp<=1)
438: return(0);
439: if(vowel(*ep)) {
440: if(monosyl(bp,ep))
441: return(0);
442: }
443: i = dict(bp,ep);
444: if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
445: ep--;
446: deriv[++lev] = duple;
447: duple[0] = '+';
448: duple[1] = *ep;
449: duple[2] = 0;
450: i = dict(bp,ep);
451: }
452: if(vflag==0||i==0)
453: return(i);
454: j = lev;
455: do {
456: if(deriv[j])
457: strcat(affix,deriv[j]);
458: } while(--j>0);
459: strcat(affix,"\t");
460: return(i);
461: }
462:
463:
464: monosyl(bp,ep)
465: char *bp, *ep;
466: {
467: if(ep<bp+2)
468: return(0);
469: if(vowel(*--ep)||!vowel(*--ep)
470: ||ep[1]=='x'||ep[1]=='w')
471: return(0);
472: while(--ep>=bp)
473: if(vowel(*ep))
474: return(0);
475: return(1);
476: }
477:
478: char *
479: skipv(s)
480: char *s;
481: {
482: if(s>=word&&vowel(*s))
483: s--;
484: while(s>=word&&!vowel(*s))
485: s--;
486: return(s);
487: }
488:
489: vowel(c)
490: {
491: switch(Tolower(c)) {
492: case 'a':
493: case 'e':
494: case 'i':
495: case 'o':
496: case 'u':
497: case 'y':
498: return(1);
499: }
500: return(0);
501: }
502:
503: /* crummy way to Britishise */
504: ise()
505: {
506: register struct suftab *p;
507: for(p = suftab;p->suf;p++) {
508: ztos(p->suf);
509: ztos(p->d1);
510: ztos(p->a1);
511: }
512: }
513: ztos(s)
514: char *s;
515: {
516: for(;*s;s++)
517: if(*s=='z')
518: *s = 's';
519: }
520:
521: dict(bp,ep)
522: char *bp, *ep;
523: {
524: register char *wp;
525: long h;
526: register long *lp;
527: register i;
528: if(xflag)
529: printf("=%.*s\n",ep-bp,bp);
530: for(i=0; i<NP; i++) {
531: for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
532: h += *wp * *lp;
533: h += '\n' * *lp;
534: h %= p[i];
535: if(get(h)==0)
536: return(0);
537: }
538: return(1);
539: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.