|
|
1.1 root 1:
2: #include "code.h"
3:
4: #ifndef _POSIX_SOURCE
5:
6: #include <stdio.h>
7: #include <ctype.h>
8: #include <libc.h>
9:
10: #else
11:
12: #include <fcntl.h>
13: #include <stdio.h>
14: #include <ctype.h>
15: #include <stdlib.h>
16: #include <unistd.h>
17: #include <string.h>
18:
19: #endif
20:
21: #define isvowel(c) voweltab[c]
22: #define Tolower(c) (isupper(c)? (c)-'A'+'a': (c))
23: #define pair(a,b) (((a)<<8) | (b))
24: #define DLEV 2
25: #define DSIZ 40
26:
27: typedef long Bits;
28: typedef unsigned char uchar;
29: #define Set(h, f) ((long)(h) & (f))
30:
31: Bits nop(char*, char*, char*, int, int);
32: Bits strip(char*, char*, char*, int, int);
33: Bits ize(char*, char*, char*, int, int);
34: Bits i_to_y(char*, char*, char*, int, int);
35: Bits ily(char*, char*, char*, int, int);
36: Bits subst(char*, char*, char*, int, int);
37: Bits CCe(char*, char*, char*, int, int);
38: Bits tion(char*, char*, char*, int, int);
39: Bits an(char*, char*, char*, int, int);
40: Bits s(char*, char*, char*, int, int);
41: Bits es(char*, char*, char*, int, int);
42: Bits bility(char*, char*, char*, int, int);
43: Bits y_to_e(char*, char*, char*, int, int);
44: Bits VCe(char*, char*, char*, int, int);
45:
46: Bits trypref(char*, char*, int, int);
47: Bits tryword(char*, char*, int, int);
48: Bits trysuff(char*, int, int);
49: Bits dict(char*, char*);
50: void typeprint(Bits);
51: void pcomma(char*);
52:
53: void runout(char*);
54: void ise(void);
55: int ordinal(void);
56: char* skipv(char*);
57: int inun(char*, Bits);
58: char* ztos(char*);
59: void readdict(char*);
60:
61: char *strdupl(char *);
62:
63: typedef struct Ptab Ptab;
64: struct Ptab
65: {
66: char* s;
67: int flag;
68: };
69:
70: typedef struct Suftab Suftab;
71: struct Suftab
72: {
73: char *suf;
74: Bits (*p1)(char*, char*, char*, int, int);
75: int n1;
76: char *d1;
77: char *a1;
78: int flag;
79: int affixable;
80: Bits (*p2)(char*, char*, char*, int, int);
81: int n2;
82: char *d2;
83: char *a2;
84: };
85:
86: Suftab staba[] = {
87: {"aibohp",subst,1,"-e+ia","",NOUN, NOUN},
88: 0
89: };
90:
91: Suftab stabc[] =
92: {
93: {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN},
94: {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN},
95: {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ },
96: {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN },
97: {"cipocs",ize,1,"-e+ic","",NOUN, ADJ },
98: {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ },
99: {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ },
100: {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ },
101: {"cibohp",subst,1,"-e+ic","",NOUN, ADJ },
102: 0
103: };
104: Suftab stabd[] =
105: {
106: {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"},
107: {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN},
108: 0
109: };
110: Suftab stabe[] =
111: {
112: /*
113: * V_affix for comment ->commence->commentment??
114: */
115: {"ecn",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
116: {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ},
117: {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ},
118: {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ},
119: {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP},
120: {"ekil",strip,4,"","+like",N_AFFIX ,ADJ},
121: 0
122: };
123: Suftab stabg[] =
124: {
125: {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN},
126: {"gnikam",strip,6,"","+making",NOUN,NOUN},
127: {"gnipeek",strip,7,"","+keeping",NOUN,NOUN},
128: {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN},
129: 0
130: };
131: Suftab stabl[] =
132: {
133: {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ},
134: {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX},
135: {"latnem",strip,2,"","+al",N_AFFIX,ADJ},
136: {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN},
137: {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN},
138: 0
139: };
140: Suftab stabm[] =
141: {
142: /* congregational + ism */
143: {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN},
144: {"margo",subst,-1,"-ph+m","",NOUN,NOUN},
145: 0
146: };
147: Suftab stabn[] =
148: {
149: {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX},
150: {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX},
151: {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR},
152: {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
153: {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX},
154: {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB},
155: {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX},
156: {"nemow",strip,5,"","+women",MAN,PROP_COLLECT},
157: {"nem",strip,3,"","+man",MAN,PROP_COLLECT},
158: 0
159: };
160: Suftab stabp[] =
161: {
162: {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
163: 0
164: };
165: Suftab stabr[] =
166: {
167: {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"},
168: {"reyhparg",nop,0,"","",0,NOUN},
169: {"rekam",strip,5,"","+maker",NOUN,NOUN},
170: {"repeek",strip,6,"","+keeper",NOUN,NOUN},
171: {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"},
172: {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y},
173: {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX},
174: {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX},
175: 0
176: };
177: Suftab stabs[] =
178: {
179: {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX},
180: {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ },
181: {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"},
182: {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH },
183: {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH },
184: 0
185: };
186: Suftab stabt[] =
187: {
188: {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB},
189: {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" },
190: {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX},
191: {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP},
192: 0
193: };
194: Suftab staby[] =
195: {
196: {"ytilb",nop,0,"","",0,NOUN},
197: {"ycn",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
198: {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX},
199: {"ytisuo",nop,0,"","",NOUN},
200: {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX },
201: {"ylb",y_to_e,1,"-e+y","",ADJ,ADV},
202: {"ylc",nop,0,"","",0},
203: {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP},
204: {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX},
205: {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP},
206: 0
207: };
208: Suftab stabz[] =
209: {
210: 0
211: };
212: Suftab* suftab[] =
213: {
214: staba,
215: stabz,
216: stabc,
217: stabd,
218: stabe,
219: stabz,
220: stabg,
221: stabz,
222: stabz,
223: stabz,
224: stabz,
225: stabl,
226: stabm,
227: stabn,
228: stabz,
229: stabp,
230: stabz,
231: stabr,
232: stabs,
233: stabt,
234: stabz,
235: stabz,
236: stabz,
237: stabz,
238: staby,
239: stabz,
240: };
241:
242: Ptab ptaba[] =
243: {
244: "anti", 0,
245: "auto", 0,
246: 0
247: };
248: Ptab ptabb[] =
249: {
250: "bio", 0,
251: 0
252: };
253: Ptab ptabc[] =
254: {
255: "counter", 0,
256: 0
257: };
258: Ptab ptabd[] =
259: {
260: "dis", 0,
261: 0
262: };
263: Ptab ptabe[] =
264: {
265: "electro", 0,
266: 0
267: };
268: Ptab ptabf[] =
269: {
270: "femto", 0,
271: 0
272: };
273: Ptab ptabg[] =
274: {
275: "geo", 0,
276: "giga", 0,
277: 0
278: };
279: Ptab ptabh[] =
280: {
281: "hyper", 0,
282: 0
283: };
284: Ptab ptabi[] =
285: {
286: "immuno", 0,
287: "im", IN,
288: "intra", 0,
289: "inter", 0,
290: "in", IN,
291: "ir", IN,
292: "iso", 0,
293: 0
294: };
295: Ptab ptabj[] =
296: {
297: 0
298: };
299: Ptab ptabk[] =
300: {
301: "kilo", 0,
302: 0
303: };
304: Ptab ptabl[] =
305: {
306: 0
307: };
308: Ptab ptabm[] =
309: {
310: "magneto", 0,
311: "mega", 0,
312: "meta", 0,
313: "micro", 0,
314: "mid", 0,
315: "milli", 0,
316: "mini", 0,
317: "mis", 0,
318: "mono", 0,
319: "multi", 0,
320: 0
321: };
322: Ptab ptabn[] =
323: {
324: "nano", 0,
325: "neuro", 0,
326: "non", 0,
327: 0
328: };
329: Ptab ptabo[] =
330: {
331: "out", 0,
332: "over", 0,
333: 0
334: };
335: Ptab ptabp[] =
336: {
337: "para", 0,
338: "photo", 0,
339: "pico", 0,
340: "poly", 0,
341: "pre", 0,
342: "pseudo", 0,
343: "psycho", 0,
344: 0
345: };
346: Ptab ptabq[] =
347: {
348: "quasi", 0,
349: 0
350: };
351: Ptab ptabr[] =
352: {
353: "radio", 0,
354: "re", 0,
355: 0
356: };
357: Ptab ptabs[] =
358: {
359: "semi", 0,
360: "stereo", 0,
361: "sub", 0,
362: "super", 0,
363: 0
364: };
365: Ptab ptabt[] =
366: {
367: "tele", 0,
368: "tera", 0,
369: "thermo", 0,
370: 0
371: };
372: Ptab ptabu[] =
373: {
374: "ultra", 0,
375: "under", 0, /*must precede un*/
376: "un", IN,
377: 0
378: };
379: Ptab ptabv[] =
380: {
381: 0
382: };
383: Ptab ptabw[] =
384: {
385: 0
386: };
387: Ptab ptabx[] =
388: {
389: 0
390: };
391: Ptab ptaby[] =
392: {
393: 0
394: };
395: Ptab ptabz[] =
396: {
397: 0
398: };
399:
400: Ptab* preftab[] =
401: {
402: ptaba,
403: ptabb,
404: ptabc,
405: ptabd,
406: ptabe,
407: ptabf,
408: ptabg,
409: ptabh,
410: ptabi,
411: ptabj,
412: ptabk,
413: ptabl,
414: ptabm,
415: ptabn,
416: ptabo,
417: ptabp,
418: ptabq,
419: ptabr,
420: ptabs,
421: ptabt,
422: ptabu,
423: ptabv,
424: ptabw,
425: ptabx,
426: ptaby,
427: ptabz,
428: };
429:
430: typedef struct {
431: char *mesg;
432: enum { NONE, SUFF, PREF} type;
433: } Deriv;
434:
435: int cflag;
436: int fflag;
437: int vflag;
438: int xflag;
439: char word[500];
440: char original[500];
441: Deriv deriv[DSIZ+3];
442: Deriv emptyderiv;
443: char affix[DSIZ*10]; /* 10 is longest affix message */
444: int prefcount;
445: int suffcount;
446: char space[300000]; /* must be as large as "words"+"space" in pcode run */
447: Bits encode[2048]; /* must be as long as "codes" in pcode run */
448: int nencode;
449: char voweltab[128];
450: char* spacep[128*128+1]; /* pointer to words starting with 'xx' */
451:
452: char* codefile = "/usr/lib/spell/amspell";
453: char* brfile = "/usr/lib/spell/brspell";
454:
455: main(int argc, char *argv[])
456: {
457: char *ep, *cp;
458: char *dp;
459: int j, i, c;
460: int low = 0;
461: Bits h;
462:
463: for(i=0; c = "aeiouyAEIOUY"[i]; i++)
464: voweltab[c] = 1;
465: while(argc > 1) {
466: if(argv[1][0] != '-')
467: break;
468: for(i=1; c = argv[1][i]; i++)
469: switch(c) {
470: default:
471: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n");
472: exit(1);
473:
474: case 'b':
475: ise();
476: if(!fflag)
477: codefile = brfile;
478: continue;
479:
480: case 'C':
481: vflag++;
482: case 'c':
483: setbuf(stdout,0);
484: cflag++;
485: continue;
486:
487: case 'v':
488: vflag++;
489: continue;
490:
491: case 'x':
492: xflag++;
493: continue;
494:
495: case 'f':
496: if(argc <= 2) {
497: fprintf(stderr, "spell: -f requires another argument\n");
498: exit(1);
499: }
500: argv++;
501: argc--;
502: fflag++;
503: codefile = argv[1];
504: goto brk;
505: }
506: brk:
507: argv++;
508: argc--;
509: }
510: readdict(codefile);
511: if(argc > 1) {
512: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n");
513: exit(1);
514: }
515: for(;;) {
516: loop:
517: affix[0] = 0;
518: for(ep=original; ; ep++) {
519: if(ep >= original + sizeof(original) - 1) {
520: *ep = 0;
521: runout(original);
522: goto loop;
523: }
524: j = getchar();
525: if(j == EOF)
526: exit(0);
527: if(j != '\n')
528: *ep = j;
529: else {
530: *ep = 0;
531: break;
532: }
533: }
534: low = 0;
535: for(ep=word,dp=original; j = *dp; ep++,dp++) {
536: if(islower(j))
537: low++;
538: if(ep >= word+sizeof(word)-1)
539: break;
540: *ep = j;
541: }
542: *ep = 0;
543:
544: h = ~STOP;
545: if(word[1] == 0 && isalnum(word[0]) ||
546: isdigit(word[0]) && ordinal())
547: goto check;
548:
549: h = 0;
550: if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)))
551: for(cp=original+1,dp=word+1; dp<ep; dp++,cp++)
552: *dp = Tolower(*cp);
553: if(!h)
554: for(;;) { /* at most twice */
555: if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))
556: break;
557: if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH))
558: break;
559: if(!isupper(word[0]))
560: break;
561: cp = original;
562: dp = word;
563: while(*dp = *cp++) {
564: if(!low)
565: *dp = Tolower(*dp);
566: dp++;
567: }
568: word[0] = Tolower(word[0]);
569: }
570: check:
571: if(cflag) {
572: if(!h || Set(h,STOP))
573: putchar('-');
574: else if(!vflag)
575: putchar('+');
576: else
577: putchar('0' + (suffcount>0) +
578: (prefcount>4? 8: 2*prefcount));
579: } else if(!h || Set(h,STOP))
580: printf("%s\n", original);
581: else if(affix[0] != 0 && affix[0] != '.')
582: printf("%s\t%s\n", affix, original);
583: }
584: }
585:
586: /* strip exactly one suffix and do
587: * indicated routine(s), which may recursively
588: * strip suffixes
589: */
590: Bits
591: trysuff(char* ep, int lev, int flag)
592: {
593: Suftab *t;
594: char *cp, *sp;
595: Bits h = 0;
596: int initchar = ep[-1];
597:
598: flag &= ~MONO;
599: lev += DLEV;
600: if(lev<DSIZ)
601: deriv[lev] = deriv[lev-1] = emptyderiv;
602: if(!islower(initchar))
603: return h;
604: for(t=suftab[initchar-'a']; sp=t->suf; t++) {
605: cp = ep;
606: while(*sp)
607: if(*--cp != *sp++)
608: goto next;
609: for(sp=ep-t->n1; --sp >= word && !isvowel(*sp);)
610: ;
611: if(sp < word)
612: continue;
613: if(!(t->affixable & flag))
614: return 0;
615: h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP);
616: if(!h && t->p2!=0) {
617: if(lev<DSIZ)
618: deriv[lev] = deriv[lev+1] = emptyderiv;
619: h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP);
620: }
621: break;
622: next:;
623: }
624: return h;
625: }
626:
627: Bits
628: nop(char* ep, char* d, char* a, int lev, int flag)
629: {
630: #pragma ref ep
631: #pragma ref d
632: #pragma ref a
633: #pragma ref lev
634: #pragma ref flag
635: return 0;
636: }
637:
638: Bits
639: cstrip(char* ep, char* d, char* a, int lev, int flag)
640: {
641: int temp = ep[0];
642:
643: if(isvowel(temp) && isvowel(ep[-1])) {
644: switch(pair(ep[-1],ep[0])) {
645: case pair('a', 'a'):
646: case pair('a', 'e'):
647: case pair('a', 'i'):
648: case pair('e', 'a'):
649: case pair('e', 'e'):
650: case pair('e', 'i'):
651: case pair('i', 'i'):
652: case pair('o', 'a'):
653: return 0;
654: }
655: } else
656: if(temp==ep[-1]&&temp==ep[-2])
657: return 0;
658: return strip(ep,d,a,lev,flag);
659: }
660:
661: Bits
662: strip(char* ep, char* d, char* a, int lev, int flag)
663: {
664: #pragma ref d
665: Bits h = trypref(ep, a, lev, flag);
666:
667: if(Set(h,MONO) && isvowel(*ep) && isvowel(ep[-2]))
668: h = 0;
669: if(h)
670: return h;
671: if(isvowel(*ep) && !isvowel(ep[-1]) && ep[-1]==ep[-2]) {
672: h = trypref(ep-1,a,lev,flag|MONO);
673: if(h)
674: return h;
675: }
676: return trysuff(ep,lev,flag);
677: }
678:
679: Bits
680: s(char* ep, char* d, char* a, int lev, int flag)
681: {
682: if(lev > DLEV+1)
683: return 0;
684: if(*ep=='s') {
685: switch(ep[-1]) {
686: case 'y':
687: if(isvowel(ep[-2])||isupper(*word))
688: break; /*says Kennedys*/
689: case 'x':
690: case 'z':
691: case 's':
692: return 0;
693: case 'h':
694: switch(ep[-2]) {
695: case 'c':
696: case 's':
697: return 0;
698: }
699: }
700: }
701: return strip(ep,d,a,lev,flag);
702: }
703:
704: Bits
705: an(char* ep, char* d, char* a, int lev, int flag)
706: {
707: #pragma ref d
708: if(!isupper(*word)) /*must be proper name*/
709: return 0;
710: return trypref(ep,a,lev,flag);
711: }
712:
713: Bits
714: ize(char* ep, char* d, char* a, int lev, int flag)
715: {
716: #pragma ref a
717: int temp = ep[-1];
718: Bits h;
719:
720: ep[-1] = 'e';
721: h = strip(ep,"",d,lev,flag);
722: ep[-1] = temp;
723: return h;
724: }
725:
726: Bits
727: y_to_e(char* ep, char* d, char* a, int lev, int flag)
728: {
729: #pragma ref a
730: Bits h;
731: int temp;
732:
733: switch(ep[-1]) {
734: case 'a':
735: case 'e':
736: case 'i':
737: return 0;
738: }
739: temp = *ep;
740: *ep++ = 'e';
741: h = strip(ep,"",d,lev,flag);
742: *--ep = temp;
743: return h;
744: }
745:
746: Bits
747: ily(char* ep, char* d, char* a, int lev, int flag)
748: {
749: int temp = ep[0];
750: char *cp = ep;
751:
752: if(temp==ep[-1]&&temp==ep[-2]) /* sillly */
753: return 0;
754: if(*--cp=='y' && !isvowel(*--cp)) /* happyly */
755: while(cp>word)
756: if(isvowel(*--cp)) /* shyness */
757: return 0;
758: if(ep[-1]=='i')
759: return i_to_y(ep,d,a,lev,flag);
760: return cstrip(ep,d,a,lev,flag);
761: }
762:
763: Bits
764: bility(char* ep, char* d, char* a, int lev, int flag)
765: {
766: *ep++ = 'l';
767: return y_to_e(ep,d,a,lev,flag);
768: }
769:
770: Bits
771: i_to_y(char* ep, char* d, char* a, int lev, int flag)
772: {
773: Bits h;
774: int temp;
775:
776: if(isupper(*word))
777: return 0;
778: if((temp=ep[-1])=='i' && !isvowel(ep[-2])) {
779: ep[-1] = 'y';
780: a = d;
781: }
782: h = cstrip(ep,"",a,lev,flag);
783: ep[-1] = temp;
784: return h;
785: }
786:
787: Bits
788: es(char* ep, char* d, char* a, int lev, int flag)
789: {
790: if(lev>DLEV)
791: return 0;
792: switch(ep[-1]) {
793: default:
794: return 0;
795: case 'i':
796: return i_to_y(ep,d,a,lev,flag);
797: case 'h':
798: switch(ep[-2]) {
799: default:
800: return 0;
801: case 'c':
802: case 's':
803: break;
804: }
805: case 's':
806: case 'z':
807: case 'x':
808: return strip(ep,d,a,lev,flag);
809: }
810: }
811:
812: Bits
813: subst(char* ep, char* d, char* a, int lev, int flag)
814: {
815: #pragma ref a
816: char *u,*t;
817: Bits h;
818:
819: if(skipv(skipv(ep-1)) < word)
820: return 0;
821: for(t=d; *t!='+'; t++)
822: continue;
823: for(u=ep; *--t!='-';)
824: *--u = *t;
825: h = strip(ep,"",d,lev,flag);
826: while(*++t != '+')
827: continue;
828: while(*++t)
829: *u++ = *t;
830: return h;
831: }
832:
833: Bits
834: tion(char* ep, char* d, char* a, int lev, int flag)
835: {
836: switch(ep[-2]) {
837: default:
838: return trypref(ep,a,lev,flag);
839: case 'a':
840: case 'e':
841: case 'i':
842: case 'o':
843: case 'u':
844: return y_to_e(ep,d,a,lev,flag);
845: }
846: }
847:
848: /*
849: * possible consonant-consonant-e ending
850: */
851: Bits
852: CCe(char* ep, char* d, char* a, int lev, int flag)
853: {
854: Bits h;
855:
856: switch(ep[-1]) {
857: case 'l':
858: if(isvowel(ep[-2]))
859: break;
860: switch(ep[-2]) {
861: case 'l':
862: case 'r':
863: case 'w':
864: break;
865: default:
866: return y_to_e(ep,d,a,lev,flag);
867: }
868: break;
869: case 'c':
870: case 'g':
871: if(*ep == 'a') /* prevent -able for -eable */
872: return 0;
873: case 's':
874: case 'v':
875: case 'z':
876: if(ep[-2]==ep[-1])
877: break;
878: if(isvowel(ep[-2]))
879: break;
880: case 'u':
881: if(h = y_to_e(ep,d,a,lev,flag))
882: return h;
883: if(!(ep[-2]=='n' && ep[-1]=='g'))
884: return 0;
885: }
886: return VCe(ep,d,a,lev,flag);
887: }
888:
889: /*
890: * possible consonant-vowel-consonant-e ending
891: */
892: Bits
893: VCe(char* ep, char* d, char* a, int lev, int flag)
894: {
895: int c;
896: Bits h;
897:
898: c = ep[-1];
899: if(c=='e')
900: return 0;
901: if(!isvowel(c) && isvowel(ep[-2])) {
902: c = *ep;
903: *ep++ = 'e';
904: h = trypref(ep,d,lev,flag);
905: if(!h)
906: h = trysuff(ep,lev,flag);
907: if(h)
908: return h;
909: ep--;
910: *ep = c;
911: }
912: return cstrip(ep,d,a,lev,flag);
913: }
914:
915: Ptab*
916: lookuppref(char** wp, char* ep)
917: {
918: Ptab *sp;
919: char *bp,*cp;
920: int initchar = Tolower(**wp);
921:
922: if(!isalpha(initchar))
923: return 0;
924: for(sp=preftab[initchar-'a'];sp->s;sp++) {
925: bp = *wp;
926: for(cp= sp->s;*cp; )
927: if(*bp++!=*cp++)
928: goto next;
929: for(cp=bp;cp<ep;cp++)
930: if(isvowel(*cp)) {
931: *wp = bp;
932: return sp;
933: }
934: next:;
935: }
936: return 0;
937: }
938:
939: /* while word is not in dictionary try stripping
940: * prefixes. Fail if no more prefixes.
941: */
942: Bits
943: trypref(char* ep, char* a, int lev, int flag)
944: {
945: Ptab *tp;
946: char *bp, *cp;
947: char *pp;
948: Bits h;
949: char space[20];
950:
951: if(lev<DSIZ) {
952: deriv[lev].mesg = a;
953: deriv[lev].type = *a=='.'? NONE: SUFF;
954: }
955: if(h = tryword(word,ep,lev,flag)) {
956: if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO))
957: return h;
958: h = 0;
959: }
960: bp = word;
961: pp = space;
962: if(lev<DSIZ) {
963: deriv[lev+1].mesg = pp;
964: deriv[lev+1].type = 0;
965: }
966: while(tp=lookuppref(&bp,ep)) {
967: *pp++ = '+';
968: cp = tp->s;
969: while(pp<space+sizeof(space) && (*pp = *cp++))
970: pp++;
971: deriv[lev+1].type += PREF;
972: h = tryword(bp,ep,lev+1,flag);
973: if(Set(h,NOPREF) ||
974: ((tp->flag&IN) && inun(bp-2,h)==0)) {
975: h = 0;
976: break;
977: }
978: if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO))
979: break;
980: h = 0;
981: }
982: if(lev<DSIZ)
983: deriv[lev+1] = deriv[lev+2] = emptyderiv;
984: return h;
985: }
986:
987: Bits
988: tryword(char* bp, char* ep, int lev, int flag)
989: {
990: int j;
991: Bits h = 0;
992: char duple[3];
993:
994: if(ep-bp <= 1)
995: return h;
996: if(flag&MONO) {
997: if(lev<DSIZ) {
998: deriv[++lev].mesg = duple;
999: deriv[lev].type = SUFF;
1000: }
1001: duple[0] = '+';
1002: duple[1] = *ep;
1003: duple[2] = 0;
1004: }
1005: h = dict(bp, ep);
1006: if(vflag==0 || h==0)
1007: return h;
1008: /*
1009: * when derivations are wanted, collect them
1010: * for printing
1011: */
1012: j = lev;
1013: prefcount = suffcount = 0;
1014: do {
1015: if(j<DSIZ && deriv[j].type) {
1016: strcat(affix, deriv[j].mesg);
1017: if(deriv[j].type == SUFF)
1018: suffcount++;
1019: else if(deriv[j].type != NONE)
1020: prefcount = deriv[j].type/PREF;
1021: }
1022: } while(--j > 0);
1023: return h;
1024: }
1025:
1026: int
1027: inun(char* bp, Bits h)
1028: {
1029: if(*bp == 'u')
1030: return Set(h, IN) == 0;
1031: /* *bp == 'i' */
1032: if(Set(h, IN) == 0)
1033: return 0;
1034: switch(bp[2]) {
1035: case 'r':
1036: return bp[1] == 'r';
1037: case 'm':
1038: case 'p':
1039: return bp[1] == 'm';
1040: }
1041: return bp[1] == 'n';
1042: }
1043:
1044: char*
1045: skipv(char *s)
1046: {
1047: if(s >= word && isvowel(*s))
1048: s--;
1049: while(s >= word && !isvowel(*s))
1050: s--;
1051: return s;
1052: }
1053:
1054: /*
1055: * crummy way to Britishise
1056: */
1057: void
1058: ise(void)
1059: {
1060: Suftab *p;
1061: int i;
1062:
1063: for(i=0; i<26; i++)
1064: for(p = suftab[i]; p->suf; p++) {
1065: p->suf = ztos(p->suf);
1066: p->d1 = ztos(p->d1);
1067: p->a1 = ztos(p->a1);
1068: }
1069: }
1070:
1071: char*
1072: ztos(char *as)
1073: {
1074: char *s, *ds;
1075:
1076: for(s=as; *s; s++)
1077: if(*s == 'z')
1078: goto copy;
1079: return as;
1080:
1081: copy:
1082: ds = strdupl(as);
1083: for(s=ds; *s; s++)
1084: if(*s == 'z')
1085: *s = 's';
1086: return ds;
1087: }
1088:
1089: Bits
1090: dict(char* bp, char* ep)
1091: {
1092: char *cp, *cp1, *w, *wp, *we;
1093: int n, f;
1094:
1095: w = bp;
1096: we = ep;
1097: n = ep-bp;
1098: if(n <= 1)
1099: return NOUN;
1100:
1101: f = w[0] & 0x7f;
1102: f *= 128;
1103: f += w[1] & 0x7f;
1104: bp = spacep[f];
1105: ep = spacep[f+1];
1106:
1107: loop:
1108: if(bp >= ep) {
1109: if(xflag)
1110: fprintf(stderr,"=%.*s\n",n, w);
1111: return 0;
1112: }
1113: /*
1114: * find the beginning of some word in the middle
1115: */
1116: cp = bp + (ep-bp)/2;
1117:
1118: while(cp > bp && !(*cp & 0x80))
1119: cp--;
1120: while(cp > bp && (cp[-1] & 0x80))
1121: cp--;
1122:
1123: wp = w + 2; /* skip two letters */
1124: cp1 = cp + 2; /* skip affix code */
1125: for(;;) {
1126: if(wp >= we) {
1127: if(*cp1&0x80)
1128: goto found;
1129: else
1130: f = 1;
1131: break;
1132: }
1133: if(*cp1&0x80) {
1134: f = -1;
1135: break;
1136: }
1137: f = *cp1++ - *wp++;
1138: if(f != 0)
1139: break;
1140: }
1141:
1142: if(f < 0) {
1143: while(!(*cp1&0x80))
1144: cp1++;
1145: bp = cp1;
1146: goto loop;
1147: }
1148: ep = cp;
1149: goto loop;
1150: found:
1151: f = ((cp[0] & 0x7) << 8) |
1152: (cp[1] & 0xff);
1153: if(xflag) {
1154: fprintf(stderr,"=%.*s ",n,w);
1155: typeprint(encode[f]);
1156: }
1157: return encode[f];
1158: }
1159:
1160: void
1161: typeprint(Bits h)
1162: {
1163:
1164: pcomma("");
1165: if(h & NOUN)
1166: pcomma("n");
1167: if(h & PROP_COLLECT)
1168: pcomma("pc");
1169: if(h & VERB) {
1170: if((h & VERB) == VERB)
1171: pcomma("v");
1172: else
1173: if((h & VERB) == V_IRREG)
1174: pcomma("vi");
1175: else
1176: if(h & ED)
1177: pcomma("ed");
1178: }
1179: if(h & ADJ)
1180: pcomma("a");
1181: if(h & COMP) {
1182: if((h & COMP) == ACTOR)
1183: pcomma("er");
1184: else
1185: pcomma("comp");
1186: }
1187: if(h & DONT_TOUCH)
1188: pcomma("d");
1189: if(h & N_AFFIX)
1190: pcomma("na");
1191: if(h & ADV)
1192: pcomma("adv");
1193: if(h & ION)
1194: pcomma("ion");
1195: if(h & V_AFFIX)
1196: pcomma("va");
1197: if(h & MAN)
1198: pcomma("man");
1199: if(h & NOPREF)
1200: pcomma("nopref");
1201: if(h & MONO)
1202: pcomma("ms");
1203: if(h & IN)
1204: pcomma("in");
1205: if(h & _Y)
1206: pcomma("y");
1207: if(h & STOP)
1208: pcomma("s");
1209: fprintf(stderr, "\n");
1210: }
1211:
1212: void
1213: pcomma(char *s)
1214: {
1215: static flag;
1216:
1217: if(*s == 0) {
1218: flag = 0;
1219: return;
1220: }
1221: if(!flag) {
1222: fprintf(stderr, "%s", s);
1223: flag = 1;
1224: } else
1225: fprintf(stderr, ",%s", s);
1226: }
1227:
1228: int
1229: ordinal(void)
1230: {
1231: char *cp = word;
1232: static char sp[4];
1233:
1234: while(isdigit(*cp))
1235: cp++;
1236: strncpy(sp,cp,3);
1237: if(isupper(cp[0]) && isupper(cp[1])) {
1238: sp[0] = Tolower(cp[0]);
1239: sp[1] = Tolower(cp[1]);
1240: }
1241: return 0 == strncmp(sp,
1242: cp[-2]=='1'? "th": /* out of bounds if 1 digit */
1243: *--cp=='1'? "st": /* harmless */
1244: *cp=='2'? "nd":
1245: *cp=='3'? "rd":
1246: "th", 3);
1247: }
1248:
1249: /* layout of file entry: first byte has bit 0x80 turned on.
1250: next 4 bits count number of characters common between this
1251: entry and previous one. last three bits concatenated with
1252: second byte are the affixing code, so arranged that the 0x80
1253: bit is zero in all bytes but the first. 3rd and following
1254: bytes are the remainder of the dictionary word.
1255:
1256: layout in memory: common prefixes are expanded, and the
1257: first two letters of each word are deleted and found
1258: instead by lookup in table spacep, which points to the
1259: first word for each two-letter prefix.
1260: */
1261:
1262: void
1263: readdict(char *file)
1264: {
1265: char *s, *is, *lasts, *ls;
1266: int c, i, sp, p;
1267: int f;
1268: long l;
1269:
1270: f = open(file, 0);
1271: if(f == -1) {
1272: fprintf(stderr, "spell: cannot open %s\n", file);
1273: exit(1);
1274: }
1275: if(read(f, space, 2) != 2)
1276: goto bad;
1277: nencode = ((space[0]&0xff)<<8) | (space[1]&0xff);
1278: if(nencode > sizeof(encode)/sizeof(*encode))
1279: goto noroom;
1280: if(read(f, space, nencode*sizeof(*encode))
1281: != nencode*sizeof(*encode))
1282: goto bad;
1283: s = space;
1284: for(i=0; i<nencode; i++) {
1285: l = (long)(s[0] & 0xff) << 24;
1286: l |= (s[1] & 0xff) << 16;
1287: l |= (s[2] & 0xff) << 8;
1288: l |= s[3] & 0xff;
1289: encode[i] = (Bits)l;
1290: s += 4;
1291: }
1292: l = read(f, space, sizeof(space));
1293: if(l >= sizeof(space))
1294: goto noroom;
1295: is = space + (sizeof(space) - l);
1296: memmove(is, space, l);
1297:
1298: s = space;
1299: c = *is++ & 0xff;
1300: sp = -1;
1301: i = 0;
1302: lasts = 0; /* to prevent diagnostics */
1303: loop:
1304: if(s > is)
1305: goto noroom;
1306: if(c < 0) {
1307: close(f);
1308: while(sp < 128*128)
1309: spacep[++sp] = s;
1310: *s++ = 0x80; /* fence */
1311: return;
1312: }
1313: p = (c>>3) & 0xf;
1314: *s++ = c;
1315: *s++ = *is++ & 0xff;
1316: if(p <= 0)
1317: i = (*is++ & 0xff)*128;
1318: if(p <= 1) {
1319: if(!(*is & 0x80))
1320: i = i/128*128 + (*is++ & 0xff);
1321: if(i <= sp) {
1322: fprintf(stderr, "spell: the dict isn't "
1323: "sorted or memmove didn't work\n");
1324: goto bad;
1325: }
1326: while(sp < i)
1327: spacep[++sp] = s-2;
1328: }
1329: ls = lasts;
1330: lasts = s;
1331: for(p-=2; p>0; p--)
1332: *s++ = *ls++;
1333: for(;;) {
1334: if(is >= space+sizeof(space)) {
1335: c = -1;
1336: break;
1337: }
1338: c = *is++ & 0xff;
1339: if(c & 0x80)
1340: break;
1341: *s++ = c;
1342: }
1343: *s = 0;
1344: goto loop;
1345:
1346: bad:
1347: fprintf(stderr, "spell: trouble reading %s\n", file);
1348: exit(1);
1349: noroom:
1350: fprintf(stderr,"spell: not enough space for dictionary\n");
1351: exit(1);
1352: }
1353:
1354: char * /* same as strdup; portability hack */
1355: strdupl(char *s)
1356: {
1357: char *t = (char*)malloc(strlen(s)+1);
1358: strcpy(t, s);
1359: return t;
1360: }
1361:
1362: void
1363: runout(char *s)
1364: {
1365: int c;
1366: if(!cflag)
1367: printf("%s", s);
1368: else {
1369: putchar('-');
1370: fflush(stdout);
1371: }
1372: do {
1373: if((c=getchar()) == EOF)
1374: exit(0);
1375: if(!cflag)
1376: putchar(c);
1377: } while(c != '\n');
1378: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.