|
|
1.1 root 1:
2: #include "code.h"
3:
4: #ifndef _POSIX_SOURCE
5:
6: #include <stdio.h>
7: #include <ctype.h>
8: #include <libc.h>
9:
10: #else
11:
12: #include <fcntl.h>
13: #include <stdio.h>
14: #include <ctype.h>
15: #include <stdlib.h>
16: #include <unistd.h>
17: #include <string.h>
18:
19: #endif
20:
21: #define isvowel(c) voweltab[c]
22: #define Tolower(c) (isupper(c)? (c)-'A'+'a': (c))
23: #define pair(a,b) (((a)<<8) | (b))
24: #define DLEV 2
25: #define DSIZ 40
26:
27: typedef long Bits;
28: typedef unsigned char uchar;
29: #define Set(h, f) ((long)(h) & (f))
30:
31: Bits nop(char*, char*, char*, int, int);
32: Bits strip(char*, char*, char*, int, int);
33: Bits ize(char*, char*, char*, int, int);
34: Bits i_to_y(char*, char*, char*, int, int);
35: Bits ily(char*, char*, char*, int, int);
36: Bits subst(char*, char*, char*, int, int);
37: Bits CCe(char*, char*, char*, int, int);
38: Bits tion(char*, char*, char*, int, int);
39: Bits an(char*, char*, char*, int, int);
40: Bits s(char*, char*, char*, int, int);
41: Bits es(char*, char*, char*, int, int);
42: Bits bility(char*, char*, char*, int, int);
43: Bits y_to_e(char*, char*, char*, int, int);
44: Bits VCe(char*, char*, char*, int, int);
45:
46: Bits trypref(char*, char*, int, int);
47: Bits tryword(char*, char*, int, int);
48: Bits trysuff(char*, int, int);
49: Bits dict(char*, char*);
50: void typeprint(Bits);
51: void pcomma(char*);
52:
53: void runout(char*);
54: void ise(void);
55: int ordinal(void);
56: char* skipv(char*);
57: int inun(char*, Bits);
58: char* ztos(char*);
59: void readdict(char*);
60:
61: char *strdupl(char *);
62:
63: typedef struct Ptab Ptab;
64: struct Ptab
65: {
66: char* s;
67: int flag;
68: };
69:
70: typedef struct Suftab Suftab;
71: struct Suftab
72: {
73: char *suf;
74: Bits (*p1)(char*, char*, char*, int, int);
75: int n1;
76: char *d1;
77: char *a1;
78: int flag;
79: int affixable;
80: Bits (*p2)(char*, char*, char*, int, int);
81: int n2;
82: char *d2;
83: char *a2;
84: };
85:
86: Suftab staba[] = {
87: {"aibohp",subst,1,"-e+ia","",NOUN, NOUN},
88: 0
89: };
90:
91: Suftab stabc[] =
92: {
93: {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN},
94: {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN},
95: {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ },
96: {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN },
97: {"cipocs",ize,1,"-e+ic","",NOUN, ADJ },
98: {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ },
99: {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ },
100: {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ },
101: {"cibohp",subst,1,"-e+ic","",NOUN, ADJ },
102: 0
103: };
104: Suftab stabd[] =
105: {
106: {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"},
107: {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN},
108: 0
109: };
110: Suftab stabe[] =
111: {
112: /*
113: * V_affix for comment ->commence->commentment??
114: */
115: {"ecn",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
116: {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ},
117: {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ},
118: {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ},
119: {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP},
120: {"ekil",strip,4,"","+like",N_AFFIX ,ADJ},
121: 0
122: };
123: Suftab stabg[] =
124: {
125: {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN},
126: {"gnikam",strip,6,"","+making",NOUN,NOUN},
127: {"gnipeek",strip,7,"","+keeping",NOUN,NOUN},
128: {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN},
129: 0
130: };
131: Suftab stabl[] =
132: {
133: {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ},
134: {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX},
135: {"latnem",strip,2,"","+al",N_AFFIX,ADJ},
136: {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN},
137: {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN},
138: 0
139: };
140: Suftab stabm[] =
141: {
142: /* congregational + ism */
143: {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN},
144: {"margo",subst,-1,"-ph+m","",NOUN,NOUN},
145: 0
146: };
147: Suftab stabn[] =
148: {
149: {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX},
150: {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX},
151: {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR},
152: {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
153: {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX},
154: {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB},
155: {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX},
156: {"nemow",strip,5,"","+women",MAN,PROP_COLLECT},
157: {"nem",strip,3,"","+man",MAN,PROP_COLLECT},
158: 0
159: };
160: Suftab stabp[] =
161: {
162: {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
163: 0
164: };
165: Suftab stabr[] =
166: {
167: {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"},
168: {"reyhparg",nop,0,"","",0,NOUN},
169: {"rekam",strip,5,"","+maker",NOUN,NOUN},
170: {"repeek",strip,6,"","+keeper",NOUN,NOUN},
171: {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"},
172: {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y},
173: {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX},
174: {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX},
175: 0
176: };
177: Suftab stabs[] =
178: {
179: {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX},
180: {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ },
181: {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"},
182: {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH },
183: {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH },
184: 0
185: };
186: Suftab stabt[] =
187: {
188: {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB},
189: {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" },
190: {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX},
191: {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP},
192: 0
193: };
194: Suftab staby[] =
195: {
196: {"ytilb",nop,0,"","",0,NOUN},
197: {"ycn",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
198: {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX},
199: {"ytisuo",nop,0,"","",NOUN},
200: {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX },
201: {"ylb",y_to_e,1,"-e+y","",ADJ,ADV},
202: {"ylc",nop,0,"","",0},
203: {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP},
204: {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX},
205: {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP},
206: 0
207: };
208: Suftab stabz[] =
209: {
210: 0
211: };
212: Suftab* suftab[] =
213: {
214: staba,
215: stabz,
216: stabc,
217: stabd,
218: stabe,
219: stabz,
220: stabg,
221: stabz,
222: stabz,
223: stabz,
224: stabz,
225: stabl,
226: stabm,
227: stabn,
228: stabz,
229: stabp,
230: stabz,
231: stabr,
232: stabs,
233: stabt,
234: stabz,
235: stabz,
236: stabz,
237: stabz,
238: staby,
239: stabz,
240: };
241:
242: Ptab ptaba[] =
243: {
244: "anti", 0,
245: "auto", 0,
246: 0
247: };
248: Ptab ptabb[] =
249: {
250: "bio", 0,
251: 0
252: };
253: Ptab ptabc[] =
254: {
255: "counter", 0,
256: 0
257: };
258: Ptab ptabd[] =
259: {
260: "dis", 0,
261: 0
262: };
263: Ptab ptabe[] =
264: {
265: "electro", 0,
266: 0
267: };
268: Ptab ptabf[] =
269: {
270: "femto", 0,
271: 0
272: };
273: Ptab ptabg[] =
274: {
275: "geo", 0,
276: "giga", 0,
277: 0
278: };
279: Ptab ptabh[] =
280: {
281: "hyper", 0,
282: 0
283: };
284: Ptab ptabi[] =
285: {
286: "immuno", 0,
287: "im", IN,
288: "intra", 0,
289: "inter", 0,
290: "in", IN,
291: "ir", IN,
292: "iso", 0,
293: 0
294: };
295: Ptab ptabj[] =
296: {
297: 0
298: };
299: Ptab ptabk[] =
300: {
301: "kilo", 0,
302: 0
303: };
304: Ptab ptabl[] =
305: {
306: 0
307: };
308: Ptab ptabm[] =
309: {
310: "magneto", 0,
311: "mega", 0,
312: "meta", 0,
313: "micro", 0,
314: "mid", 0,
315: "milli", 0,
316: "mini", 0,
317: "mis", 0,
318: "mono", 0,
319: "multi", 0,
320: 0
321: };
322: Ptab ptabn[] =
323: {
324: "nano", 0,
325: "neuro", 0,
326: "non", 0,
327: 0
328: };
329: Ptab ptabo[] =
330: {
331: "out", 0,
332: "over", 0,
333: 0
334: };
335: Ptab ptabp[] =
336: {
337: "para", 0,
338: "photo", 0,
339: "pico", 0,
340: "poly", 0,
341: "pre", 0,
342: "pseudo", 0,
343: "psycho", 0,
344: 0
345: };
346: Ptab ptabq[] =
347: {
348: "quasi", 0,
349: 0
350: };
351: Ptab ptabr[] =
352: {
353: "radio", 0,
354: "re", 0,
355: 0
356: };
357: Ptab ptabs[] =
358: {
359: "semi", 0,
360: "stereo", 0,
361: "sub", 0,
362: "super", 0,
363: 0
364: };
365: Ptab ptabt[] =
366: {
367: "tele", 0,
368: "thermo", 0,
369: 0
370: };
371: Ptab ptabu[] =
372: {
373: "ultra", 0,
374: "under", 0, /*must precede un*/
375: "un", IN,
376: 0
377: };
378: Ptab ptabv[] =
379: {
380: 0
381: };
382: Ptab ptabw[] =
383: {
384: 0
385: };
386: Ptab ptabx[] =
387: {
388: 0
389: };
390: Ptab ptaby[] =
391: {
392: 0
393: };
394: Ptab ptabz[] =
395: {
396: 0
397: };
398:
399: Ptab* preftab[] =
400: {
401: ptaba,
402: ptabb,
403: ptabc,
404: ptabd,
405: ptabe,
406: ptabf,
407: ptabg,
408: ptabh,
409: ptabi,
410: ptabj,
411: ptabk,
412: ptabl,
413: ptabm,
414: ptabn,
415: ptabo,
416: ptabp,
417: ptabq,
418: ptabr,
419: ptabs,
420: ptabt,
421: ptabu,
422: ptabv,
423: ptabw,
424: ptabx,
425: ptaby,
426: ptabz,
427: };
428:
429: typedef struct {
430: char *mesg;
431: enum { NONE, SUFF, PREF} type;
432: } Deriv;
433:
434: int cflag;
435: int fflag;
436: int vflag;
437: int xflag;
438: char word[500];
439: char original[500];
440: Deriv deriv[DSIZ+3];
441: Deriv emptyderiv;
442: char affix[DSIZ*10]; /* 10 is longest affix message */
443: int prefcount;
444: int suffcount;
445: char space[300000]; /* must be as large as "words"+"space" in pcode run */
446: Bits encode[2048]; /* must be as long as "codes" in pcode run */
447: int nencode;
448: char voweltab[128];
449: char* spacep[128*128+1]; /* pointer to words starting with 'xx' */
450:
451: char* codefile = "/usr/lib/spell/amspell";
452: char* brfile = "/usr/lib/spell/brspell";
453:
454: main(int argc, char *argv[])
455: {
456: char *ep, *cp;
457: char *dp;
458: int j, i, c;
459: int low = 0;
460: Bits h;
461:
462: for(i=0; c = "aeiouyAEIOUY"[i]; i++)
463: voweltab[c] = 1;
464: while(argc > 1) {
465: if(argv[1][0] != '-')
466: break;
467: for(i=1; c = argv[1][i]; i++)
468: switch(c) {
469: default:
470: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n");
471: exit(1);
472:
473: case 'b':
474: ise();
475: if(!fflag)
476: codefile = brfile;
477: continue;
478:
479: case 'C':
480: vflag++;
481: case 'c':
482: setbuf(stdout,0);
483: cflag++;
484: continue;
485:
486: case 'v':
487: vflag++;
488: continue;
489:
490: case 'x':
491: xflag++;
492: continue;
493:
494: case 'f':
495: if(argc <= 2) {
496: fprintf(stderr, "spell: -f requires another argument\n");
497: exit(1);
498: }
499: argv++;
500: argc--;
501: fflag++;
502: codefile = argv[1];
503: goto brk;
504: }
505: brk:
506: argv++;
507: argc--;
508: }
509: readdict(codefile);
510: if(argc > 1) {
511: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n");
512: exit(1);
513: }
514: for(;;) {
515: loop:
516: affix[0] = 0;
517: for(ep=original; ; ep++) {
518: if(ep >= original + sizeof(original) - 1) {
519: *ep = 0;
520: runout(original);
521: goto loop;
522: }
523: j = getchar();
524: if(j == EOF)
525: exit(0);
526: if(j != '\n')
527: *ep = j;
528: else {
529: *ep = 0;
530: break;
531: }
532: }
533: low = 0;
534: for(ep=word,dp=original; j = *dp; ep++,dp++) {
535: if(islower(j))
536: low++;
537: if(ep >= word+sizeof(word)-1)
538: break;
539: *ep = j;
540: }
541: *ep = 0;
542:
543: h = ~STOP;
544: if(word[1] == 0 && isalnum(word[0]) ||
545: isdigit(word[0]) && ordinal())
546: goto check;
547:
548: h = 0;
549: if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)))
550: for(cp=original+1,dp=word+1; dp<ep; dp++,cp++)
551: *dp = Tolower(*cp);
552: if(!h)
553: for(;;) { /* at most twice */
554: if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))
555: break;
556: if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH))
557: break;
558: if(!isupper(word[0]))
559: break;
560: cp = original;
561: dp = word;
562: while(*dp = *cp++) {
563: if(!low)
564: *dp = Tolower(*dp);
565: dp++;
566: }
567: word[0] = Tolower(word[0]);
568: }
569: check:
570: if(cflag) {
571: if(!h || Set(h,STOP))
572: putchar('-');
573: else if(!vflag)
574: putchar('+');
575: else
576: putchar('0' + (suffcount>0) +
577: (prefcount>4? 8: 2*prefcount));
578: } else if(!h || Set(h,STOP))
579: printf("%s\n", original);
580: else if(affix[0] != 0 && affix[0] != '.')
581: printf("%s\t%s\n", affix, original);
582: }
583: }
584:
585: /* strip exactly one suffix and do
586: * indicated routine(s), which may recursively
587: * strip suffixes
588: */
589: Bits
590: trysuff(char* ep, int lev, int flag)
591: {
592: Suftab *t;
593: char *cp, *sp;
594: Bits h = 0;
595: int initchar = ep[-1];
596:
597: flag &= ~MONO;
598: lev += DLEV;
599: if(lev<DSIZ)
600: deriv[lev] = deriv[lev-1] = emptyderiv;
601: if(!islower(initchar))
602: return h;
603: for(t=suftab[initchar-'a']; sp=t->suf; t++) {
604: cp = ep;
605: while(*sp)
606: if(*--cp != *sp++)
607: goto next;
608: for(sp=ep-t->n1; --sp >= word && !isvowel(*sp);)
609: ;
610: if(sp < word)
611: continue;
612: if(!(t->affixable & flag))
613: return 0;
614: h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP);
615: if(!h && t->p2!=0) {
616: if(lev<DSIZ)
617: deriv[lev] = deriv[lev+1] = emptyderiv;
618: h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP);
619: }
620: break;
621: next:;
622: }
623: return h;
624: }
625:
626: Bits
627: nop(char* ep, char* d, char* a, int lev, int flag)
628: {
629: #pragma ref ep
630: #pragma ref d
631: #pragma ref a
632: #pragma ref lev
633: #pragma ref flag
634: return 0;
635: }
636:
637: Bits
638: cstrip(char* ep, char* d, char* a, int lev, int flag)
639: {
640: int temp = ep[0];
641:
642: if(isvowel(temp) && isvowel(ep[-1])) {
643: switch(pair(ep[-1],ep[0])) {
644: case pair('a', 'a'):
645: case pair('a', 'e'):
646: case pair('a', 'i'):
647: case pair('e', 'a'):
648: case pair('e', 'e'):
649: case pair('e', 'i'):
650: case pair('i', 'i'):
651: case pair('o', 'a'):
652: return 0;
653: }
654: } else
655: if(temp==ep[-1]&&temp==ep[-2])
656: return 0;
657: return strip(ep,d,a,lev,flag);
658: }
659:
660: Bits
661: strip(char* ep, char* d, char* a, int lev, int flag)
662: {
663: #pragma ref d
664: Bits h = trypref(ep, a, lev, flag);
665:
666: if(Set(h,MONO) && isvowel(*ep) && isvowel(ep[-2]))
667: h = 0;
668: if(h)
669: return h;
670: if(isvowel(*ep) && !isvowel(ep[-1]) && ep[-1]==ep[-2]) {
671: h = trypref(ep-1,a,lev,flag|MONO);
672: if(h)
673: return h;
674: }
675: return trysuff(ep,lev,flag);
676: }
677:
678: Bits
679: s(char* ep, char* d, char* a, int lev, int flag)
680: {
681: if(lev > DLEV+1)
682: return 0;
683: if(*ep=='s') {
684: switch(ep[-1]) {
685: case 'y':
686: if(isvowel(ep[-2])||isupper(*word))
687: break; /*says Kennedys*/
688: case 'x':
689: case 'z':
690: case 's':
691: return 0;
692: case 'h':
693: switch(ep[-2]) {
694: case 'c':
695: case 's':
696: return 0;
697: }
698: }
699: }
700: return strip(ep,d,a,lev,flag);
701: }
702:
703: Bits
704: an(char* ep, char* d, char* a, int lev, int flag)
705: {
706: #pragma ref d
707: if(!isupper(*word)) /*must be proper name*/
708: return 0;
709: return trypref(ep,a,lev,flag);
710: }
711:
712: Bits
713: ize(char* ep, char* d, char* a, int lev, int flag)
714: {
715: #pragma ref a
716: int temp = ep[-1];
717: Bits h;
718:
719: ep[-1] = 'e';
720: h = strip(ep,"",d,lev,flag);
721: ep[-1] = temp;
722: return h;
723: }
724:
725: Bits
726: y_to_e(char* ep, char* d, char* a, int lev, int flag)
727: {
728: #pragma ref a
729: Bits h;
730: int temp;
731:
732: switch(ep[-1]) {
733: case 'a':
734: case 'e':
735: case 'i':
736: return 0;
737: }
738: temp = *ep;
739: *ep++ = 'e';
740: h = strip(ep,"",d,lev,flag);
741: *--ep = temp;
742: return h;
743: }
744:
745: Bits
746: ily(char* ep, char* d, char* a, int lev, int flag)
747: {
748: int temp = ep[0];
749: char *cp = ep;
750:
751: if(temp==ep[-1]&&temp==ep[-2]) /* sillly */
752: return 0;
753: if(*--cp=='y' && !isvowel(*--cp)) /* happyly */
754: while(cp>word)
755: if(isvowel(*--cp)) /* shyness */
756: return 0;
757: if(ep[-1]=='i')
758: return i_to_y(ep,d,a,lev,flag);
759: return cstrip(ep,d,a,lev,flag);
760: }
761:
762: Bits
763: bility(char* ep, char* d, char* a, int lev, int flag)
764: {
765: *ep++ = 'l';
766: return y_to_e(ep,d,a,lev,flag);
767: }
768:
769: Bits
770: i_to_y(char* ep, char* d, char* a, int lev, int flag)
771: {
772: Bits h;
773: int temp;
774:
775: if(isupper(*word))
776: return 0;
777: if((temp=ep[-1])=='i' && !isvowel(ep[-2])) {
778: ep[-1] = 'y';
779: a = d;
780: }
781: h = cstrip(ep,"",a,lev,flag);
782: ep[-1] = temp;
783: return h;
784: }
785:
786: Bits
787: es(char* ep, char* d, char* a, int lev, int flag)
788: {
789: if(lev>DLEV)
790: return 0;
791: switch(ep[-1]) {
792: default:
793: return 0;
794: case 'i':
795: return i_to_y(ep,d,a,lev,flag);
796: case 'h':
797: switch(ep[-2]) {
798: default:
799: return 0;
800: case 'c':
801: case 's':
802: break;
803: }
804: case 's':
805: case 'z':
806: case 'x':
807: return strip(ep,d,a,lev,flag);
808: }
809: }
810:
811: Bits
812: subst(char* ep, char* d, char* a, int lev, int flag)
813: {
814: #pragma ref a
815: char *u,*t;
816: Bits h;
817:
818: if(skipv(skipv(ep-1)) < word)
819: return 0;
820: for(t=d; *t!='+'; t++)
821: continue;
822: for(u=ep; *--t!='-';)
823: *--u = *t;
824: h = strip(ep,"",d,lev,flag);
825: while(*++t != '+')
826: continue;
827: while(*++t)
828: *u++ = *t;
829: return h;
830: }
831:
832: Bits
833: tion(char* ep, char* d, char* a, int lev, int flag)
834: {
835: switch(ep[-2]) {
836: default:
837: return trypref(ep,a,lev,flag);
838: case 'a':
839: case 'e':
840: case 'i':
841: case 'o':
842: case 'u':
843: return y_to_e(ep,d,a,lev,flag);
844: }
845: }
846:
847: /*
848: * possible consonant-consonant-e ending
849: */
850: Bits
851: CCe(char* ep, char* d, char* a, int lev, int flag)
852: {
853: Bits h;
854:
855: switch(ep[-1]) {
856: case 'l':
857: if(isvowel(ep[-2]))
858: break;
859: switch(ep[-2]) {
860: case 'l':
861: case 'r':
862: case 'w':
863: break;
864: default:
865: return y_to_e(ep,d,a,lev,flag);
866: }
867: break;
868: case 'c':
869: case 'g':
870: if(*ep == 'a') /* prevent -able for -eable */
871: return 0;
872: case 's':
873: case 'v':
874: case 'z':
875: if(ep[-2]==ep[-1])
876: break;
877: if(isvowel(ep[-2]))
878: break;
879: case 'u':
880: if(h = y_to_e(ep,d,a,lev,flag))
881: return h;
882: if(!(ep[-2]=='n' && ep[-1]=='g'))
883: return 0;
884: }
885: return VCe(ep,d,a,lev,flag);
886: }
887:
888: /*
889: * possible consonant-vowel-consonant-e ending
890: */
891: Bits
892: VCe(char* ep, char* d, char* a, int lev, int flag)
893: {
894: int c;
895: Bits h;
896:
897: c = ep[-1];
898: if(c=='e')
899: return 0;
900: if(!isvowel(c) && isvowel(ep[-2])) {
901: c = *ep;
902: *ep++ = 'e';
903: h = trypref(ep,d,lev,flag);
904: if(!h)
905: h = trysuff(ep,lev,flag);
906: if(h)
907: return h;
908: ep--;
909: *ep = c;
910: }
911: return cstrip(ep,d,a,lev,flag);
912: }
913:
914: Ptab*
915: lookuppref(char** wp, char* ep)
916: {
917: Ptab *sp;
918: char *bp,*cp;
919: int initchar = Tolower(**wp);
920:
921: if(!isalpha(initchar))
922: return 0;
923: for(sp=preftab[initchar-'a'];sp->s;sp++) {
924: bp = *wp;
925: for(cp= sp->s;*cp; )
926: if(*bp++!=*cp++)
927: goto next;
928: for(cp=bp;cp<ep;cp++)
929: if(isvowel(*cp)) {
930: *wp = bp;
931: return sp;
932: }
933: next:;
934: }
935: return 0;
936: }
937:
938: /* while word is not in dictionary try stripping
939: * prefixes. Fail if no more prefixes.
940: */
941: Bits
942: trypref(char* ep, char* a, int lev, int flag)
943: {
944: Ptab *tp;
945: char *bp, *cp;
946: char *pp;
947: Bits h;
948: char space[20];
949:
950: if(lev<DSIZ) {
951: deriv[lev].mesg = a;
952: deriv[lev].type = *a=='.'? NONE: SUFF;
953: }
954: if(h = tryword(word,ep,lev,flag)) {
955: if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO))
956: return h;
957: h = 0;
958: }
959: bp = word;
960: pp = space;
961: if(lev<DSIZ) {
962: deriv[lev+1].mesg = pp;
963: deriv[lev+1].type = 0;
964: }
965: while(tp=lookuppref(&bp,ep)) {
966: *pp++ = '+';
967: cp = tp->s;
968: while(pp<space+sizeof(space) && (*pp = *cp++))
969: pp++;
970: deriv[lev+1].type += PREF;
971: h = tryword(bp,ep,lev+1,flag);
972: if(Set(h,NOPREF) ||
973: ((tp->flag&IN) && inun(bp-2,h)==0)) {
974: h = 0;
975: break;
976: }
977: if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO))
978: break;
979: h = 0;
980: }
981: if(lev<DSIZ)
982: deriv[lev+1] = deriv[lev+2] = emptyderiv;
983: return h;
984: }
985:
986: Bits
987: tryword(char* bp, char* ep, int lev, int flag)
988: {
989: int j;
990: Bits h = 0;
991: char duple[3];
992:
993: if(ep-bp <= 1)
994: return h;
995: if(flag&MONO) {
996: if(lev<DSIZ) {
997: deriv[++lev].mesg = duple;
998: deriv[lev].type = SUFF;
999: }
1000: duple[0] = '+';
1001: duple[1] = *ep;
1002: duple[2] = 0;
1003: }
1004: h = dict(bp, ep);
1005: if(vflag==0 || h==0)
1006: return h;
1007: /*
1008: * when derivations are wanted, collect them
1009: * for printing
1010: */
1011: j = lev;
1012: prefcount = suffcount = 0;
1013: do {
1014: if(j<DSIZ && deriv[j].type) {
1015: strcat(affix, deriv[j].mesg);
1016: if(deriv[j].type == SUFF)
1017: suffcount++;
1018: else if(deriv[j].type != NONE)
1019: prefcount = deriv[j].type/PREF;
1020: }
1021: } while(--j > 0);
1022: return h;
1023: }
1024:
1025: int
1026: inun(char* bp, Bits h)
1027: {
1028: if(*bp == 'u')
1029: return Set(h, IN) == 0;
1030: /* *bp == 'i' */
1031: if(Set(h, IN) == 0)
1032: return 0;
1033: switch(bp[2]) {
1034: case 'r':
1035: return bp[1] == 'r';
1036: case 'm':
1037: case 'p':
1038: return bp[1] == 'm';
1039: }
1040: return bp[1] == 'n';
1041: }
1042:
1043: char*
1044: skipv(char *s)
1045: {
1046: if(s >= word && isvowel(*s))
1047: s--;
1048: while(s >= word && !isvowel(*s))
1049: s--;
1050: return s;
1051: }
1052:
1053: /*
1054: * crummy way to Britishise
1055: */
1056: void
1057: ise(void)
1058: {
1059: Suftab *p;
1060: int i;
1061:
1062: for(i=0; i<26; i++)
1063: for(p = suftab[i]; p->suf; p++) {
1064: p->suf = ztos(p->suf);
1065: p->d1 = ztos(p->d1);
1066: p->a1 = ztos(p->a1);
1067: }
1068: }
1069:
1070: char*
1071: ztos(char *as)
1072: {
1073: char *s, *ds;
1074:
1075: for(s=as; *s; s++)
1076: if(*s == 'z')
1077: goto copy;
1078: return as;
1079:
1080: copy:
1081: ds = strdupl(as);
1082: for(s=ds; *s; s++)
1083: if(*s == 'z')
1084: *s = 's';
1085: return ds;
1086: }
1087:
1088: Bits
1089: dict(char* bp, char* ep)
1090: {
1091: char *cp, *cp1, *w, *wp, *we;
1092: int n, f;
1093:
1094: w = bp;
1095: we = ep;
1096: n = ep-bp;
1097: if(n <= 1)
1098: return NOUN;
1099:
1100: f = w[0] & 0x7f;
1101: f *= 128;
1102: f += w[1] & 0x7f;
1103: bp = spacep[f];
1104: ep = spacep[f+1];
1105:
1106: loop:
1107: if(bp >= ep) {
1108: if(xflag)
1109: fprintf(stderr,"=%.*s\n",n, w);
1110: return 0;
1111: }
1112: /*
1113: * find the beginning of some word in the middle
1114: */
1115: cp = bp + (ep-bp)/2;
1116:
1117: while(cp > bp && !(*cp & 0x80))
1118: cp--;
1119: while(cp > bp && (cp[-1] & 0x80))
1120: cp--;
1121:
1122: wp = w + 2; /* skip two letters */
1123: cp1 = cp + 2; /* skip affix code */
1124: for(;;) {
1125: if(wp >= we) {
1126: if(*cp1&0x80)
1127: goto found;
1128: else
1129: f = 1;
1130: break;
1131: }
1132: if(*cp1&0x80) {
1133: f = -1;
1134: break;
1135: }
1136: f = *cp1++ - *wp++;
1137: if(f != 0)
1138: break;
1139: }
1140:
1141: if(f < 0) {
1142: while(!(*cp1&0x80))
1143: cp1++;
1144: bp = cp1;
1145: goto loop;
1146: }
1147: ep = cp;
1148: goto loop;
1149: found:
1150: f = ((cp[0] & 0x7) << 8) |
1151: (cp[1] & 0xff);
1152: if(xflag) {
1153: fprintf(stderr,"=%.*s ",n,w);
1154: typeprint(encode[f]);
1155: }
1156: return encode[f];
1157: }
1158:
1159: void
1160: typeprint(Bits h)
1161: {
1162:
1163: pcomma("");
1164: if(h & NOUN)
1165: pcomma("n");
1166: if(h & PROP_COLLECT)
1167: pcomma("pc");
1168: if(h & VERB) {
1169: if((h & VERB) == VERB)
1170: pcomma("v");
1171: else
1172: if((h & VERB) == V_IRREG)
1173: pcomma("vi");
1174: else
1175: if(h & ED)
1176: pcomma("ed");
1177: }
1178: if(h & ADJ)
1179: pcomma("a");
1180: if(h & COMP) {
1181: if((h & COMP) == ACTOR)
1182: pcomma("er");
1183: else
1184: pcomma("comp");
1185: }
1186: if(h & DONT_TOUCH)
1187: pcomma("d");
1188: if(h & N_AFFIX)
1189: pcomma("na");
1190: if(h & ADV)
1191: pcomma("adv");
1192: if(h & ION)
1193: pcomma("ion");
1194: if(h & V_AFFIX)
1195: pcomma("va");
1196: if(h & MAN)
1197: pcomma("man");
1198: if(h & NOPREF)
1199: pcomma("nopref");
1200: if(h & MONO)
1201: pcomma("ms");
1202: if(h & IN)
1203: pcomma("in");
1204: if(h & _Y)
1205: pcomma("y");
1206: if(h & STOP)
1207: pcomma("s");
1208: fprintf(stderr, "\n");
1209: }
1210:
1211: void
1212: pcomma(char *s)
1213: {
1214: static flag;
1215:
1216: if(*s == 0) {
1217: flag = 0;
1218: return;
1219: }
1220: if(!flag) {
1221: fprintf(stderr, "%s", s);
1222: flag = 1;
1223: } else
1224: fprintf(stderr, ",%s", s);
1225: }
1226:
1227: int
1228: ordinal(void)
1229: {
1230: char *cp = word;
1231: static char sp[4];
1232:
1233: while(isdigit(*cp))
1234: cp++;
1235: strncpy(sp,cp,3);
1236: if(isupper(cp[0]) && isupper(cp[1])) {
1237: sp[0] = Tolower(cp[0]);
1238: sp[1] = Tolower(cp[1]);
1239: }
1240: return 0 == strncmp(sp,
1241: cp[-2]=='1'? "th": /* out of bounds if 1 digit */
1242: *--cp=='1'? "st": /* harmless */
1243: *cp=='2'? "nd":
1244: *cp=='3'? "rd":
1245: "th", 3);
1246: }
1247:
1248: /* layout of file entry: first byte has bit 0x80 turned on.
1249: next 4 bits count number of characters common between this
1250: entry and previous one. last three bits concatenated with
1251: second byte are the affixing code, so arranged that the 0x80
1252: bit is zero in all bytes but the first. 3rd and following
1253: bytes are the remainder of the dictionary word.
1254:
1255: layout in memory: common prefixes are expanded, and the
1256: first two letters of each word are deleted and found
1257: instead by lookup in table spacep, which points to the
1258: first word for each two-letter prefix.
1259: */
1260:
1261: void
1262: readdict(char *file)
1263: {
1264: char *s, *is, *lasts, *ls;
1265: int c, i, sp, p;
1266: int f;
1267: long l;
1268:
1269: f = open(file, 0);
1270: if(f == -1) {
1271: fprintf(stderr, "spell: cannot open %s\n", file);
1272: exit(1);
1273: }
1274: if(read(f, space, 2) != 2)
1275: goto bad;
1276: nencode = ((space[0]&0xff)<<8) | (space[1]&0xff);
1277: if(nencode > sizeof(encode)/sizeof(*encode))
1278: goto noroom;
1279: if(read(f, space, nencode*sizeof(*encode))
1280: != nencode*sizeof(*encode))
1281: goto bad;
1282: s = space;
1283: for(i=0; i<nencode; i++) {
1284: l = (long)(s[0] & 0xff) << 24;
1285: l |= (s[1] & 0xff) << 16;
1286: l |= (s[2] & 0xff) << 8;
1287: l |= s[3] & 0xff;
1288: encode[i] = (Bits)l;
1289: s += 4;
1290: }
1291: l = read(f, space, sizeof(space));
1292: if(l >= sizeof(space))
1293: goto noroom;
1294: is = space + (sizeof(space) - l);
1295: memmove(is, space, l);
1296:
1297: s = space;
1298: c = *is++ & 0xff;
1299: sp = -1;
1300: i = 0;
1301: lasts = 0; /* to prevent diagnostics */
1302: loop:
1303: if(s > is)
1304: goto noroom;
1305: if(c < 0) {
1306: close(f);
1307: while(sp < 128*128)
1308: spacep[++sp] = s;
1309: *s++ = 0x80; /* fence */
1310: return;
1311: }
1312: p = (c>>3) & 0xf;
1313: *s++ = c;
1314: *s++ = *is++ & 0xff;
1315: if(p <= 0)
1316: i = (*is++ & 0xff)*128;
1317: if(p <= 1) {
1318: if(!(*is & 0x80))
1319: i = i/128*128 + (*is++ & 0xff);
1320: if(i <= sp) {
1321: fprintf(stderr, "spell: the dict isn't "
1322: "sorted or memmove didn't work\n");
1323: goto bad;
1324: }
1325: while(sp < i)
1326: spacep[++sp] = s-2;
1327: }
1328: ls = lasts;
1329: lasts = s;
1330: for(p-=2; p>0; p--)
1331: *s++ = *ls++;
1332: for(;;) {
1333: if(is >= space+sizeof(space)) {
1334: c = -1;
1335: break;
1336: }
1337: c = *is++ & 0xff;
1338: if(c & 0x80)
1339: break;
1340: *s++ = c;
1341: }
1342: *s = 0;
1343: goto loop;
1344:
1345: bad:
1346: fprintf(stderr, "spell: trouble reading %s\n", file);
1347: exit(1);
1348: noroom:
1349: fprintf(stderr,"spell: not enough space for dictionary\n");
1350: exit(1);
1351: }
1352:
1353: char * /* same as strdup; portability hack */
1354: strdupl(char *s)
1355: {
1356: char *t = (char*)malloc(strlen(s)+1);
1357: strcpy(t, s);
1358: return t;
1359: }
1360:
1361: void
1362: runout(char *s)
1363: {
1364: int c;
1365: if(!cflag)
1366: printf("%s", s);
1367: else {
1368: putchar('-');
1369: fflush(stdout);
1370: }
1371: do {
1372: if((c=getchar()) == EOF)
1373: exit(0);
1374: if(!cflag)
1375: putchar(c);
1376: } while(c != '\n');
1377: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.