|
|
1.1 root 1: # include "stdio.h"
2: # include "assert.h"
3: # define SAME 0
4: struct suft {
5: char *told;
6: int tlen;
7: char *tnew;
8: int type;
9: }
10: # define S 1
11: # define EDING 2
12: # define LY 3
13: suftab[] = {
14: {"is", 2, "is", S},
15: {"ies", 3, "y", S},
16: {"us", 2, "us", S},
17: {"sses", 4, "ss", S},
18: {"xes", 3, "x", S},
19: {"jones", 5, "jones", S},
20: {"ss", 2, "ss", S},
21: {"ings", 4, "", EDING},
22: {"namics", 6, "namics", S},
23: {"hysics", 6, "hysics", S},
24: {"hes", 3, "h", S},
25: {"autics", 6, "autics", S},
26: {"matics", 6, "matics", S},
27: {"stics", 5, "stics", S},
28: {"news", 4, "news", S},
29: {"setts", 5, "setts", S},
30: {"denes", 5, "denes", S},
31: {"'s", 2, "", S},
32: {"s", 1, "", S},
33: {"speed", 5, "speed", EDING},
34: {"ied", 3, "y", EDING},
35: {"ed", 2, "", EDING},
36: {"ssing", 5, "ss", EDING},
37: {"ing", 3, "", EDING},
38: {"ply", 3, "ply", LY},
39: {"ily", 3, "ily", LY},
40: {"ly", 2, "", LY},
41: {0, 0, 0}};
42: # define OK 0
43: # define ADD 1
44: struct et {
45: char *esuf;
46: int elen;
47: int evow;
48: int eadd;
49: } etab[] = {
50: {"x", 1, 0, OK},
51: {"sponsor", 7, 0, OK},
52: {"monitor", 7, 0, OK},
53: {"og", 2, 2, OK},
54: {"Vg", 2, 0, ADD},
55: {"ow", 2, 0, OK},
56: {"dit", 3, 0, OK},
57: {"er", 2, 0, OK},
58: {"el", 2, 0, OK},
59: {"ss", 2, 0, OK},
60: {"et", 2, 0, OK},
61: {"el", 2, 0, OK},
62: {"air", 3, 0, OK},
63: {"en", 2, 2, OK},
64: {"on", 2, 2, OK},
65: {"ong", 3, 0, OK},
66: {"CVC", 3, 0, ADD},
67: {"VVv", 3, 0, ADD},
68: {"Vlv", 3, 0, ADD},
69: {"nc", 2, 0, ADD},
70: {"pl", 2, 0, ADD},
71: {"bl", 2, 0, ADD},
72: {"rg", 2, 0, ADD},
73: {"lg", 2, 0, ADD},
74: {"dg", 2, 0, ADD},
75: {"ng", 2, 0, ADD},
76: {"iat", 3, 0, ADD},
77: {"u", 1, 0, ADD},
78: {"uir", 3, 0, ADD},
79: {"Cs", 2, 0, ADD},
80: {"us", 2, 0, ADD},
81: {0, 0, 0}};
82: char vow[] = {
83: /*nul soh stx etx eot enq ack bel */
84: 0, 0, 0, 0, 0, 0, 0, 0,
85: /*bs ht nl vt np cr so si */
86: 0, 0, 0, 0, 0, 0, 0, 0,
87: /*dle dc1 dc2 dc3 dc4 nak syn etb */
88: 0, 0, 0, 0, 0, 0, 0, 0,
89: /*can em sub esc fs gs rs us */
90: 0, 0, 0, 0, 0, 0, 0, 0,
91: /*sp ! " # $ % & ' */
92: 0, 0, 0, 0, 0, 0, 0, 0,
93: /* ( ) * + , - . / */
94: 0, 0, 0, 0, 0, 0, 0, 0,
95: /* 0 1 2 3 4 5 6 7 */
96: 0, 0, 0, 0, 0, 0, 0, 0,
97: /* 8 9 : ; < = > ? */
98: 0, 0, 0, 0, 0, 0, 0, 0,
99: /* @ A B C D E F G */
100: 0, 1, 0, 0, 0, 1, 0, 0,
101: /* H I J K L M N O */
102: 0, 1, 0, 0, 0, 0, 0, 1,
103: /* P Q R S T U V W */
104: 0, 0, 0, 0, 0, 1, 0, 0,
105: /* X Y Z [ \ ] ^ _ */
106: 0, 1, 0, 0, 0, 0, 0, 0,
107: /* ` a b c d e f g */
108: 0, 1, 0, 0, 0, 1, 0, 0,
109: /* h i j k l m n o */
110: 0, 1, 0, 0, 0, 0, 0, 1,
111: /* p q r s t u v w */
112: 0, 0, 0, 0, 0, 1, 0, 0,
113: /* x y z { } ~ del */
114: 0, 1, 0, 0, 0, 0, 0, 0 };
115: char *
116: sufstr(p)
117: char *p;
118: { /* removes a few common suffixes */
119: static char wd[50];
120: struct suft *x;
121: struct et *xe;
122: register char *s; register int c;
123: int nl, nv;
124: strcpy(wd, p);
125: nl = strlen(wd);
126: for(s=wd; *s; s++);
127: for(x=suftab; x->told; x++)
128: {
129: if (nl < x->tlen) continue;
130: if (strcmp(x->told, s-x->tlen)==SAME)
131: {
132: s -= x->tlen;
133: strcpy(s, x->tnew);
134: nv = novow(wd);
135: if (strlen(wd)<3 || nv==0)
136: return(p);
137: switch(x->type)
138: {
139: case S:
140: return(wd);
141: case EDING:
142: c = s[-1];
143: if (s[0]!=0) return(wd);
144: /* undouble final consonant? */
145: if (!vow[c] && c!= 's' && c==s[-2] && (c!='l'||nv>1))
146: {
147: s[-1]=0;
148: return(wd);
149: }
150: /* add final e decision */
151: for( xe = etab; xe->esuf; xe++)
152: {
153: if (xe->evow > nv)
154: continue;
155: if (ecomp(xe->esuf, s-xe->elen))
156: {
157: if (xe->eadd)
158: strcpy(s, "e");
159: return(wd);
160: }
161: }
162: return(wd);
163: case LY:
164: return(wd);
165: }
166: return(wd);
167: }
168: }
169: return(p);
170: }
171: ecomp (etent, wd)
172: char *etent, *wd;
173: {
174: register int c, d;
175: while ( c= *etent++)
176: {
177: d = *wd++;
178: switch(c)
179: {
180: case 'C':
181: if (!vow[d])
182: continue;
183: else
184: return(0);
185: case 'V':
186: if (vow[d])
187: continue;
188: else
189: return(0);
190: default:
191: if (c==d)
192: continue;
193: else
194: return(0);
195: }
196: }
197: return(1);
198: }
199: novow(s)
200: char *s;
201: {
202: int c, k=0;
203: while (c = *s++)
204: if (vow[c])
205: k++;
206: return(k);
207: }
208: char *comwords[420];
209: # define NCHASH 401
210: int hashc[NCHASH];
211: char comspace[2000];
212: char *combuff=comspace;
213: char *comname = "/usr/lib/refer/function";
214: common (s)
215: char *s;
216: {
217: char *p;
218: static int cgate=0;
219: register int k, c, i;
220: if (cgate==0)
221: {
222: cgate=1;
223: comstart();
224: }
225: c = s[0];
226: for(k = hash(s); i = hashc[k]; k=(k+1)%NCHASH)
227: {
228: p = comwords[i];
229: if (p==NULL) continue;
230: if (c!= *p) continue;
231: if (strcmp(p, s)==SAME)
232: return(1);
233: }
234: return(0);
235: }
236: hash(s)
237: register char *s;
238: {
239: register int n=0, c;
240: while ( c = *s++)
241: {
242: n += (c<<n%4);
243: }
244: return ( ( n & 077777) %NCHASH);
245: }
246: comstart()
247: {
248: FILE *comf;
249: char line[300];
250: int nw=1, j, i;
251: /* to permit fast test, we skip one byte at start of combuff */
252: comf= fopen(comname, "r");
253: assert(comf!=NULL);
254: while (fgets(line, 300, comf))
255: {
256: trimnl(line);
257: comwords[i=nw++]=combuff;
258: strcpy(combuff, line);
259: while (*combuff++);
260: assert(nw<300);
261: assert(combuff<comspace+2000);
262: j = hash (comwords[i]);
263: while (hashc[j]) j++;
264: hashc[j]=i;
265: assert(j<1500);
266: }
267: fclose(comf);
268: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.