|
|
1.1 root 1: #include <stdlib.h>
2: #include <string.h>
3: #include <stdio.h>
4: #include <ctype.h>
5: #include <signal.h>
6:
7: #ifndef WORD_LIST
8: #define WORD_LIST "/usr/lib/spell/amspell"
9: #endif
10: #ifndef WORD_LISTB
11: #define WORD_LISTB "/usr/lib/spell/brspell"
12: #endif
13: /* the next string should be the first word in spell's stop list */
14: #define STOPWORD "Argentinan"
15: #define VOWELS "aeiouy"
16: #define ALPHAS 26
17: #define LENLIMIT 64
18: #define talloc(t) salloc(sizeof (t))
19: #define MAP(c) ((c) - 'a')
20:
21: typedef long Bits; /* must agree with spell's pcode */
22:
23: typedef struct word word;
24: typedef unsigned long ulong;
25:
26: struct word
27: {
28: char *text;
29: int length;
30: ulong mask;
31: word *next;
32: };
33:
34: typedef word *set[LENLIMIT];
35:
36: typedef struct
37: {
38: int count[ALPHAS];
39: int length;
40: ulong mask;
41: }
42: target;
43:
44: char *myname;
45: char *dict_name = WORD_LIST;
46: int interrupt = 0;
47: int allwords = 0;
48: int fixed;
49: int maxwords;
50: set words;
51: word *list[LENLIMIT];
52: FILE *f_dict;
53:
54: void
55: error(char *s)
56: {
57: fprintf(stderr, "%s: %s\n", myname, s);
58: exit(1);
59: }
60:
61: void *
62: salloc(ulong z)
63: {
64: void *p;
65:
66: if ((p = malloc(z)) == NULL)
67: error("ran out of memory");
68:
69: return p;
70: }
71:
72: int
73: word_ok(char *s)
74: {
75: int vowel = 0;
76:
77: while (*s != '\0')
78: {
79: if (!islower(*s))
80: if (allwords) *s = tolower(*s);
81: else return 0;
82:
83: switch (*s)
84: {
85: case 'a':
86: case 'e':
87: case 'i':
88: case 'o':
89: case 'u':
90: case 'y':
91: vowel = 1;
92: }
93:
94: s++;
95: }
96:
97: return vowel;
98: }
99:
100: ulong
101: str_to_mask(char *s)
102: {
103: ulong m;
104:
105: m = 0;
106:
107: while (*s != '\0')
108: m |= 1 << MAP(*s++);
109:
110: return m;
111: }
112:
113: char *
114: get_word(char *bp)
115: {
116: int c, i, affix;
117: static int n = 0, stop = 0, naffix = 0;
118:
119: retry:
120: if (n == -1) return NULL;
121: c = getc(f_dict); /* affix encoding */
122: affix = naffix | c;
123: for (i = n; i < LENLIMIT && !((c = getc(f_dict)) & 0x80); i++)
124: bp[i] = c;
125: if (i >= LENLIMIT || i == 0)
126: error("Internal dictionary format bad");
127: if (c == EOF) n = -1;
128: else {
129: n = (c>>3)&017; /* prefix count, next word */
130: naffix = (c&07)<<8;
131: }
132: bp[i] = '\0';
133: /* the dictionary includes "stop" (illegal) words, flagged
134: by a magic affix that will be different each time pcode is
135: run. Determine the flag dynamically by looking for a
136: known word in the stop list, STOPWORD
137: */
138: if (stop == 0 && strcmp(bp, STOPWORD) == 0)
139: stop = affix; /* magic affix! */
140: if ((stop && (affix == stop)) || !word_ok(bp)) goto retry;
141: return bp;
142: }
143:
144: void
145: make_word(char *s)
146: {
147: word *w;
148: word **p;
149: int i;
150:
151: w = talloc(word);
152: w->length = i = strlen(s);
153: w->text = strcpy(salloc(i+1), s);
154: w->mask = str_to_mask(s);
155: p = &words[w->length];
156: w->next = *p;
157: *p = w;
158: }
159:
160: void
161: build_wordlist(void)
162: {
163: char bp[LENLIMIT];
164: short n;
165:
166: if ( (f_dict = fopen(dict_name, "r")) == NULL )
167: error("can't open word list");
168:
169: n = (getc(f_dict)&0xFF) << 8;
170: n |= getc(f_dict)&0xFF;
171:
172: if (fseek(f_dict, n*sizeof(Bits)+1, 1) != 0) {
173: perror(WORD_LIST);
174: exit(2);
175: }
176:
177: if (allwords) {
178: make_word("a");
179: make_word("i");
180: make_word("o");
181: }
182: while (get_word(bp) != NULL)
183: make_word(bp);
184: }
185:
186: void
187: count_letters(target *t, char *s)
188: {
189: int i;
190:
191: for (i = 0; i < ALPHAS; i++)
192: t->count[i] = 0;
193:
194: t->length = 0;
195:
196: while (*s != '\0')
197: {
198: t->count[MAP(*s++)]++;
199: t->length++;
200: }
201: }
202:
203: int
204: contained(word *i, target *t)
205: {
206: int n;
207: char *v;
208: target it;
209:
210: if ((i->mask & t->mask) != i->mask)
211: return 0;
212:
213: count_letters(&it, i->text);
214:
215: for (n = 0; n < ALPHAS; n++)
216: {
217: if (it.count[n] > t->count[n])
218: return 0;
219: }
220:
221: if (it.length == t->length)
222: return 1;
223:
224: for (v = VOWELS; *v != '\0'; v++)
225: {
226: if (t->count[MAP(*v)] > it.count[MAP(*v)])
227: return 1;
228: }
229:
230: return 0;
231: }
232:
233: int
234: prune(set in, int m, target *filter, set out)
235: {
236: word *i, *o, *t;
237: int n;
238: int nz;
239:
240: nz = 0;
241:
242: for (n = 1; n < LENLIMIT; n++)
243: {
244: if (n > m)
245: {
246: out[n] = NULL;
247: continue;
248: }
249:
250: o = NULL;
251:
252: for (i = in[n]; i != NULL; i = i->next)
253: {
254: if (contained(i, filter))
255: {
256: t = talloc(word);
257: *t = *i;
258: t->next = o;
259: o = t;
260: nz = 1;
261: }
262: }
263:
264: out[n] = o;
265: }
266:
267: return nz;
268: }
269:
270: ulong
271: target_mask(int c[])
272: {
273: ulong m;
274: int i;
275:
276: m = 0;
277:
278: for (i = 0; i < ALPHAS; i++)
279: {
280: if (c[i] != 0)
281: m |= 1 << i;
282: }
283:
284: return m;
285: }
286:
287: void
288: dump_list(word **e)
289: {
290: word **p;
291:
292: /* fprintf(stdout, "%d", (int)(e - list + 1));
293: */
294: for (p = list; p < e; p++) {
295: fputs((*p)->text, stdout);
296: putchar(' ');
297: }
298: puts((*e)->text);
299: }
300:
301: void
302: free_set(set s)
303: {
304: int i;
305: word *p, *q;
306:
307: for (i = 1; i < LENLIMIT; i++)
308: {
309: for (p = s[i]; p != NULL; p = q)
310: {
311: q = p->next;
312: free(p);
313: }
314: }
315: }
316:
317: void
318: enumerate(word **p, target *i, set c)
319: {
320: target t;
321: set o;
322: word *w, *h;
323: char *s;
324: int l, m, b;
325:
326: l = p - list;
327: b = (i->length + (maxwords - l - 1)) / (maxwords - l);
328:
329: for (m = i->length; m >= b; m--)
330: {
331: h = c[m];
332:
333: for (w = h; w != NULL; w = w->next)
334: {
335: if (interrupt) return;
336: if (i->length == m)
337: {
338: *p = w;
339: dump_list(p);
340: continue;
341: }
342:
343: if (l == maxwords - 1)
344: continue;
345:
346: t = *i;
347: t.length -= m;
348:
349: for (s = w->text; *s != '\0'; s++)
350: t.count[MAP(*s)]--;
351:
352: t.mask = target_mask(t.count);
353: c[m] = w->next;
354:
355: if (prune(c, m, &t, o))
356: {
357: *p = w;
358: enumerate(p + 1, &t, o);
359: free_set(o);
360: }
361: }
362:
363: c[m] = h;
364: }
365: }
366:
367: void
368: clean(char *s)
369: {
370: char *p, *q;
371:
372: for (p = s, q = s; *p != '\0'; p++)
373: {
374: if (islower(*p))
375: *q++ = *p;
376: else if (isupper(*p))
377: *q++ = tolower(*p);
378: }
379:
380: *q = '\0';
381: }
382:
383: void
384: anagramulate(char *s)
385: {
386: target t;
387: set subjects;
388:
389: clean(s);
390:
391: if (fixed)
392: maxwords = fixed;
393: else if ((maxwords = strlen(s) / 4) < 3)
394: maxwords = 3;
395:
396: fprintf(stdout, "%s:\n", s);
397: t.mask = str_to_mask(s);
398: count_letters(&t, s);
399:
400: if (!prune(words,t.length, &t, subjects))
401: return;
402:
403: enumerate(&list[0], &t, subjects);
404: }
405:
406: void
407: set_fixed(char *s)
408: {
409: if ((fixed = atoi(s)) < 1)
410: fixed = 1;
411: }
412:
413: void
414: read_words(void)
415: {
416: char *s;
417: char b[LENLIMIT];
418: char b2[LENLIMIT];
419:
420:
421: while ((s = fgets(b, LENLIMIT, stdin)) != NULL) {
422: if (b[0] == 'q' && b[1] == '\n') exit(0);
423: if (isdigit(b[0]))
424: {
425: set_fixed(s);
426: continue;
427: }
428: if ( b[0] == '\n' )
429: s = &b2[0];
430: else s = strcpy(b2, b);
431:
432: anagramulate(s);
433: if (interrupt) {
434: interrupt = 0;
435: puts("Interrupt");
436: }
437: else puts("Done.");
438: }
439: }
440:
441: void
442: fbreak(int dummy)
443: {
444: dummy = dummy; /* avert use/set diagnostics */
445: interrupt += 1;
446: signal(SIGINT, fbreak);
447: return;
448: }
449: void
450: main(int argc, char **argv)
451: {
452: myname = argv[0];
453: if (argc > 1)
454: if (argv[1][0] == '-') {
455: if (argv[1][1] == 'b')
456: dict_name = WORD_LISTB;
457: else if (argv[1][1] == 'a')
458: allwords = 1;
459: else error("invalid command argument");
460: if (argc > 2) set_fixed(argv[2]);
461: }
462: else set_fixed(argv[1]);
463:
464: build_wordlist();
465: signal(SIGINT, fbreak);
466: read_words();
467: exit(0);
468: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.