|
|
1.1 root 1: /* @(#)spell.h 4.1 12/18/82 */
2:
3: #include <stdio.h>
4: #include <ctype.h>
5:
6: #ifndef unix
7: #define SHIFT 5
8: #define TABSIZE (int)(400000/(1<<SHIFT))
9: int *tab; /*honeywell loader deficiency*/
10: #else
11: #define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */
12: #define SHIFT 4
13: #define TABSIZE 25000 /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/
14: short tab[TABSIZE];
15: #endif
16: long p[] = {
17: 399871,
18: 399887,
19: 399899,
20: 399911,
21: 399913,
22: 399937,
23: 399941,
24: 399953,
25: 399979,
26: 399983,
27: 399989,
28: };
29: #define NP (sizeof(p)/sizeof(p[0]))
30: #define NW 30
31:
32: /*
33: * Hash table for spelling checker has n bits.
34: * Each word w is hashed by k different (modular) hash functions, hi.
35: * The bits hi(w), i=1..k, are set for words in the dictionary.
36: * Assuming independence, the probability that no word of a d-word
37: * dictionary sets a particular bit is given by the Poisson formula
38: * P = exp(-y)*y**0/0!, where y=d*k/n.
39: * The probability that a random string is recognized as a word is then
40: * (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2,
41: * whence one finds, for example, that a 25000-word dictionary in a
42: * 400000-bit table works best with k=11.
43: */
44:
45: long pow2[NP][NW];
46:
47: prime(argc, argv) register char **argv;
48: {
49: int i, j;
50: long h;
51: register long *lp;
52:
53: #ifndef unix
54: if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL)
55: return(0);
56: #endif
57: if (argc > 1) {
58: FILE *f;
59: if ((f = fopen(argv[1], "ri")) == NULL)
60: return(0);
61: if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE)
62: return(0);
63: fclose(f);
64: }
65: for (i=0; i<NP; i++) {
66: h = *(lp = pow2[i]) = 1<<14;
67: for (j=1; j<NW; j++)
68: h = *++lp = (h<<7) % p[i];
69: }
70: return(1);
71: }
72:
73: #define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1))))
74: #define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.