Annotation of 43BSD/usr.bin/spell/spell.h, revision 1.1.1.1

1.1       root        1: /*     @(#)spell.h     4.1     12/18/82        */
                      2: 
                      3: #include <stdio.h>
                      4: #include <ctype.h>
                      5: 
                      6: #ifndef unix
                      7: #define SHIFT  5
                      8: #define TABSIZE (int)(400000/(1<<SHIFT))
                      9: int    *tab;   /*honeywell loader deficiency*/
                     10: #else
                     11: #define Tolower(c)     (isupper(c)?tolower(c):c) /* ugh!!! */
                     12: #define SHIFT  4
                     13: #define TABSIZE 25000  /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/
                     14: short  tab[TABSIZE];
                     15: #endif
                     16: long   p[] = {
                     17:        399871,
                     18:        399887,
                     19:        399899,
                     20:        399911,
                     21:        399913,
                     22:        399937,
                     23:        399941,
                     24:        399953,
                     25:        399979,
                     26:        399983,
                     27:        399989,
                     28: };
                     29: #define        NP      (sizeof(p)/sizeof(p[0]))
                     30: #define        NW      30
                     31: 
                     32: /*
                     33: * Hash table for spelling checker has n bits.
                     34: * Each word w is hashed by k different (modular) hash functions, hi.
                     35: * The bits hi(w), i=1..k, are set for words in the dictionary.
                     36: * Assuming independence, the probability that no word of a d-word
                     37: * dictionary sets a particular bit is given by the Poisson formula
                     38: * P = exp(-y)*y**0/0!, where y=d*k/n.
                     39: * The probability that a random string is recognized as a word is then
                     40: * (1-P)**k.  For given n and d this is minimum when y=log(2), P=1/2,
                     41: * whence one finds, for example, that a 25000-word dictionary in a
                     42: * 400000-bit table works best with k=11.
                     43: */
                     44: 
                     45: long   pow2[NP][NW];
                     46: 
                     47: prime(argc, argv) register char **argv;
                     48: {
                     49:        int i, j;
                     50:        long h;
                     51:        register long *lp;
                     52: 
                     53: #ifndef unix
                     54:        if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL)
                     55:                return(0);
                     56: #endif
                     57:        if (argc > 1) {
                     58:                FILE *f;
                     59:                if ((f = fopen(argv[1], "ri")) == NULL)
                     60:                        return(0);
                     61:                if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE)
                     62:                        return(0);
                     63:                fclose(f);
                     64:        }
                     65:        for (i=0; i<NP; i++) {
                     66:                h = *(lp = pow2[i]) = 1<<14;
                     67:                for (j=1; j<NW; j++)
                     68:                        h = *++lp = (h<<7) % p[i];
                     69:        }
                     70:        return(1);
                     71: }
                     72: 
                     73: #define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1))))
                     74: #define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.