|
|
1.1 ! root 1: #ifndef lint ! 2: static char *sccsid = "@(#)what4.c 4.1 (Berkeley) 5/6/83"; ! 3: #endif ! 4: ! 5: #include "what..c" ! 6: #define NW 5 ! 7: #define ZIPF 10 ! 8: #define HASHF 3 ! 9: #define WLEN 10 ! 10: #define SAME 0 ! 11: #define TSIZE HASHF*ZIPF*NW ! 12: #define NF 10 ! 13: ! 14: struct wst { ! 15: char *tx; ! 16: int ct; ! 17: } ! 18: ; ! 19: int HSIZE; ! 20: static struct wst word[TSIZE]; ! 21: static char tbuf[NW*ZIPF*WLEN], *tp tbuf; ! 22: ! 23: freqwd ( fn, wd, nin ) ! 24: char *fn[], *wd[]; ! 25: { ! 26: FILE *fi[NF]; ! 27: int nw 0, i, any, nf, j, wexch(), wcomp(); ! 28: char tw[20]; ! 29: for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--); ! 30: for(nf=0; fn[nf] && nf<NF; nf++) ! 31: fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL; ! 32: do { ! 33: any=0; ! 34: for(i=0; i<nf; i++) ! 35: { ! 36: if (fi[i]==NULL) continue; ! 37: if (gw(fi[i], tw)==0) ! 38: { ! 39: fclose(fi[i]); ! 40: fi[i]==NULL; ! 41: continue; ! 42: } ! 43: any=1; ! 44: if (common(tw)) continue; ! 45: if (strlen(tw)<3) continue; ! 46: j = lookup (tw); ! 47: if (j<0 && nw < ZIPF*NW) ! 48: { ! 49: j = -j; ! 50: strcpy (tp, tw); ! 51: word[j].tx = tp; ! 52: while (*tp++); ! 53: _assert (tp < tbuf+NW*ZIPF*WLEN); ! 54: word[j].ct = 1; ! 55: nw++; ! 56: } ! 57: else if (j>0) ! 58: word[j].ct++; ! 59: } ! 60: } ! 61: while (any>0); ! 62: shell ( TSIZE, wcomp, wexch ); ! 63: for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++) ! 64: if (nw>=nin*2 && word[nw].ct != word[0].ct) ! 65: break; ! 66: for(i=0; i<nw; i++) ! 67: wd[i] = word[i].tx; ! 68: return(nw); ! 69: } ! 70: ! 71: lookup (wt) ! 72: char *wt; ! 73: { ! 74: int h; ! 75: h = hash(wt); ! 76: for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE) ! 77: { ! 78: if (h==0) continue; ! 79: if (strcmp(wt, word[h].tx) == SAME) ! 80: return (h); ! 81: } ! 82: return ( -h ); ! 83: } ! 84: ! 85: hash (s) ! 86: char *s; ! 87: { ! 88: int k 0, c 0, i 0; ! 89: while ( c = *s++ ) ! 90: k ^= (c << (i++%5) ); ! 91: return (k>0 ? k : -k); ! 92: } ! 93: ! 94: gw (f, t) ! 95: char *t; ! 96: FILE *f; ! 97: { ! 98: int start 1, oldc ' ', c; ! 99: if (f==NULL) return (0); ! 100: while ( (c=getc(f)) != EOF) ! 101: { ! 102: if (isupper(c)) c= tolower(c); ! 103: if (start==1) ! 104: if (!alphanum(c, oldc)) ! 105: continue; ! 106: else ! 107: start=0; ! 108: if (start==0) ! 109: if (alphanum(c, oldc)) ! 110: *t++ = c; ! 111: else ! 112: { ! 113: *t=0; ! 114: return(1); ! 115: } ! 116: oldc=c; ! 117: } ! 118: return(0); ! 119: } ! 120: ! 121: alphanum( c, oldc ) ! 122: { ! 123: if (isalpha(c) || isdigit(c)) return(1); ! 124: if (isalpha(oldc)) ! 125: if (c== '\'' || c == '-') return(1); ! 126: return(0); ! 127: } ! 128: ! 129: wcomp (n1, n2) ! 130: { ! 131: return (word[n1].ct >= word[n2].ct); ! 132: } ! 133: ! 134: wexch (n1, n2) ! 135: { ! 136: struct wst tt; ! 137: tt.tx = word[n1].tx; ! 138: tt.ct = word[n1].ct; ! 139: word[n1].tx = word[n2].tx; ! 140: word[n1].ct = word[n2].ct; ! 141: word[n2].tx = tt.tx; ! 142: word[n2].ct = tt.ct; ! 143: } ! 144: ! 145: prime(n) ! 146: { ! 147: /* only executed once- slow is ok */ ! 148: int i; ! 149: if (n%2==0) return(0); ! 150: for(i=3; i*i<=n; i+= 2) ! 151: if (n%i ==0 ) return(0); ! 152: return(1); ! 153: } ! 154: ! 155: trimnl(s) ! 156: char *s; ! 157: { ! 158: while (*s)s++; ! 159: if (*--s=='\n') *s=0; ! 160: } ! 161: ! 162: /* this is the test for what4.c as a standalone prog ... */ ! 163: # ifdef 0 ! 164: main (argc, argv) ! 165: char *argv[]; ! 166: { ! 167: char *ff[10], *wd[20], **ffp ff; ! 168: int n, i; ! 169: ! 170: while (--argc) ! 171: *ffp++ = *++argv; ! 172: *ffp=0; ! 173: n=freqwd(ff,wd); ! 174: for(i=0; i<n; i++) ! 175: printf("%s\n",wd[i]); ! 176: printf("total of %d items\n",n); ! 177: } ! 178: # endif 0
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.