|
|
1.1 ! root 1: # include "what..c" ! 2: struct wst { char *tx; int ct; } ; ! 3: # define NW 5 ! 4: # define ZIPF 10 ! 5: # define HASHF 3 ! 6: # define WLEN 10 ! 7: # define SAME 0 ! 8: # define TSIZE HASHF*ZIPF*NW ! 9: int HSIZE; ! 10: static struct wst word[TSIZE]; ! 11: static char tbuf[NW*ZIPF*WLEN], *tp tbuf; ! 12: # define NF 10 ! 13: ! 14: freqwd ( fn, wd, nin ) ! 15: char *fn[], *wd[]; ! 16: { ! 17: FILE *fi[NF]; ! 18: int nw 0, i, any, nf, j, wexch(), wcomp(); ! 19: char tw[20]; ! 20: for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--); ! 21: for(nf=0; fn[nf] && nf<NF; nf++) ! 22: fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL; ! 23: do { ! 24: any=0; ! 25: for(i=0; i<nf; i++) ! 26: { ! 27: if (fi[i]==NULL) continue; ! 28: if (gw(fi[i], tw)==0) ! 29: { ! 30: fclose(fi[i]); ! 31: fi[i]==NULL; ! 32: continue; ! 33: } ! 34: any=1; ! 35: if (common(tw)) continue; ! 36: if (strlen(tw)<3) continue; ! 37: j = lookup (tw); ! 38: if (j<0 && nw < ZIPF*NW) ! 39: { ! 40: j = -j; ! 41: strcpy (tp, tw); ! 42: word[j].tx = tp; ! 43: while (*tp++); ! 44: _assert (tp < tbuf+NW*ZIPF*WLEN); ! 45: word[j].ct = 1; ! 46: nw++; ! 47: } ! 48: else if (j>0) ! 49: word[j].ct++; ! 50: } ! 51: } while (any>0); ! 52: shell ( TSIZE, wcomp, wexch ); ! 53: for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++) ! 54: if (nw>=nin*2 && word[nw].ct != word[0].ct) ! 55: break; ! 56: for(i=0; i<nw; i++) ! 57: wd[i] = word[i].tx; ! 58: return(nw); ! 59: } ! 60: ! 61: lookup (wt) ! 62: char *wt; ! 63: { ! 64: int h; ! 65: h = hash(wt); ! 66: for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE) ! 67: { ! 68: if (h==0) continue; ! 69: if (strcmp(wt, word[h].tx) == SAME) ! 70: return (h); ! 71: } ! 72: return ( -h ); ! 73: } ! 74: ! 75: hash (s) ! 76: char *s; ! 77: { ! 78: int k 0, c 0, i 0; ! 79: while ( c = *s++ ) ! 80: k ^= (c << (i++%5) ); ! 81: return (k>0 ? k : -k); ! 82: } ! 83: ! 84: gw (f, t) ! 85: char *t; ! 86: FILE *f; ! 87: { ! 88: int start 1, oldc ' ', c; ! 89: if (f==NULL) return (0); ! 90: while ( (c=getc(f)) != EOF) ! 91: { ! 92: if (isupper(c)) c= tolower(c); ! 93: if (start==1) ! 94: if (!alphanum(c, oldc)) ! 95: continue; ! 96: else ! 97: start=0; ! 98: if (start==0) ! 99: if (alphanum(c, oldc)) ! 100: *t++ = c; ! 101: else ! 102: { ! 103: *t=0; ! 104: return(1); ! 105: } ! 106: oldc=c; ! 107: } ! 108: return(0); ! 109: } ! 110: ! 111: alphanum( c, oldc ) ! 112: { ! 113: if (isalpha(c) || isdigit(c)) return(1); ! 114: if (isalpha(oldc)) ! 115: if (c== '\'' || c == '-') return(1); ! 116: return(0); ! 117: } ! 118: ! 119: wcomp (n1, n2) ! 120: { ! 121: return (word[n1].ct >= word[n2].ct); ! 122: } ! 123: ! 124: wexch (n1, n2) ! 125: { ! 126: struct wst tt; ! 127: tt.tx = word[n1].tx; tt.ct = word[n1].ct; ! 128: word[n1].tx = word[n2].tx; word[n1].ct = word[n2].ct; ! 129: word[n2].tx = tt.tx; word[n2].ct = tt.ct; ! 130: } ! 131: ! 132: prime(n) ! 133: { ! 134: /* only executed once- slow is ok */ ! 135: int i; ! 136: if (n%2==0) return(0); ! 137: for(i=3; i*i<=n; i+= 2) ! 138: if (n%i ==0 ) return(0); ! 139: return(1); ! 140: } ! 141: trimnl(s) ! 142: char *s; ! 143: { ! 144: while (*s)s++; ! 145: if (*--s=='\n') *s=0; ! 146: } ! 147: ! 148: ! 149: /* this is the test for what4.c as a standalone prog ... ! 150: main (argc, argv) ! 151: char *argv[]; ! 152: { ! 153: char *ff[10], *wd[20], **ffp ff; ! 154: int n, i; ! 155: while (--argc) ! 156: *ffp++ = *++argv; ! 157: *ffp=0; ! 158: n=freqwd(ff,wd); ! 159: for(i=0; i<n; i++) ! 160: printf("%s\n",wd[i]); ! 161: printf("total of %d items\n",n); ! 162: } ! 163: /* .... */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.