|
|
1.1 ! root 1: # include "stdio.h" ! 2: # include "ctype.h" ! 3: # include "bib.h" ! 4: ! 5: char commlist[MAXCOMM]= /* list of strings of common words */ ! 6: ""; ! 7: int firsttime = 1; ! 8: ! 9: /* makekey(p,max_klen,common): compresses *p into a key ! 10: folds upper to lower case. ignores non-alphanumeric ! 11: drops keys of length <= 2. ! 12: drops words in common (name of file of words, one per line) ! 13: (first call determines common for all later calls) ! 14: */ ! 15: makekey(p,max_klen,common) ! 16: char *p; ! 17: int max_klen; /* max key length */ ! 18: char *common; ! 19: { ! 20: register char *from, *to, *stop; ! 21: ! 22: if (firsttime) { ! 23: firsttime= 0; ! 24: load_comm(common); ! 25: } ! 26: ! 27: from= p; ! 28: to= p; ! 29: stop= max_klen+p; ! 30: while (*from != NULL && to < stop) { ! 31: if (islower(*from)) *to++ = *from++; ! 32: else if (isdigit(*from)) *to++ = *from++; ! 33: else if (isupper(*from)) { ! 34: *to++ = tolower(*from); ! 35: from++; ! 36: } ! 37: else from++; ! 38: } ! 39: *to= NULL; ! 40: if(isdigit(p[0])) { ! 41: if ((p[0] != '1') || ((p[1] != '9') && (p[1] != '8')) || (to-p != 4)) { ! 42: *p = NULL; ! 43: return; ! 44: } ! 45: } ! 46: ! 47: if (to<=p+2 || c_look(p,1) ) { ! 48: *p= NULL; ! 49: } ! 50: } ! 51: ! 52: ! 53: char * ! 54: trimnl(ln) ! 55: char *ln; ! 56: { ! 57: register char *p= ln; ! 58: while (*p) p++; ! 59: p--; ! 60: if (*p == '\n') *p=0; ! 61: return(ln); ! 62: } ! 63: ! 64: ! 65: ! 66: #define COMNUM 500 ! 67: #define COMTSIZE 997 ! 68: int comcount = 500; ! 69: static char cbuf[COMNUM*9]; ! 70: static char *cwds[COMTSIZE]; ! 71: ! 72: ! 73: /* read file common into common hashtable ! 74: */ ! 75: load_comm(common) ! 76: char *common; ! 77: { ! 78: FILE *commfile; /* stream of common words */ ! 79: char *p; ! 80: int i; ! 81: ! 82: commfile= fopen(common,"r"); ! 83: if (commfile==NULL) { ! 84: fprintf(stderr, "cannot open '%s'\n", common); ! 85: return; ! 86: } ! 87: p = cbuf; ! 88: for(i=0; i <comcount; i++) { ! 89: if(fgets(p,15,commfile) == NULL) ! 90: break; ! 91: trimnl(p); ! 92: if(strlen(p) > 6) ! 93: p[6] = 0; ! 94: c_look(p, 0); ! 95: while(*p++); ! 96: } ! 97: fclose(commfile); ! 98: } ! 99: ! 100: ! 101: c_look(s,fl) ! 102: char *s; ! 103: { ! 104: int h; ! 105: h = hash(s) % (COMTSIZE); ! 106: while(cwds[h] != 0) { ! 107: if(strcmp(s,cwds[h]) ==0) ! 108: return(1); ! 109: h = (h+1) % (COMTSIZE); ! 110: } ! 111: if(fl == 0) ! 112: cwds[h] = s; ! 113: return(0); ! 114: } ! 115: ! 116: ! 117: hash (s) ! 118: char *s; ! 119: { ! 120: int c, n; ! 121: for(n=0; c= *s; s++) ! 122: n += (c*n+ c << (n%4)); ! 123: return(n>0 ? n : -n); ! 124: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.