|
|
1.1 root 1: # include "stdio.h"
2: # include "ctype.h"
3: # include "bib.h"
4:
5: char commlist[MAXCOMM]= /* list of strings of common words */
6: "";
7: int firsttime = 1;
8:
9: /* makekey(p,max_klen,common): compresses *p into a key
10: folds upper to lower case. ignores non-alphanumeric
11: drops keys of length <= 2.
12: drops words in common (name of file of words, one per line)
13: (first call determines common for all later calls)
14: */
15: makekey(p,max_klen,common)
16: char *p;
17: int max_klen; /* max key length */
18: char *common;
19: {
20: register char *from, *to, *stop;
21:
22: if (firsttime) {
23: firsttime= 0;
24: load_comm(common);
25: }
26:
27: from= p;
28: to= p;
29: stop= max_klen+p;
30: while (*from != NULL && to < stop) {
31: if (islower(*from)) *to++ = *from++;
32: else if (isdigit(*from)) *to++ = *from++;
33: else if (isupper(*from)) {
34: *to++ = tolower(*from);
35: from++;
36: }
37: else from++;
38: }
39: *to= NULL;
40: if(isdigit(p[0])) {
41: if ((p[0] != '1') || ((p[1] != '9') && (p[1] != '8')) || (to-p != 4)) {
42: *p = NULL;
43: return;
44: }
45: }
46:
47: if (to<=p+2 || c_look(p,1) ) {
48: *p= NULL;
49: }
50: }
51:
52:
53: char *
54: trimnl(ln)
55: char *ln;
56: {
57: register char *p= ln;
58: while (*p) p++;
59: p--;
60: if (*p == '\n') *p=0;
61: return(ln);
62: }
63:
64:
65:
66: #define COMNUM 500
67: #define COMTSIZE 997
68: int comcount = 500;
69: static char cbuf[COMNUM*9];
70: static char *cwds[COMTSIZE];
71:
72:
73: /* read file common into common hashtable
74: */
75: load_comm(common)
76: char *common;
77: {
78: FILE *commfile; /* stream of common words */
79: char *p;
80: int i;
81:
82: commfile= fopen(common,"r");
83: if (commfile==NULL) {
84: fprintf(stderr, "cannot open '%s'\n", common);
85: return;
86: }
87: p = cbuf;
88: for(i=0; i <comcount; i++) {
89: if(fgets(p,15,commfile) == NULL)
90: break;
91: trimnl(p);
92: if(strlen(p) > 6)
93: p[6] = 0;
94: c_look(p, 0);
95: while(*p++);
96: }
97: fclose(commfile);
98: }
99:
100:
101: c_look(s,fl)
102: char *s;
103: {
104: int h;
105: h = hash(s) % (COMTSIZE);
106: while(cwds[h] != 0) {
107: if(strcmp(s,cwds[h]) ==0)
108: return(1);
109: h = (h+1) % (COMTSIZE);
110: }
111: if(fl == 0)
112: cwds[h] = s;
113: return(0);
114: }
115:
116:
117: hash (s)
118: char *s;
119: {
120: int c, n;
121: for(n=0; c= *s; s++)
122: n += (c*n+ c << (n%4));
123: return(n>0 ? n : -n);
124: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.