|
|
1.1 root 1: #ifndef lint
2: static char *sccsid = "@(#)hunt2.c 4.5 (Berkeley) 9/28/87";
3: #endif
4:
5: #include "refer..c"
6:
7: static int *coord = 0;
8: int hh[50];
9: extern int *hfreq, hfrflg, hcomp(), hexch();
10: extern int prfreqs;
11:
12: doquery(hpt, nhash, fb, nitem, qitem, master)
13: long *hpt;
14: FILE *fb;
15: char *qitem[];
16: unsigned *master;
17: {
18: union ptr {
19: unsigned *a;
20: long *b;
21: } umaster;
22: long k;
23: union ptr prevdrop;
24: int nf = 0, best = 0, nterm = 0, i, g, j;
25: int *prevcoord;
26: long lp;
27: extern int lmaster, colevel, reached;
28: long getl();
29: extern int iflong;
30:
31: # if D1
32: fprintf(stderr, "entering doquery nitem %d\n",nitem);
33: fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n", hpt[0],hpt[1],hpt[2],hpt[3],hpt[4]);
34: fprintf(stderr, "and frequencies are %d %d %d %d %d\n",hfreq[0],hfreq[1],hfreq[2],hfreq[3],hfreq[4]);
35: # endif
36: if (iflong)
37: umaster.b = (long *) master;
38: else
39: umaster.a = master;
40: _assert (lmaster>0);
41: if (coord==0)
42: coord = (int *) zalloc(lmaster, sizeof(lmaster));
43: if (colevel>0)
44: {
45: if (iflong)
46: prevdrop.b = (long *) zalloc(lmaster, sizeof(long));
47: else
48: prevdrop.a = (unsigned *) zalloc(lmaster, sizeof(int));
49: prevcoord = (int *) zalloc(lmaster, sizeof(lmaster));
50: }
51: else
52: {
53: prevdrop.a=umaster.a;
54: prevcoord=coord;
55: }
56: # if D1
57: fprintf(stderr, "nitem %d\n",nitem);
58: # endif
59: for(i=0; i<nitem; i++)
60: {
61: hh[i] = hash(qitem[i])%nhash;
62: # if D1
63: fprintf(stderr,"query wd X%sX has hash %d\n", qitem[i], hh[i]);
64: # endif
65: }
66: # if D1
67: fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
68: # endif
69: if (prfreqs)
70: for(i=0; i<nitem; i++)
71: fprintf(stderr,"item %s hash %d hfreq %d\n",qitem[i], hh[i], hfreq[hh[i]]);
72: /* if possible, sort query into decreasing frequency of hashes */
73: if (hfrflg)
74: shell (nitem, hcomp, hexch);
75: # if D1
76: for(i=0; i<nitem; i++)
77: fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
78: # endif
79: lp = hpt [hh[0]];
80: # if D1
81: fprintf(stderr,"first item hash %d lp %ld 0%lo\n", hh[0],lp,lp);
82: # endif
83: _assert (fb!=NULL);
84: _assert (fseek(fb, lp, 0) != -1);
85: for(i=0; i<lmaster; i++)
86: {
87: if (iflong)
88: umaster.b[i] = getl(fb);
89: else
90: umaster.a[i] = getw(fb);
91: coord[i]=1;
92: # if D2
93: if (iflong)
94: fprintf(stderr,"umaster has %ld\n",(umaster.b[i]));
95: else
96: fprintf(stderr,"umaster has %d\n",(umaster.a[i]));
97: # endif
98: _assert (i<lmaster);
99: if (iflong)
100: {
101: if (umaster.b[i] == -1L) break;
102: }
103: else
104: {
105: if (umaster.a[i] == -1) break;
106: }
107: }
108: nf= i;
109: for(nterm=1; nterm<nitem; nterm++)
110: {
111: # ifdef D1
112: fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
113: # endif
114: if (colevel>0)
115: {
116: for(j=0; j<nf; j++)
117: {
118: if (iflong)
119: prevdrop.b[j] = umaster.b[j];
120: else
121: prevdrop.a[j] = umaster.a[j];
122: prevcoord[j] = coord[j];
123: }
124: }
125: lp = hpt[hh[nterm]];
126: _assert (fseek(fb, lp, 0) != -1);
127: # if D1
128: fprintf(stderr,"item %d hash %d seek to %ld\n",nterm,hh[nterm],lp);
129: # endif
130: g=j=0;
131: while (1)
132: {
133: if (iflong)
134: k = getl(fb);
135: else
136: k = getw(fb);
137: if (k== -1) break;
138: # if D2
139: fprintf(stderr,"next term finds %ld\n",k);
140: # endif
141: # if D3
142: if (iflong)
143: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.b[j],(long)(k));
144: else
145: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.a[j],(long)(k));
146: # endif
147: while (j<nf && (iflong?prevdrop.b[j]:prevdrop.a[j])<k)
148: {
149: # if D3
150: if (iflong)
151: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
152: j,nf,prevdrop.b[j], prevcoord[j], colevel, nterm, (long)(k));
153: else
154: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
155: j,nf,prevdrop.a[j], prevcoord[j], colevel, nterm, (long)(k));
156: # endif
157: if (prevcoord[j] + colevel <= nterm)
158: j++;
159: else
160: {
161: _assert (g<lmaster);
162: if (iflong)
163: umaster.b[g] = prevdrop.b[j];
164: else
165: umaster.a[g] = prevdrop.a[j];
166: coord[g++] = prevcoord[j++];
167: # if D1
168: if (iflong)
169: fprintf(stderr, " not skip g %d doc %d coord %d note %d\n",g,umaster.b[g-1], coord[g-1],umaster.b[j-1]);
170: else
171: fprintf(stderr, " not skip g %d doc %ld coord %d nterm %d\n",g,umaster.a[g-1], coord[g-1],nterm);
172: # endif
173: continue;
174: }
175: }
176: if (colevel==0 && j>=nf) break;
177: if (j<nf && (iflong? prevdrop.b[j]: prevdrop.a[j]) == k)
178: {
179: if (iflong)
180: umaster.b[g]=k;
181: else
182: umaster.a[g]=k;
183: coord[g++] = prevcoord[j++]+1;
184: # if D1
185: if (iflong)
186: fprintf(stderr, " at g %d item %ld coord %d note %ld\n",g,umaster.b[g-1],coord[g-1],umaster.b[j-1]);
187: else
188: fprintf(stderr, " at g %d item %d coord %d note %d\n",g,umaster.a[g-1],coord[g-1],umaster.a[j-1]);
189: # endif
190: }
191: else
192: if (colevel >= nterm)
193: {
194: if (iflong)
195: umaster.b[g]=k;
196: else
197: umaster.a[g]=k;
198: coord[g++] = 1;
199: }
200: }
201: # if D1
202: fprintf(stderr,"now have %d items\n",g);
203: # endif
204: if (colevel>0)
205: for ( ; j<nf; j++)
206: if (prevcoord[j]+colevel > nterm)
207: {
208: _assert(g<lmaster);
209: if (iflong)
210: umaster.b[g] = prevdrop.b[j];
211: else
212: umaster.a[g] = prevdrop.a[j];
213: coord[g++] = prevcoord[j];
214: # if D3
215: if(iflong)
216: fprintf(stderr, "copied over %ld coord %d\n",umaster.b[g-1], coord[g-1]);
217: else
218: fprintf(stderr, "copied over %d coord %d\n",umaster.a[g-1], coord[g-1]);
219: # endif
220: }
221: nf = g;
222: }
223: if (colevel>0)
224: {
225: best=0;
226: for(j=0; j<nf; j++)
227: if (coord[j]>best) best = coord[j];
228: # if D1
229: fprintf(stderr, "colevel %d best %d\n", colevel, best);
230: # endif
231: reached = best;
232: for(g=j=0; j<nf; j++)
233: if (coord[j]==best)
234: {
235: if (iflong)
236: umaster.b[g++] = umaster.b[j];
237: else
238: umaster.a[g++] = umaster.a[j];
239: }
240: nf=g;
241: # if D1
242: fprintf(stderr, "yet got %d\n",nf);
243: # endif
244: }
245: # ifdef D1
246: fprintf(stderr, " returning with %d\n",nf);
247: # endif
248: if (colevel)
249: {
250: free(prevdrop, lmaster, iflong?sizeof(long): sizeof(int));
251: free(prevcoord, lmaster, sizeof (lmaster));
252: }
253: # if D3
254: for(g=0;g<nf;g++)
255: if(iflong)
256: fprintf(stderr,":%ld\n",umaster.b[g]);
257: else
258: fprintf(stderr,":%d\n",umaster.a[g]);
259: # endif
260: return(nf);
261: }
262:
263: long
264: getl(fb)
265: FILE *fb;
266: {
267: return(getw(fb));
268: }
269:
270: putl(ll, f)
271: long ll;
272: FILE *f;
273: {
274: putw(ll, f);
275: }
276:
277: hcomp( n1, n2)
278: {
279: return (hfreq[hh[n1]]<=hfreq[hh[n2]]);
280: }
281:
282: hexch( n1, n2 )
283: {
284: int t;
285: t = hh[n1];
286: hh[n1] = hh[n2];
287: hh[n2] = t;
288: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.