|
|
1.1 root 1: #ifndef lint
2: static char *sccsid = "@(#)hunt2.c 4.1 (Berkeley) 5/6/83";
3: #endif
4:
5: #include "refer..c"
6:
7: static int *coord = 0;
8: int hh[50];
9: extern int *hfreq, hfrflg, hcomp(), hexch();
10: extern int prfreqs;
11:
12: doquery(hpt, nhash, fb, nitem, qitem, master)
13: long *hpt;
14: FILE *fb;
15: char *qitem[];
16: union ptr {
17: unsigned *a;
18: long *b;
19: } master;
20: {
21: long k;
22: union ptr prevdrop;
23: int nf = 0, best = 0, nterm = 0, i, g, j;
24: int *prevcoord;
25: long lp;
26: extern int lmaster, colevel, reached;
27: long getl();
28: unsigned getw();
29: extern int iflong;
30:
31: # if D1
32: fprintf(stderr, "entering doquery nitem %d\n",nitem);
33: fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n", hpt[0],hpt[1],hpt[2],hpt[3],hpt[4]);
34: fprintf(stderr, "and frequencies are %d %d %d %d %d\n",hfreq[0],hfreq[1],hfreq[2],hfreq[3],hfreq[4]);
35: # endif
36: _assert (lmaster>0);
37: if (coord==0)
38: coord = zalloc(lmaster, sizeof(lmaster));
39: if (colevel>0)
40: {
41: prevdrop.a=zalloc(lmaster,iflong?sizeof(long): sizeof(int));
42: prevcoord = zalloc(lmaster, sizeof(lmaster));
43: }
44: else
45: {
46: prevdrop.a=master.a;
47: prevcoord=coord;
48: }
49: # if D1
50: fprintf(stderr, "nitem %d\n",nitem);
51: # endif
52: for(i=0; i<nitem; i++)
53: {
54: hh[i] = hash(qitem[i])%nhash;
55: # if D1
56: fprintf(stderr,"query wd X%sX has hash %d\n", qitem[i], hh[i]);
57: # endif
58: }
59: # if D1
60: fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
61: # endif
62: if (prfreqs)
63: for(i=0; i<nitem; i++)
64: fprintf(stderr,"item %s hash %d hfreq %d\n",qitem[i], hh[i], hfreq[hh[i]]);
65: /* if possible, sort query into decreasing frequency of hashes */
66: if (hfrflg)
67: shell (nitem, hcomp, hexch);
68: # if D1
69: for(i=0; i<nitem; i++)
70: fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
71: # endif
72: lp = hpt [hh[0]];
73: # if D1
74: fprintf(stderr,"first item hash %d lp %ld 0%lo\n", hh[0],lp,lp);
75: # endif
76: _assert (fb!=NULL);
77: _assert (fseek(fb,lp,0)==NULL);
78: for(i=0; i<lmaster; i++)
79: {
80: if (iflong)
81: master.b[i] = getl(fb);
82: else
83: master.a[i] = getw(fb);
84: coord[i]=1;
85: # if D2
86: if (iflong)
87: fprintf(stderr,"master has %ld\n",(master.b[i]));
88: else
89: fprintf(stderr,"master has %d\n",(master.a[i]));
90: # endif
91: _assert (i<lmaster);
92: if (iflong)
93: {
94: if (master.b[i] == -1L) break;
95: }
96: else
97: {
98: if (master.a[i] == -1) break;
99: }
100: }
101: nf= i;
102: for(nterm=1; nterm<nitem; nterm++)
103: {
104: # ifdef D1
105: fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
106: # endif
107: if (colevel>0)
108: {
109: for(j=0; j<nf; j++)
110: {
111: if (iflong)
112: prevdrop.b[j] = master.b[j];
113: else
114: prevdrop.a[j] = master.a[j];
115: prevcoord[j] = coord[j];
116: }
117: }
118: lp = hpt[hh[nterm]];
119: _assert (fseek(fb, lp, 0)==0);
120: # if D1
121: fprintf(stderr,"item %d hash %d seek to %ld\n",nterm,hh[nterm],lp);
122: # endif
123: g=j=0;
124: while (1)
125: {
126: if (iflong)
127: k = getl(fb);
128: else
129: k = getw(fb);
130: if (k== -1) break;
131: # if D2
132: fprintf(stderr,"next term finds %ld\n",k);
133: # endif
134: # if D3
135: if (iflong)
136: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.b[j],(long)(k));
137: else
138: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.a[j],(long)(k));
139: # endif
140: while (j<nf && (iflong?prevdrop.b[j]:prevdrop.a[j])<k)
141: {
142: # if D3
143: if (iflong)
144: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
145: j,nf,prevdrop.b[j], prevcoord[j], colevel, nterm, (long)(k));
146: else
147: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
148: j,nf,prevdrop.a[j], prevcoord[j], colevel, nterm, (long)(k));
149: # endif
150: if (prevcoord[j] + colevel <= nterm)
151: j++;
152: else
153: {
154: _assert (g<lmaster);
155: if (iflong)
156: master.b[g] = prevdrop.b[j];
157: else
158: master.a[g] = prevdrop.a[j];
159: coord[g++] = prevcoord[j++];
160: # if D1
161: if (iflong)
162: fprintf(stderr, " not skip g %d doc %d coord %d note %d\n",g,master.b[g-1], coord[g-1],master.b[j-1]);
163: else
164: fprintf(stderr, " not skip g %d doc %ld coord %d nterm %d\n",g,master.a[g-1], coord[g-1],nterm);
165: # endif
166: continue;
167: }
168: }
169: if (colevel==0 && j>=nf) break;
170: if (j<nf && (iflong? prevdrop.b[j]: prevdrop.a[j]) == k)
171: {
172: if (iflong)
173: master.b[g]=k;
174: else
175: master.a[g]=k;
176: coord[g++] = prevcoord[j++]+1;
177: # if D1
178: if (iflong)
179: fprintf(stderr, " at g %d item %ld coord %d note %ld\n",g,master.b[g-1],coord[g-1],master.b[j-1]);
180: else
181: fprintf(stderr, " at g %d item %d coord %d note %d\n",g,master.a[g-1],coord[g-1],master.a[j-1]);
182: # endif
183: }
184: else
185: if (colevel >= nterm)
186: {
187: if (iflong)
188: master.b[g]=k;
189: else
190: master.a[g]=k;
191: coord[g++] = 1;
192: }
193: }
194: # if D1
195: fprintf(stderr,"now have %d items\n",g);
196: # endif
197: if (colevel>0)
198: for ( ; j<nf; j++)
199: if ((iflong?prevcoord.b[j]:prevcoord.a[j])+colevel > nterm)
200: {
201: _assert(g<lmaster);
202: if (iflong)
203: master.b[g] = prevdrop.b[j];
204: else
205: master.a[g] = prevdrop.a[j];
206: coord[g++] = prevcoord[j];
207: # if D3
208: if(iflong)
209: fprintf(stderr, "copied over %ld coord %d\n",master.b[g-1], coord[g-1]);
210: else
211: fprintf(stderr, "copied over %d coord %d\n",master.a[g-1], coord[g-1]);
212: # endif
213: }
214: nf = g;
215: }
216: if (colevel>0)
217: {
218: best=0;
219: for(j=0; j<nf; j++)
220: if (coord[j]>best) best = coord[j];
221: # if D1
222: fprintf(stderr, "colevel %d best %d\n", colevel, best);
223: # endif
224: reached = best;
225: for(g=j=0; j<nf; j++)
226: if (coord[j]==best)
227: {
228: if (iflong)
229: master.b[g++] = master.b[j];
230: else
231: master.a[g++] = master.a[j];
232: }
233: nf=g;
234: # if D1
235: fprintf(stderr, "yet got %d\n",nf);
236: # endif
237: }
238: # ifdef D1
239: fprintf(stderr, " returning with %d\n",nf);
240: # endif
241: if (colevel)
242: {
243: free(prevdrop, lmaster, iflong?sizeof(long): sizeof(int));
244: free(prevcoord, lmaster, sizeof (lmaster));
245: }
246: # if D3
247: for(g=0;g<nf;g++)
248: if(iflong)
249: fprintf(stderr,":%ld\n",master.b[g]);
250: else
251: fprintf(stderr,":%d\n",master.a[g]);
252: # endif
253: return(nf);
254: }
255:
256: long
257: getl(fb)
258: FILE *fb;
259: {
260: return(getw(fb));
261: }
262:
263: putl(ll, f)
264: long ll;
265: FILE *f;
266: {
267: putw(ll, f);
268: }
269:
270: hcomp( n1, n2)
271: {
272: return (hfreq[hh[n1]]<=hfreq[hh[n2]]);
273: }
274:
275: hexch( n1, n2 )
276: {
277: int t;
278: t = hh[n1];
279: hh[n1] = hh[n2];
280: hh[n2] = t;
281: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.