|
|
1.1 root 1: # include "refer..c"
2: static int *coord = 0;
3: int hh[50]; extern int *hfreq, hfrflg, hcomp(), hexch();
4: extern int prfreqs;
5:
6: doquery(hpt, nhash, fb, nitem, qitem, master)
7: union ptr {unsigned *a; long *b;} master;
8: long *hpt;
9: FILE *fb;
10: char *qitem[];
11: {
12: long k;
13: union ptr prevdrop;
14: int nf = 0, best = 0, nterm = 0, i, g, j;
15: int *prevcoord;
16: long lp;
17: extern int lmaster, colevel, reached;
18: long getl(); unsigned getw(); extern int iflong;
19:
20: # if D1
21: fprintf(stderr, "entering doquery nitem %d\n",nitem);
22: fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n", hpt[0],hpt[1],hpt[2],hpt[3],hpt[4]);
23: fprintf(stderr, "and frequencies are %d %d %d %d %d\n",hfreq[0],hfreq[1],hfreq[2],hfreq[3],hfreq[4]);
24: # endif
25: _assert (lmaster>0);
26: if (coord==0)
27: coord = zalloc(lmaster, sizeof(lmaster));
28: if (colevel>0)
29: {
30: prevdrop.a=zalloc(lmaster,iflong?sizeof(long): sizeof(int));
31: prevcoord = zalloc(lmaster, sizeof(lmaster));
32: }
33: else
34: {
35: prevdrop.a=master.a;
36: prevcoord=coord;
37: }
38: # if D1
39: fprintf(stderr, "nitem %d\n",nitem);
40: # endif
41: for(i=0; i<nitem; i++)
42: {
43: hh[i] = hash(qitem[i])%nhash;
44: # if D1
45: fprintf(stderr,"query wd X%sX has hash %d\n", qitem[i], hh[i]);
46: # endif
47: }
48: # if D1
49: fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
50: # endif
51: if (prfreqs)
52: for(i=0; i<nitem; i++)
53: fprintf(stderr,"item %s hash %d hfreq %d\n",qitem[i], hh[i], hfreq[hh[i]]);
54: /* if possible, sort query into decreasing frequency of hashes */
55: if (hfrflg)
56: shell (nitem, hcomp, hexch);
57: # if D1
58: for(i=0; i<nitem; i++)
59: fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
60: # endif
61: lp = hpt [hh[0]];
62: # if D1
63: fprintf(stderr,"first item hash %d lp %ld 0%lo\n", hh[0],lp,lp);
64: # endif
65: _assert (fb!=NULL);
66: _assert (fseek(fb,lp,0)==NULL);
67: for(i=0; i<lmaster; i++)
68: {
69: if (iflong)
70: master.b[i] = getl(fb);
71: else
72: master.a[i] = getw(fb);
73: coord[i]=1;
74: # if D2
75: if (iflong)
76: fprintf(stderr,"master has %ld\n",(master.b[i]));
77: else
78: fprintf(stderr,"master has %d\n",(master.a[i]));
79: # endif
80: _assert (i<lmaster);
81: if (iflong)
82: {
83: if (master.b[i] == -1L) break;
84: }
85: else
86: {
87: if (master.a[i] == -1) break;
88: }
89: }
90: nf= i;
91: for(nterm=1; nterm<nitem; nterm++)
92: {
93: # ifdef D1
94: fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
95: # endif
96: if (colevel>0)
97: {
98: for(j=0; j<nf; j++)
99: {
100: if (iflong)
101: prevdrop.b[j] = master.b[j];
102: else
103: prevdrop.a[j] = master.a[j];
104: prevcoord[j] = coord[j];
105: }
106: }
107: lp = hpt[hh[nterm]];
108: _assert (fseek(fb, lp, 0)==0);
109: # if D1
110: fprintf(stderr,"item %d hash %d seek to %ld\n",nterm,hh[nterm],lp);
111: # endif
112: g=j=0;
113: while (1)
114: {
115: if (iflong)
116: k = getl(fb);
117: else
118: k = getw(fb);
119: if (k== -1) break;
120: # if D2
121: fprintf(stderr,"next term finds %ld\n",k);
122: # endif
123: # if D3
124: if (iflong)
125: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.b[j],(long)(k));
126: else
127: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.a[j],(long)(k));
128: # endif
129: while (j<nf && (iflong?prevdrop.b[j]:prevdrop.a[j])<k)
130: {
131: # if D3
132: if (iflong)
133: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
134: j,nf,prevdrop.b[j], prevcoord[j], colevel, nterm, (long)(k));
135: else
136: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
137: j,nf,prevdrop.a[j], prevcoord[j], colevel, nterm, (long)(k));
138: # endif
139: if (prevcoord[j] + colevel <= nterm)
140: j++;
141: else
142: {
143: _assert (g<lmaster);
144: if (iflong)
145: master.b[g] = prevdrop.b[j];
146: else
147: master.a[g] = prevdrop.a[j];
148: coord[g++] = prevcoord[j++];
149: # if D1
150: if (iflong)
151: fprintf(stderr, " not skip g %d doc %d coord %d note %d\n",g,master.b[g-1], coord[g-1],master.b[j-1]);
152: else
153: fprintf(stderr, " not skip g %d doc %ld coord %d nterm %d\n",g,master.a[g-1], coord[g-1],nterm);
154: # endif
155: continue;
156: }
157: }
158: if (colevel==0 && j>=nf) break;
159: if (j<nf && (iflong? prevdrop.b[j]: prevdrop.a[j]) == k)
160: {
161: if (iflong)
162: master.b[g]=k;
163: else
164: master.a[g]=k;
165: coord[g++] = prevcoord[j++]+1;
166: # if D1
167: if (iflong)
168: fprintf(stderr, " at g %d item %ld coord %d note %ld\n",g,master.b[g-1],coord[g-1],master.b[j-1]);
169: else
170: fprintf(stderr, " at g %d item %d coord %d note %d\n",g,master.a[g-1],coord[g-1],master.a[j-1]);
171: # endif
172: }
173: else
174: if (colevel >= nterm)
175: {
176: if (iflong)
177: master.b[g]=k;
178: else
179: master.a[g]=k;
180: coord[g++] = 1;
181: }
182: }
183: # if D1
184: fprintf(stderr,"now have %d items\n",g);
185: # endif
186: if (colevel>0)
187: for ( ; j<nf; j++)
188: if ((iflong?prevcoord.b[j]:prevcoord.a[j])+colevel > nterm)
189: {
190: _assert(g<lmaster);
191: if (iflong)
192: master.b[g] = prevdrop.b[j];
193: else
194: master.a[g] = prevdrop.a[j];
195: coord[g++] = prevcoord[j];
196: # if D3
197: if(iflong)
198: fprintf(stderr, "copied over %ld coord %d\n",master.b[g-1], coord[g-1]);
199: else
200: fprintf(stderr, "copied over %d coord %d\n",master.a[g-1], coord[g-1]);
201: # endif
202: }
203: nf = g;
204: }
205: if (colevel>0)
206: {
207: best=0;
208: for(j=0; j<nf; j++)
209: if (coord[j]>best) best = coord[j];
210: # if D1
211: fprintf(stderr, "colevel %d best %d\n", colevel, best);
212: # endif
213: reached = best;
214: for(g=j=0; j<nf; j++)
215: if (coord[j]==best)
216: {
217: if (iflong)
218: master.b[g++] = master.b[j];
219: else
220: master.a[g++] = master.a[j];
221: }
222: nf=g;
223: # if D1
224: fprintf(stderr, "yet got %d\n",nf);
225: # endif
226: }
227: # ifdef D1
228: fprintf(stderr, " returning with %d\n",nf);
229: # endif
230: if (colevel)
231: {
232: free(prevdrop, lmaster, iflong?sizeof(long): sizeof(int));
233: free(prevcoord, lmaster, sizeof (lmaster));
234: }
235: # if D3
236: for(g=0;g<nf;g++)
237: if(iflong)
238: fprintf(stderr,":%ld\n",master.b[g]);
239: else
240: fprintf(stderr,":%d\n",master.a[g]);
241: # endif
242: return(nf);
243: }
244: long
245: getl(fb)
246: FILE *fb;
247: {
248: return(getw(fb));
249: }
250:
251: putl(ll, f)
252: long ll;
253: FILE *f;
254: {
255: putw(ll, f);
256: }
257: hcomp( n1, n2)
258: {
259: return (hfreq[hh[n1]]<=hfreq[hh[n2]]);
260: }
261: hexch( n1, n2 )
262: {
263: int t;
264: t = hh[n1];
265: hh[n1] = hh[n2];
266: hh[n2] = t;
267: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.