|
|
1.1 root 1: # include "refer..c"
2: static int *coord = 0;
3: int *zalloc();
4: int hh[50]; extern int *hfreq, hfrflg, hcomp(), hexch();
5: extern int prfreqs;
6:
7: doquery(hpt, nhash, fb, nitem, qitem, master)
8: union ptr {unsigned *a; long *b;} master;
9: long *hpt;
10: FILE *fb;
11: char *qitem[];
12: {
13: long k;
14: union ptr prevdrop;
15: int nf = 0, best = 0, nterm = 0, i, g, j;
16: int * prevcoord;
17: long lp;
18: extern int lmaster, colevel, reached;
19: long getl(); unsigned getw(); extern int iflong;
20:
21: # if D1
22: fprintf(stderr, "entering doquery nitem %d\n",nitem);
23: fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n", hpt[0],hpt[1],hpt[2],hpt[3],hpt[4]);
24: fprintf(stderr, "and frequencies are %d %d %d %d %d\n",hfreq[0],hfreq[1],hfreq[2],hfreq[3],hfreq[4]);
25: # endif
26: assert (lmaster>0);
27: if (coord==0)
28: coord = zalloc(lmaster, sizeof(lmaster));
29: if (colevel>0)
30: {
31: prevdrop.a=(unsigned *) zalloc(lmaster,iflong?sizeof(long):sizeof(int));
32: prevcoord = zalloc(lmaster, sizeof(lmaster));
33: }
34: else
35: {
36: prevdrop.a=master.a;
37: prevcoord=coord;
38: }
39: # if D1
40: fprintf(stderr, "nitem %d\n",nitem);
41: # endif
42: for(i=0; i<nitem; i++)
43: {
44: hh[i] = hash(qitem[i])%nhash;
45: # if D1
46: fprintf(stderr,"query wd X%sX has hash %d\n", qitem[i], hh[i]);
47: # endif
48: }
49: # if D1
50: fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
51: # endif
52: if (prfreqs)
53: for(i=0; i<nitem; i++)
54: fprintf(stderr,"item %s hash %d hfreq %d\n",qitem[i], hh[i], hfreq[hh[i]]);
55: /* if possible, sort query into decreasing frequency of hashes */
56: if (hfrflg)
57: shell (nitem, hcomp, hexch);
58: # if D1
59: for(i=0; i<nitem; i++)
60: fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
61: # endif
62: lp = hpt [hh[0]];
63: # if D1
64: fprintf(stderr,"first item hash %d lp %ld 0%lo\n", hh[0],lp,lp);
65: # endif
66: assert (fb!=NULL);
67: assert (fseek(fb,lp,0)==NULL);
68: for(i=0; i<lmaster; i++)
69: {
70: if (iflong)
71: master.b[i] = getl(fb);
72: else
73: master.a[i] = getw(fb);
74: coord[i]=1;
75: # if D2
76: if (iflong)
77: fprintf(stderr,"master has %ld\n",(master.b[i]));
78: else
79: fprintf(stderr,"master has %d\n",(master.a[i]));
80: # endif
81: assert (i<lmaster);
82: if (iflong)
83: {
84: if (master.b[i] == -1L) break;
85: }
86: else
87: {
88: if (master.a[i] == -1) break;
89: }
90: }
91: nf= i;
92: for(nterm=1; nterm<nitem; nterm++)
93: {
94: # ifdef D1
95: fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
96: # endif
97: if (colevel>0)
98: {
99: for(j=0; j<nf; j++)
100: {
101: if (iflong)
102: prevdrop.b[j] = master.b[j];
103: else
104: prevdrop.a[j] = master.a[j];
105: prevcoord[j] = coord[j];
106: }
107: }
108: lp = hpt[hh[nterm]];
109: assert (fseek(fb, lp, 0)==0);
110: # if D1
111: fprintf(stderr,"item %d hash %d seek to %ld\n",nterm,hh[nterm],lp);
112: # endif
113: g=j=0;
114: while (1)
115: {
116: if (iflong)
117: k = getl(fb);
118: else
119: k = getw(fb);
120: if (k== -1) break;
121: # if D2
122: fprintf(stderr,"next term finds %ld\n",k);
123: # endif
124: # if D3
125: if (iflong)
126: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.b[j],(long)(k));
127: else
128: fprintf(stderr, "bfwh j %d nf %d master %ld k %ld\n",j,nf,prevdrop.a[j],(long)(k));
129: # endif
130: while (j<nf && (iflong?prevdrop.b[j]:prevdrop.a[j])<k)
131: {
132: # if D3
133: if (iflong)
134: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
135: j,nf,prevdrop.b[j], prevcoord[j], colevel, nterm, (long)(k));
136: else
137: fprintf(stderr, "j %d nf %d prevdrop %ld prevcoord %d colevel %d nterm %d k %ld\n",
138: j,nf,prevdrop.a[j], prevcoord[j], colevel, nterm, (long)(k));
139: # endif
140: if (prevcoord[j] + colevel <= nterm)
141: j++;
142: else
143: {
144: assert (g<lmaster);
145: if (iflong)
146: master.b[g] = prevdrop.b[j];
147: else
148: master.a[g] = prevdrop.a[j];
149: coord[g++] = prevcoord[j++];
150: # if D1
151: if (iflong)
152: fprintf(stderr, " not skip g %d doc %d coord %d note %d\n",g,master.b[g-1], coord[g-1],master.b[j-1]);
153: else
154: fprintf(stderr, " not skip g %d doc %ld coord %d nterm %d\n",g,master.a[g-1], coord[g-1],nterm);
155: # endif
156: continue;
157: }
158: }
159: if (colevel==0 && j>=nf) break;
160: if (j<nf && (iflong? prevdrop.b[j]: prevdrop.a[j]) == k)
161: {
162: if (iflong)
163: master.b[g]=k;
164: else
165: master.a[g]=k;
166: coord[g++] = prevcoord[j++]+1;
167: # if D1
168: if (iflong)
169: fprintf(stderr, " at g %d item %ld coord %d note %ld\n",g,master.b[g-1],coord[g-1],master.b[j-1]);
170: else
171: fprintf(stderr, " at g %d item %d coord %d note %d\n",g,master.a[g-1],coord[g-1],master.a[j-1]);
172: # endif
173: }
174: else
175: if (colevel >= nterm)
176: {
177: if (iflong)
178: master.b[g]=k;
179: else
180: master.a[g]=k;
181: coord[g++] = 1;
182: }
183: }
184: # if D1
185: fprintf(stderr,"now have %d items\n",g);
186: # endif
187: if (colevel>0)
188: for ( ; j<nf; j++)
189: if ((iflong ? ((long *)prevcoord)[j] :
190: ((unsigned *)prevcoord)[j])+colevel > nterm)
191: {
192: assert(g<lmaster);
193: if (iflong)
194: master.b[g] = prevdrop.b[j];
195: else
196: master.a[g] = prevdrop.a[j];
197: coord[g++] = prevcoord[j];
198: # if D3
199: if(iflong)
200: fprintf(stderr, "copied over %ld coord %d\n",master.b[g-1], coord[g-1]);
201: else
202: fprintf(stderr, "copied over %d coord %d\n",master.a[g-1], coord[g-1]);
203: # endif
204: }
205: nf = g;
206: }
207: if (colevel>0)
208: {
209: best=0;
210: for(j=0; j<nf; j++)
211: if (coord[j]>best) best = coord[j];
212: # if D1
213: fprintf(stderr, "colevel %d best %d\n", colevel, best);
214: # endif
215: reached = best;
216: for(g=j=0; j<nf; j++)
217: if (coord[j]==best)
218: {
219: if (iflong)
220: master.b[g++] = master.b[j];
221: else
222: master.a[g++] = master.a[j];
223: }
224: nf=g;
225: # if D1
226: fprintf(stderr, "yet got %d\n",nf);
227: # endif
228: }
229: # ifdef D1
230: fprintf(stderr, " returning with %d\n",nf);
231: # endif
232: if (colevel)
233: {
234: free(prevdrop.a, lmaster, iflong?sizeof(long):sizeof(short));
235: free(prevcoord, lmaster, sizeof (lmaster));
236: }
237: # if D3
238: for(g=0;g<nf;g++)
239: if(iflong)
240: fprintf(stderr,":%ld\n",master.b[g]);
241: else
242: fprintf(stderr,":%d\n",master.a[g]);
243: # endif
244: return(nf);
245: }
246: long
247: getl(fb)
248: FILE *fb;
249: {
250: int x[2];
251: long *lp;
252: if (sizeof(int)==sizeof(long))
253: return(getw(fb));
254: x[0] = getw(fb);
255: x[1] = getw(fb);
256: lp= x;
257: return(*lp);
258: }
259: putl(ll, f)
260: long ll;
261: FILE *f;
262: {
263: int *x;
264: x = ≪
265: putw(x[0], f);
266: putw(x[1], f);
267: }
268: hcomp( n1, n2)
269: {
270: return (hfreq[hh[n1]]<=hfreq[hh[n2]]);
271: }
272: hexch( n1, n2 )
273: {
274: int t;
275: t = hh[n1];
276: hh[n1] = hh[n2];
277: hh[n2] = t;
278: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.