|
|
1.1 root 1: #define AMBIGUOUS 1
2: #define STDERR stdout
3: #include <stdio.h>
4: #include <ctype.h>
5: #include "./regexp.h"
6: regexp *regcomp(),*rs;
7: int nwanted,wantps;
8: int accept();
9: char *malloc();
10: #define freecol(x) (col[x].next=colfree,colfree=x)
11: #define getcol(x) ((x=colfree)?(colfree=col[colfree].next):(x=morecol()))
12: #define NHS 114723 /* number of headwords and senses */
13: #define N 400 /* max line length */
14: #define INITTEXT (516673+1) /* bytes of headword text */
15: #define INITSQ 1980 /* bytes of sense qualifier text */
16: #define INITSQTAB 1063 /* #qual sense addr with text addr */
17: #define NWINDEX 514104 /* number of 17 bit references */
18: #define X 1 /* extra bit for ref qualified */
19: #define WADDR 17 /* bits in headword or sense addr */
20: #define WMASK ((1<<WADDR)-1)
21: #define WQ (1<<WADDR)
22: #define WBITS (WADDR+X)
23: #define WQTEXT 2518 /* bytes of ref qualifier text */
24: #define NWQ 11582 /* #qual ref addr with text addr */
25: #define WQT 207 /* #tree entries for ref qual sort */
26: #define NCOL 1024 /* intermediate senses in path search */
27: struct col {
28: unsigned int entry:17;
29: unsigned int next:15;
30: } *col;
31: int colfree;
32: FILE *f;
33: FILE *wpackfile;
34: union x {
35: struct headword {
36: unsigned int type:1;
37: unsigned int tt:19;
38: unsigned int v:12;
39: } h;
40: struct sense {
41: unsigned int type:1;
42: unsigned int st:19;
43: unsigned int p:4;
44: unsigned int q:1;
45: unsigned int v:7;
46: } s;
47: } p[NHS+2];
48: char tbase[INITTEXT];
49: int text;
50: int z;
51: char in[N];
52: struct squal {
53: unsigned int saddr:17;
54: unsigned int qaddr:11;
55: unsigned int v:4;
56: } sqtab[INITSQTAB];
57: int nsq;
58: char sqbase[INITSQ];
59: int sqtext;
60: int windex;
61: unsigned char arena[(NWINDEX*WBITS)/8+1];
62: struct wqual {
63: unsigned int waddr:19;
64: unsigned int qaddr:12;
65: } wqtab[NWQ];
66: int nwq;
67: char wqbase[WQTEXT];
68: int wqtext;
69: struct wqt {
70: unsigned left:8,right:8,qaddr:12;
71: } wqtree[WQT];
72: int wqt;
73: #define NSET 32
74: int set[NSET];
75: int ncol;
76: char *parts[] = {
77: "","n","v","adj","adv","pron","conj","prep","interj"
78: };
79: #define NP 9
80: inset(t)
81: char *t;
82: {
83: char y[128];
84: int i,j,k;
85: if(*t!='&') {
86: i=headw(t);
87: if(i==0)return 0;
88: for(j=0; j<NSET; j++)if(p[i+j+1].s.type==0||i+j+1>=z)break;
89: else set[j]=i+j+1;
90: return j;
91: }
92: if(isdigit(*++t)) {
93: for(k=0; isdigit(*t); t++)k=k*10+*t-'0';
94: i=headw(t);
95: if(i==0)return 0;
96: for(j=0; j<NSET; j++)if(p[i+j+1].s.type==0||i+j+1>=z)break;
97: else if(j+1==k) {
98: set[0]=i+j+1;
99: return 1;
100: }
101: return 0;
102: }
103: if(strlen(t)>126)err("string to long in inset()");
104: strcpy(y,t);
105: for(j=0; y[j]&&y[j]!='.'; j++);
106: if(y[j]==0)return 0;
107: y[j]=0;
108: for(k=1; k<NP; k++)if(strcmp(y,parts[k])==0)break;
109: if(k>=NP)return 0;
110: i=headw(y+j+1);
111: if(i++==0)return 0;
112: for(j=0; j<NSET&&i<=z&&p[i].s.type==1; i++) {
113: if(p[i].s.p==k)set[j++]=i;
114: }
115: return j;
116: }
117: readnum()
118: {
119: int j,k;
120: for(k=0; isdigit(j=getc(f)); k=k*10+j-'0');
121: if(j!='\n')err("expect number");
122: fprintf(STDERR,"readnum()=%d\n",k);
123: return k;
124: }
125: sensequal(zz,t)
126: char *t;
127: {
128: int i;
129: if(p[zz].h.type!=1)err("squal on headword");
130: if(*t==0)return;
131: p[zz].s.q=1;
132: sqtab[nsq].saddr=zz;
133: for(i=0; i<nsq; i++)if(strcmp(t,sqbase+sqtab[i].qaddr)==0)break;
134: if(i<nsq) {
135: sqtab[nsq].qaddr=sqtab[i].qaddr;
136: }
137: else {
138: sqtab[nsq].qaddr=sqtext;
139: strcpy(sqbase+sqtext,t);
140: sqtext+=strlen(t)+1;
141: if(sqtext>=INITSQ)err("sqtext overflow");
142: }
143: if(++nsq>INITSQTAB)err("nsq overflow");
144: }
145: err(s)
146: char *s;
147: {
148: int i;
149: fprintf(STDERR,"err: %s\n",s);
150: fprintf(STDERR,"type return:"); getchar();
151: fprintf(STDERR,"input buffer segments:\n");
152: for(i=0; i<N; i++) {
153: if(in[i]==0)continue;
154: fprintf(STDERR,"'%s'\n",in+i);
155: while(i<N&&in[i])i++;
156: }
157: exit(1);
158: }
159: char *squal(zz)
160: {
161: int il,im,iu;
162: if(p[zz].h.type==0) {
163: err("ask for sensqual on headword");
164: }
165: if(p[zz].s.q==0)return "";
166: il=0; iu=nsq;
167: while(il<iu-1) {
168: im=(il+iu)/2;
169: if(sqtab[im].saddr>zz) {
170: iu=im;
171: }
172: else il=im;
173: }
174: if(sqtab[il].saddr!=zz)err("binary botch in squal()");
175: return sqbase+sqtab[il].qaddr;
176: }
177: char *wqual(wi)
178: {
179: int il,im,iu;
180: if((unpackw(wi)&WQ)==0)return "";
181: il=0; iu=nwq;
182: while(il<iu-1) {
183: im=(il+iu)/2;
184: if(wqtab[im].waddr>wi) {
185: iu=im;
186: }
187: else il=im;
188: }
189: if(wqtab[il].waddr!=wi)err("binary botch wqual");
190: return wqbase+wqtab[il].qaddr;
191: }
192: opackw(index,value)
193: register value;
194: {
195: int i,j;
196: register int mask=(1<<WBITS)-1;
197: i=index*WBITS;
198: value<<=i&7;
199: mask<<=i&7;
200: j=i>>3;
201: while(mask&0xff) {
202: arena[j]&=~mask;
203: arena[j]|=value;
204: value>>=8;
205: mask>>=8;
206: j++;
207: }
208: }
209: packw(index,value)
210: register value;
211: {
212: register i;
213: register unsigned char *p;
214: i=index*18;
215: value<<=i&7;
216: p=arena+(i>>3);
217: *p++|=value; value>>=8; *p++|=value; value>>=8;
218: *p++|=value;
219: }
220: unpackw(index)
221: {
222: register i,k;
223: register unsigned char *p;
224: i=index*18;
225: p=arena+(i>>3)+2;
226: k=*p; k=(k<<8)|*--p; k=(k<<8)|*--p;
227: return (k>>(i&7))&0x3ffff;
228: }
229: ounpackw(index)
230: {
231: int i,j,k;
232: int x;
233: register int mask=(1<<WBITS)-1;
234: i=index*WBITS;
235: j=i>>3;
236: mask<<=i&7;
237: x=k=0;
238: while(mask&0xff) {
239: k|=(arena[j]&mask)<<x;
240: mask>>=8;
241: j++;
242: x+=8;
243: }
244: return k>>(i&7);
245: }
246: dumps(x)
247: {
248: int i,j,k,l,m,n;
249: if(x==0) {
250: fprintf(STDERR,"not found\n");
251: return;
252: }
253: for(i=x; i>0&&p[i].h.type!=0; i--);
254: if(i==0) {
255: fprintf(STDERR,"dump of %d no headword\n",x);
256: return;
257: }
258: fprintf(STDERR,"\n&%d%s ",x-i,p[i].h.tt+tbase);
259: if(p[x].h.type==0) {
260: fprintf(STDERR,"\n");
261: while(++x<z&&p[x].h.type!=0)dumps(x);
262: return;
263: }
264: fprintf(STDERR," (%s.)",parts[p[x].s.p]);
265: if(*squal(x))fprintf(STDERR," [%s]",squal(x));
266: fprintf(STDERR,":\n");
267: for(i=p[x].s.st; l=unpackw(i++); ) {
268: for(j=l&WMASK; j>0&&p[j].h.type!=0; j--);
269: fprintf(STDERR," &%d%s",(l&WMASK)-j,p[j].h.tt+tbase);
270: if(l&WQ)fprintf(STDERR," [%s]",wqual(i-1));
271: fprintf(STDERR,",");
272: }
273: fprintf(STDERR,"\n");
274: }
275: char *ma(x)
276: {
277: char *p;
278: p=(char*)calloc(x,1);
279: if(p==0) {
280: fprintf(STDERR,"can't calloc %d bytes\n",x);
281: err("quit");
282: }
283: return p;
284: }
285: wsym(q,node)
286: char *q;
287: {
288: int i;
289: if(wqt==0) {
290: strcpy(wqbase,q);
291: wqtext=strlen(q)+1;
292: wqt++;
293: return 0;
294: }
295: i=strcmp(q,wqbase+wqtree[node].qaddr);
296: if(i==0)return wqtree[node].qaddr;
297: if(i<0) {
298: if(wqtree[node].left)return wsym(q,wqtree[node].left);
299: wqtree[node].left=wqt;
300: }
301: else {
302: if(wqtree[node].right)return wsym(q,wqtree[node].right);
303: wqtree[node].right=wqt;
304: }
305: strcpy(wqbase+wqtext,q);
306: i=wqtext;
307: wqtext+=strlen(q)+1;
308: if(wqtext>=WQTEXT)err("wqtext ovfl");
309: wqtree[wqt++].qaddr=i;
310: if(wqt>=WQT)err("wqt ovfl");
311: return i;
312: }
313: headw(text)
314: char *text;
315: {
316: int il,iu,im;
317: il=1; iu=z;
318: while(il<iu+1) {
319: im=(il+iu)/2;
320: while(im>il&&p[im].h.type!=0)im--;
321: if(im<=il) {
322: while(++im<iu&&p[im].h.type!=0);
323: if(im>=iu)break;
324: }
325: if(strcmp(p[im].h.tt+tbase,text)>0) {
326: iu=im;
327: }
328: else {
329: il=im;
330: }
331: }
332: if(strcmp(p[il].h.tt+tbase,text))return 0;
333: return il;
334: }
335: morecol()
336: {
337: int i;
338: if(col==0)col=(struct col*)ma(NCOL*sizeof*col);
339: else col=(struct col*)realloc(col,(ncol+NCOL)*sizeof*col);
340: for(i=NCOL+ncol-1; i>ncol; i--)col[i].next=i-1;
341: col[ncol].next=0;
342: ncol+=NCOL;
343: colfree=ncol-2;
344: return ncol-1;
345: }
346: path(a,b)
347: char *a,*b;
348: {
349: int i,j,k;
350: int old,new;
351: int e;
352: unsigned char psa[NP],psb[NP];
353: int it;
354: int pf;
355: for(i=0; b[i]; i++)if(b[i]=='/') {
356: nwanted=10;
357: rsparse(b);
358: qpath(accept,a);
359: fprintf(STDERR,"\n");
360: return;
361: }
362: #ifdef AMBIGUOUS
363: if(strncmp(a,"ambiguous ",10)==0) {
364: ambpath(a+10,b);
365: fprintf(STDERR,"\n");
366: return;
367: }
368: #endif
369: old=new=0;
370: it=1;
371: pf=0;
372: e=inset(a);
373: if(e==0) {
374: fprintf(STDERR,"can't find %s\n",a);
375: return;
376: }
377: for(i=0; i<NP; i++)psa[i]=psb[i]=0;
378: for(i=0; i<e; i++)psa[p[set[i]].s.p]++;
379: k=inset(b);
380: if(k==0) {
381: fprintf(STDERR,"%s not found\n",b);
382: return;
383: }
384: for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0;
385: for(i=0; i<k; i++) {
386: if(psa[p[set[i]].s.p]==0)continue;
387: psb[p[set[i]].s.p]++;
388: getcol(j);
389: col[j].entry=set[i];
390: col[j].next=old;
391: old=j;
392: p[set[i]].s.v=it;
393: }
394: if(old==0) {
395: fprintf(STDERR,"%s and %s: no part of speech in common\n",
396: a,b);
397: return;
398: }
399: e=inset(a);
400: for(i=j=0; i<e; i++) {
401: set[j]=set[i];
402: if(psb[p[set[i]].s.p])j++;
403: }
404: e=j;
405: if(e==0)err("path 1");
406: loop:
407: if(old==0) {
408: if(pf)fprintf(STDERR,"\n");
409: else fprintf(STDERR," no path from %s to %s\n",a,b);
410: goto pathx;
411: }
412: it++;
413: while(old) {
414: i=p[col[old].entry].s.st;
415: j=old;
416: old=col[old].next;
417: freecol(j);
418: while(j=(unpackw(i++)&WMASK)) {
419: if(p[j].s.type&&p[j].s.v==0) {
420: getcol(k);
421: col[k].next=new;
422: col[k].entry=j;
423: new=k;
424: p[j].s.v=it;
425: }
426: }
427: }
428: old=new;
429: new=0;
430: for(i=j=0; i<e; i++) {
431: set[j]=set[i];
432: if(p[set[i]].s.v) {
433: pfound(set[i]);
434: pf++;
435: }
436: else j++;
437: }
438: if(e=j)goto loop;
439: fprintf(STDERR,"\n");
440: pathx:
441: freeall(old);
442: freeall(new);
443: }
444: freeall(x)
445: {
446: int i;
447: while(x) {
448: i=col[x].next;
449: freecol(x);
450: x=i;
451: }
452: }
453: pfound(x)
454: {
455: int i,j;
456: do {
457: for(i=x; i>0&&p[i].h.type; i--);
458: if(i==0)err("no hw");
459: fprintf(STDERR," &%d%s",x-i,p[i].h.tt+tbase);
460: i=p[x].s.st;
461: while(j=(WMASK&unpackw(i++))) {
462: if(p[j].s.type&&p[j].s.v==p[x].s.v-1)break;
463: }
464: } while(x=j);
465: fprintf(STDERR,"\n");
466: }
467: char *avdummy[2];
468: main(ac,av)
469: char *av[];
470: {
471: int i,j,k,l,m,n;
472: int nhw,nhs;
473: int backref;
474: char *q;
475: int zz;
476: int w;
477: int nw;
478: char y[128];
479: if(ac<2) {
480: fprintf(stderr,"word_clout: need data file");
481: return 1;
482: }
483: f=fopen(av[1],"r");
484: for(i=0; (j=getc(f))!=EOF&&j!='\n'&&i<N-1; i++)in[i]=j;
485: if(j==EOF)err("immediate eof");
486: in[i]=0;
487: if(strcmp(in,"-huffman")==0) {
488: goto unpackh;
489: }
490: else if(strcmp(in,"-pack")) {
491: err("expect -huffman or -pack");
492: }
493: nhw=readnum();
494: nhs=readnum();
495: /* now preallocated:
496: * p=(struct x*)ma((nhs+2)*sizeof*p);
497: * tbase=(char*)ma(INITTEXT);
498: * sqbase=(char*)ma(INITSQ);
499: * sqtab=(struct squal*)ma(INITSQTAB*sizeof*sqtab);
500: * arena=(unsigned char*)ma(i=(NWINDEX*sizeof*arena*WBITS)/8+1);
501: * wqtab=(struct wqual*)ma(NWQ*sizeof*wqtab);
502: * wqbase=(char*)ma(WQTEXT*sizeof*wqbase);
503: * wqtree=(struct wqt*)ma(WQT*sizeof*wqtree);
504: */
505: readhead:
506: for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j;
507: if(i>=N)err("line too long");
508: if(j==EOF)err("eof in headwords");
509: in[i]=0;
510: if(i==0)goto hwdone;
511: for(j=i-1; j>0&&in[j]!=' '; j--);
512: in[j++]=0;
513: p[++z].h.tt=text;
514: zz=z;
515: strcpy(text+tbase,in);
516: text+=strlen(in)+1;
517: if(text>=INITTEXT)err("inittext too small");
518: p[z].h.type=0;
519: for( ; in[j]; j++) {
520: p[++z].s.type=1;
521: p[z].s.q=0;
522: p[z].s.st=0;
523: k=in[j];
524: if(k<='z'&&k>='a')p[z].s.p=k-'a';
525: else p[z].s.p=k-'A';
526: }
527: if((j=getc(f))==':') {
528: for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j;
529: if(j==EOF)err("eof in sense qual");
530: if(i>=N)err("squal too long");
531: in[i]=0;
532: for(i=0; in[i]; i++)if(in[i]==':'&&in[i+1]==' ')in[i]='-';
533: for(i=0; in[i]; i++) {
534: if(++zz>z)err("too many squal fields");
535: for(j=i; in[j]&&in[j]!=':'; j++);
536: if(in[j]==0)err("squal not ending :");
537: in[j]=0;
538: sensequal(zz,in+i);
539: i=j;
540: }
541: if(zz<z)err("missing squal field");
542: goto readhead;
543: }
544: ungetc(j,f);
545: goto readhead;
546: hwdone:
547: fprintf(STDERR,"text=%d\n",text);
548: z++;
549: p[z].h.type=0; /* extra slot on end */
550: for(i=1,j=0; i<z; i++)if(p[i].h.type==1&&p[i].s.q)j++;
551: fprintf(STDERR,"%d entries %d qual senses\n",i,j);
552: fprintf(STDERR,"sqtext=%d, nsq=%d\n",sqtext,nsq);
553: zz=1;
554: readwords:
555: for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j;
556: if(j==EOF)goto done;
557: if(i>=N)err("line too long");
558: in[i]=0;
559: while(zz<z&&p[zz].h.type==0)zz++;
560: for(i=nw=0; in[i]<='z'&&in[i]>='a'; i++)nw=nw*26+in[i]-'a';
561: w=windex;
562: p[zz].s.st=w;
563: windex+=nw+1;
564: if(windex>NWINDEX)err("windex too big");
565: while(in[i]==':') {
566: q=0;
567: if(in[++i]=='{') {
568: for(j=i+1; in[j]&&in[j]!='}'; j++);
569: if(in[j]==0)err("missing }");
570: in[j]=0;
571: q=in+i+1;
572: i=j+1;
573: }
574: for(k=0; in[i]>='a'&&in[i]<='z'; i++)k=k*26+in[i]-'a';
575: ++k; /* offset 1 so preserve 0=0 */
576: if(k!=(k&WMASK))err("exceeds wmask");
577: if(p[k].h.type==1&&p[k].s.p!=p[zz].s.p) {
578: dumps(k);
579: dumps(zz);
580: err("part bad");
581: }
582: if(q) {
583: k|=WQ;
584: wqtab[nwq].waddr=w;
585: wqtab[nwq].qaddr=wsym(q,0);
586: nwq++;
587: if(nwq>=NWQ)err("nwq ovfl");
588: }
589: packw(w++,k);
590: }
591: zz++;
592: goto readwords;
593: done:
594: fprintf(STDERR,"wqtext=%d nwq=%d windex=%d\n",wqtext,nwq,windex);
595: fprintf(STDERR,"wqt=%d\n",wqt);
596: fprintf(STDERR,"zz=%d z=%d\n",zz,z);
597: m=0;
598: if(ac>2&&*av[2]=='c') {
599: for(w=0; w<windex; w++)if(unpackw(w))m++;
600: fprintf(STDERR,"%d words set\n",m);
601: }
602: m=0;
603: for(zz=1; zz<z; zz++) {
604: if(p[zz].h.type==0)continue;
605: i=p[zz].s.st;
606: while(j=unpackw(i++)) {
607: j&=WMASK;
608: if(p[j].h.type==0)continue;
609: k=p[j].s.st;
610: while(l=unpackw(k++)) {
611: l&=WMASK;
612: if(l==zz)goto backed;
613: }
614: if(unpackw(k))err("oversubscribed");
615: packw(k-1,zz);
616: m++;
617: backed:;
618: }
619: }
620: fprintf(STDERR,"backrefs added %d\n",m);
621: if(ac>2&&*av[2]=='c')for(zz=1; zz<z; zz++) {
622: if(p[zz].h.type==0) {
623: j=zz;
624: continue;
625: }
626: for(k=p[zz].s.st; unpackw(k)&WMASK; k++);
627: if(k+1<z&&(unpackw(k+1)&WMASK)==0) {
628: fprintf(STDERR,"%s is missing words\n",
629: tbase+p[j].h.tt);
630: }
631: }
632: if(ac>3&&(wpackfile=fopen(av[3],"w")))packit();
633: goto ahem;
634: unpackh:
635: unpackit();
636: ahem: fprintf(STDERR,"ready.\n");
637: loop:
638: for(i=0; i<N&&(j=getchar())!=EOF&&j!='\n'; i++)in[i]=j;
639: if(i<N)in[i]=0;
640: if(j==EOF||strcmp(in,"q")==0) {
641: exit(0);
642: }
643: for(i=0; in[i]&&in[i]!='/'; i++)if(in[i]=='^') {
644: in[i]=0;
645: path(in,in+i+1);
646: goto loop;
647: }
648: if(in[i]=='/') {
649: strcpy(y,in+i+1);
650: in[i]=0;
651: if(i>0&&in[i-1]=='.')in[i-1]=0;
652: for(i=0; y[i]&&y[i]!='/'; i++);
653: y[i]=0;
654: k=0;
655: if(*in=='&')for(k=0; k<NP&&strcmp(in+1,parts[k]); k++);
656: if(rs)free(rs);
657: rs=regcomp(y);
658: for(zz=1; zz<z; zz++) {
659: if(p[zz].h.type)continue;
660: if(k) {
661: for(i=1; p[zz+i].s.type; i++)
662: if(p[zz+i].s.p==k)goto ok;
663: continue;
664: }
665: ok:
666: if(regexec(rs,p[zz].h.tt+tbase,0,0)) {
667: fprintf(stderr," %s,",p[zz].h.tt+tbase);
668: }
669: }
670: fprintf(stderr,"\n");
671: goto loop;
672: }
673: i=inset(in);
674: for(j=0; j<i; j++)dumps(set[j]);
675: if(i==0)fprintf(STDERR,"%s not found\n",in);
676: goto loop;
677: }
678: /* funct == 0 reject
679: funct == -1 stop
680: funct == otherwise, continue
681: */
682: qpath(funct,b)
683: char *b;
684: int (*funct)();
685: {
686: int i,j,k;
687: int old,new;
688: int it;
689: old=new=0;
690: it=1;
691: k=inset(b);
692: if(k==0) {
693: fprintf(STDERR,"%s not found\n",b);
694: return;
695: }
696: for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0;
697: for(i=0; i<k; i++) {
698: getcol(j);
699: col[j].entry=set[i];
700: col[j].next=old;
701: old=j;
702: p[set[i]].s.v=it;
703: }
704: loop:
705: if(old==0) {
706: goto pathx;
707: }
708: it++;
709: while(old) {
710: i=p[col[old].entry].s.st;
711: j=old;
712: old=col[old].next;
713: freecol(j);
714: while(j=(unpackw(i++)&WMASK)) {
715: if(p[j].s.type&&p[j].s.v==0) {
716: p[j].s.v=it;
717: switch((*funct)(j)) {
718: case 0:
719: p[j].s.v=0;
720: continue;
721: case -1: goto pathx;
722: }
723: getcol(k);
724: col[k].next=new;
725: col[k].entry=j;
726: new=k;
727: }
728: }
729: }
730: old=new;
731: new=0;
732: if(old)goto loop;
733: pathx:
734: freeall(old);
735: freeall(new);
736: }
737: rsparse(b)
738: char *b;
739: {
740: int i,j;
741: wantps=0;
742: for(i=0; b[i]&&b[i]!='/'; i++);
743: if(i&&b[i-1]=='.') {
744: b[i-1]=0;
745: for(j=0; j<NP; j++)if(strcmp(parts[j],b+1)==0)wantps=j;
746: }
747: if(b[i])i++;
748: for(j=i; b[j]; j++);
749: while(j&&b[j-1]!='/')j--;
750: if(j)b[j-1]=0;
751: if(atoi(b+j))nwanted=atoi(b+j);
752: if(rs) {
753: free(rs);
754: }
755: rs=regcomp(b+i);
756: }
757: accept(zz)
758: {
759: int i;
760: for(i=zz; i&&p[i].h.type; i--);
761: if(regexec(rs,p[i].h.tt+tbase,0,0)) {
762: fprintf(stderr," &%d%s,",zz-i,p[i].h.tt+tbase);
763: if(--nwanted<=0)return -1;
764: return 1;
765: }
766: return 1;
767: }
768: #define NCOMM 100
769: #define NHASCII 128
770: #define NNSEN 50
771: #define NNWDS 100
772: #define NSQR 256
773: #define NWQR 256
774: #define ALLOC(base,num) (base.freq=(long*)ma(num*sizeof(long)),\
775: base.code=(long*)ma(num*sizeof(long)),\
776: base.tree=(short*)ma(2*num*sizeof(short)))
777: #define FREE(base) (free(base.freq),free(base.code),free(base.tree))
778: #define GEN(base,n) (gencodes(base.freq,n,base.code),flipcodes(n,base.code),\
779: entree(base.code,n,base.tree))
780: #define INBIT() (ibuffc?(ibuff>>--ibuffc)&1:(ibuffc=7,((ibuff=getc(f))>>7)&1))
781: struct huff {
782: long *freq,*code;
783: short *tree;
784: };
785: struct alpha {
786: unsigned int k,wqtab;
787: };
788: alphacmp(a,b)
789: struct alpha *a,*b;
790: {
791: if(p[a->k&WMASK].h.type) {
792: if(p[b->k&WMASK].h.type)return (a->k&WMASK)-(b->k&WMASK);
793: return 1;
794: }
795: if(p[b->k&WMASK].h.type==0)return (a->k&WMASK)-(b->k&WMASK);
796: return -1;
797: }
798: wqcmp(a,b)
799: struct wqual *a,*b;
800: {
801: return a->waddr-b->waddr;
802: }
803: int ibuffc,ibuff;
804: packit()
805: {
806: struct huff comm,qascii,hascii,nsen,parsp,nwds,nwdsback;
807: int i,j,k,l,m,n;
808: int ii,jj,il,iu,im;
809: int zz;
810: int sqref[NSQR],wqref[NWQR];
811: int medians,medianw;
812: struct alpha wtemp[NNWDS];
813: /* s & w qualifier text addr from s & w qualifier index */
814: for(i=j=0; i<sqtext; i++) {
815: if(i==0||sqbase[i-1]==0) {
816: sqref[j++]=i;
817: if(j>=NSQR)err("NSQR too small");
818: }
819: }
820: for(i=j=0; i<wqtext; i++) {
821: if(i==0||wqbase[i-1]==0) {
822: wqref[j++]=i;
823: if(j>=NWQR)err("NWQR too small");
824: }
825: }
826: ALLOC(comm,NCOMM);
827: ALLOC(qascii,NHASCII);
828: ALLOC(hascii,NHASCII);
829: ALLOC(nsen,NNSEN);
830: ALLOC(parsp,NP);
831: ALLOC(nwds,NNWDS);
832: ALLOC(nwdsback,NNWDS);
833: /* common substring, number of senses, parts of speech */
834: for(zz=1; zz<z; zz++) {
835: if(p[zz].h.type)continue;
836: j=k,k=p[zz].h.tt;
837: if(k==0)l=0;
838: else for(l=0; tbase[k+l]==tbase[j+l]; l++);
839: if(l>=NCOMM)err("NCOMM too small");
840: comm.freq[l]++;
841: for(i=l; tbase[i+k]; i++)hascii.freq[tbase[i+k]]++;
842: hascii.freq[0]++;
843: for(n=zz+1; p[n].s.type; n++);
844: if(n-zz>=NNSEN)err("NNSEN too small");
845: nsen.freq[n-zz]++;
846: for(n=zz+1; p[n].s.type; n++)parsp.freq[p[n].s.p]++;
847: }
848: /* number of words */
849: for(zz=1; zz<z; zz++) {
850: if(p[zz].h.type) {
851: for(i=p[zz].s.st,j=0; k=unpackw(i); i++)if((k&WMASK)<zz)j++;
852: i-=p[zz].s.st;
853: if(i>=NNWDS)err("NNWDS too small");
854: nwds.freq[i]++;
855: nwdsback.freq[j]++;
856: }
857: }
858: /* ascii in qualifiers */
859: for(i=0; i<sqtext; i++)qascii.freq[sqbase[i]]++;
860: for(i=0; i<wqtext; i++)qascii.freq[wqbase[i]]++;
861:
862: GEN(qascii,NHASCII);
863: GEN(hascii,NHASCII);
864: GEN(comm,NCOMM);
865: GEN(nsen,NNSEN);
866: GEN(parsp,NP);
867: GEN(nwds,NNWDS);
868: GEN(nwdsback,NNWDS);
869:
870: /* median sense qualifier */
871: for(i=j=0,zz=1; zz<z; zz++) {
872: if(p[zz].s.type) {
873: i++;
874: if(p[zz].s.q)j++;
875: }
876: }
877: medians=(7*i)/(10*j);
878: /* median word qualifier runs */
879: for(i=j=0,zz=1; zz<z; zz++) {
880: if(p[zz].h.type) {
881: for(ii=p[zz].s.st; jj=unpackw(ii); ii++) {
882: i++;
883: if(jj&WQ)j++;
884: }
885: }
886: }
887: medianw=(7*i)/(10*j);
888:
889: /* Start packed output */
890: fprintf(wpackfile,"-huffman\n");
891:
892: /* OUTPUT trees */
893: outtree(qascii);
894: outtree(hascii);
895: outtree(comm);
896: outtree(nsen);
897: outtree(parsp);
898: outtree(nwds);
899: outtree(nwdsback);
900:
901: /* OUTPUT headers */
902: writesim(500000,text);
903: writesim(100000,z);
904: writesim(1000,nsq);
905: writesim(500000,windex);
906: writesim(10000,nwq);
907: writesim(2000,wqtext);
908: writesim(2000,sqtext);
909: writesim(100,medians);
910: writesim(100,medianw);
911:
912: /* OUTPUT sense qualifier text */
913: for(i=0; i<sqtext; i++)outbit(qascii.code[sqbase[i]]);
914: /* OUTPUT word qualifier text */
915: for(i=0; i<wqtext; i++)outbit(qascii.code[wqbase[i]]);
916: /* OUTPUT hw ncommon, text \0, number of senses, part of speech */
917: for(zz=1; zz<z; zz++) {
918: if(p[zz].h.type)continue;
919: j=k,k=p[zz].h.tt;
920: if(k==0)l=0;
921: else for(l=0; tbase[k+l]==tbase[j+l]; l++);
922: outbit(comm.code[l]);
923: for(i=l; tbase[i+k]; i++)outbit(hascii.code[tbase[i+k]]);
924: outbit(hascii.code[0]);
925: for(n=zz+1; p[n].s.type; n++);
926: outbit(nsen.code[n-zz]);
927: for(n=zz+1; p[n].s.type; n++)outbit(parsp.code[p[n].s.p]);
928: }
929: /* OUTPUT qualified sense run, qualifier reference number */
930: m=n=0;
931: for(zz=1; zz<z; zz++) {
932: if(p[zz].h.type) {
933: n++;
934: if(p[zz].s.q) {
935: writesim(medians,n-m-1);
936: m=n;
937: j=squal(zz)-sqbase;
938: for(i=0; i<NSQR; i++)if(sqref[i]==j)break;
939: if(sqref[i]!=j)err("NSQR search botch");
940: outbitf(NSQR+i);
941: }
942: }
943: }
944: writesim(medians,n-m);
945: /* sort words into alphabetical order */
946: for(zz=1; zz<z; zz++) {
947: if(p[zz].h.type==0)continue;
948: for(i=p[zz].s.st,j=0; k=unpackw(i); i++) {
949: wtemp[j].wqtab=0;
950: if(k&WQ) {
951: il=0; iu=nwq;
952: while(il<iu-1) {
953: im=(il+iu)/2;
954: if(wqtab[im].waddr>i)iu=im;
955: else il=im;
956: }
957: if(wqtab[il].waddr!=i)err("bin botch");
958: wtemp[j].wqtab=il;
959: }
960: wtemp[j++].k=k;
961: }
962: qsort(wtemp,j,sizeof*wtemp,alphacmp);
963: for(i=p[zz].s.st,j=0; unpackw(i); i++) {
964: if(wtemp[j].k&WQ) {
965: wqtab[wtemp[j].wqtab].waddr=i;
966: }
967: opackw(i,wtemp[j++].k);
968: }
969: }
970: qsort(wqtab,nwq,sizeof*wqtab,wqcmp);
971: /* OUTPUT # words, # words back, back references */
972: for(zz=1; zz<z; zz++) {
973: if(p[zz].h.type==0)continue;
974: for(i=p[zz].s.st,j=0; k=unpackw(i)&WMASK; i++) {
975: if(k<zz||p[k].s.type==0)j++;
976: }
977: outbit(nwds.code[i-p[zz].s.st]);
978: outbit(nwdsback.code[j]);
979: for(i=p[zz].s.st; k=unpackw(i)&WMASK; i++) {
980: if(k<zz||p[k].s.type==0)outbitf(WQ|k);
981: }
982: }
983: /* OUTPUT word run, word qualifier index */
984: m=n=0;
985: for(zz=1; zz<z; zz++) {
986: if(p[zz].h.type) {
987: for(ii=p[zz].s.st; jj=unpackw(ii); ii++) {
988: n++;
989: if(jj&WQ) {
990: writesim(medianw,n-m-1);
991: m=n;
992: j=wqual(ii)-wqbase;
993: for(i=0; i<NWQR; i++)if(wqref[i]==j)break;
994: if(wqref[i]!=j)err("NWQR search botch");
995: outbitf(NWQR+i);
996: }
997: }
998: }
999: }
1000: writesim(medianw,n-m);
1001: FREE(qascii);
1002: FREE(hascii);
1003: FREE(comm);
1004: FREE(nsen);
1005: FREE(parsp);
1006: FREE(nwds);
1007: FREE(nwdsback);
1008: outbitf(0xff);
1009: fclose(wpackfile);
1010: }
1011: outbit(x)
1012: register x;
1013: {
1014: static obuff,b;
1015: while(x>1) {
1016: obuff=(obuff<<1)+(x&1);
1017: x>>=1;
1018: if(++b>=8) {
1019: b=0;
1020: putc(obuff,wpackfile);
1021: }
1022: }
1023: }
1024: outbitf(x)
1025: register x;
1026: {
1027: register y;
1028: for(y=1; x>1; x>>=1)y=(y<<1)+(x&1);
1029: outbit(y);
1030: }
1031: inbit() { return INBIT(); }
1032: char *rhalloc(x) { return malloc(x); }
1033: char *rhfree(x) char *x; { free(x); }
1034: wbytes(n) {
1035: putc(n>>8,wpackfile);
1036: putc(n,wpackfile);
1037: }
1038: rbytes() {
1039: int i;
1040: i=getc(f);
1041: return (i<<8)+getc(f);
1042: }
1043: decode(tree)
1044: register short *tree;
1045: {
1046: register int i;
1047: i=0;
1048: if(*tree++<2)return *tree&077777;
1049: do {
1050: if(INBIT())i=tree[i+1];
1051: else i=tree[i];
1052: } while((i&0100000)==0);
1053: return i&077777;
1054: }
1055: outtree(x)
1056: struct huff x;
1057: {
1058: int i;
1059: for(i=0; i<=x.tree[0]; i++)wbytes(x.tree[i]);
1060: }
1061: struct huff intree()
1062: {
1063: struct huff x;
1064: int i;
1065: x.code=0;
1066: x.freq=0;
1067: i=rbytes();
1068: x.tree=(short*)ma((i+1)*sizeof(short));
1069: x.tree[0]=i;
1070: for(i=1; i<=x.tree[0]; i++)x.tree[i]=rbytes();
1071: return x;
1072: }
1073: rfixlen(code)
1074: register code;
1075: {
1076: register i=0;
1077: if(code<=1)return 0;
1078: do {
1079: i=(i<<1)+INBIT();
1080: } while((code>>=1)>1);
1081: return i;
1082: }
1083: unpackit()
1084: {
1085: int i,j,k,l,m,n;
1086: int sqref[NSQR],wqref[NWQR];
1087: struct huff qascii,hascii,comm,nsen,parsp,nwds,nwdsback;
1088: int medians,medianw;
1089: int zz;
1090: int ii,jj;
1091: fprintf(STDERR,"wait for 'ready.'\n");
1092: fflush(STDERR);
1093: qascii=intree();
1094: hascii=intree();
1095: comm=intree();
1096: nsen=intree();
1097: parsp=intree();
1098: nwds=intree();
1099: nwdsback=intree();
1100: text=readsim(500000);
1101: z=readsim(100000);
1102: nsq=readsim(1000);
1103: windex=readsim(500000);
1104: nwq=readsim(10000);
1105: wqtext=readsim(2000);
1106: sqtext=readsim(2000);
1107: medians=readsim(100);
1108: medianw=readsim(100);
1109:
1110: for(i=0; i<sqtext; i++)sqbase[i]=decode(qascii.tree);
1111: for(i=0; i<wqtext; i++)wqbase[i]=decode(qascii.tree);
1112:
1113: for(i=j=0; i<sqtext; i++) {
1114: if(i==0||sqbase[i-1]==0) {
1115: sqref[j++]=i;
1116: }
1117: }
1118: for(i=j=0; i<wqtext; i++) {
1119: if(i==0||wqbase[i-1]==0) {
1120: wqref[j++]=i;
1121: }
1122: }
1123: for(k=0,zz=1; zz<z; zz=n) {
1124: l=decode(comm.tree);
1125: for(i=0; i<l; i++)tbase[i+k]=tbase[i+j];
1126: while(tbase[k+l++]=decode(hascii.tree));
1127: p[zz].h.tt=k;
1128: j=k;
1129: k+=l;
1130: l=decode(nsen.tree);
1131: for(n=zz+1; n<zz+l; n++) {
1132: p[n].s.p=decode(parsp.tree);
1133: p[n].s.type=1;
1134: }
1135: }
1136: if(zz!=z)fprintf(STDERR,"botch zz!=z %d %d\n",zz,z);
1137: if(k!=text)fprintf(STDERR,"botch k!=text %d %d\n",k,text);
1138: p[z].h.type=0;
1139: m=n=0;
1140: l=readsim(medians);
1141: k=0;
1142: for(zz=1; zz<z; zz++) {
1143: if(p[zz].h.type) {
1144: n++;
1145: if(l==n-m-1) {
1146: p[zz].s.q=1;
1147: m=n;
1148: sqtab[k].saddr=zz;
1149: sqtab[k++].qaddr=sqref[rfixlen(NSQR)];
1150: l=readsim(medians);
1151: }
1152: }
1153: }
1154: if(k!=nsq)fprintf(STDERR,"k!=nsq %d %d\n",k,nsq);
1155: j=0;
1156: for(zz=1; zz<z; zz++) {
1157: if(p[zz].h.type==0)continue;
1158: l=decode(nwds.tree);
1159: p[zz].s.st=j;
1160: m=decode(nwdsback.tree);
1161: for(i=0; i<m; i++) {
1162: packw(j+i,n=rfixlen(WQ));
1163: if(p[n].s.type) {
1164: packw(p[n].s.st+p[n].s.v++,zz);
1165: }
1166: }
1167: p[zz].s.v=m;
1168: j+=l+1;
1169: }
1170: if(j!=windex)fprintf(STDERR,"j!=windex %d %d\n",j,windex);
1171: m=n=0;
1172: l=readsim(medianw);
1173: j=0;
1174: for(zz=1; zz<z; zz++) {
1175: if(p[zz].h.type) {
1176: for(ii=p[zz].s.st; jj=unpackw(ii); ii++) {
1177: n++;
1178: if(n-m-1==l) {
1179: m=n;
1180: opackw(ii,WQ+jj);
1181: i=wqref[rfixlen(NWQR)];
1182: wqtab[j].waddr=ii;
1183: wqtab[j++].qaddr=i;
1184: l=readsim(medianw);
1185: }
1186: }
1187: }
1188: }
1189: if(j!=nwq)fprintf(STDERR,"j!=nwq %d %d\n",j,nwq);
1190: free(qascii.tree);
1191: free(hascii.tree);
1192: free(comm.tree);
1193: free(nsen.tree);
1194: free(parsp.tree);
1195: free(nwds.tree);
1196: free(nwdsback.tree);
1197: }
1198: char *rhalloc();
1199: typedef long huffreq;
1200: typedef long huffcode;
1201: typedef unsigned short hufftree;
1202: typedef int huffnum;
1203:
1204: /* gencodes((huffreq*)freqtab, (huffnum)n, (huffcode*)output);
1205: flipcodes((huffnum)n,(huffcode*)output);
1206: entree((huffcodes*)output.flipped,(huffnum)n,(hufftree*)treearray)
1207: */
1208: #define ENDFLAG 0100000
1209: /* decoding tree is (hufftree) array; start at 0, add next bit to index;
1210: if ENDFLAG is set in entry, everything else is the wanted code;
1211: if not, take the next offset from the entry;
1212:
1213: first entry in tree is number of entries in rest of tree;
1214: offset 0 corresponds to tr[1].
1215: */
1216: entree(cd,ncd,tr)
1217: huffcode *cd;
1218: huffnum ncd;
1219: hufftree *tr;
1220: {
1221: long i,j,k;
1222: long x;
1223: for(i=0; i<2*ncd; i++)tr[i]=0;
1224: tr++;
1225: k=2;
1226:
1227: for(i=0; i<ncd; i++) {
1228: j=0;
1229: for(x=cd[i]; x>1; x>>=1) {
1230: j+=x&1;
1231: if(tr[j]==0) {
1232: if(x>3) {
1233: tr[j]=k;
1234: k+=2;
1235: }
1236: else {
1237: tr[j]=i|ENDFLAG;
1238: }
1239: }
1240: j=tr[j];
1241: if(x>3&&(j&ENDFLAG)) {
1242: err("botched huffman code");
1243: }
1244: }
1245: }
1246: *--tr=k;
1247: }
1248: /* Huffman codes from counts */
1249:
1250: /* gencodes(huffreq freq table, huffnum length, huffcode output code table)
1251: output code words have leading 1-bit set, rest is huffman code.
1252: program aborts if a code is more than 30 bits -- try increasing
1253: the minimum counts.
1254: zero frequency counts produce zero code-words (are ignored).
1255:
1256: flipcodes(huffnum length, huffcode code table) flips the bit order of
1257: the huffman code bits.
1258: */
1259:
1260: struct code {
1261: struct code *left,*right;
1262: huffreq count;
1263: huffnum val;
1264: };
1265:
1266: struct code *getcode()
1267: {
1268: struct code *h;
1269: h=(struct code*)rhalloc(sizeof*h);
1270: if(h==0)err("getcode: can't rhalloc code");
1271: h->left=h->right=0;
1272: h->val=h->count=0;
1273: return h;
1274: }
1275:
1276: tcompare(a,b)
1277: register struct code **a,**b;
1278: {
1279: if(a[0]->count<b[0]->count)return -1;
1280: if(a[0]->count>b[0]->count)return 1;
1281: if(a[0]->val<b[0]->val)return -1;
1282: if(a[0]->val>b[0]->val)return 1;
1283: return 0;
1284: }
1285:
1286: gencodes(infreq,nel,outcodes)
1287: huffreq *infreq;
1288: huffnum nel;
1289: huffcode *outcodes;
1290: {
1291: struct code **tp,**a,*p,*s;
1292: long i,j,k;
1293: long na;
1294: tp=(struct code**)rhalloc(nel*sizeof*tp);
1295: if(tp==0)err("gencodes: can't rhalloc tp");
1296: for(i=0; i<nel; i++) {
1297: outcodes[i]=0;
1298: tp[i]=getcode();
1299: tp[i]->count=infreq[i];
1300: tp[i]->val=i;
1301: }
1302: qsort(tp,nel,sizeof*tp,tcompare);
1303: for(k=0; k<nel-1; k++) {
1304: if(tp[k]->count==0)rhfree(tp[k]);
1305: else break;
1306: }
1307: a=&tp[k];
1308: na=nel-k;
1309: /* add smallest two and replace in heap till heap gone */
1310: for(k=na; k>2; ) {
1311: j=a[1]->count<a[2]->count?1:2;
1312: p=getcode();
1313: p->count=a[0]->count+a[j]->count;
1314: p->left=a[0];
1315: p->right=a[j];
1316: a[j]=p;
1317: for(i=j; (i+=i+1)<k; j=i) {
1318: if(i+1<k&&a[i]->count>a[i+1]->count)i++;
1319: if(a[j]->count<=a[i]->count)break;
1320: p=a[j];
1321: a[j]=a[i];
1322: a[i]=p;
1323: }
1324: a[0]=a[--k];
1325: for(i=j=0; (i+=i+1)<k; j=i) {
1326: if(i+1<k&&a[i]->count>a[i+1]->count)i++;
1327: if(a[j]->count<=a[i]->count)break;
1328: p=a[j];
1329: a[j]=a[i];
1330: a[i]=p;
1331: }
1332: }
1333: if(k==2) {
1334: p=getcode();
1335: p->count=a[0]->count+a[1]->count;
1336: p->left=a[0];
1337: p->right=a[1];
1338: }
1339: else p=a[0];
1340: rhfree(tp);
1341: ascodes(p,1L,outcodes);
1342: }
1343:
1344: ascodes(p,hcode,outcodes)
1345: struct code *p;
1346: long *outcodes;
1347: long hcode;
1348: {
1349: if(hcode<=0)err("ascodes: bitcode overflow");
1350: if(p->left==0) {
1351: if(p->right!=0)err("ascodes: tree ends wrong");
1352: outcodes[p->val]=hcode;
1353: }
1354: else {
1355: if(p->right==0)err("ascodes: tree ends oddly");
1356: ascodes(p->right,(hcode<<1)|1,outcodes);
1357: ascodes(p->left,(hcode<<1)|0,outcodes);
1358: }
1359: rhfree(p);
1360: }
1361:
1362: flipcodes(nel,codes)
1363: huffnum nel;
1364: huffcode *codes;
1365: {
1366: long i,j,k;
1367: long x;
1368: for(i=0; i<nel; i++) {
1369: if(codes[i]==0)continue;
1370: x=codes[i];
1371: codes[i]=1;
1372: while(x>1) {
1373: codes[i]=codes[i]*2|(x&1);
1374: x>>=1;
1375: }
1376: }
1377: }
1378: /* self-similar Huffman codes for neg exp distr.
1379: calls outbitf(1+forward code)
1380: calls inbit()
1381: */
1382: writesim(median,intv)
1383: {
1384: int i;
1385: static nc,dc,ec,omedian;
1386: if(median!=omedian) {
1387: omedian=median;
1388: for(i=1; (1L<<i)<=median; i++);
1389: nc=1L<<--i;
1390: dc=2*nc-median;
1391: ec=4*nc-median;
1392: }
1393: while(intv>=median) {
1394: outbitf(03L);
1395: intv-=median;
1396: }
1397: outbitf(02L);
1398: if(intv<dc)outbitf(intv+nc);
1399: else outbitf(ec+intv);
1400: }
1401: readsim(median) {
1402: int i,j,k;
1403: static nc,oi,dc,omedian;
1404: if(median!=omedian) {
1405: omedian=median;
1406: for(i=1; (1L<<i)<=median; i++);
1407: nc=1L<<--i;
1408: oi=i;
1409: dc=2*nc-median;
1410: }
1411: k=0; while(INBIT())k+=median;
1412: for(i=j=0; i<oi; i++)j+=j+INBIT();
1413: if(j<dc)return k+j;
1414: return k-dc+j+j+INBIT();
1415: }
1416: #ifdef AMBIGUOUS
1417: prsense(i)
1418: {
1419: fprintf(STDERR," &%d%s",i-hwof(i),p[hwof(i)].h.tt+tbase);
1420: }
1421: ambpath(a,b)
1422: char *a,*b;
1423: {
1424: int i,j,k;
1425: int froma,fromb,nfroma,nfromb;
1426: int e;
1427: int it;
1428: int pf;
1429: pf=0;
1430: it=1;
1431: e=inset(a);
1432: if(e==0) {
1433: fprintf(STDERR,"can't find %s\n",a);
1434: return;
1435: }
1436: k=inset(b);
1437: if(k==0) {
1438: fprintf(STDERR,"%s not found\n",b);
1439: return;
1440: }
1441: for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0; else p[i].h.v=0;
1442: fromb=0;
1443: for(i=0; i<k; i++) {
1444: getcol(j);
1445: col[j].entry=set[i];
1446: col[j].next=fromb;
1447: fromb=j;
1448: p[set[i]].s.v=it;
1449: }
1450: e=inset(a);
1451: froma=0;
1452: for(i=0; i<e; i++) {
1453: getcol(j);
1454: col[j].entry=set[i];
1455: col[j].next=froma;
1456: froma=j;
1457: p[set[i]].s.v=it;
1458: }
1459: loop:
1460: pf=0;
1461: for(k=froma; k; k=col[k].next) {
1462: j=hwof(col[k].entry);
1463: p[j].h.v|=1;
1464: if(p[j].h.v==3)pf++;
1465: }
1466: for(k=fromb; k; k=col[k].next) {
1467: j=hwof(col[k].entry);
1468: p[j].h.v|=2;
1469: if(p[j].h.v==3)pf++;
1470: }
1471: if(pf>0) {
1472: ambiguous(froma,fromb);
1473: goto pathx;
1474: }
1475: if(froma==0&&fromb==0) {
1476: fprintf(STDERR,"no link found\n");
1477: return;
1478: }
1479: it++;
1480: nfroma=0;
1481: while(froma) {
1482: i=p[col[froma].entry].s.st;
1483: j=froma;
1484: froma=col[j].next;
1485: freecol(j);
1486: while(j=(unpackw(i++)&WMASK)) {
1487: if(p[j].s.type&&p[j].s.v==0) {
1488: getcol(k);
1489: col[k].next=nfroma;
1490: col[k].entry=j;
1491: nfroma=k;
1492: p[j].s.v=it;
1493: }
1494: }
1495: }
1496: froma=nfroma;
1497: nfromb=0;
1498: while(fromb) {
1499: i=p[col[fromb].entry].s.st;
1500: j=fromb;
1501: fromb=col[j].next;
1502: freecol(j);
1503: while(j=(unpackw(i++)&WMASK)) {
1504: if(p[j].s.type&&p[j].s.v==0) {
1505: getcol(k);
1506: col[k].next=nfromb;
1507: col[k].entry=j;
1508: nfromb=k;
1509: p[j].s.v=it;
1510: }
1511: }
1512: }
1513: fromb=nfromb;
1514: goto loop;
1515: pathx:
1516: freeall(froma);
1517: freeall(fromb);
1518: }
1519: hwof(i)
1520: {
1521: while(i>0&&p[i].h.type!=0)i--;
1522: if(i==0)err("hwof()");
1523: return i;
1524: }
1525: ambiguous(fa,fb)
1526: {
1527: int f,g,i,j,k;
1528: int dbest;
1529: int enda,endb;
1530: int la,lb;
1531: int length;
1532: int best,a;
1533: dbest=9999;
1534: for(best=0; best<2; best++)
1535: for(f=fa; f; f=(f==fb)?0:fb)for(g=f; g; g=col[g].next) {
1536: enda=endb=0;
1537: la=lb=9999;
1538: i=hwof(col[g].entry);
1539: if(p[i].h.v!=3)continue;
1540: for(j=1; p[i+j].s.type; j++) {
1541: if(p[i+j].s.v==0)continue;
1542: a=pathtoend(i+j);
1543: if(enda==0)enda=a,la=p[i+j].s.v;
1544: else
1545: if(hwof(col[a].entry)==hwof(col[enda].entry)) {
1546: if(p[i+j].s.v<la) {
1547: freeall(enda);
1548: enda=a;
1549: la=p[i+j].s.v;
1550: }
1551: else freeall(a);
1552: }
1553: else if(endb==0)endb=a,lb=p[i+j].s.v;
1554: else
1555: if(hwof(col[a].entry)==hwof(col[endb].entry)) {
1556: if(p[i+j].s.v<lb) {
1557: freeall(endb);
1558: endb=a;
1559: lb=p[i+j].s.v;
1560: }
1561: else freeall(a);
1562: }
1563: }
1564: length=la+lb;
1565: if(best==0&&length<dbest)dbest=length;
1566: else if(best==1&&length==dbest) {
1567: prrev(enda);
1568: fprintf(STDERR,"\n");
1569: prrev(endb);
1570: fprintf(STDERR,"\n\n");
1571: p[i].h.v=0;
1572: }
1573: freeall(enda);
1574: freeall(endb);
1575: }
1576: }
1577: pathtoend(x)
1578: {
1579: int i,j,k;
1580: for(k=0;;) {
1581: getcol(j);
1582: col[j].entry=x;
1583: col[j].next=k;
1584: k=j;
1585: if(p[x].s.v<=1)return k;
1586: i=p[x].s.st;
1587: while(j=(WMASK&unpackw(i++))) {
1588: if(p[j].s.type&&p[j].s.v==p[x].s.v-1)break;
1589: }
1590: x=j;
1591: }
1592: }
1593: prrev(i)
1594: {
1595: if(i==0)return;
1596: prrev(col[i].next);
1597: prsense(col[i].entry);
1598: }
1599: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.