|
|
1.1 ! root 1: #define AMBIGUOUS 1 ! 2: #define STDERR stdout ! 3: #include <stdio.h> ! 4: #include <ctype.h> ! 5: #include "./regexp.h" ! 6: regexp *regcomp(),*rs; ! 7: int nwanted,wantps; ! 8: int accept(); ! 9: char *malloc(); ! 10: #define freecol(x) (col[x].next=colfree,colfree=x) ! 11: #define getcol(x) ((x=colfree)?(colfree=col[colfree].next):(x=morecol())) ! 12: #define NHS 114723 /* number of headwords and senses */ ! 13: #define N 400 /* max line length */ ! 14: #define INITTEXT (516673+1) /* bytes of headword text */ ! 15: #define INITSQ 1980 /* bytes of sense qualifier text */ ! 16: #define INITSQTAB 1063 /* #qual sense addr with text addr */ ! 17: #define NWINDEX 514104 /* number of 17 bit references */ ! 18: #define X 1 /* extra bit for ref qualified */ ! 19: #define WADDR 17 /* bits in headword or sense addr */ ! 20: #define WMASK ((1<<WADDR)-1) ! 21: #define WQ (1<<WADDR) ! 22: #define WBITS (WADDR+X) ! 23: #define WQTEXT 2518 /* bytes of ref qualifier text */ ! 24: #define NWQ 11582 /* #qual ref addr with text addr */ ! 25: #define WQT 207 /* #tree entries for ref qual sort */ ! 26: #define NCOL 1024 /* intermediate senses in path search */ ! 27: struct col { ! 28: unsigned int entry:17; ! 29: unsigned int next:15; ! 30: } *col; ! 31: int colfree; ! 32: FILE *f; ! 33: FILE *wpackfile; ! 34: union x { ! 35: struct headword { ! 36: unsigned int type:1; ! 37: unsigned int tt:19; ! 38: unsigned int v:12; ! 39: } h; ! 40: struct sense { ! 41: unsigned int type:1; ! 42: unsigned int st:19; ! 43: unsigned int p:4; ! 44: unsigned int q:1; ! 45: unsigned int v:7; ! 46: } s; ! 47: } p[NHS+2]; ! 48: char tbase[INITTEXT]; ! 49: int text; ! 50: int z; ! 51: char in[N]; ! 52: struct squal { ! 53: unsigned int saddr:17; ! 54: unsigned int qaddr:11; ! 55: unsigned int v:4; ! 56: } sqtab[INITSQTAB]; ! 57: int nsq; ! 58: char sqbase[INITSQ]; ! 59: int sqtext; ! 60: int windex; ! 61: unsigned char arena[(NWINDEX*WBITS)/8+1]; ! 62: struct wqual { ! 63: unsigned int waddr:19; ! 64: unsigned int qaddr:12; ! 65: } wqtab[NWQ]; ! 66: int nwq; ! 67: char wqbase[WQTEXT]; ! 68: int wqtext; ! 69: struct wqt { ! 70: unsigned left:8,right:8,qaddr:12; ! 71: } wqtree[WQT]; ! 72: int wqt; ! 73: #define NSET 32 ! 74: int set[NSET]; ! 75: int ncol; ! 76: char *parts[] = { ! 77: "","n","v","adj","adv","pron","conj","prep","interj" ! 78: }; ! 79: #define NP 9 ! 80: inset(t) ! 81: char *t; ! 82: { ! 83: char y[128]; ! 84: int i,j,k; ! 85: if(*t!='&') { ! 86: i=headw(t); ! 87: if(i==0)return 0; ! 88: for(j=0; j<NSET; j++)if(p[i+j+1].s.type==0||i+j+1>=z)break; ! 89: else set[j]=i+j+1; ! 90: return j; ! 91: } ! 92: if(isdigit(*++t)) { ! 93: for(k=0; isdigit(*t); t++)k=k*10+*t-'0'; ! 94: i=headw(t); ! 95: if(i==0)return 0; ! 96: for(j=0; j<NSET; j++)if(p[i+j+1].s.type==0||i+j+1>=z)break; ! 97: else if(j+1==k) { ! 98: set[0]=i+j+1; ! 99: return 1; ! 100: } ! 101: return 0; ! 102: } ! 103: if(strlen(t)>126)err("string to long in inset()"); ! 104: strcpy(y,t); ! 105: for(j=0; y[j]&&y[j]!='.'; j++); ! 106: if(y[j]==0)return 0; ! 107: y[j]=0; ! 108: for(k=1; k<NP; k++)if(strcmp(y,parts[k])==0)break; ! 109: if(k>=NP)return 0; ! 110: i=headw(y+j+1); ! 111: if(i++==0)return 0; ! 112: for(j=0; j<NSET&&i<=z&&p[i].s.type==1; i++) { ! 113: if(p[i].s.p==k)set[j++]=i; ! 114: } ! 115: return j; ! 116: } ! 117: readnum() ! 118: { ! 119: int j,k; ! 120: for(k=0; isdigit(j=getc(f)); k=k*10+j-'0'); ! 121: if(j!='\n')err("expect number"); ! 122: fprintf(STDERR,"readnum()=%d\n",k); ! 123: return k; ! 124: } ! 125: sensequal(zz,t) ! 126: char *t; ! 127: { ! 128: int i; ! 129: if(p[zz].h.type!=1)err("squal on headword"); ! 130: if(*t==0)return; ! 131: p[zz].s.q=1; ! 132: sqtab[nsq].saddr=zz; ! 133: for(i=0; i<nsq; i++)if(strcmp(t,sqbase+sqtab[i].qaddr)==0)break; ! 134: if(i<nsq) { ! 135: sqtab[nsq].qaddr=sqtab[i].qaddr; ! 136: } ! 137: else { ! 138: sqtab[nsq].qaddr=sqtext; ! 139: strcpy(sqbase+sqtext,t); ! 140: sqtext+=strlen(t)+1; ! 141: if(sqtext>=INITSQ)err("sqtext overflow"); ! 142: } ! 143: if(++nsq>INITSQTAB)err("nsq overflow"); ! 144: } ! 145: err(s) ! 146: char *s; ! 147: { ! 148: int i; ! 149: fprintf(STDERR,"err: %s\n",s); ! 150: fprintf(STDERR,"type return:"); getchar(); ! 151: fprintf(STDERR,"input buffer segments:\n"); ! 152: for(i=0; i<N; i++) { ! 153: if(in[i]==0)continue; ! 154: fprintf(STDERR,"'%s'\n",in+i); ! 155: while(i<N&&in[i])i++; ! 156: } ! 157: exit(1); ! 158: } ! 159: char *squal(zz) ! 160: { ! 161: int il,im,iu; ! 162: if(p[zz].h.type==0) { ! 163: err("ask for sensqual on headword"); ! 164: } ! 165: if(p[zz].s.q==0)return ""; ! 166: il=0; iu=nsq; ! 167: while(il<iu-1) { ! 168: im=(il+iu)/2; ! 169: if(sqtab[im].saddr>zz) { ! 170: iu=im; ! 171: } ! 172: else il=im; ! 173: } ! 174: if(sqtab[il].saddr!=zz)err("binary botch in squal()"); ! 175: return sqbase+sqtab[il].qaddr; ! 176: } ! 177: char *wqual(wi) ! 178: { ! 179: int il,im,iu; ! 180: if((unpackw(wi)&WQ)==0)return ""; ! 181: il=0; iu=nwq; ! 182: while(il<iu-1) { ! 183: im=(il+iu)/2; ! 184: if(wqtab[im].waddr>wi) { ! 185: iu=im; ! 186: } ! 187: else il=im; ! 188: } ! 189: if(wqtab[il].waddr!=wi)err("binary botch wqual"); ! 190: return wqbase+wqtab[il].qaddr; ! 191: } ! 192: opackw(index,value) ! 193: register value; ! 194: { ! 195: int i,j; ! 196: register int mask=(1<<WBITS)-1; ! 197: i=index*WBITS; ! 198: value<<=i&7; ! 199: mask<<=i&7; ! 200: j=i>>3; ! 201: while(mask&0xff) { ! 202: arena[j]&=~mask; ! 203: arena[j]|=value; ! 204: value>>=8; ! 205: mask>>=8; ! 206: j++; ! 207: } ! 208: } ! 209: packw(index,value) ! 210: register value; ! 211: { ! 212: register i; ! 213: register unsigned char *p; ! 214: i=index*18; ! 215: value<<=i&7; ! 216: p=arena+(i>>3); ! 217: *p++|=value; value>>=8; *p++|=value; value>>=8; ! 218: *p++|=value; ! 219: } ! 220: unpackw(index) ! 221: { ! 222: register i,k; ! 223: register unsigned char *p; ! 224: i=index*18; ! 225: p=arena+(i>>3)+2; ! 226: k=*p; k=(k<<8)|*--p; k=(k<<8)|*--p; ! 227: return (k>>(i&7))&0x3ffff; ! 228: } ! 229: ounpackw(index) ! 230: { ! 231: int i,j,k; ! 232: int x; ! 233: register int mask=(1<<WBITS)-1; ! 234: i=index*WBITS; ! 235: j=i>>3; ! 236: mask<<=i&7; ! 237: x=k=0; ! 238: while(mask&0xff) { ! 239: k|=(arena[j]&mask)<<x; ! 240: mask>>=8; ! 241: j++; ! 242: x+=8; ! 243: } ! 244: return k>>(i&7); ! 245: } ! 246: dumps(x) ! 247: { ! 248: int i,j,k,l,m,n; ! 249: if(x==0) { ! 250: fprintf(STDERR,"not found\n"); ! 251: return; ! 252: } ! 253: for(i=x; i>0&&p[i].h.type!=0; i--); ! 254: if(i==0) { ! 255: fprintf(STDERR,"dump of %d no headword\n",x); ! 256: return; ! 257: } ! 258: fprintf(STDERR,"\n&%d%s ",x-i,p[i].h.tt+tbase); ! 259: if(p[x].h.type==0) { ! 260: fprintf(STDERR,"\n"); ! 261: while(++x<z&&p[x].h.type!=0)dumps(x); ! 262: return; ! 263: } ! 264: fprintf(STDERR," (%s.)",parts[p[x].s.p]); ! 265: if(*squal(x))fprintf(STDERR," [%s]",squal(x)); ! 266: fprintf(STDERR,":\n"); ! 267: for(i=p[x].s.st; l=unpackw(i++); ) { ! 268: for(j=l&WMASK; j>0&&p[j].h.type!=0; j--); ! 269: fprintf(STDERR," &%d%s",(l&WMASK)-j,p[j].h.tt+tbase); ! 270: if(l&WQ)fprintf(STDERR," [%s]",wqual(i-1)); ! 271: fprintf(STDERR,","); ! 272: } ! 273: fprintf(STDERR,"\n"); ! 274: } ! 275: char *ma(x) ! 276: { ! 277: char *p; ! 278: p=(char*)calloc(x,1); ! 279: if(p==0) { ! 280: fprintf(STDERR,"can't calloc %d bytes\n",x); ! 281: err("quit"); ! 282: } ! 283: return p; ! 284: } ! 285: wsym(q,node) ! 286: char *q; ! 287: { ! 288: int i; ! 289: if(wqt==0) { ! 290: strcpy(wqbase,q); ! 291: wqtext=strlen(q)+1; ! 292: wqt++; ! 293: return 0; ! 294: } ! 295: i=strcmp(q,wqbase+wqtree[node].qaddr); ! 296: if(i==0)return wqtree[node].qaddr; ! 297: if(i<0) { ! 298: if(wqtree[node].left)return wsym(q,wqtree[node].left); ! 299: wqtree[node].left=wqt; ! 300: } ! 301: else { ! 302: if(wqtree[node].right)return wsym(q,wqtree[node].right); ! 303: wqtree[node].right=wqt; ! 304: } ! 305: strcpy(wqbase+wqtext,q); ! 306: i=wqtext; ! 307: wqtext+=strlen(q)+1; ! 308: if(wqtext>=WQTEXT)err("wqtext ovfl"); ! 309: wqtree[wqt++].qaddr=i; ! 310: if(wqt>=WQT)err("wqt ovfl"); ! 311: return i; ! 312: } ! 313: headw(text) ! 314: char *text; ! 315: { ! 316: int il,iu,im; ! 317: il=1; iu=z; ! 318: while(il<iu+1) { ! 319: im=(il+iu)/2; ! 320: while(im>il&&p[im].h.type!=0)im--; ! 321: if(im<=il) { ! 322: while(++im<iu&&p[im].h.type!=0); ! 323: if(im>=iu)break; ! 324: } ! 325: if(strcmp(p[im].h.tt+tbase,text)>0) { ! 326: iu=im; ! 327: } ! 328: else { ! 329: il=im; ! 330: } ! 331: } ! 332: if(strcmp(p[il].h.tt+tbase,text))return 0; ! 333: return il; ! 334: } ! 335: morecol() ! 336: { ! 337: int i; ! 338: if(col==0)col=(struct col*)ma(NCOL*sizeof*col); ! 339: else col=(struct col*)realloc(col,(ncol+NCOL)*sizeof*col); ! 340: for(i=NCOL+ncol-1; i>ncol; i--)col[i].next=i-1; ! 341: col[ncol].next=0; ! 342: ncol+=NCOL; ! 343: colfree=ncol-2; ! 344: return ncol-1; ! 345: } ! 346: path(a,b) ! 347: char *a,*b; ! 348: { ! 349: int i,j,k; ! 350: int old,new; ! 351: int e; ! 352: unsigned char psa[NP],psb[NP]; ! 353: int it; ! 354: int pf; ! 355: for(i=0; b[i]; i++)if(b[i]=='/') { ! 356: nwanted=10; ! 357: rsparse(b); ! 358: qpath(accept,a); ! 359: fprintf(STDERR,"\n"); ! 360: return; ! 361: } ! 362: #ifdef AMBIGUOUS ! 363: if(strncmp(a,"ambiguous ",10)==0) { ! 364: ambpath(a+10,b); ! 365: fprintf(STDERR,"\n"); ! 366: return; ! 367: } ! 368: #endif ! 369: old=new=0; ! 370: it=1; ! 371: pf=0; ! 372: e=inset(a); ! 373: if(e==0) { ! 374: fprintf(STDERR,"can't find %s\n",a); ! 375: return; ! 376: } ! 377: for(i=0; i<NP; i++)psa[i]=psb[i]=0; ! 378: for(i=0; i<e; i++)psa[p[set[i]].s.p]++; ! 379: k=inset(b); ! 380: if(k==0) { ! 381: fprintf(STDERR,"%s not found\n",b); ! 382: return; ! 383: } ! 384: for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0; ! 385: for(i=0; i<k; i++) { ! 386: if(psa[p[set[i]].s.p]==0)continue; ! 387: psb[p[set[i]].s.p]++; ! 388: getcol(j); ! 389: col[j].entry=set[i]; ! 390: col[j].next=old; ! 391: old=j; ! 392: p[set[i]].s.v=it; ! 393: } ! 394: if(old==0) { ! 395: fprintf(STDERR,"%s and %s: no part of speech in common\n", ! 396: a,b); ! 397: return; ! 398: } ! 399: e=inset(a); ! 400: for(i=j=0; i<e; i++) { ! 401: set[j]=set[i]; ! 402: if(psb[p[set[i]].s.p])j++; ! 403: } ! 404: e=j; ! 405: if(e==0)err("path 1"); ! 406: loop: ! 407: if(old==0) { ! 408: if(pf)fprintf(STDERR,"\n"); ! 409: else fprintf(STDERR," no path from %s to %s\n",a,b); ! 410: goto pathx; ! 411: } ! 412: it++; ! 413: while(old) { ! 414: i=p[col[old].entry].s.st; ! 415: j=old; ! 416: old=col[old].next; ! 417: freecol(j); ! 418: while(j=(unpackw(i++)&WMASK)) { ! 419: if(p[j].s.type&&p[j].s.v==0) { ! 420: getcol(k); ! 421: col[k].next=new; ! 422: col[k].entry=j; ! 423: new=k; ! 424: p[j].s.v=it; ! 425: } ! 426: } ! 427: } ! 428: old=new; ! 429: new=0; ! 430: for(i=j=0; i<e; i++) { ! 431: set[j]=set[i]; ! 432: if(p[set[i]].s.v) { ! 433: pfound(set[i]); ! 434: pf++; ! 435: } ! 436: else j++; ! 437: } ! 438: if(e=j)goto loop; ! 439: fprintf(STDERR,"\n"); ! 440: pathx: ! 441: freeall(old); ! 442: freeall(new); ! 443: } ! 444: freeall(x) ! 445: { ! 446: int i; ! 447: while(x) { ! 448: i=col[x].next; ! 449: freecol(x); ! 450: x=i; ! 451: } ! 452: } ! 453: pfound(x) ! 454: { ! 455: int i,j; ! 456: do { ! 457: for(i=x; i>0&&p[i].h.type; i--); ! 458: if(i==0)err("no hw"); ! 459: fprintf(STDERR," &%d%s",x-i,p[i].h.tt+tbase); ! 460: i=p[x].s.st; ! 461: while(j=(WMASK&unpackw(i++))) { ! 462: if(p[j].s.type&&p[j].s.v==p[x].s.v-1)break; ! 463: } ! 464: } while(x=j); ! 465: fprintf(STDERR,"\n"); ! 466: } ! 467: char *avdummy[2]; ! 468: main(ac,av) ! 469: char *av[]; ! 470: { ! 471: int i,j,k,l,m,n; ! 472: int nhw,nhs; ! 473: int backref; ! 474: char *q; ! 475: int zz; ! 476: int w; ! 477: int nw; ! 478: char y[128]; ! 479: if(ac<2) { ! 480: fprintf(stderr,"word_clout: need data file"); ! 481: return 1; ! 482: } ! 483: f=fopen(av[1],"r"); ! 484: for(i=0; (j=getc(f))!=EOF&&j!='\n'&&i<N-1; i++)in[i]=j; ! 485: if(j==EOF)err("immediate eof"); ! 486: in[i]=0; ! 487: if(strcmp(in,"-huffman")==0) { ! 488: goto unpackh; ! 489: } ! 490: else if(strcmp(in,"-pack")) { ! 491: err("expect -huffman or -pack"); ! 492: } ! 493: nhw=readnum(); ! 494: nhs=readnum(); ! 495: /* now preallocated: ! 496: * p=(struct x*)ma((nhs+2)*sizeof*p); ! 497: * tbase=(char*)ma(INITTEXT); ! 498: * sqbase=(char*)ma(INITSQ); ! 499: * sqtab=(struct squal*)ma(INITSQTAB*sizeof*sqtab); ! 500: * arena=(unsigned char*)ma(i=(NWINDEX*sizeof*arena*WBITS)/8+1); ! 501: * wqtab=(struct wqual*)ma(NWQ*sizeof*wqtab); ! 502: * wqbase=(char*)ma(WQTEXT*sizeof*wqbase); ! 503: * wqtree=(struct wqt*)ma(WQT*sizeof*wqtree); ! 504: */ ! 505: readhead: ! 506: for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j; ! 507: if(i>=N)err("line too long"); ! 508: if(j==EOF)err("eof in headwords"); ! 509: in[i]=0; ! 510: if(i==0)goto hwdone; ! 511: for(j=i-1; j>0&&in[j]!=' '; j--); ! 512: in[j++]=0; ! 513: p[++z].h.tt=text; ! 514: zz=z; ! 515: strcpy(text+tbase,in); ! 516: text+=strlen(in)+1; ! 517: if(text>=INITTEXT)err("inittext too small"); ! 518: p[z].h.type=0; ! 519: for( ; in[j]; j++) { ! 520: p[++z].s.type=1; ! 521: p[z].s.q=0; ! 522: p[z].s.st=0; ! 523: k=in[j]; ! 524: if(k<='z'&&k>='a')p[z].s.p=k-'a'; ! 525: else p[z].s.p=k-'A'; ! 526: } ! 527: if((j=getc(f))==':') { ! 528: for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j; ! 529: if(j==EOF)err("eof in sense qual"); ! 530: if(i>=N)err("squal too long"); ! 531: in[i]=0; ! 532: for(i=0; in[i]; i++)if(in[i]==':'&&in[i+1]==' ')in[i]='-'; ! 533: for(i=0; in[i]; i++) { ! 534: if(++zz>z)err("too many squal fields"); ! 535: for(j=i; in[j]&&in[j]!=':'; j++); ! 536: if(in[j]==0)err("squal not ending :"); ! 537: in[j]=0; ! 538: sensequal(zz,in+i); ! 539: i=j; ! 540: } ! 541: if(zz<z)err("missing squal field"); ! 542: goto readhead; ! 543: } ! 544: ungetc(j,f); ! 545: goto readhead; ! 546: hwdone: ! 547: fprintf(STDERR,"text=%d\n",text); ! 548: z++; ! 549: p[z].h.type=0; /* extra slot on end */ ! 550: for(i=1,j=0; i<z; i++)if(p[i].h.type==1&&p[i].s.q)j++; ! 551: fprintf(STDERR,"%d entries %d qual senses\n",i,j); ! 552: fprintf(STDERR,"sqtext=%d, nsq=%d\n",sqtext,nsq); ! 553: zz=1; ! 554: readwords: ! 555: for(i=0; i<N&&(j=getc(f))!=EOF&&j!='\n'; i++)in[i]=j; ! 556: if(j==EOF)goto done; ! 557: if(i>=N)err("line too long"); ! 558: in[i]=0; ! 559: while(zz<z&&p[zz].h.type==0)zz++; ! 560: for(i=nw=0; in[i]<='z'&&in[i]>='a'; i++)nw=nw*26+in[i]-'a'; ! 561: w=windex; ! 562: p[zz].s.st=w; ! 563: windex+=nw+1; ! 564: if(windex>NWINDEX)err("windex too big"); ! 565: while(in[i]==':') { ! 566: q=0; ! 567: if(in[++i]=='{') { ! 568: for(j=i+1; in[j]&&in[j]!='}'; j++); ! 569: if(in[j]==0)err("missing }"); ! 570: in[j]=0; ! 571: q=in+i+1; ! 572: i=j+1; ! 573: } ! 574: for(k=0; in[i]>='a'&&in[i]<='z'; i++)k=k*26+in[i]-'a'; ! 575: ++k; /* offset 1 so preserve 0=0 */ ! 576: if(k!=(k&WMASK))err("exceeds wmask"); ! 577: if(p[k].h.type==1&&p[k].s.p!=p[zz].s.p) { ! 578: dumps(k); ! 579: dumps(zz); ! 580: err("part bad"); ! 581: } ! 582: if(q) { ! 583: k|=WQ; ! 584: wqtab[nwq].waddr=w; ! 585: wqtab[nwq].qaddr=wsym(q,0); ! 586: nwq++; ! 587: if(nwq>=NWQ)err("nwq ovfl"); ! 588: } ! 589: packw(w++,k); ! 590: } ! 591: zz++; ! 592: goto readwords; ! 593: done: ! 594: fprintf(STDERR,"wqtext=%d nwq=%d windex=%d\n",wqtext,nwq,windex); ! 595: fprintf(STDERR,"wqt=%d\n",wqt); ! 596: fprintf(STDERR,"zz=%d z=%d\n",zz,z); ! 597: m=0; ! 598: if(ac>2&&*av[2]=='c') { ! 599: for(w=0; w<windex; w++)if(unpackw(w))m++; ! 600: fprintf(STDERR,"%d words set\n",m); ! 601: } ! 602: m=0; ! 603: for(zz=1; zz<z; zz++) { ! 604: if(p[zz].h.type==0)continue; ! 605: i=p[zz].s.st; ! 606: while(j=unpackw(i++)) { ! 607: j&=WMASK; ! 608: if(p[j].h.type==0)continue; ! 609: k=p[j].s.st; ! 610: while(l=unpackw(k++)) { ! 611: l&=WMASK; ! 612: if(l==zz)goto backed; ! 613: } ! 614: if(unpackw(k))err("oversubscribed"); ! 615: packw(k-1,zz); ! 616: m++; ! 617: backed:; ! 618: } ! 619: } ! 620: fprintf(STDERR,"backrefs added %d\n",m); ! 621: if(ac>2&&*av[2]=='c')for(zz=1; zz<z; zz++) { ! 622: if(p[zz].h.type==0) { ! 623: j=zz; ! 624: continue; ! 625: } ! 626: for(k=p[zz].s.st; unpackw(k)&WMASK; k++); ! 627: if(k+1<z&&(unpackw(k+1)&WMASK)==0) { ! 628: fprintf(STDERR,"%s is missing words\n", ! 629: tbase+p[j].h.tt); ! 630: } ! 631: } ! 632: if(ac>3&&(wpackfile=fopen(av[3],"w")))packit(); ! 633: goto ahem; ! 634: unpackh: ! 635: unpackit(); ! 636: ahem: fprintf(STDERR,"ready.\n"); ! 637: loop: ! 638: for(i=0; i<N&&(j=getchar())!=EOF&&j!='\n'; i++)in[i]=j; ! 639: if(i<N)in[i]=0; ! 640: if(j==EOF||strcmp(in,"q")==0) { ! 641: exit(0); ! 642: } ! 643: for(i=0; in[i]&&in[i]!='/'; i++)if(in[i]=='^') { ! 644: in[i]=0; ! 645: path(in,in+i+1); ! 646: goto loop; ! 647: } ! 648: if(in[i]=='/') { ! 649: strcpy(y,in+i+1); ! 650: in[i]=0; ! 651: if(i>0&&in[i-1]=='.')in[i-1]=0; ! 652: for(i=0; y[i]&&y[i]!='/'; i++); ! 653: y[i]=0; ! 654: k=0; ! 655: if(*in=='&')for(k=0; k<NP&&strcmp(in+1,parts[k]); k++); ! 656: if(rs)free(rs); ! 657: rs=regcomp(y); ! 658: for(zz=1; zz<z; zz++) { ! 659: if(p[zz].h.type)continue; ! 660: if(k) { ! 661: for(i=1; p[zz+i].s.type; i++) ! 662: if(p[zz+i].s.p==k)goto ok; ! 663: continue; ! 664: } ! 665: ok: ! 666: if(regexec(rs,p[zz].h.tt+tbase,0,0)) { ! 667: fprintf(stderr," %s,",p[zz].h.tt+tbase); ! 668: } ! 669: } ! 670: fprintf(stderr,"\n"); ! 671: goto loop; ! 672: } ! 673: i=inset(in); ! 674: for(j=0; j<i; j++)dumps(set[j]); ! 675: if(i==0)fprintf(STDERR,"%s not found\n",in); ! 676: goto loop; ! 677: } ! 678: /* funct == 0 reject ! 679: funct == -1 stop ! 680: funct == otherwise, continue ! 681: */ ! 682: qpath(funct,b) ! 683: char *b; ! 684: int (*funct)(); ! 685: { ! 686: int i,j,k; ! 687: int old,new; ! 688: int it; ! 689: old=new=0; ! 690: it=1; ! 691: k=inset(b); ! 692: if(k==0) { ! 693: fprintf(STDERR,"%s not found\n",b); ! 694: return; ! 695: } ! 696: for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0; ! 697: for(i=0; i<k; i++) { ! 698: getcol(j); ! 699: col[j].entry=set[i]; ! 700: col[j].next=old; ! 701: old=j; ! 702: p[set[i]].s.v=it; ! 703: } ! 704: loop: ! 705: if(old==0) { ! 706: goto pathx; ! 707: } ! 708: it++; ! 709: while(old) { ! 710: i=p[col[old].entry].s.st; ! 711: j=old; ! 712: old=col[old].next; ! 713: freecol(j); ! 714: while(j=(unpackw(i++)&WMASK)) { ! 715: if(p[j].s.type&&p[j].s.v==0) { ! 716: p[j].s.v=it; ! 717: switch((*funct)(j)) { ! 718: case 0: ! 719: p[j].s.v=0; ! 720: continue; ! 721: case -1: goto pathx; ! 722: } ! 723: getcol(k); ! 724: col[k].next=new; ! 725: col[k].entry=j; ! 726: new=k; ! 727: } ! 728: } ! 729: } ! 730: old=new; ! 731: new=0; ! 732: if(old)goto loop; ! 733: pathx: ! 734: freeall(old); ! 735: freeall(new); ! 736: } ! 737: rsparse(b) ! 738: char *b; ! 739: { ! 740: int i,j; ! 741: wantps=0; ! 742: for(i=0; b[i]&&b[i]!='/'; i++); ! 743: if(i&&b[i-1]=='.') { ! 744: b[i-1]=0; ! 745: for(j=0; j<NP; j++)if(strcmp(parts[j],b+1)==0)wantps=j; ! 746: } ! 747: if(b[i])i++; ! 748: for(j=i; b[j]; j++); ! 749: while(j&&b[j-1]!='/')j--; ! 750: if(j)b[j-1]=0; ! 751: if(atoi(b+j))nwanted=atoi(b+j); ! 752: if(rs) { ! 753: free(rs); ! 754: } ! 755: rs=regcomp(b+i); ! 756: } ! 757: accept(zz) ! 758: { ! 759: int i; ! 760: for(i=zz; i&&p[i].h.type; i--); ! 761: if(regexec(rs,p[i].h.tt+tbase,0,0)) { ! 762: fprintf(stderr," &%d%s,",zz-i,p[i].h.tt+tbase); ! 763: if(--nwanted<=0)return -1; ! 764: return 1; ! 765: } ! 766: return 1; ! 767: } ! 768: #define NCOMM 100 ! 769: #define NHASCII 128 ! 770: #define NNSEN 50 ! 771: #define NNWDS 100 ! 772: #define NSQR 256 ! 773: #define NWQR 256 ! 774: #define ALLOC(base,num) (base.freq=(long*)ma(num*sizeof(long)),\ ! 775: base.code=(long*)ma(num*sizeof(long)),\ ! 776: base.tree=(short*)ma(2*num*sizeof(short))) ! 777: #define FREE(base) (free(base.freq),free(base.code),free(base.tree)) ! 778: #define GEN(base,n) (gencodes(base.freq,n,base.code),flipcodes(n,base.code),\ ! 779: entree(base.code,n,base.tree)) ! 780: #define INBIT() (ibuffc?(ibuff>>--ibuffc)&1:(ibuffc=7,((ibuff=getc(f))>>7)&1)) ! 781: struct huff { ! 782: long *freq,*code; ! 783: short *tree; ! 784: }; ! 785: struct alpha { ! 786: unsigned int k,wqtab; ! 787: }; ! 788: alphacmp(a,b) ! 789: struct alpha *a,*b; ! 790: { ! 791: if(p[a->k&WMASK].h.type) { ! 792: if(p[b->k&WMASK].h.type)return (a->k&WMASK)-(b->k&WMASK); ! 793: return 1; ! 794: } ! 795: if(p[b->k&WMASK].h.type==0)return (a->k&WMASK)-(b->k&WMASK); ! 796: return -1; ! 797: } ! 798: wqcmp(a,b) ! 799: struct wqual *a,*b; ! 800: { ! 801: return a->waddr-b->waddr; ! 802: } ! 803: int ibuffc,ibuff; ! 804: packit() ! 805: { ! 806: struct huff comm,qascii,hascii,nsen,parsp,nwds,nwdsback; ! 807: int i,j,k,l,m,n; ! 808: int ii,jj,il,iu,im; ! 809: int zz; ! 810: int sqref[NSQR],wqref[NWQR]; ! 811: int medians,medianw; ! 812: struct alpha wtemp[NNWDS]; ! 813: /* s & w qualifier text addr from s & w qualifier index */ ! 814: for(i=j=0; i<sqtext; i++) { ! 815: if(i==0||sqbase[i-1]==0) { ! 816: sqref[j++]=i; ! 817: if(j>=NSQR)err("NSQR too small"); ! 818: } ! 819: } ! 820: for(i=j=0; i<wqtext; i++) { ! 821: if(i==0||wqbase[i-1]==0) { ! 822: wqref[j++]=i; ! 823: if(j>=NWQR)err("NWQR too small"); ! 824: } ! 825: } ! 826: ALLOC(comm,NCOMM); ! 827: ALLOC(qascii,NHASCII); ! 828: ALLOC(hascii,NHASCII); ! 829: ALLOC(nsen,NNSEN); ! 830: ALLOC(parsp,NP); ! 831: ALLOC(nwds,NNWDS); ! 832: ALLOC(nwdsback,NNWDS); ! 833: /* common substring, number of senses, parts of speech */ ! 834: for(zz=1; zz<z; zz++) { ! 835: if(p[zz].h.type)continue; ! 836: j=k,k=p[zz].h.tt; ! 837: if(k==0)l=0; ! 838: else for(l=0; tbase[k+l]==tbase[j+l]; l++); ! 839: if(l>=NCOMM)err("NCOMM too small"); ! 840: comm.freq[l]++; ! 841: for(i=l; tbase[i+k]; i++)hascii.freq[tbase[i+k]]++; ! 842: hascii.freq[0]++; ! 843: for(n=zz+1; p[n].s.type; n++); ! 844: if(n-zz>=NNSEN)err("NNSEN too small"); ! 845: nsen.freq[n-zz]++; ! 846: for(n=zz+1; p[n].s.type; n++)parsp.freq[p[n].s.p]++; ! 847: } ! 848: /* number of words */ ! 849: for(zz=1; zz<z; zz++) { ! 850: if(p[zz].h.type) { ! 851: for(i=p[zz].s.st,j=0; k=unpackw(i); i++)if((k&WMASK)<zz)j++; ! 852: i-=p[zz].s.st; ! 853: if(i>=NNWDS)err("NNWDS too small"); ! 854: nwds.freq[i]++; ! 855: nwdsback.freq[j]++; ! 856: } ! 857: } ! 858: /* ascii in qualifiers */ ! 859: for(i=0; i<sqtext; i++)qascii.freq[sqbase[i]]++; ! 860: for(i=0; i<wqtext; i++)qascii.freq[wqbase[i]]++; ! 861: ! 862: GEN(qascii,NHASCII); ! 863: GEN(hascii,NHASCII); ! 864: GEN(comm,NCOMM); ! 865: GEN(nsen,NNSEN); ! 866: GEN(parsp,NP); ! 867: GEN(nwds,NNWDS); ! 868: GEN(nwdsback,NNWDS); ! 869: ! 870: /* median sense qualifier */ ! 871: for(i=j=0,zz=1; zz<z; zz++) { ! 872: if(p[zz].s.type) { ! 873: i++; ! 874: if(p[zz].s.q)j++; ! 875: } ! 876: } ! 877: medians=(7*i)/(10*j); ! 878: /* median word qualifier runs */ ! 879: for(i=j=0,zz=1; zz<z; zz++) { ! 880: if(p[zz].h.type) { ! 881: for(ii=p[zz].s.st; jj=unpackw(ii); ii++) { ! 882: i++; ! 883: if(jj&WQ)j++; ! 884: } ! 885: } ! 886: } ! 887: medianw=(7*i)/(10*j); ! 888: ! 889: /* Start packed output */ ! 890: fprintf(wpackfile,"-huffman\n"); ! 891: ! 892: /* OUTPUT trees */ ! 893: outtree(qascii); ! 894: outtree(hascii); ! 895: outtree(comm); ! 896: outtree(nsen); ! 897: outtree(parsp); ! 898: outtree(nwds); ! 899: outtree(nwdsback); ! 900: ! 901: /* OUTPUT headers */ ! 902: writesim(500000,text); ! 903: writesim(100000,z); ! 904: writesim(1000,nsq); ! 905: writesim(500000,windex); ! 906: writesim(10000,nwq); ! 907: writesim(2000,wqtext); ! 908: writesim(2000,sqtext); ! 909: writesim(100,medians); ! 910: writesim(100,medianw); ! 911: ! 912: /* OUTPUT sense qualifier text */ ! 913: for(i=0; i<sqtext; i++)outbit(qascii.code[sqbase[i]]); ! 914: /* OUTPUT word qualifier text */ ! 915: for(i=0; i<wqtext; i++)outbit(qascii.code[wqbase[i]]); ! 916: /* OUTPUT hw ncommon, text \0, number of senses, part of speech */ ! 917: for(zz=1; zz<z; zz++) { ! 918: if(p[zz].h.type)continue; ! 919: j=k,k=p[zz].h.tt; ! 920: if(k==0)l=0; ! 921: else for(l=0; tbase[k+l]==tbase[j+l]; l++); ! 922: outbit(comm.code[l]); ! 923: for(i=l; tbase[i+k]; i++)outbit(hascii.code[tbase[i+k]]); ! 924: outbit(hascii.code[0]); ! 925: for(n=zz+1; p[n].s.type; n++); ! 926: outbit(nsen.code[n-zz]); ! 927: for(n=zz+1; p[n].s.type; n++)outbit(parsp.code[p[n].s.p]); ! 928: } ! 929: /* OUTPUT qualified sense run, qualifier reference number */ ! 930: m=n=0; ! 931: for(zz=1; zz<z; zz++) { ! 932: if(p[zz].h.type) { ! 933: n++; ! 934: if(p[zz].s.q) { ! 935: writesim(medians,n-m-1); ! 936: m=n; ! 937: j=squal(zz)-sqbase; ! 938: for(i=0; i<NSQR; i++)if(sqref[i]==j)break; ! 939: if(sqref[i]!=j)err("NSQR search botch"); ! 940: outbitf(NSQR+i); ! 941: } ! 942: } ! 943: } ! 944: writesim(medians,n-m); ! 945: /* sort words into alphabetical order */ ! 946: for(zz=1; zz<z; zz++) { ! 947: if(p[zz].h.type==0)continue; ! 948: for(i=p[zz].s.st,j=0; k=unpackw(i); i++) { ! 949: wtemp[j].wqtab=0; ! 950: if(k&WQ) { ! 951: il=0; iu=nwq; ! 952: while(il<iu-1) { ! 953: im=(il+iu)/2; ! 954: if(wqtab[im].waddr>i)iu=im; ! 955: else il=im; ! 956: } ! 957: if(wqtab[il].waddr!=i)err("bin botch"); ! 958: wtemp[j].wqtab=il; ! 959: } ! 960: wtemp[j++].k=k; ! 961: } ! 962: qsort(wtemp,j,sizeof*wtemp,alphacmp); ! 963: for(i=p[zz].s.st,j=0; unpackw(i); i++) { ! 964: if(wtemp[j].k&WQ) { ! 965: wqtab[wtemp[j].wqtab].waddr=i; ! 966: } ! 967: opackw(i,wtemp[j++].k); ! 968: } ! 969: } ! 970: qsort(wqtab,nwq,sizeof*wqtab,wqcmp); ! 971: /* OUTPUT # words, # words back, back references */ ! 972: for(zz=1; zz<z; zz++) { ! 973: if(p[zz].h.type==0)continue; ! 974: for(i=p[zz].s.st,j=0; k=unpackw(i)&WMASK; i++) { ! 975: if(k<zz||p[k].s.type==0)j++; ! 976: } ! 977: outbit(nwds.code[i-p[zz].s.st]); ! 978: outbit(nwdsback.code[j]); ! 979: for(i=p[zz].s.st; k=unpackw(i)&WMASK; i++) { ! 980: if(k<zz||p[k].s.type==0)outbitf(WQ|k); ! 981: } ! 982: } ! 983: /* OUTPUT word run, word qualifier index */ ! 984: m=n=0; ! 985: for(zz=1; zz<z; zz++) { ! 986: if(p[zz].h.type) { ! 987: for(ii=p[zz].s.st; jj=unpackw(ii); ii++) { ! 988: n++; ! 989: if(jj&WQ) { ! 990: writesim(medianw,n-m-1); ! 991: m=n; ! 992: j=wqual(ii)-wqbase; ! 993: for(i=0; i<NWQR; i++)if(wqref[i]==j)break; ! 994: if(wqref[i]!=j)err("NWQR search botch"); ! 995: outbitf(NWQR+i); ! 996: } ! 997: } ! 998: } ! 999: } ! 1000: writesim(medianw,n-m); ! 1001: FREE(qascii); ! 1002: FREE(hascii); ! 1003: FREE(comm); ! 1004: FREE(nsen); ! 1005: FREE(parsp); ! 1006: FREE(nwds); ! 1007: FREE(nwdsback); ! 1008: outbitf(0xff); ! 1009: fclose(wpackfile); ! 1010: } ! 1011: outbit(x) ! 1012: register x; ! 1013: { ! 1014: static obuff,b; ! 1015: while(x>1) { ! 1016: obuff=(obuff<<1)+(x&1); ! 1017: x>>=1; ! 1018: if(++b>=8) { ! 1019: b=0; ! 1020: putc(obuff,wpackfile); ! 1021: } ! 1022: } ! 1023: } ! 1024: outbitf(x) ! 1025: register x; ! 1026: { ! 1027: register y; ! 1028: for(y=1; x>1; x>>=1)y=(y<<1)+(x&1); ! 1029: outbit(y); ! 1030: } ! 1031: inbit() { return INBIT(); } ! 1032: char *rhalloc(x) { return malloc(x); } ! 1033: char *rhfree(x) char *x; { free(x); } ! 1034: wbytes(n) { ! 1035: putc(n>>8,wpackfile); ! 1036: putc(n,wpackfile); ! 1037: } ! 1038: rbytes() { ! 1039: int i; ! 1040: i=getc(f); ! 1041: return (i<<8)+getc(f); ! 1042: } ! 1043: decode(tree) ! 1044: register short *tree; ! 1045: { ! 1046: register int i; ! 1047: i=0; ! 1048: if(*tree++<2)return *tree&077777; ! 1049: do { ! 1050: if(INBIT())i=tree[i+1]; ! 1051: else i=tree[i]; ! 1052: } while((i&0100000)==0); ! 1053: return i&077777; ! 1054: } ! 1055: outtree(x) ! 1056: struct huff x; ! 1057: { ! 1058: int i; ! 1059: for(i=0; i<=x.tree[0]; i++)wbytes(x.tree[i]); ! 1060: } ! 1061: struct huff intree() ! 1062: { ! 1063: struct huff x; ! 1064: int i; ! 1065: x.code=0; ! 1066: x.freq=0; ! 1067: i=rbytes(); ! 1068: x.tree=(short*)ma((i+1)*sizeof(short)); ! 1069: x.tree[0]=i; ! 1070: for(i=1; i<=x.tree[0]; i++)x.tree[i]=rbytes(); ! 1071: return x; ! 1072: } ! 1073: rfixlen(code) ! 1074: register code; ! 1075: { ! 1076: register i=0; ! 1077: if(code<=1)return 0; ! 1078: do { ! 1079: i=(i<<1)+INBIT(); ! 1080: } while((code>>=1)>1); ! 1081: return i; ! 1082: } ! 1083: unpackit() ! 1084: { ! 1085: int i,j,k,l,m,n; ! 1086: int sqref[NSQR],wqref[NWQR]; ! 1087: struct huff qascii,hascii,comm,nsen,parsp,nwds,nwdsback; ! 1088: int medians,medianw; ! 1089: int zz; ! 1090: int ii,jj; ! 1091: fprintf(STDERR,"wait for 'ready.'\n"); ! 1092: fflush(STDERR); ! 1093: qascii=intree(); ! 1094: hascii=intree(); ! 1095: comm=intree(); ! 1096: nsen=intree(); ! 1097: parsp=intree(); ! 1098: nwds=intree(); ! 1099: nwdsback=intree(); ! 1100: text=readsim(500000); ! 1101: z=readsim(100000); ! 1102: nsq=readsim(1000); ! 1103: windex=readsim(500000); ! 1104: nwq=readsim(10000); ! 1105: wqtext=readsim(2000); ! 1106: sqtext=readsim(2000); ! 1107: medians=readsim(100); ! 1108: medianw=readsim(100); ! 1109: ! 1110: for(i=0; i<sqtext; i++)sqbase[i]=decode(qascii.tree); ! 1111: for(i=0; i<wqtext; i++)wqbase[i]=decode(qascii.tree); ! 1112: ! 1113: for(i=j=0; i<sqtext; i++) { ! 1114: if(i==0||sqbase[i-1]==0) { ! 1115: sqref[j++]=i; ! 1116: } ! 1117: } ! 1118: for(i=j=0; i<wqtext; i++) { ! 1119: if(i==0||wqbase[i-1]==0) { ! 1120: wqref[j++]=i; ! 1121: } ! 1122: } ! 1123: for(k=0,zz=1; zz<z; zz=n) { ! 1124: l=decode(comm.tree); ! 1125: for(i=0; i<l; i++)tbase[i+k]=tbase[i+j]; ! 1126: while(tbase[k+l++]=decode(hascii.tree)); ! 1127: p[zz].h.tt=k; ! 1128: j=k; ! 1129: k+=l; ! 1130: l=decode(nsen.tree); ! 1131: for(n=zz+1; n<zz+l; n++) { ! 1132: p[n].s.p=decode(parsp.tree); ! 1133: p[n].s.type=1; ! 1134: } ! 1135: } ! 1136: if(zz!=z)fprintf(STDERR,"botch zz!=z %d %d\n",zz,z); ! 1137: if(k!=text)fprintf(STDERR,"botch k!=text %d %d\n",k,text); ! 1138: p[z].h.type=0; ! 1139: m=n=0; ! 1140: l=readsim(medians); ! 1141: k=0; ! 1142: for(zz=1; zz<z; zz++) { ! 1143: if(p[zz].h.type) { ! 1144: n++; ! 1145: if(l==n-m-1) { ! 1146: p[zz].s.q=1; ! 1147: m=n; ! 1148: sqtab[k].saddr=zz; ! 1149: sqtab[k++].qaddr=sqref[rfixlen(NSQR)]; ! 1150: l=readsim(medians); ! 1151: } ! 1152: } ! 1153: } ! 1154: if(k!=nsq)fprintf(STDERR,"k!=nsq %d %d\n",k,nsq); ! 1155: j=0; ! 1156: for(zz=1; zz<z; zz++) { ! 1157: if(p[zz].h.type==0)continue; ! 1158: l=decode(nwds.tree); ! 1159: p[zz].s.st=j; ! 1160: m=decode(nwdsback.tree); ! 1161: for(i=0; i<m; i++) { ! 1162: packw(j+i,n=rfixlen(WQ)); ! 1163: if(p[n].s.type) { ! 1164: packw(p[n].s.st+p[n].s.v++,zz); ! 1165: } ! 1166: } ! 1167: p[zz].s.v=m; ! 1168: j+=l+1; ! 1169: } ! 1170: if(j!=windex)fprintf(STDERR,"j!=windex %d %d\n",j,windex); ! 1171: m=n=0; ! 1172: l=readsim(medianw); ! 1173: j=0; ! 1174: for(zz=1; zz<z; zz++) { ! 1175: if(p[zz].h.type) { ! 1176: for(ii=p[zz].s.st; jj=unpackw(ii); ii++) { ! 1177: n++; ! 1178: if(n-m-1==l) { ! 1179: m=n; ! 1180: opackw(ii,WQ+jj); ! 1181: i=wqref[rfixlen(NWQR)]; ! 1182: wqtab[j].waddr=ii; ! 1183: wqtab[j++].qaddr=i; ! 1184: l=readsim(medianw); ! 1185: } ! 1186: } ! 1187: } ! 1188: } ! 1189: if(j!=nwq)fprintf(STDERR,"j!=nwq %d %d\n",j,nwq); ! 1190: free(qascii.tree); ! 1191: free(hascii.tree); ! 1192: free(comm.tree); ! 1193: free(nsen.tree); ! 1194: free(parsp.tree); ! 1195: free(nwds.tree); ! 1196: free(nwdsback.tree); ! 1197: } ! 1198: char *rhalloc(); ! 1199: typedef long huffreq; ! 1200: typedef long huffcode; ! 1201: typedef unsigned short hufftree; ! 1202: typedef int huffnum; ! 1203: ! 1204: /* gencodes((huffreq*)freqtab, (huffnum)n, (huffcode*)output); ! 1205: flipcodes((huffnum)n,(huffcode*)output); ! 1206: entree((huffcodes*)output.flipped,(huffnum)n,(hufftree*)treearray) ! 1207: */ ! 1208: #define ENDFLAG 0100000 ! 1209: /* decoding tree is (hufftree) array; start at 0, add next bit to index; ! 1210: if ENDFLAG is set in entry, everything else is the wanted code; ! 1211: if not, take the next offset from the entry; ! 1212: ! 1213: first entry in tree is number of entries in rest of tree; ! 1214: offset 0 corresponds to tr[1]. ! 1215: */ ! 1216: entree(cd,ncd,tr) ! 1217: huffcode *cd; ! 1218: huffnum ncd; ! 1219: hufftree *tr; ! 1220: { ! 1221: long i,j,k; ! 1222: long x; ! 1223: for(i=0; i<2*ncd; i++)tr[i]=0; ! 1224: tr++; ! 1225: k=2; ! 1226: ! 1227: for(i=0; i<ncd; i++) { ! 1228: j=0; ! 1229: for(x=cd[i]; x>1; x>>=1) { ! 1230: j+=x&1; ! 1231: if(tr[j]==0) { ! 1232: if(x>3) { ! 1233: tr[j]=k; ! 1234: k+=2; ! 1235: } ! 1236: else { ! 1237: tr[j]=i|ENDFLAG; ! 1238: } ! 1239: } ! 1240: j=tr[j]; ! 1241: if(x>3&&(j&ENDFLAG)) { ! 1242: err("botched huffman code"); ! 1243: } ! 1244: } ! 1245: } ! 1246: *--tr=k; ! 1247: } ! 1248: /* Huffman codes from counts */ ! 1249: ! 1250: /* gencodes(huffreq freq table, huffnum length, huffcode output code table) ! 1251: output code words have leading 1-bit set, rest is huffman code. ! 1252: program aborts if a code is more than 30 bits -- try increasing ! 1253: the minimum counts. ! 1254: zero frequency counts produce zero code-words (are ignored). ! 1255: ! 1256: flipcodes(huffnum length, huffcode code table) flips the bit order of ! 1257: the huffman code bits. ! 1258: */ ! 1259: ! 1260: struct code { ! 1261: struct code *left,*right; ! 1262: huffreq count; ! 1263: huffnum val; ! 1264: }; ! 1265: ! 1266: struct code *getcode() ! 1267: { ! 1268: struct code *h; ! 1269: h=(struct code*)rhalloc(sizeof*h); ! 1270: if(h==0)err("getcode: can't rhalloc code"); ! 1271: h->left=h->right=0; ! 1272: h->val=h->count=0; ! 1273: return h; ! 1274: } ! 1275: ! 1276: tcompare(a,b) ! 1277: register struct code **a,**b; ! 1278: { ! 1279: if(a[0]->count<b[0]->count)return -1; ! 1280: if(a[0]->count>b[0]->count)return 1; ! 1281: if(a[0]->val<b[0]->val)return -1; ! 1282: if(a[0]->val>b[0]->val)return 1; ! 1283: return 0; ! 1284: } ! 1285: ! 1286: gencodes(infreq,nel,outcodes) ! 1287: huffreq *infreq; ! 1288: huffnum nel; ! 1289: huffcode *outcodes; ! 1290: { ! 1291: struct code **tp,**a,*p,*s; ! 1292: long i,j,k; ! 1293: long na; ! 1294: tp=(struct code**)rhalloc(nel*sizeof*tp); ! 1295: if(tp==0)err("gencodes: can't rhalloc tp"); ! 1296: for(i=0; i<nel; i++) { ! 1297: outcodes[i]=0; ! 1298: tp[i]=getcode(); ! 1299: tp[i]->count=infreq[i]; ! 1300: tp[i]->val=i; ! 1301: } ! 1302: qsort(tp,nel,sizeof*tp,tcompare); ! 1303: for(k=0; k<nel-1; k++) { ! 1304: if(tp[k]->count==0)rhfree(tp[k]); ! 1305: else break; ! 1306: } ! 1307: a=&tp[k]; ! 1308: na=nel-k; ! 1309: /* add smallest two and replace in heap till heap gone */ ! 1310: for(k=na; k>2; ) { ! 1311: j=a[1]->count<a[2]->count?1:2; ! 1312: p=getcode(); ! 1313: p->count=a[0]->count+a[j]->count; ! 1314: p->left=a[0]; ! 1315: p->right=a[j]; ! 1316: a[j]=p; ! 1317: for(i=j; (i+=i+1)<k; j=i) { ! 1318: if(i+1<k&&a[i]->count>a[i+1]->count)i++; ! 1319: if(a[j]->count<=a[i]->count)break; ! 1320: p=a[j]; ! 1321: a[j]=a[i]; ! 1322: a[i]=p; ! 1323: } ! 1324: a[0]=a[--k]; ! 1325: for(i=j=0; (i+=i+1)<k; j=i) { ! 1326: if(i+1<k&&a[i]->count>a[i+1]->count)i++; ! 1327: if(a[j]->count<=a[i]->count)break; ! 1328: p=a[j]; ! 1329: a[j]=a[i]; ! 1330: a[i]=p; ! 1331: } ! 1332: } ! 1333: if(k==2) { ! 1334: p=getcode(); ! 1335: p->count=a[0]->count+a[1]->count; ! 1336: p->left=a[0]; ! 1337: p->right=a[1]; ! 1338: } ! 1339: else p=a[0]; ! 1340: rhfree(tp); ! 1341: ascodes(p,1L,outcodes); ! 1342: } ! 1343: ! 1344: ascodes(p,hcode,outcodes) ! 1345: struct code *p; ! 1346: long *outcodes; ! 1347: long hcode; ! 1348: { ! 1349: if(hcode<=0)err("ascodes: bitcode overflow"); ! 1350: if(p->left==0) { ! 1351: if(p->right!=0)err("ascodes: tree ends wrong"); ! 1352: outcodes[p->val]=hcode; ! 1353: } ! 1354: else { ! 1355: if(p->right==0)err("ascodes: tree ends oddly"); ! 1356: ascodes(p->right,(hcode<<1)|1,outcodes); ! 1357: ascodes(p->left,(hcode<<1)|0,outcodes); ! 1358: } ! 1359: rhfree(p); ! 1360: } ! 1361: ! 1362: flipcodes(nel,codes) ! 1363: huffnum nel; ! 1364: huffcode *codes; ! 1365: { ! 1366: long i,j,k; ! 1367: long x; ! 1368: for(i=0; i<nel; i++) { ! 1369: if(codes[i]==0)continue; ! 1370: x=codes[i]; ! 1371: codes[i]=1; ! 1372: while(x>1) { ! 1373: codes[i]=codes[i]*2|(x&1); ! 1374: x>>=1; ! 1375: } ! 1376: } ! 1377: } ! 1378: /* self-similar Huffman codes for neg exp distr. ! 1379: calls outbitf(1+forward code) ! 1380: calls inbit() ! 1381: */ ! 1382: writesim(median,intv) ! 1383: { ! 1384: int i; ! 1385: static nc,dc,ec,omedian; ! 1386: if(median!=omedian) { ! 1387: omedian=median; ! 1388: for(i=1; (1L<<i)<=median; i++); ! 1389: nc=1L<<--i; ! 1390: dc=2*nc-median; ! 1391: ec=4*nc-median; ! 1392: } ! 1393: while(intv>=median) { ! 1394: outbitf(03L); ! 1395: intv-=median; ! 1396: } ! 1397: outbitf(02L); ! 1398: if(intv<dc)outbitf(intv+nc); ! 1399: else outbitf(ec+intv); ! 1400: } ! 1401: readsim(median) { ! 1402: int i,j,k; ! 1403: static nc,oi,dc,omedian; ! 1404: if(median!=omedian) { ! 1405: omedian=median; ! 1406: for(i=1; (1L<<i)<=median; i++); ! 1407: nc=1L<<--i; ! 1408: oi=i; ! 1409: dc=2*nc-median; ! 1410: } ! 1411: k=0; while(INBIT())k+=median; ! 1412: for(i=j=0; i<oi; i++)j+=j+INBIT(); ! 1413: if(j<dc)return k+j; ! 1414: return k-dc+j+j+INBIT(); ! 1415: } ! 1416: #ifdef AMBIGUOUS ! 1417: prsense(i) ! 1418: { ! 1419: fprintf(STDERR," &%d%s",i-hwof(i),p[hwof(i)].h.tt+tbase); ! 1420: } ! 1421: ambpath(a,b) ! 1422: char *a,*b; ! 1423: { ! 1424: int i,j,k; ! 1425: int froma,fromb,nfroma,nfromb; ! 1426: int e; ! 1427: int it; ! 1428: int pf; ! 1429: pf=0; ! 1430: it=1; ! 1431: e=inset(a); ! 1432: if(e==0) { ! 1433: fprintf(STDERR,"can't find %s\n",a); ! 1434: return; ! 1435: } ! 1436: k=inset(b); ! 1437: if(k==0) { ! 1438: fprintf(STDERR,"%s not found\n",b); ! 1439: return; ! 1440: } ! 1441: for(i=0; i<z; i++)if(p[i].s.type)p[i].s.v=0; else p[i].h.v=0; ! 1442: fromb=0; ! 1443: for(i=0; i<k; i++) { ! 1444: getcol(j); ! 1445: col[j].entry=set[i]; ! 1446: col[j].next=fromb; ! 1447: fromb=j; ! 1448: p[set[i]].s.v=it; ! 1449: } ! 1450: e=inset(a); ! 1451: froma=0; ! 1452: for(i=0; i<e; i++) { ! 1453: getcol(j); ! 1454: col[j].entry=set[i]; ! 1455: col[j].next=froma; ! 1456: froma=j; ! 1457: p[set[i]].s.v=it; ! 1458: } ! 1459: loop: ! 1460: pf=0; ! 1461: for(k=froma; k; k=col[k].next) { ! 1462: j=hwof(col[k].entry); ! 1463: p[j].h.v|=1; ! 1464: if(p[j].h.v==3)pf++; ! 1465: } ! 1466: for(k=fromb; k; k=col[k].next) { ! 1467: j=hwof(col[k].entry); ! 1468: p[j].h.v|=2; ! 1469: if(p[j].h.v==3)pf++; ! 1470: } ! 1471: if(pf>0) { ! 1472: ambiguous(froma,fromb); ! 1473: goto pathx; ! 1474: } ! 1475: if(froma==0&&fromb==0) { ! 1476: fprintf(STDERR,"no link found\n"); ! 1477: return; ! 1478: } ! 1479: it++; ! 1480: nfroma=0; ! 1481: while(froma) { ! 1482: i=p[col[froma].entry].s.st; ! 1483: j=froma; ! 1484: froma=col[j].next; ! 1485: freecol(j); ! 1486: while(j=(unpackw(i++)&WMASK)) { ! 1487: if(p[j].s.type&&p[j].s.v==0) { ! 1488: getcol(k); ! 1489: col[k].next=nfroma; ! 1490: col[k].entry=j; ! 1491: nfroma=k; ! 1492: p[j].s.v=it; ! 1493: } ! 1494: } ! 1495: } ! 1496: froma=nfroma; ! 1497: nfromb=0; ! 1498: while(fromb) { ! 1499: i=p[col[fromb].entry].s.st; ! 1500: j=fromb; ! 1501: fromb=col[j].next; ! 1502: freecol(j); ! 1503: while(j=(unpackw(i++)&WMASK)) { ! 1504: if(p[j].s.type&&p[j].s.v==0) { ! 1505: getcol(k); ! 1506: col[k].next=nfromb; ! 1507: col[k].entry=j; ! 1508: nfromb=k; ! 1509: p[j].s.v=it; ! 1510: } ! 1511: } ! 1512: } ! 1513: fromb=nfromb; ! 1514: goto loop; ! 1515: pathx: ! 1516: freeall(froma); ! 1517: freeall(fromb); ! 1518: } ! 1519: hwof(i) ! 1520: { ! 1521: while(i>0&&p[i].h.type!=0)i--; ! 1522: if(i==0)err("hwof()"); ! 1523: return i; ! 1524: } ! 1525: ambiguous(fa,fb) ! 1526: { ! 1527: int f,g,i,j,k; ! 1528: int dbest; ! 1529: int enda,endb; ! 1530: int la,lb; ! 1531: int length; ! 1532: int best,a; ! 1533: dbest=9999; ! 1534: for(best=0; best<2; best++) ! 1535: for(f=fa; f; f=(f==fb)?0:fb)for(g=f; g; g=col[g].next) { ! 1536: enda=endb=0; ! 1537: la=lb=9999; ! 1538: i=hwof(col[g].entry); ! 1539: if(p[i].h.v!=3)continue; ! 1540: for(j=1; p[i+j].s.type; j++) { ! 1541: if(p[i+j].s.v==0)continue; ! 1542: a=pathtoend(i+j); ! 1543: if(enda==0)enda=a,la=p[i+j].s.v; ! 1544: else ! 1545: if(hwof(col[a].entry)==hwof(col[enda].entry)) { ! 1546: if(p[i+j].s.v<la) { ! 1547: freeall(enda); ! 1548: enda=a; ! 1549: la=p[i+j].s.v; ! 1550: } ! 1551: else freeall(a); ! 1552: } ! 1553: else if(endb==0)endb=a,lb=p[i+j].s.v; ! 1554: else ! 1555: if(hwof(col[a].entry)==hwof(col[endb].entry)) { ! 1556: if(p[i+j].s.v<lb) { ! 1557: freeall(endb); ! 1558: endb=a; ! 1559: lb=p[i+j].s.v; ! 1560: } ! 1561: else freeall(a); ! 1562: } ! 1563: } ! 1564: length=la+lb; ! 1565: if(best==0&&length<dbest)dbest=length; ! 1566: else if(best==1&&length==dbest) { ! 1567: prrev(enda); ! 1568: fprintf(STDERR,"\n"); ! 1569: prrev(endb); ! 1570: fprintf(STDERR,"\n\n"); ! 1571: p[i].h.v=0; ! 1572: } ! 1573: freeall(enda); ! 1574: freeall(endb); ! 1575: } ! 1576: } ! 1577: pathtoend(x) ! 1578: { ! 1579: int i,j,k; ! 1580: for(k=0;;) { ! 1581: getcol(j); ! 1582: col[j].entry=x; ! 1583: col[j].next=k; ! 1584: k=j; ! 1585: if(p[x].s.v<=1)return k; ! 1586: i=p[x].s.st; ! 1587: while(j=(WMASK&unpackw(i++))) { ! 1588: if(p[j].s.type&&p[j].s.v==p[x].s.v-1)break; ! 1589: } ! 1590: x=j; ! 1591: } ! 1592: } ! 1593: prrev(i) ! 1594: { ! 1595: if(i==0)return; ! 1596: prrev(col[i].next); ! 1597: prsense(col[i].entry); ! 1598: } ! 1599: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.