|
|
1.1 ! root 1: 0707070000000000011006440044230044230000010000000467126123100002400000000211Makefile Ugsf Ggsf /* ! 2: * old kpv delta/update/malloc library ! 3: */ ! 4: ! 5: SYSV == 1 ! 6: ! 7: odelta :LIBRARY: update.h suftree.h \ ! 8: delta.c mtchstring.c suftree.c update.c ! 9: 0707070000000000021006440044230044230000010000000425126261600002300000022032delta.c Ugsf Ggsf #include "update.h" ! 10: #include "suftree.h" ! 11: ! 12: /* ! 13: ** Compute delta commands to transform the source string 'src' ! 14: ** to the target string 'tar'. Small blockmoves are transformed ! 15: ** to blockadds for space efficiency. ! 16: ** Return -1 in case of error. ! 17: ** ! 18: ** For details on computing blockmoves, see: ! 19: ** "The String-to-String Correction Problem with Block Moves" ! 20: ** W. Tichy, ACM TOCS, v.2, No.4, 1984, pp.309-321. ! 21: ** ! 22: ** Written by Kiem-Phong Vo, 5/18/88 ! 23: */ ! 24: ! 25: extern char *malloc(); ! 26: ! 27: #define M_MAX 9 /* max size of a block move instruction */ ! 28: #define A_MAX 5 /* max size of the header of an add instruction */ ! 29: ! 30: /* structure of a delta instruction */ ! 31: typedef struct _m_ ! 32: { ! 33: int type; /* DELTA_MOVE or DELTA_ADD */ ! 34: long size; /* size of the block being moved or added */ ! 35: long addr; /* offset where the block starts */ ! 36: struct _m_ *last; /* doubly linked list for easy insert/delete */ ! 37: struct _m_ *next; ! 38: } Move; ! 39: ! 40: /* bases of the source and target strings */ ! 41: static char *Bsrc, *Btar; ! 42: ! 43: /* Data buffer area */ ! 44: static char *Ddata, *Dend, *Dnext; ! 45: static int Dfd; ! 46: ! 47: #define delinit(buf,fd) (Ddata=Dnext=buf, Dend=buf+BUFSIZE, Dfd=fd) ! 48: #define delflush() (write(Dfd,Ddata,Dnext-Ddata) >= 0 ? (Dnext=Ddata,0) : -1) ! 49: ! 50: static int delputc(byte) ! 51: char byte; ! 52: { ! 53: if(Dnext == Dend) ! 54: if(delflush() < 0) ! 55: return -1; ! 56: *Dnext++ = byte; ! 57: return 0; ! 58: } ! 59: ! 60: static int delputl(n,v) ! 61: register int n; ! 62: register long v; ! 63: { ! 64: register int i; ! 65: unsigned char c[4]; ! 66: ! 67: for(i = 0; i < n; ++i) ! 68: { ! 69: c[i] = (unsigned char)(v%BASE); ! 70: v /= BASE; ! 71: } ! 72: for(i = n-1; i >= 0; --i) ! 73: if(delputc((char)c[i]) < 0) ! 74: return -1; ! 75: return 0; ! 76: } ! 77: ! 78: static int delputs(n,addr) ! 79: register long n; ! 80: register long addr; ! 81: { ! 82: if(n < (Dend-Dnext)) ! 83: { ! 84: memcopy(Dnext,Btar+addr,n); ! 85: Dnext += n; ! 86: } ! 87: else ! 88: { ! 89: if(delflush() < 0) ! 90: return -1; ! 91: if(write(Dfd,Btar+addr,n) != n) ! 92: return -1; ! 93: } ! 94: return 0; ! 95: } ! 96: ! 97: /* write an instruction */ ! 98: static int putMove(this) ! 99: Move *this; ! 100: { ! 101: register char inst; ! 102: ! 103: inst = this->type; ! 104: inst |= (NBYTE(this->size)&07) << 3; ! 105: if(this->type == DELTA_MOVE) ! 106: { ! 107: inst |= NBYTE(this->addr)&07; ! 108: if(delputc(inst) < 0 || ! 109: delputl(NBYTE(this->size),this->size) < 0 || ! 110: delputl(NBYTE(this->addr),this->addr) < 0) ! 111: return -1; ! 112: } ! 113: else ! 114: { ! 115: if(delputc(inst) < 0 || ! 116: delputl(NBYTE(this->size),this->size) < 0 || ! 117: delputs(this->size,this->addr) < 0) ! 118: return -1; ! 119: } ! 120: return 0; ! 121: } ! 122: ! 123: /* constructor for Move */ ! 124: static Move *newMove(type,size,addr,last) ! 125: int type; ! 126: long size; ! 127: long addr; ! 128: Move *last; ! 129: { ! 130: register Move *this = (Move*) malloc(sizeof(Move)); ! 131: if(this == NIL(Move)) ! 132: return NIL(Move); ! 133: this->type = type; ! 134: this->size = size; ! 135: this->addr = addr; ! 136: if(last) ! 137: { ! 138: last->next = this; ! 139: this->last = last; ! 140: } ! 141: else this->last = NIL(Move); ! 142: this->next = NIL(Move); ! 143: return this; ! 144: } ! 145: ! 146: /* destructor for Move, return the elt after move */ ! 147: static Move *delMove(this) ! 148: Move *this; ! 149: { ! 150: register Move *next = this->next; ! 151: register Move *last = this->last; ! 152: if(last) ! 153: last->next = next; ! 154: if(next) ! 155: next->last = last; ! 156: free((char*)this); ! 157: return next ? next : last; ! 158: } ! 159: ! 160: /* make a new add command */ ! 161: static Move *makeAdd(beg,end,last) ! 162: char *beg, *end; ! 163: Move *last; ! 164: { ! 165: register Move *this; ! 166: ! 167: this = newMove(DELTA_ADD,(long)(end-beg),(long)(beg-Btar),NIL(Move)); ! 168: if(!this) ! 169: return NIL(Move); ! 170: ! 171: /* remove small previous adjacent moves */ ! 172: while(last) ! 173: { ! 174: register int a_size, cost_m, cost_a; ! 175: ! 176: if(last->type == DELTA_ADD) ! 177: break; ! 178: ! 179: cost_m = NBYTE(last->size) + NBYTE(last->addr) + ! 180: NBYTE(this->size) + this->size; ! 181: a_size = this->size + last->size; ! 182: cost_a = NBYTE(a_size) + a_size; ! 183: if(cost_m < cost_a) ! 184: break; ! 185: ! 186: this->size = a_size; ! 187: this->addr -= last->size; ! 188: last = delMove(last); ! 189: } ! 190: ! 191: /* merge with adjacent adds */ ! 192: if(last && last->type == DELTA_ADD) ! 193: { ! 194: this->size += last->size; ! 195: this->addr -= last->size; ! 196: last = delMove(last); ! 197: } ! 198: ! 199: if(last) ! 200: { ! 201: last->next = this; ! 202: this->last = last; ! 203: } ! 204: return this; ! 205: } ! 206: ! 207: /* check to see if a move is appropriate */ ! 208: static int chkMove(m_size,m_addr,a_size) ! 209: long m_size, m_addr, a_size; ! 210: { ! 211: register int cost_m, cost_a; ! 212: ! 213: cost_m = NBYTE(m_size) + NBYTE(m_addr); ! 214: cost_a = NBYTE(m_size) + m_size; ! 215: if(cost_m >= cost_a) ! 216: return 0; ! 217: ! 218: /* it's good but it may be better to merge it to an add */ ! 219: if(a_size > 0) ! 220: { ! 221: register int m_cost, a_cost; ! 222: ! 223: m_cost = cost_m + NBYTE(a_size) + a_size; ! 224: a_size += m_size; ! 225: a_cost = NBYTE(a_size) + a_size; ! 226: ! 227: /* it is better to merge! */ ! 228: if(m_cost >= a_cost) ! 229: return 0; ! 230: } ! 231: ! 232: return m_size; ! 233: } ! 234: ! 235: /* optimize a sequence of moves */ ! 236: static Move *optMove(s) ! 237: register Move *s; ! 238: { ! 239: register long add, m_cost, a_cost; ! 240: register Move *this, *last; ! 241: ! 242: add = (s->last && s->last->type == DELTA_ADD) ? s->last->size : 0; ! 243: ! 244: m_cost = 0; ! 245: a_cost = 0; ! 246: for(this = s; this != NIL(Move); this = this->next) ! 247: { ! 248: register long cost_m, cost_a; ! 249: ! 250: if(this->type == DELTA_ADD || this->size > (M_MAX+A_MAX)) ! 251: break; ! 252: ! 253: m_cost += 1+NBYTE(this->size)+NBYTE(this->addr); ! 254: a_cost += this->size; ! 255: ! 256: /* costs of alternatives */ ! 257: cost_m = m_cost; ! 258: cost_a = a_cost; ! 259: if(add > 0) ! 260: { ! 261: cost_m += 1 + add + NBYTE(add); ! 262: cost_a += add; ! 263: } ! 264: if(this->next && this->next->type == DELTA_ADD) ! 265: { ! 266: cost_m += 1 + this->next->size + NBYTE(this->next->size); ! 267: cost_a += this->next->size; ! 268: } ! 269: cost_a += 1 + NBYTE(cost_a); ! 270: ! 271: /* conversion is bad */ ! 272: if(cost_m < cost_a) ! 273: continue; ! 274: ! 275: /* convert the entire sequence to an add */ ! 276: s->type = DELTA_ADD; ! 277: while(this != s) ! 278: { ! 279: last = this->last; ! 280: s->size += this->size; ! 281: (void)delMove(this); ! 282: this = last; ! 283: } ! 284: ! 285: /* merge adjacent adds */ ! 286: if((last = s->last) && last->type == DELTA_ADD) ! 287: { ! 288: last->size += s->size; ! 289: (void)delMove(s); ! 290: s = last; ! 291: } ! 292: if(s->next && s->next->type == DELTA_ADD) ! 293: { ! 294: s->size += s->next->size; ! 295: (void)delMove(s->next); ! 296: } ! 297: /* done */ ! 298: break; ! 299: } ! 300: return s; ! 301: } ! 302: ! 303: /* the real thing */ ! 304: delta(src,n_src,tar,n_tar,delfd) ! 305: char *src; ! 306: long n_src; ! 307: char *tar; ! 308: long n_tar; ! 309: int delfd; ! 310: { ! 311: register char *sp, *tp, *esrc, *etar; ! 312: register long size, addr; ! 313: Suftree *tree; ! 314: Move *moves, *last; ! 315: char inst, buf[BUFSIZE]; ! 316: long mtchstring(); ! 317: ! 318: /* initialize the output area */ ! 319: delinit(buf,delfd); ! 320: ! 321: /* output file sizes */ ! 322: inst = DELTA_TYPE | ((NBYTE(n_src)&07) << 3) | (NBYTE(n_tar)&07); ! 323: if(delputc(inst) < 0) ! 324: return -1; ! 325: if(delputl(NBYTE(n_src),n_src) < 0 || delputl(NBYTE(n_tar),n_tar) < 0) ! 326: return -1; ! 327: ! 328: /* bases */ ! 329: Bsrc = src; ! 330: Btar = tar; ! 331: esrc = src + n_src - 1; ! 332: etar = tar + n_tar - 1; ! 333: ! 334: /* initialize list and last block */ ! 335: moves = NIL(Move); ! 336: last = NIL(Move); ! 337: ! 338: /* try making suffix tree */ ! 339: if(!(tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree))) ! 340: { ! 341: /* not enough space for tree, remove matching prefix and suffix */ ! 342: for(; src <= esrc && tar <= etar; ++src, ++tar) ! 343: if(*src != *tar) ! 344: break; ! 345: if((size = src-Bsrc) > 0) ! 346: { ! 347: register int cost_m, cost_a; ! 348: ! 349: cost_m = NBYTE(size) + NBYTE(0); ! 350: cost_a = NBYTE(size) + size; ! 351: if(cost_m < cost_a) ! 352: { ! 353: moves = newMove(DELTA_MOVE,size,0L,NIL(Move)); ! 354: if(!moves) ! 355: return -1; ! 356: n_src -= src-Bsrc; ! 357: n_tar -= tar-Btar; ! 358: } ! 359: else ! 360: { ! 361: src = Bsrc; ! 362: tar = Btar; ! 363: } ! 364: } ! 365: ! 366: for(sp = esrc, tp = etar; sp >= src && tp >= tar; --sp, --tp) ! 367: if(*sp != *tp) ! 368: break; ! 369: if((size = esrc-sp) > 0) ! 370: { ! 371: addr = sp+1-Bsrc; ! 372: if(chkMove(size,addr,0L) > 0) ! 373: { ! 374: last = newMove(DELTA_MOVE,size,addr,NIL(Move)); ! 375: if(!last) ! 376: return -1; ! 377: esrc = sp; ! 378: etar = tp; ! 379: n_tar = etar-tar+1; ! 380: n_src = esrc-src+1; ! 381: } ! 382: } ! 383: ! 384: /* try making the tree again */ ! 385: tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree); ! 386: } ! 387: ! 388: /* compute block moves */ ! 389: tp = NIL(char); ! 390: while(n_tar > 0) ! 391: { ! 392: char *match; ! 393: ! 394: if(tree) ! 395: size = mtchsuftree(tree,tar,n_tar,&match); ! 396: else size = mtchstring(src,n_src,tar,n_tar,&match); ! 397: if(size < 0) ! 398: return -1; ! 399: if(size > 0) ! 400: size = chkMove(size,(long)(match-Bsrc),(long)(tp ? tp-tar : 0)); ! 401: ! 402: /* output a block move */ ! 403: if(size > 0) ! 404: { ! 405: if(tp) ! 406: { ! 407: moves = makeAdd(tp,tar,moves); ! 408: if(!moves) ! 409: return -1; ! 410: tp = NIL(char); ! 411: } ! 412: moves = newMove(DELTA_MOVE,size,(long)(match-Bsrc),moves); ! 413: if(!moves) ! 414: return -1; ! 415: tar += size; ! 416: n_tar -= size; ! 417: } ! 418: else ! 419: { ! 420: if(!tp) ! 421: tp = tar; ! 422: tar += 1; ! 423: n_tar -= 1; ! 424: } ! 425: } ! 426: ! 427: /* add any remaining blocks */ ! 428: if(tp) ! 429: { ! 430: if(last && chkMove(last->size,last->addr,(long)(tar-tp)) <= 0) ! 431: { ! 432: tar += last->size; ! 433: last = delMove(last); ! 434: } ! 435: moves = makeAdd(tp,tar,moves); ! 436: if(!moves) ! 437: return -1; ! 438: } ! 439: if(last) ! 440: { ! 441: moves->next = last; ! 442: last->last = moves; ! 443: } ! 444: ! 445: /* release space use for string matching */ ! 446: if(tree) ! 447: delsuftree(tree); ! 448: else mtchstring(NIL(char),0L,NIL(char),0L,NIL(char*)); ! 449: ! 450: /* optimize move instructions */ ! 451: if(moves) ! 452: { ! 453: register Move *this; ! 454: ! 455: this = moves; ! 456: while(this->last) ! 457: this = this->last; ! 458: for(; this != NIL(Move); this = this->next) ! 459: if(this->type == DELTA_MOVE && this->size <= (M_MAX+A_MAX)) ! 460: moves = this = optMove(this); ! 461: ! 462: while(moves->last) ! 463: moves = moves->last; ! 464: } ! 465: ! 466: /* write out the move instructions */ ! 467: addr = 0L; ! 468: while(moves) ! 469: { ! 470: if(moves->type == DELTA_ADD) ! 471: moves->addr = addr; ! 472: addr += moves->size; ! 473: if(putMove(moves) < 0) ! 474: return -1; ! 475: moves = delMove(moves); ! 476: } ! 477: ! 478: /* write ending token */ ! 479: delputc((char)DELTA_TYPE); ! 480: ! 481: /* flush buffer */ ! 482: return delflush(); ! 483: } ! 484: 0707070000000000031006440044230044230000010000000425126261600003000000006234mtchstring.c Ugsf Ggsf #include "update.h" ! 485: ! 486: ! 487: /* ! 488: ** Find the longest prefix of tar that match some substring of src ! 489: ** This uses an inverted index for speed. ! 490: */ ! 491: #define ALPHA (256) /* alphabet size */ ! 492: ! 493: static void freemem(n_index,index) ! 494: long *n_index; ! 495: char ***index; ! 496: { ! 497: register int i; ! 498: if(n_index && index) ! 499: { ! 500: for(i = 0; i < ALPHA; ++i) ! 501: if(n_index[i] > 0) ! 502: free(index[i]); ! 503: free(index); ! 504: free(n_index); ! 505: } ! 506: } ! 507: ! 508: /* initial assumptions: src[0] == tar[0] && src+n_match <= endsrc */ ! 509: static long domatch(src,endsrc,tar,endtar,n_match) ! 510: char *src, *endsrc, *tar, *endtar; ! 511: long n_match; ! 512: { ! 513: register char *sp, *tp; ! 514: ! 515: /* see if this really improves on the current match */ ! 516: for(sp = src+n_match, tp = tar+n_match; sp > src; --sp, --tp) ! 517: if(*sp != *tp) ! 518: break; ! 519: ! 520: /* got an improvement, match as many more as we can */ ! 521: if(sp == src) ! 522: { ! 523: sp = src+n_match+1; ! 524: tp = tar+n_match+1; ! 525: for(; sp < endsrc && tp < endtar; ++sp, ++tp) ! 526: if(*sp != *tp) ! 527: break; ! 528: } ! 529: return tp-tar; ! 530: } ! 531: ! 532: /* the real thing */ ! 533: long mtchstring(src,n_src,tar,n_tar,match) ! 534: char *src; /* the source string to match from */ ! 535: long n_src; /* the length of src */ ! 536: char *tar; /* the string a prefix of which is matched */ ! 537: long n_tar; /* the length of tar */ ! 538: char **match; /* to return the matched address in src */ ! 539: { ! 540: char *endsrc, *endtar; ! 541: long n_match; ! 542: register int i; ! 543: register long n_ind; ! 544: register char **ind; ! 545: static long *N_index = NIL(long); ! 546: static char *Cursrc = NIL(char), ***Index = NIL(char**); ! 547: static int Alloced = 0; ! 548: char **malloc(), **realloc(); ! 549: ! 550: /* free old inverted indices if necessary */ ! 551: if(src != Cursrc) ! 552: { ! 553: if(Alloced) ! 554: freemem(N_index,Index); ! 555: Alloced = 0; ! 556: Cursrc = NIL(char); ! 557: } ! 558: ! 559: /* nothing to do */ ! 560: if(!src || n_src <= 0 || !tar || n_tar <= 0) ! 561: return 0; ! 562: ! 563: endsrc = src + n_src; ! 564: endtar = tar + n_tar; ! 565: ! 566: /* build new index structure */ ! 567: if(src != Cursrc) ! 568: { ! 569: Cursrc = src; ! 570: Alloced = 1; ! 571: if(N_index = (long*) malloc(ALPHA*sizeof(long))) ! 572: { ! 573: register char *sp; ! 574: ! 575: memzero((char*)N_index,ALPHA*sizeof(long)); ! 576: if(!(Index = (char ***) malloc(ALPHA*sizeof(char**)))) ! 577: { ! 578: free(N_index); ! 579: N_index = NIL(long); ! 580: Alloced = 0; ! 581: } ! 582: else for(sp = src; sp < endsrc; ++sp) ! 583: { ! 584: i = (int)(*((unsigned char *)(sp))); ! 585: ind = Index[i]; ! 586: n_ind = N_index[i]; ! 587: ! 588: /* make sure we have space */ ! 589: if((n_ind%ALPHA) == 0) ! 590: { ! 591: ind = n_ind == 0 ? ! 592: malloc(ALPHA*sizeof(char *)) : ! 593: realloc(ind,(n_ind+ALPHA)*sizeof(char*)); ! 594: Index[i] = ind; ! 595: } ! 596: if(ind) ! 597: { ! 598: ind[n_ind] = sp; ! 599: N_index[i] += 1; ! 600: } ! 601: else ! 602: { ! 603: freemem(N_index,Index); ! 604: N_index = NIL(long); ! 605: Index = NIL(char**); ! 606: Alloced = 0; ! 607: break; ! 608: } ! 609: } ! 610: } ! 611: } ! 612: ! 613: /* find longest matching prefix */ ! 614: i = (int)(*((unsigned char *)(tar))); ! 615: ind = Alloced ? Index[i] : NULL; ! 616: n_ind = Alloced ? N_index[i] : -1; ! 617: n_match = 0; ! 618: while(1) ! 619: { ! 620: register long m; ! 621: ! 622: if(ind) ! 623: { ! 624: src = n_ind > 0 ? *ind++ : endsrc; ! 625: n_ind -= 1; ! 626: } ! 627: else for(; src+n_match < endsrc; ++src) ! 628: if(*src == *tar) ! 629: break; ! 630: if(src+n_match >= endsrc) ! 631: break; ! 632: ! 633: if((m = domatch(src,endsrc,tar,endtar,n_match)) > n_match) ! 634: { ! 635: n_match = m; ! 636: *match = src; ! 637: if(tar+n_match >= endtar) ! 638: break; ! 639: } ! 640: } ! 641: ! 642: return n_match; ! 643: } ! 644: 0707070000000000041006440044230044230000010000000425126261700002500000017763suftree.c Ugsf Ggsf #define _IN_SUF_TREE ! 645: #include "suftree.h" ! 646: ! 647: /* ! 648: ** Construct a suffix tree for a source string ! 649: ** and perform string matching of various input strings. ! 650: ** This is based on the suffix tree algorithm of E. McCreight. ! 651: ** I extended the algorithm to remove the restriction that the ! 652: ** last element of the string has to be different from the rest ! 653: ** of the string. Note also that since the alphabet is large (256), ! 654: ** instead of being stored in an array, the children of a node ! 655: ** are kept in a linked list which is managed by a move-to-front ! 656: ** heuristic. ! 657: ** ! 658: ** For details, see the paper: ! 659: ** "A Space-Economical Suffix Tree Construction Algorithm" ! 660: ** E.M. McCreight, JACM, v.23, No.2, 1976, pp.262-272. ! 661: ** ! 662: ** BldSuftree returns either NULL or the pointer to the root of the ! 663: ** tree. In the latter case, the tree can be destroyed by free()-ing ! 664: ** the root. ! 665: ** ! 666: ** Written by Kiem-Phong Vo, 5/20/88. ! 667: */ ! 668: ! 669: /* Delete a suffix tree */ ! 670: void delsuftree(root) ! 671: Suftree *root; ! 672: { ! 673: if(!root) ! 674: return; ! 675: root -= 1; ! 676: while(root) ! 677: { ! 678: register Suftree *next; ! 679: next = NEXT(root); ! 680: free(root); ! 681: root = next; ! 682: } ! 683: } ! 684: ! 685: /* Find a child whose label string starts with a given character */ ! 686: static Suftree *child_find(node,c) ! 687: Suftree *node; /* the node whose children will be searched */ ! 688: Element c; /* the element being looked for */ ! 689: { ! 690: register Suftree *np, *last; ! 691: ! 692: last = NIL(Suftree); ! 693: for(np = CHILD(node); np != NIL(Suftree); np = SIBLING(np)) ! 694: if(LENGTH(np) > 0 && *LABEL(np) == c) ! 695: break; ! 696: else last = np; ! 697: ! 698: if(np && last) ! 699: { ! 700: /* move-to-front heuristic */ ! 701: SIBLING(last) = SIBLING(np); ! 702: SIBLING(np) = CHILD(node); ! 703: CHILD(node) = np; ! 704: } ! 705: return np; ! 706: } ! 707: ! 708: /* Get space for tree nodes. */ ! 709: static Suftree *getmem(root,n) ! 710: Suftree *root; ! 711: int n; ! 712: { ! 713: Suftree *list, *malloc(), **realloc(); ! 714: ! 715: if((list = malloc((n+1)*sizeof(Suftree))) == NIL(Suftree)) ! 716: { ! 717: if(root) ! 718: delsuftree(root); ! 719: return NIL(Suftree); ! 720: } ! 721: ! 722: /* store where this list is for later deletion */ ! 723: NEXT(list) = NIL(Suftree); ! 724: if(root) ! 725: { ! 726: for(root -= 1; NEXT(root) != NIL(Suftree); root = NEXT(root)) ! 727: ; ! 728: NEXT(root) = list; ! 729: } ! 730: ! 731: return list+1; ! 732: } ! 733: ! 734: /* Build the tree. ! 735: ** Following are the meaning of a few important variables: ! 736: ** clocus: contracted locus, this variable defines the ! 737: ** tree node that points to the longest prefix ! 738: ** that terminates at a node in the current tree. ! 739: ** locus: defines a tree node to be constructed that ! 740: ** points to the longest prefix that can be defined. ! 741: ** Unless both clocus and locus equal the root, ! 742: ** we maintain the invariance that clocus is the ! 743: ** parent of locus. ! 744: ** link: defines the sublink of the locus of the prefix ! 745: ** of the previously inserted suffix. ! 746: */ ! 747: Suftree *bldsuftree(src,len) ! 748: Element *src; /* the string to build the tree from */ ! 749: long int len; /* the length of the string */ ! 750: { ! 751: register Element *sp, *mp, *rescan, *endmatch, *endsrc; ! 752: register Suftree *match, *clocus, *locus, *link; ! 753: register long int mtlen, relen; ! 754: register int n; ! 755: Suftree *root, *list, *endlist; ! 756: ! 757: if(len == 0) ! 758: return NIL(Suftree); ! 759: ! 760: /* get initial space for the tree nodes */ ! 761: root = NIL(Suftree); ! 762: ! 763: /* 2*len+1 is the maximum number of nodes that can be created */ ! 764: n = 2*len+1; ! 765: if(n > ALLOCSIZE) ! 766: n = ALLOCSIZE; ! 767: if(!(list = getmem(root,n))) ! 768: return NIL(Suftree); ! 769: endlist = list + n; ! 770: ! 771: /* make root node */ ! 772: root = list++; ! 773: LABEL(root) = NIL(Element); ! 774: CHILD(root) = NIL(Suftree); ! 775: LINK(root) = root; ! 776: ! 777: /* locus and contracted locus of the empty substring */ ! 778: locus = clocus = root; ! 779: ! 780: /* the current match length */ ! 781: mtlen = 0; ! 782: ! 783: /* the end of the source string */ ! 784: endsrc = src+len; ! 785: ! 786: /* now build the tree */ ! 787: for(; src < endsrc; ++src) ! 788: { ! 789: /* prepare for scanning the current suffix */ ! 790: if(locus != root) ! 791: { ! 792: /* define the string to be rescanned */ ! 793: rescan = LABEL(locus); ! 794: relen = LENGTH(locus); ! 795: ! 796: /* minus the first character of the previous prefix */ ! 797: if(clocus == root) ! 798: { ! 799: rescan++; ! 800: if(relen > 0) ! 801: --relen; ! 802: } ! 803: } ! 804: else mtlen = relen = 0; ! 805: ! 806: /* the length of the known-to-be-matched part */ ! 807: if(mtlen > 0) ! 808: --mtlen; ! 809: /**/ ASSERT(relen <= mtlen) ! 810: ! 811: /* use sublink to rescan */ ! 812: link = LINK(clocus); ! 813: ! 814: /* rescan */ ! 815: while(relen > 0) ! 816: { ! 817: /* find a child of link that starts with the ! 818: first character of rescan. We then know that ! 819: rescan must match a prefix of that child. ! 820: */ ! 821: match = child_find(link,*rescan); ! 822: /**/ ASSERT(match != NULL) ! 823: ! 824: /* clocus will be the parent of the new link */ ! 825: clocus = link; ! 826: ! 827: /* rescan contains LABEL(match) */ ! 828: if(relen >= LENGTH(match)) ! 829: { ! 830: link = match; ! 831: relen -= LENGTH(match); ! 832: rescan += LENGTH(match); ! 833: } ! 834: /* rescan is a proper prefix of LABEL(match) */ ! 835: else ! 836: { ! 837: if(list >= endlist) ! 838: { ! 839: if(!(list = getmem(root,ALLOCSIZE))) ! 840: return NIL(Suftree); ! 841: else endlist = list+ALLOCSIZE; ! 842: } ! 843: ! 844: /* make an internal node labeled with rescan */ ! 845: LABEL(list) = rescan; ! 846: LENGTH(list) = relen; ! 847: CHILD(list) = match; ! 848: SIBLING(list) = SIBLING(match); ! 849: LINK(list) = root; ! 850: ! 851: /* adjust label and pointer of old node */ ! 852: SIBLING(match) = NIL(Suftree); ! 853: LABEL(match) += relen; ! 854: LENGTH(match) -= relen; ! 855: ! 856: CHILD(link) = list; ! 857: link = list++; ! 858: break; ! 859: } ! 860: } ! 861: ! 862: /* define sublink for the prefix of the last suffix */ ! 863: if(locus != root) ! 864: LINK(locus) = link; ! 865: ! 866: /* scan to match as much as possible */ ! 867: locus = link; ! 868: sp = src + mtlen; ! 869: while(sp < endsrc) ! 870: { ! 871: /* see if it matches some child of clocus */ ! 872: if(!(match = child_find(locus,*sp))) ! 873: break; ! 874: ! 875: /* clocus will be the parent of the new locus */ ! 876: clocus = locus; ! 877: ! 878: /* find the extend of the match */ ! 879: mp = LABEL(match); ! 880: endmatch = mp + LENGTH(match); ! 881: for(; sp < endsrc && mp < endmatch; ++sp, ++mp) ! 882: if(*sp != *mp) ! 883: break; ! 884: ! 885: /* the whole node is matched */ ! 886: if(mp >= endmatch) ! 887: { ! 888: locus = match; ! 889: mtlen += LENGTH(match); ! 890: } ! 891: /* found the extended locus of this suffix */ ! 892: else ! 893: { ! 894: if(list >= endlist) ! 895: { ! 896: if(!(list = getmem(root,ALLOCSIZE))) ! 897: return NIL(Suftree); ! 898: else endlist = list+ALLOCSIZE; ! 899: } ! 900: ! 901: /* make a new internal node */ ! 902: LABEL(list) = LABEL(match); ! 903: LENGTH(list) = mp - LABEL(match); ! 904: CHILD(list) = match; ! 905: SIBLING(list) = SIBLING(match); ! 906: LINK(list) = root; ! 907: ! 908: SIBLING(match) = NIL(Suftree); ! 909: LABEL(match) += LENGTH(list); ! 910: LENGTH(match) -= LENGTH(list); ! 911: mtlen += LENGTH(list); ! 912: ! 913: /* the new node is the locus for this suffix */ ! 914: CHILD(locus) = list; ! 915: locus = list++; ! 916: break; ! 917: } ! 918: } ! 919: ! 920: if(list >= endlist) ! 921: { ! 922: if(!(list = getmem(root,ALLOCSIZE))) ! 923: return NIL(Suftree); ! 924: else endlist = list+ALLOCSIZE; ! 925: } ! 926: ! 927: /* make a new external node for the suffix */ ! 928: SUFFIX(list) = src; ! 929: LABEL(list) = sp; ! 930: LENGTH(list) = endsrc-sp; ! 931: CHILD(list) = NIL(Suftree); ! 932: ! 933: /* hook it in as the first child of locus */ ! 934: SIBLING(list) = CHILD(locus); ! 935: CHILD(locus) = list++; ! 936: } ! 937: ! 938: return root; ! 939: } ! 940: ! 941: /* Given a raw string and a string represented in a suffix tree, ! 942: match the string against the tree to find a longest matching ! 943: prefix of the string. ! 944: Return the length of the match and where it occurs in the ! 945: string represented by the tree. ! 946: */ ! 947: long mtchsuftree(tree,str,len,mtchp) ! 948: Suftree *tree; /* the root of the suffix tree */ ! 949: Element *str; /* the string to be matched */ ! 950: long len; /* the length of the string */ ! 951: Element **mtchp; /* to return the address of the match */ ! 952: { ! 953: register Suftree *match; ! 954: register Element *sp, *mp, *endmp, *endstr; ! 955: register long mlen; ! 956: ! 957: mlen = 0; ! 958: endstr = str + len; ! 959: while(1) ! 960: { ! 961: if(!(match = child_find(tree,*str))) ! 962: break; ! 963: ! 964: /* find the extent of the match */ ! 965: mp = LABEL(match); ! 966: endmp = mp + LENGTH(match); ! 967: for(sp = str; sp < endstr && mp < endmp; ++sp, ++mp) ! 968: if(*sp != *mp) ! 969: break; ! 970: ! 971: /* update the length of the match */ ! 972: mlen += sp-str; ! 973: ! 974: /* prepare for next iteration */ ! 975: tree = match; ! 976: str = sp; ! 977: ! 978: /* see if we have to work any more */ ! 979: if(mp < endmp || str >= endstr) ! 980: break; ! 981: } ! 982: ! 983: if(mlen == 0) /* no match */ ! 984: *mtchp = NIL(Element); ! 985: else ! 986: { ! 987: /* find where the match starts */ ! 988: while(CHILD(tree)) ! 989: tree = CHILD(tree); ! 990: *mtchp = SUFFIX(tree); ! 991: } ! 992: ! 993: return mlen; ! 994: } ! 995: 0707070000000000051006440044230044230000010000000425126261700002500000002273suftree.h Ugsf Ggsf /* the type of list elements we play with */ ! 996: typedef char Element; ! 997: ! 998: /* for suffix trees, a tree node looks like this */ ! 999: typedef struct _ts_ ! 1000: { ! 1001: Element *_label; /* substring labeling the edge */ ! 1002: long int _length; /* the length of the string */ ! 1003: struct _ts_ *_child; /* list of children */ ! 1004: struct _ts_ *_sibling; /* link for the child list */ ! 1005: union ! 1006: { /* these two fields are mutual exclusive */ ! 1007: struct _ts_ *_link; /* sub-link */ ! 1008: Element *_suffix; /* suffix */ ! 1009: } _uls_; ! 1010: } Suftree; ! 1011: ! 1012: /* short hand for various fields in a tree node */ ! 1013: #define LABEL(n) ((n)->_label) ! 1014: #define LENGTH(n) ((n)->_length) ! 1015: #define CHILD(n) ((n)->_child) ! 1016: #define SIBLING(n) ((n)->_sibling) ! 1017: #define LINK(n) ((n)->_uls_._link) ! 1018: #define SUFFIX(n) ((n)->_uls_._suffix) ! 1019: ! 1020: extern Suftree *bldsuftree(); ! 1021: extern long mtchsuftree(); ! 1022: ! 1023: ! 1024: /* the following definitions are not to be seen by users */ ! 1025: #ifdef _IN_SUF_TREE ! 1026: #ifdef DEBUG ! 1027: #define ASSERT(p) if(!(p)) abort(); ! 1028: #else ! 1029: #define ASSERT(p) ! 1030: #endif /*DEBUG*/ ! 1031: ! 1032: #ifndef NULL ! 1033: #define NULL (0L) ! 1034: #endif /*NULL*/ ! 1035: ! 1036: #ifndef NIL ! 1037: #define NIL(type) ((type*)NULL) ! 1038: #endif /*NIL*/ ! 1039: ! 1040: #define ALLOCSIZE 256 /* amount of nodes to allocate each time */ ! 1041: #define NEXT(n) ((n)->_sibling) ! 1042: ! 1043: #endif /*_IN_SUF_TREE*/ ! 1044: 0707070000000000061006440044230044230000010000000425126261700002400000007231update.c Ugsf Ggsf #include "update.h" ! 1045: ! 1046: /* ! 1047: ** Reconstruct a target file from a source file and a delta file. ! 1048: ** The delta file contain block move instructions computed by delta(). ! 1049: ** ! 1050: ** Written by Kiem-Phong Vo, 5/20/88 ! 1051: */ ! 1052: ! 1053: /* buffers for delta and target files */ ! 1054: static unsigned char *Ddata, *Dnext, *Dend, ! 1055: *Tdata, *Tnext, *Tend; ! 1056: static int Dfd, Tfd; ! 1057: ! 1058: #define delinit(buf,size,fd) (Ddata=Dnext=Dend=buf, Dfd=fd) ! 1059: #define tarinit(buf,size,fd) (Tdata=Tnext=buf, Tend=buf+size, Tfd=fd) ! 1060: #define tarflush() (write(Tfd,Tdata,Tnext-Tdata) >= 0 ? (Tnext=Tdata,0) : -1) ! 1061: ! 1062: /* read a byte from delta file */ ! 1063: static int delgetc() ! 1064: { ! 1065: if(Dnext >= Dend) ! 1066: { ! 1067: register int n; ! 1068: if((n = read(Dfd,Ddata,BUFSIZE)) <= 0) ! 1069: return -1; ! 1070: Dnext = Ddata, Dend = Ddata+n; ! 1071: } ! 1072: return (int)(*Dnext++); ! 1073: } ! 1074: ! 1075: /* read a long value from delta file */ ! 1076: static long delgetl(n) ! 1077: register int n; ! 1078: { ! 1079: register long lv; ! 1080: ! 1081: lv = 0; ! 1082: for(; n > 0; --n) ! 1083: { ! 1084: register int v; ! 1085: if((v = delgetc()) < 0) ! 1086: return -1; ! 1087: lv = lv*256 + v; ! 1088: } ! 1089: return lv; ! 1090: } ! 1091: ! 1092: /* transfer a number of bytes from a file to the target file */ ! 1093: static int filetransfer(fd,n) ! 1094: int fd; ! 1095: long n; ! 1096: { ! 1097: while(n > 0) ! 1098: { ! 1099: register int r; ! 1100: ! 1101: if(Tnext >= Tend) ! 1102: if(tarflush() < 0) ! 1103: return -1; ! 1104: r = n > (Tend-Tnext) ? (Tend-Tnext) : n; ! 1105: if(read(fd,Tnext,r) != r) ! 1106: return -1; ! 1107: Tnext += r; ! 1108: n -= r; ! 1109: } ! 1110: return 0; ! 1111: } ! 1112: ! 1113: /* transfer a number of bytes from a memory area to the target file */ ! 1114: static int memtransfer(addr,n) ! 1115: unsigned char *addr; ! 1116: register long n; ! 1117: { ! 1118: while(n > 0) ! 1119: { ! 1120: register int r; ! 1121: ! 1122: if(Tnext >= Tend) ! 1123: if(tarflush() < 0) ! 1124: return -1; ! 1125: r = n > (Tend-Tnext) ? (Tend-Tnext) : n; ! 1126: memcopy(Tnext,addr,r); ! 1127: Tnext += r; ! 1128: addr += r; ! 1129: n -= r; ! 1130: } ! 1131: return 0; ! 1132: } ! 1133: ! 1134: /* transfer a number of bytes from delta to target */ ! 1135: static int deltransfer(n) ! 1136: long n; ! 1137: { ! 1138: register int d; ! 1139: ! 1140: /* transfer what's already in core */ ! 1141: if((d = Dend-Dnext) > 0) ! 1142: { ! 1143: register int w = n <= d ? n : d; ! 1144: ! 1145: if(w > (Tend-Tnext)) ! 1146: if(tarflush() < 0) ! 1147: return -1; ! 1148: ! 1149: /* copy from the delta buffer */ ! 1150: memcopy(Tnext,Dnext,w); ! 1151: Dnext += w; ! 1152: Tnext += w; ! 1153: n -= w; ! 1154: } ! 1155: ! 1156: return n > 0 ? filetransfer(Dfd,n) : 0; ! 1157: } ! 1158: ! 1159: update(srcfd,offset,delfd,tarfd) ! 1160: int srcfd; ! 1161: long offset; ! 1162: int delfd; ! 1163: int tarfd; ! 1164: { ! 1165: register int i; ! 1166: register long n_src, n_tar; ! 1167: unsigned char delbuf[BUFSIZE], tarbuf[BUFSIZE]; ! 1168: unsigned char *src, *tar, *malloc(); ! 1169: ! 1170: /* start buffering system for delta file */ ! 1171: delinit(delbuf,BUFSIZE,delfd); ! 1172: ! 1173: /* read the file sizes */ ! 1174: if((i = delgetc()) < 0 || (i&DELTA_TYPE) != DELTA_TYPE) ! 1175: return -1; ! 1176: if((n_src = delgetl((i>>3)&07)) < 0 || (n_tar = delgetl(i&07)) < 0) ! 1177: return -1; ! 1178: ! 1179: /* make data area for target file */ ! 1180: if(tar = malloc(n_tar)) /* assignment = */ ! 1181: tarinit(tar,n_tar,tarfd); ! 1182: else tarinit(tarbuf,BUFSIZE,tarfd); ! 1183: ! 1184: /* read in source file if possible to avoid lseek */ ! 1185: if(src = malloc(n_src)) /* assignment = */ ! 1186: { ! 1187: lseek(srcfd,offset,0); ! 1188: if(read(srcfd,src,n_src) != n_src) ! 1189: return -1; ! 1190: } ! 1191: ! 1192: while((i = delgetc()) >= 0) ! 1193: { ! 1194: register long size, addr; ! 1195: ! 1196: if((size = delgetl((i>>3)&07)) < 0) ! 1197: return -1; ! 1198: switch(i&DELTA_TYPE) ! 1199: { ! 1200: default : ! 1201: return -1; ! 1202: case DELTA_TYPE : ! 1203: /* sync delta file pointer */ ! 1204: if((addr = Dend-Dnext) > 0) ! 1205: lseek(Dfd,-addr,1); ! 1206: /* flush output buffer */ ! 1207: if(tarflush() < 0) ! 1208: return -1; ! 1209: /* free space used */ ! 1210: if(src) ! 1211: free(src); ! 1212: if(tar) ! 1213: free(tar); ! 1214: return 0; ! 1215: case DELTA_MOVE : ! 1216: if((addr = delgetl(i&07)) < 0) ! 1217: return -1; ! 1218: if(src) ! 1219: { ! 1220: if(memtransfer(src+addr,size) < 0) ! 1221: return -1; ! 1222: } ! 1223: else ! 1224: { ! 1225: if(lseek(srcfd,offset+addr,0) < 0) ! 1226: return -1; ! 1227: if(filetransfer(srcfd,size) < 0) ! 1228: return -1; ! 1229: } ! 1230: break; ! 1231: case DELTA_ADD : ! 1232: if(deltransfer(size) < 0) ! 1233: return -1; ! 1234: break; ! 1235: } ! 1236: } ! 1237: ! 1238: /* should never get here */ ! 1239: return -1; ! 1240: } ! 1241: 0707070000000000071006440044230044230000010000000425126261700002400000001253update.h Ugsf Ggsf /* values for the instruction field */ ! 1242: #define DELTA_TYPE 0300 ! 1243: #define DELTA_MOVE 0200 ! 1244: #define DELTA_ADD 0100 ! 1245: ! 1246: /* number of bytes required to code a value */ ! 1247: #define BASE 256 ! 1248: #define ONE (BASE) ! 1249: #define TWO (BASE*BASE) ! 1250: #define THREE (BASE*BASE*BASE) ! 1251: #define NBYTE(v) ((v) < ONE ? 1 : ((v) < TWO ? 2 : ((v) < THREE ? 3 : 4))) ! 1252: ! 1253: #define BUFSIZE 2048 ! 1254: ! 1255: #ifndef NULL ! 1256: #define NULL (0L) ! 1257: #endif ! 1258: ! 1259: #ifndef NIL ! 1260: #define NIL(type) ((type*)NULL) ! 1261: #endif ! 1262: ! 1263: /* function to copy data from one area to another */ ! 1264: #ifdef SYSV ! 1265: #define memcopy(to,fr,n) memcpy(to,fr,n) ! 1266: #define memzero(s,n) memset(s,0,n) ! 1267: #endif ! 1268: ! 1269: #ifdef BSD ! 1270: #define memcopy(to,fr,n) bcopy(fr,to,n) ! 1271: #define memzero(s,n) bzero(s,n) ! 1272: #endif ! 1273: 0707070000000000101006440044230044230000010000000475460055300002300000003501Mamfile Ugsf Ggsf note # # make abstract machine file generated from Makefile # # ! 1274: setv AS as ! 1275: setv ASFLAGS ! 1276: setv AR ar ! 1277: setv ARFLAGS cr ! 1278: setv CC cc ! 1279: setv CCFLAGS "-O" ! 1280: setv CPP "$CC -E" ! 1281: setv CPIO cpio ! 1282: setv CPIOFLAGS ! 1283: setv F77 f77 ! 1284: setv INSTALLROOT $HOME ! 1285: setv LD ld ! 1286: setv LDFLAGS ! 1287: setv LEX lex ! 1288: setv LEXFLAGS ! 1289: setv LPR lpr ! 1290: setv LPRFLAGS ! 1291: setv M4FLAGS ! 1292: setv MAKE nmake ! 1293: setv MAKEFLAGS ! 1294: setv PR pr ! 1295: setv PRFLAGS ! 1296: setv TAR tar ! 1297: setv YACC yacc ! 1298: setv YACCFLAGS -d ! 1299: make install ! 1300: make all ! 1301: make libodelta.a ! 1302: attr arch ! 1303: make delta.o ! 1304: make delta.c ! 1305: attr perm ! 1306: attr scan ! 1307: make suftree.h ! 1308: attr perm ! 1309: attr scan ! 1310: attr impl ! 1311: done suftree.h ! 1312: make update.h ! 1313: attr perm ! 1314: attr scan ! 1315: attr impl ! 1316: done update.h ! 1317: done delta.c ! 1318: prev delta.c ! 1319: setv SYSV -DSYSV ! 1320: exec $CC $CCFLAGS -I. "$SYSV" -c delta.c ! 1321: done delta.o ! 1322: make mtchstring.o ! 1323: make mtchstring.c ! 1324: attr perm ! 1325: attr scan ! 1326: prev update.h ! 1327: done mtchstring.c ! 1328: prev mtchstring.c ! 1329: exec $CC $CCFLAGS -I. "$SYSV" -c mtchstring.c ! 1330: done mtchstring.o ! 1331: make suftree.o ! 1332: make suftree.c ! 1333: attr perm ! 1334: attr scan ! 1335: prev suftree.h ! 1336: done suftree.c ! 1337: prev suftree.c ! 1338: exec $CC $CCFLAGS -I. -c suftree.c ! 1339: done suftree.o ! 1340: make update.o ! 1341: make update.c ! 1342: attr perm ! 1343: attr scan ! 1344: prev update.h ! 1345: done update.c ! 1346: prev update.c ! 1347: exec $CC $CCFLAGS -I. "$SYSV" -c update.c ! 1348: done update.o ! 1349: exec $AR cr libodelta.a delta.o mtchstring.o suftree.o update.o ! 1350: exec (ranlib libodelta.a) >/dev/null 2>&1 || true ! 1351: done libodelta.a ! 1352: done all ! 1353: make $INSTALLROOT/lib ! 1354: exec if test ! -d $INSTALLROOT/lib ! 1355: .... then rm -rf $INSTALLROOT/lib && mkdir $INSTALLROOT/lib || { rm -rf $INSTALLROOT && mkdir $INSTALLROOT && mkdir $INSTALLROOT/lib ;} || true ! 1356: .... fi ! 1357: done $INSTALLROOT/lib ! 1358: make $INSTALLROOT/lib/libodelta.a ! 1359: attr arch ! 1360: prev libodelta.a ! 1361: exec { cp libodelta.a $INSTALLROOT/lib/libodelta.a 2>/dev/null ;} || true ! 1362: exec (ranlib $INSTALLROOT/lib/libodelta.a) >/dev/null 2>&1 || true ! 1363: done $INSTALLROOT/lib/libodelta.a ! 1364: done install ! 1365: 0707070000000000000000000000000000000000010000000000000000000001300000000000TRAILER!!!
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.