|
|
1.1 ! root 1: #include "update.h" ! 2: #include "suftree.h" ! 3: ! 4: /* ! 5: ** Compute delta commands to transform the source string 'src' ! 6: ** to the target string 'tar'. Small blockmoves are transformed ! 7: ** to blockadds for space efficiency. ! 8: ** Return -1 in case of error. ! 9: ** ! 10: ** For details on computing blockmoves, see: ! 11: ** "The String-to-String Correction Problem with Block Moves" ! 12: ** W. Tichy, ACM TOCS, v.2, No.4, 1984, pp.309-321. ! 13: ** ! 14: ** Written by Kiem-Phong Vo, 5/18/88 ! 15: */ ! 16: ! 17: extern char *malloc(); ! 18: ! 19: #define M_MAX 9 /* max size of a block move instruction */ ! 20: #define A_MAX 5 /* max size of the header of an add instruction */ ! 21: ! 22: /* structure of a delta instruction */ ! 23: typedef struct _m_ ! 24: { ! 25: int type; /* DELTA_MOVE or DELTA_ADD */ ! 26: long size; /* size of the block being moved or added */ ! 27: long addr; /* offset where the block starts */ ! 28: struct _m_ *last; /* doubly linked list for easy insert/delete */ ! 29: struct _m_ *next; ! 30: } Move; ! 31: ! 32: /* bases of the source and target strings */ ! 33: static char *Bsrc, *Btar; ! 34: ! 35: /* Data buffer area */ ! 36: static char *Ddata, *Dend, *Dnext; ! 37: static int Dfd; ! 38: ! 39: #define delinit(buf,fd) (Ddata=Dnext=buf, Dend=buf+BUFSIZE, Dfd=fd) ! 40: #define delflush() (write(Dfd,Ddata,Dnext-Ddata) >= 0 ? (Dnext=Ddata,0) : -1) ! 41: ! 42: static int delputc(byte) ! 43: char byte; ! 44: { ! 45: if(Dnext == Dend) ! 46: if(delflush() < 0) ! 47: return -1; ! 48: *Dnext++ = byte; ! 49: return 0; ! 50: } ! 51: ! 52: static int delputl(n,v) ! 53: register int n; ! 54: register long v; ! 55: { ! 56: register int i; ! 57: unsigned char c[4]; ! 58: ! 59: for(i = 0; i < n; ++i) ! 60: { ! 61: c[i] = (unsigned char)(v%BASE); ! 62: v /= BASE; ! 63: } ! 64: for(i = n-1; i >= 0; --i) ! 65: if(delputc((char)c[i]) < 0) ! 66: return -1; ! 67: return 0; ! 68: } ! 69: ! 70: static int delputs(n,addr) ! 71: register long n; ! 72: register long addr; ! 73: { ! 74: if(n < (Dend-Dnext)) ! 75: { ! 76: memcopy(Dnext,Btar+addr,n); ! 77: Dnext += n; ! 78: } ! 79: else ! 80: { ! 81: if(delflush() < 0) ! 82: return -1; ! 83: if(write(Dfd,Btar+addr,n) != n) ! 84: return -1; ! 85: } ! 86: return 0; ! 87: } ! 88: ! 89: /* write an instruction */ ! 90: static int putMove(this) ! 91: Move *this; ! 92: { ! 93: register char inst; ! 94: ! 95: inst = this->type; ! 96: inst |= (NBYTE(this->size)&07) << 3; ! 97: if(this->type == DELTA_MOVE) ! 98: { ! 99: inst |= NBYTE(this->addr)&07; ! 100: if(delputc(inst) < 0 || ! 101: delputl(NBYTE(this->size),this->size) < 0 || ! 102: delputl(NBYTE(this->addr),this->addr) < 0) ! 103: return -1; ! 104: } ! 105: else ! 106: { ! 107: if(delputc(inst) < 0 || ! 108: delputl(NBYTE(this->size),this->size) < 0 || ! 109: delputs(this->size,this->addr) < 0) ! 110: return -1; ! 111: } ! 112: return 0; ! 113: } ! 114: ! 115: /* constructor for Move */ ! 116: static Move *newMove(type,size,addr,last) ! 117: int type; ! 118: long size; ! 119: long addr; ! 120: Move *last; ! 121: { ! 122: register Move *this = (Move*) malloc(sizeof(Move)); ! 123: if(this == NIL(Move)) ! 124: return NIL(Move); ! 125: this->type = type; ! 126: this->size = size; ! 127: this->addr = addr; ! 128: if(last) ! 129: { ! 130: last->next = this; ! 131: this->last = last; ! 132: } ! 133: else this->last = NIL(Move); ! 134: this->next = NIL(Move); ! 135: return this; ! 136: } ! 137: ! 138: /* destructor for Move, return the elt after move */ ! 139: static Move *delMove(this) ! 140: Move *this; ! 141: { ! 142: register Move *next = this->next; ! 143: register Move *last = this->last; ! 144: if(last) ! 145: last->next = next; ! 146: if(next) ! 147: next->last = last; ! 148: free((char*)this); ! 149: return next ? next : last; ! 150: } ! 151: ! 152: /* make a new add command */ ! 153: static Move *makeAdd(beg,end,last) ! 154: char *beg, *end; ! 155: Move *last; ! 156: { ! 157: register Move *this; ! 158: ! 159: this = newMove(DELTA_ADD,(long)(end-beg),(long)(beg-Btar),NIL(Move)); ! 160: if(!this) ! 161: return NIL(Move); ! 162: ! 163: /* remove small previous adjacent moves */ ! 164: while(last) ! 165: { ! 166: register int a_size, cost_m, cost_a; ! 167: ! 168: if(last->type == DELTA_ADD) ! 169: break; ! 170: ! 171: cost_m = NBYTE(last->size) + NBYTE(last->addr) + ! 172: NBYTE(this->size) + this->size; ! 173: a_size = this->size + last->size; ! 174: cost_a = NBYTE(a_size) + a_size; ! 175: if(cost_m < cost_a) ! 176: break; ! 177: ! 178: this->size = a_size; ! 179: this->addr -= last->size; ! 180: last = delMove(last); ! 181: } ! 182: ! 183: /* merge with adjacent adds */ ! 184: if(last && last->type == DELTA_ADD) ! 185: { ! 186: this->size += last->size; ! 187: this->addr -= last->size; ! 188: last = delMove(last); ! 189: } ! 190: ! 191: if(last) ! 192: { ! 193: last->next = this; ! 194: this->last = last; ! 195: } ! 196: return this; ! 197: } ! 198: ! 199: /* check to see if a move is appropriate */ ! 200: static int chkMove(m_size,m_addr,a_size) ! 201: long m_size, m_addr, a_size; ! 202: { ! 203: register int cost_m, cost_a; ! 204: ! 205: cost_m = NBYTE(m_size) + NBYTE(m_addr); ! 206: cost_a = NBYTE(m_size) + m_size; ! 207: if(cost_m >= cost_a) ! 208: return 0; ! 209: ! 210: /* it's good but it may be better to merge it to an add */ ! 211: if(a_size > 0) ! 212: { ! 213: register int m_cost, a_cost; ! 214: ! 215: m_cost = cost_m + NBYTE(a_size) + a_size; ! 216: a_size += m_size; ! 217: a_cost = NBYTE(a_size) + a_size; ! 218: ! 219: /* it is better to merge! */ ! 220: if(m_cost >= a_cost) ! 221: return 0; ! 222: } ! 223: ! 224: return m_size; ! 225: } ! 226: ! 227: /* optimize a sequence of moves */ ! 228: static Move *optMove(s) ! 229: register Move *s; ! 230: { ! 231: register long add, m_cost, a_cost; ! 232: register Move *this, *last; ! 233: ! 234: add = (s->last && s->last->type == DELTA_ADD) ? s->last->size : 0; ! 235: ! 236: m_cost = 0; ! 237: a_cost = 0; ! 238: for(this = s; this != NIL(Move); this = this->next) ! 239: { ! 240: register long cost_m, cost_a; ! 241: ! 242: if(this->type == DELTA_ADD || this->size > (M_MAX+A_MAX)) ! 243: break; ! 244: ! 245: m_cost += 1+NBYTE(this->size)+NBYTE(this->addr); ! 246: a_cost += this->size; ! 247: ! 248: /* costs of alternatives */ ! 249: cost_m = m_cost; ! 250: cost_a = a_cost; ! 251: if(add > 0) ! 252: { ! 253: cost_m += 1 + add + NBYTE(add); ! 254: cost_a += add; ! 255: } ! 256: if(this->next && this->next->type == DELTA_ADD) ! 257: { ! 258: cost_m += 1 + this->next->size + NBYTE(this->next->size); ! 259: cost_a += this->next->size; ! 260: } ! 261: cost_a += 1 + NBYTE(cost_a); ! 262: ! 263: /* conversion is bad */ ! 264: if(cost_m < cost_a) ! 265: continue; ! 266: ! 267: /* convert the entire sequence to an add */ ! 268: s->type = DELTA_ADD; ! 269: while(this != s) ! 270: { ! 271: last = this->last; ! 272: s->size += this->size; ! 273: (void)delMove(this); ! 274: this = last; ! 275: } ! 276: ! 277: /* merge adjacent adds */ ! 278: if((last = s->last) && last->type == DELTA_ADD) ! 279: { ! 280: last->size += s->size; ! 281: (void)delMove(s); ! 282: s = last; ! 283: } ! 284: if(s->next && s->next->type == DELTA_ADD) ! 285: { ! 286: s->size += s->next->size; ! 287: (void)delMove(s->next); ! 288: } ! 289: /* done */ ! 290: break; ! 291: } ! 292: return s; ! 293: } ! 294: ! 295: /* the real thing */ ! 296: delta(src,n_src,tar,n_tar,delfd) ! 297: char *src; ! 298: long n_src; ! 299: char *tar; ! 300: long n_tar; ! 301: int delfd; ! 302: { ! 303: register char *sp, *tp, *esrc, *etar; ! 304: register long size, addr; ! 305: Suftree *tree; ! 306: Move *moves, *last; ! 307: char inst, buf[BUFSIZE]; ! 308: long mtchstring(); ! 309: ! 310: /* initialize the output area */ ! 311: delinit(buf,delfd); ! 312: ! 313: /* output file sizes */ ! 314: inst = DELTA_TYPE | ((NBYTE(n_src)&07) << 3) | (NBYTE(n_tar)&07); ! 315: if(delputc(inst) < 0) ! 316: return -1; ! 317: if(delputl(NBYTE(n_src),n_src) < 0 || delputl(NBYTE(n_tar),n_tar) < 0) ! 318: return -1; ! 319: ! 320: /* bases */ ! 321: Bsrc = src; ! 322: Btar = tar; ! 323: esrc = src + n_src - 1; ! 324: etar = tar + n_tar - 1; ! 325: ! 326: /* initialize list and last block */ ! 327: moves = NIL(Move); ! 328: last = NIL(Move); ! 329: ! 330: /* try making suffix tree */ ! 331: if(!(tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree))) ! 332: { ! 333: /* not enough space for tree, remove matching prefix and suffix */ ! 334: for(; src <= esrc && tar <= etar; ++src, ++tar) ! 335: if(*src != *tar) ! 336: break; ! 337: if((size = src-Bsrc) > 0) ! 338: { ! 339: register int cost_m, cost_a; ! 340: ! 341: cost_m = NBYTE(size) + NBYTE(0); ! 342: cost_a = NBYTE(size) + size; ! 343: if(cost_m < cost_a) ! 344: { ! 345: moves = newMove(DELTA_MOVE,size,0L,NIL(Move)); ! 346: if(!moves) ! 347: return -1; ! 348: n_src -= src-Bsrc; ! 349: n_tar -= tar-Btar; ! 350: } ! 351: else ! 352: { ! 353: src = Bsrc; ! 354: tar = Btar; ! 355: } ! 356: } ! 357: ! 358: for(sp = esrc, tp = etar; sp >= src && tp >= tar; --sp, --tp) ! 359: if(*sp != *tp) ! 360: break; ! 361: if((size = esrc-sp) > 0) ! 362: { ! 363: addr = sp+1-Bsrc; ! 364: if(chkMove(size,addr,0L) > 0) ! 365: { ! 366: last = newMove(DELTA_MOVE,size,addr,NIL(Move)); ! 367: if(!last) ! 368: return -1; ! 369: esrc = sp; ! 370: etar = tp; ! 371: n_tar = etar-tar+1; ! 372: n_src = esrc-src+1; ! 373: } ! 374: } ! 375: ! 376: /* try making the tree again */ ! 377: tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree); ! 378: } ! 379: ! 380: /* compute block moves */ ! 381: tp = NIL(char); ! 382: while(n_tar > 0) ! 383: { ! 384: char *match; ! 385: ! 386: if(tree) ! 387: size = mtchsuftree(tree,tar,n_tar,&match); ! 388: else size = mtchstring(src,n_src,tar,n_tar,&match); ! 389: if(size < 0) ! 390: return -1; ! 391: if(size > 0) ! 392: size = chkMove(size,(long)(match-Bsrc),(long)(tp ? tp-tar : 0)); ! 393: ! 394: /* output a block move */ ! 395: if(size > 0) ! 396: { ! 397: if(tp) ! 398: { ! 399: moves = makeAdd(tp,tar,moves); ! 400: if(!moves) ! 401: return -1; ! 402: tp = NIL(char); ! 403: } ! 404: moves = newMove(DELTA_MOVE,size,(long)(match-Bsrc),moves); ! 405: if(!moves) ! 406: return -1; ! 407: tar += size; ! 408: n_tar -= size; ! 409: } ! 410: else ! 411: { ! 412: if(!tp) ! 413: tp = tar; ! 414: tar += 1; ! 415: n_tar -= 1; ! 416: } ! 417: } ! 418: ! 419: /* add any remaining blocks */ ! 420: if(tp) ! 421: { ! 422: if(last && chkMove(last->size,last->addr,(long)(tar-tp)) <= 0) ! 423: { ! 424: tar += last->size; ! 425: last = delMove(last); ! 426: } ! 427: moves = makeAdd(tp,tar,moves); ! 428: if(!moves) ! 429: return -1; ! 430: } ! 431: if(last) ! 432: { ! 433: moves->next = last; ! 434: last->last = moves; ! 435: } ! 436: ! 437: /* release space use for string matching */ ! 438: if(tree) ! 439: delsuftree(tree); ! 440: else mtchstring(NIL(char),0L,NIL(char),0L,NIL(char*)); ! 441: ! 442: /* optimize move instructions */ ! 443: if(moves) ! 444: { ! 445: register Move *this; ! 446: ! 447: this = moves; ! 448: while(this->last) ! 449: this = this->last; ! 450: for(; this != NIL(Move); this = this->next) ! 451: if(this->type == DELTA_MOVE && this->size <= (M_MAX+A_MAX)) ! 452: moves = this = optMove(this); ! 453: ! 454: while(moves->last) ! 455: moves = moves->last; ! 456: } ! 457: ! 458: /* write out the move instructions */ ! 459: addr = 0L; ! 460: while(moves) ! 461: { ! 462: if(moves->type == DELTA_ADD) ! 463: moves->addr = addr; ! 464: addr += moves->size; ! 465: if(putMove(moves) < 0) ! 466: return -1; ! 467: moves = delMove(moves); ! 468: } ! 469: ! 470: /* write ending token */ ! 471: delputc((char)DELTA_TYPE); ! 472: ! 473: /* flush buffer */ ! 474: return delflush(); ! 475: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.