|
|
1.1 root 1: #include "update.h"
2: #include "suftree.h"
3:
4: /*
5: ** Compute delta commands to transform the source string 'src'
6: ** to the target string 'tar'. Small blockmoves are transformed
7: ** to blockadds for space efficiency.
8: ** Return -1 in case of error.
9: **
10: ** For details on computing blockmoves, see:
11: ** "The String-to-String Correction Problem with Block Moves"
12: ** W. Tichy, ACM TOCS, v.2, No.4, 1984, pp.309-321.
13: **
14: ** Written by Kiem-Phong Vo, 5/18/88
15: */
16:
17: extern char *malloc();
18:
19: #define M_MAX 9 /* max size of a block move instruction */
20: #define A_MAX 5 /* max size of the header of an add instruction */
21:
22: /* structure of a delta instruction */
23: typedef struct _m_
24: {
25: int type; /* DELTA_MOVE or DELTA_ADD */
26: long size; /* size of the block being moved or added */
27: long addr; /* offset where the block starts */
28: struct _m_ *last; /* doubly linked list for easy insert/delete */
29: struct _m_ *next;
30: } Move;
31:
32: /* bases of the source and target strings */
33: static char *Bsrc, *Btar;
34:
35: /* Data buffer area */
36: static char *Ddata, *Dend, *Dnext;
37: static int Dfd;
38:
39: #define delinit(buf,fd) (Ddata=Dnext=buf, Dend=buf+BUFSIZE, Dfd=fd)
40: #define delflush() (write(Dfd,Ddata,Dnext-Ddata) >= 0 ? (Dnext=Ddata,0) : -1)
41:
42: static int delputc(byte)
43: char byte;
44: {
45: if(Dnext == Dend)
46: if(delflush() < 0)
47: return -1;
48: *Dnext++ = byte;
49: return 0;
50: }
51:
52: static int delputl(n,v)
53: register int n;
54: register long v;
55: {
56: register int i;
57: unsigned char c[4];
58:
59: for(i = 0; i < n; ++i)
60: {
61: c[i] = (unsigned char)(v%BASE);
62: v /= BASE;
63: }
64: for(i = n-1; i >= 0; --i)
65: if(delputc((char)c[i]) < 0)
66: return -1;
67: return 0;
68: }
69:
70: static int delputs(n,addr)
71: register long n;
72: register long addr;
73: {
74: if(n < (Dend-Dnext))
75: {
76: memcopy(Dnext,Btar+addr,n);
77: Dnext += n;
78: }
79: else
80: {
81: if(delflush() < 0)
82: return -1;
83: if(write(Dfd,Btar+addr,n) != n)
84: return -1;
85: }
86: return 0;
87: }
88:
89: /* write an instruction */
90: static int putMove(this)
91: Move *this;
92: {
93: register char inst;
94:
95: inst = this->type;
96: inst |= (NBYTE(this->size)&07) << 3;
97: if(this->type == DELTA_MOVE)
98: {
99: inst |= NBYTE(this->addr)&07;
100: if(delputc(inst) < 0 ||
101: delputl(NBYTE(this->size),this->size) < 0 ||
102: delputl(NBYTE(this->addr),this->addr) < 0)
103: return -1;
104: }
105: else
106: {
107: if(delputc(inst) < 0 ||
108: delputl(NBYTE(this->size),this->size) < 0 ||
109: delputs(this->size,this->addr) < 0)
110: return -1;
111: }
112: return 0;
113: }
114:
115: /* constructor for Move */
116: static Move *newMove(type,size,addr,last)
117: int type;
118: long size;
119: long addr;
120: Move *last;
121: {
122: register Move *this = (Move*) malloc(sizeof(Move));
123: if(this == NIL(Move))
124: return NIL(Move);
125: this->type = type;
126: this->size = size;
127: this->addr = addr;
128: if(last)
129: {
130: last->next = this;
131: this->last = last;
132: }
133: else this->last = NIL(Move);
134: this->next = NIL(Move);
135: return this;
136: }
137:
138: /* destructor for Move, return the elt after move */
139: static Move *delMove(this)
140: Move *this;
141: {
142: register Move *next = this->next;
143: register Move *last = this->last;
144: if(last)
145: last->next = next;
146: if(next)
147: next->last = last;
148: free((char*)this);
149: return next ? next : last;
150: }
151:
152: /* make a new add command */
153: static Move *makeAdd(beg,end,last)
154: char *beg, *end;
155: Move *last;
156: {
157: register Move *this;
158:
159: this = newMove(DELTA_ADD,(long)(end-beg),(long)(beg-Btar),NIL(Move));
160: if(!this)
161: return NIL(Move);
162:
163: /* remove small previous adjacent moves */
164: while(last)
165: {
166: register int a_size, cost_m, cost_a;
167:
168: if(last->type == DELTA_ADD)
169: break;
170:
171: cost_m = NBYTE(last->size) + NBYTE(last->addr) +
172: NBYTE(this->size) + this->size;
173: a_size = this->size + last->size;
174: cost_a = NBYTE(a_size) + a_size;
175: if(cost_m < cost_a)
176: break;
177:
178: this->size = a_size;
179: this->addr -= last->size;
180: last = delMove(last);
181: }
182:
183: /* merge with adjacent adds */
184: if(last && last->type == DELTA_ADD)
185: {
186: this->size += last->size;
187: this->addr -= last->size;
188: last = delMove(last);
189: }
190:
191: if(last)
192: {
193: last->next = this;
194: this->last = last;
195: }
196: return this;
197: }
198:
199: /* check to see if a move is appropriate */
200: static int chkMove(m_size,m_addr,a_size)
201: long m_size, m_addr, a_size;
202: {
203: register int cost_m, cost_a;
204:
205: cost_m = NBYTE(m_size) + NBYTE(m_addr);
206: cost_a = NBYTE(m_size) + m_size;
207: if(cost_m >= cost_a)
208: return 0;
209:
210: /* it's good but it may be better to merge it to an add */
211: if(a_size > 0)
212: {
213: register int m_cost, a_cost;
214:
215: m_cost = cost_m + NBYTE(a_size) + a_size;
216: a_size += m_size;
217: a_cost = NBYTE(a_size) + a_size;
218:
219: /* it is better to merge! */
220: if(m_cost >= a_cost)
221: return 0;
222: }
223:
224: return m_size;
225: }
226:
227: /* optimize a sequence of moves */
228: static Move *optMove(s)
229: register Move *s;
230: {
231: register long add, m_cost, a_cost;
232: register Move *this, *last;
233:
234: add = (s->last && s->last->type == DELTA_ADD) ? s->last->size : 0;
235:
236: m_cost = 0;
237: a_cost = 0;
238: for(this = s; this != NIL(Move); this = this->next)
239: {
240: register long cost_m, cost_a;
241:
242: if(this->type == DELTA_ADD || this->size > (M_MAX+A_MAX))
243: break;
244:
245: m_cost += 1+NBYTE(this->size)+NBYTE(this->addr);
246: a_cost += this->size;
247:
248: /* costs of alternatives */
249: cost_m = m_cost;
250: cost_a = a_cost;
251: if(add > 0)
252: {
253: cost_m += 1 + add + NBYTE(add);
254: cost_a += add;
255: }
256: if(this->next && this->next->type == DELTA_ADD)
257: {
258: cost_m += 1 + this->next->size + NBYTE(this->next->size);
259: cost_a += this->next->size;
260: }
261: cost_a += 1 + NBYTE(cost_a);
262:
263: /* conversion is bad */
264: if(cost_m < cost_a)
265: continue;
266:
267: /* convert the entire sequence to an add */
268: s->type = DELTA_ADD;
269: while(this != s)
270: {
271: last = this->last;
272: s->size += this->size;
273: (void)delMove(this);
274: this = last;
275: }
276:
277: /* merge adjacent adds */
278: if((last = s->last) && last->type == DELTA_ADD)
279: {
280: last->size += s->size;
281: (void)delMove(s);
282: s = last;
283: }
284: if(s->next && s->next->type == DELTA_ADD)
285: {
286: s->size += s->next->size;
287: (void)delMove(s->next);
288: }
289: /* done */
290: break;
291: }
292: return s;
293: }
294:
295: /* the real thing */
296: delta(src,n_src,tar,n_tar,delfd)
297: char *src;
298: long n_src;
299: char *tar;
300: long n_tar;
301: int delfd;
302: {
303: register char *sp, *tp, *esrc, *etar;
304: register long size, addr;
305: Suftree *tree;
306: Move *moves, *last;
307: char inst, buf[BUFSIZE];
308: long mtchstring();
309:
310: /* initialize the output area */
311: delinit(buf,delfd);
312:
313: /* output file sizes */
314: inst = DELTA_TYPE | ((NBYTE(n_src)&07) << 3) | (NBYTE(n_tar)&07);
315: if(delputc(inst) < 0)
316: return -1;
317: if(delputl(NBYTE(n_src),n_src) < 0 || delputl(NBYTE(n_tar),n_tar) < 0)
318: return -1;
319:
320: /* bases */
321: Bsrc = src;
322: Btar = tar;
323: esrc = src + n_src - 1;
324: etar = tar + n_tar - 1;
325:
326: /* initialize list and last block */
327: moves = NIL(Move);
328: last = NIL(Move);
329:
330: /* try making suffix tree */
331: if(!(tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree)))
332: {
333: /* not enough space for tree, remove matching prefix and suffix */
334: for(; src <= esrc && tar <= etar; ++src, ++tar)
335: if(*src != *tar)
336: break;
337: if((size = src-Bsrc) > 0)
338: {
339: register int cost_m, cost_a;
340:
341: cost_m = NBYTE(size) + NBYTE(0);
342: cost_a = NBYTE(size) + size;
343: if(cost_m < cost_a)
344: {
345: moves = newMove(DELTA_MOVE,size,0L,NIL(Move));
346: if(!moves)
347: return -1;
348: n_src -= src-Bsrc;
349: n_tar -= tar-Btar;
350: }
351: else
352: {
353: src = Bsrc;
354: tar = Btar;
355: }
356: }
357:
358: for(sp = esrc, tp = etar; sp >= src && tp >= tar; --sp, --tp)
359: if(*sp != *tp)
360: break;
361: if((size = esrc-sp) > 0)
362: {
363: addr = sp+1-Bsrc;
364: if(chkMove(size,addr,0L) > 0)
365: {
366: last = newMove(DELTA_MOVE,size,addr,NIL(Move));
367: if(!last)
368: return -1;
369: esrc = sp;
370: etar = tp;
371: n_tar = etar-tar+1;
372: n_src = esrc-src+1;
373: }
374: }
375:
376: /* try making the tree again */
377: tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree);
378: }
379:
380: /* compute block moves */
381: tp = NIL(char);
382: while(n_tar > 0)
383: {
384: char *match;
385:
386: if(tree)
387: size = mtchsuftree(tree,tar,n_tar,&match);
388: else size = mtchstring(src,n_src,tar,n_tar,&match);
389: if(size < 0)
390: return -1;
391: if(size > 0)
392: size = chkMove(size,(long)(match-Bsrc),(long)(tp ? tp-tar : 0));
393:
394: /* output a block move */
395: if(size > 0)
396: {
397: if(tp)
398: {
399: moves = makeAdd(tp,tar,moves);
400: if(!moves)
401: return -1;
402: tp = NIL(char);
403: }
404: moves = newMove(DELTA_MOVE,size,(long)(match-Bsrc),moves);
405: if(!moves)
406: return -1;
407: tar += size;
408: n_tar -= size;
409: }
410: else
411: {
412: if(!tp)
413: tp = tar;
414: tar += 1;
415: n_tar -= 1;
416: }
417: }
418:
419: /* add any remaining blocks */
420: if(tp)
421: {
422: if(last && chkMove(last->size,last->addr,(long)(tar-tp)) <= 0)
423: {
424: tar += last->size;
425: last = delMove(last);
426: }
427: moves = makeAdd(tp,tar,moves);
428: if(!moves)
429: return -1;
430: }
431: if(last)
432: {
433: moves->next = last;
434: last->last = moves;
435: }
436:
437: /* release space use for string matching */
438: if(tree)
439: delsuftree(tree);
440: else mtchstring(NIL(char),0L,NIL(char),0L,NIL(char*));
441:
442: /* optimize move instructions */
443: if(moves)
444: {
445: register Move *this;
446:
447: this = moves;
448: while(this->last)
449: this = this->last;
450: for(; this != NIL(Move); this = this->next)
451: if(this->type == DELTA_MOVE && this->size <= (M_MAX+A_MAX))
452: moves = this = optMove(this);
453:
454: while(moves->last)
455: moves = moves->last;
456: }
457:
458: /* write out the move instructions */
459: addr = 0L;
460: while(moves)
461: {
462: if(moves->type == DELTA_ADD)
463: moves->addr = addr;
464: addr += moves->size;
465: if(putMove(moves) < 0)
466: return -1;
467: moves = delMove(moves);
468: }
469:
470: /* write ending token */
471: delputc((char)DELTA_TYPE);
472:
473: /* flush buffer */
474: return delflush();
475: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.