|
|
1.1 root 1: 0707070000000000011006440044230044230000010000000467126123100002400000000211Makefile Ugsf Ggsf /*
2: * old kpv delta/update/malloc library
3: */
4:
5: SYSV == 1
6:
7: odelta :LIBRARY: update.h suftree.h \
8: delta.c mtchstring.c suftree.c update.c
9: 0707070000000000021006440044230044230000010000000425126261600002300000022032delta.c Ugsf Ggsf #include "update.h"
10: #include "suftree.h"
11:
12: /*
13: ** Compute delta commands to transform the source string 'src'
14: ** to the target string 'tar'. Small blockmoves are transformed
15: ** to blockadds for space efficiency.
16: ** Return -1 in case of error.
17: **
18: ** For details on computing blockmoves, see:
19: ** "The String-to-String Correction Problem with Block Moves"
20: ** W. Tichy, ACM TOCS, v.2, No.4, 1984, pp.309-321.
21: **
22: ** Written by Kiem-Phong Vo, 5/18/88
23: */
24:
25: extern char *malloc();
26:
27: #define M_MAX 9 /* max size of a block move instruction */
28: #define A_MAX 5 /* max size of the header of an add instruction */
29:
30: /* structure of a delta instruction */
31: typedef struct _m_
32: {
33: int type; /* DELTA_MOVE or DELTA_ADD */
34: long size; /* size of the block being moved or added */
35: long addr; /* offset where the block starts */
36: struct _m_ *last; /* doubly linked list for easy insert/delete */
37: struct _m_ *next;
38: } Move;
39:
40: /* bases of the source and target strings */
41: static char *Bsrc, *Btar;
42:
43: /* Data buffer area */
44: static char *Ddata, *Dend, *Dnext;
45: static int Dfd;
46:
47: #define delinit(buf,fd) (Ddata=Dnext=buf, Dend=buf+BUFSIZE, Dfd=fd)
48: #define delflush() (write(Dfd,Ddata,Dnext-Ddata) >= 0 ? (Dnext=Ddata,0) : -1)
49:
50: static int delputc(byte)
51: char byte;
52: {
53: if(Dnext == Dend)
54: if(delflush() < 0)
55: return -1;
56: *Dnext++ = byte;
57: return 0;
58: }
59:
60: static int delputl(n,v)
61: register int n;
62: register long v;
63: {
64: register int i;
65: unsigned char c[4];
66:
67: for(i = 0; i < n; ++i)
68: {
69: c[i] = (unsigned char)(v%BASE);
70: v /= BASE;
71: }
72: for(i = n-1; i >= 0; --i)
73: if(delputc((char)c[i]) < 0)
74: return -1;
75: return 0;
76: }
77:
78: static int delputs(n,addr)
79: register long n;
80: register long addr;
81: {
82: if(n < (Dend-Dnext))
83: {
84: memcopy(Dnext,Btar+addr,n);
85: Dnext += n;
86: }
87: else
88: {
89: if(delflush() < 0)
90: return -1;
91: if(write(Dfd,Btar+addr,n) != n)
92: return -1;
93: }
94: return 0;
95: }
96:
97: /* write an instruction */
98: static int putMove(this)
99: Move *this;
100: {
101: register char inst;
102:
103: inst = this->type;
104: inst |= (NBYTE(this->size)&07) << 3;
105: if(this->type == DELTA_MOVE)
106: {
107: inst |= NBYTE(this->addr)&07;
108: if(delputc(inst) < 0 ||
109: delputl(NBYTE(this->size),this->size) < 0 ||
110: delputl(NBYTE(this->addr),this->addr) < 0)
111: return -1;
112: }
113: else
114: {
115: if(delputc(inst) < 0 ||
116: delputl(NBYTE(this->size),this->size) < 0 ||
117: delputs(this->size,this->addr) < 0)
118: return -1;
119: }
120: return 0;
121: }
122:
123: /* constructor for Move */
124: static Move *newMove(type,size,addr,last)
125: int type;
126: long size;
127: long addr;
128: Move *last;
129: {
130: register Move *this = (Move*) malloc(sizeof(Move));
131: if(this == NIL(Move))
132: return NIL(Move);
133: this->type = type;
134: this->size = size;
135: this->addr = addr;
136: if(last)
137: {
138: last->next = this;
139: this->last = last;
140: }
141: else this->last = NIL(Move);
142: this->next = NIL(Move);
143: return this;
144: }
145:
146: /* destructor for Move, return the elt after move */
147: static Move *delMove(this)
148: Move *this;
149: {
150: register Move *next = this->next;
151: register Move *last = this->last;
152: if(last)
153: last->next = next;
154: if(next)
155: next->last = last;
156: free((char*)this);
157: return next ? next : last;
158: }
159:
160: /* make a new add command */
161: static Move *makeAdd(beg,end,last)
162: char *beg, *end;
163: Move *last;
164: {
165: register Move *this;
166:
167: this = newMove(DELTA_ADD,(long)(end-beg),(long)(beg-Btar),NIL(Move));
168: if(!this)
169: return NIL(Move);
170:
171: /* remove small previous adjacent moves */
172: while(last)
173: {
174: register int a_size, cost_m, cost_a;
175:
176: if(last->type == DELTA_ADD)
177: break;
178:
179: cost_m = NBYTE(last->size) + NBYTE(last->addr) +
180: NBYTE(this->size) + this->size;
181: a_size = this->size + last->size;
182: cost_a = NBYTE(a_size) + a_size;
183: if(cost_m < cost_a)
184: break;
185:
186: this->size = a_size;
187: this->addr -= last->size;
188: last = delMove(last);
189: }
190:
191: /* merge with adjacent adds */
192: if(last && last->type == DELTA_ADD)
193: {
194: this->size += last->size;
195: this->addr -= last->size;
196: last = delMove(last);
197: }
198:
199: if(last)
200: {
201: last->next = this;
202: this->last = last;
203: }
204: return this;
205: }
206:
207: /* check to see if a move is appropriate */
208: static int chkMove(m_size,m_addr,a_size)
209: long m_size, m_addr, a_size;
210: {
211: register int cost_m, cost_a;
212:
213: cost_m = NBYTE(m_size) + NBYTE(m_addr);
214: cost_a = NBYTE(m_size) + m_size;
215: if(cost_m >= cost_a)
216: return 0;
217:
218: /* it's good but it may be better to merge it to an add */
219: if(a_size > 0)
220: {
221: register int m_cost, a_cost;
222:
223: m_cost = cost_m + NBYTE(a_size) + a_size;
224: a_size += m_size;
225: a_cost = NBYTE(a_size) + a_size;
226:
227: /* it is better to merge! */
228: if(m_cost >= a_cost)
229: return 0;
230: }
231:
232: return m_size;
233: }
234:
235: /* optimize a sequence of moves */
236: static Move *optMove(s)
237: register Move *s;
238: {
239: register long add, m_cost, a_cost;
240: register Move *this, *last;
241:
242: add = (s->last && s->last->type == DELTA_ADD) ? s->last->size : 0;
243:
244: m_cost = 0;
245: a_cost = 0;
246: for(this = s; this != NIL(Move); this = this->next)
247: {
248: register long cost_m, cost_a;
249:
250: if(this->type == DELTA_ADD || this->size > (M_MAX+A_MAX))
251: break;
252:
253: m_cost += 1+NBYTE(this->size)+NBYTE(this->addr);
254: a_cost += this->size;
255:
256: /* costs of alternatives */
257: cost_m = m_cost;
258: cost_a = a_cost;
259: if(add > 0)
260: {
261: cost_m += 1 + add + NBYTE(add);
262: cost_a += add;
263: }
264: if(this->next && this->next->type == DELTA_ADD)
265: {
266: cost_m += 1 + this->next->size + NBYTE(this->next->size);
267: cost_a += this->next->size;
268: }
269: cost_a += 1 + NBYTE(cost_a);
270:
271: /* conversion is bad */
272: if(cost_m < cost_a)
273: continue;
274:
275: /* convert the entire sequence to an add */
276: s->type = DELTA_ADD;
277: while(this != s)
278: {
279: last = this->last;
280: s->size += this->size;
281: (void)delMove(this);
282: this = last;
283: }
284:
285: /* merge adjacent adds */
286: if((last = s->last) && last->type == DELTA_ADD)
287: {
288: last->size += s->size;
289: (void)delMove(s);
290: s = last;
291: }
292: if(s->next && s->next->type == DELTA_ADD)
293: {
294: s->size += s->next->size;
295: (void)delMove(s->next);
296: }
297: /* done */
298: break;
299: }
300: return s;
301: }
302:
303: /* the real thing */
304: delta(src,n_src,tar,n_tar,delfd)
305: char *src;
306: long n_src;
307: char *tar;
308: long n_tar;
309: int delfd;
310: {
311: register char *sp, *tp, *esrc, *etar;
312: register long size, addr;
313: Suftree *tree;
314: Move *moves, *last;
315: char inst, buf[BUFSIZE];
316: long mtchstring();
317:
318: /* initialize the output area */
319: delinit(buf,delfd);
320:
321: /* output file sizes */
322: inst = DELTA_TYPE | ((NBYTE(n_src)&07) << 3) | (NBYTE(n_tar)&07);
323: if(delputc(inst) < 0)
324: return -1;
325: if(delputl(NBYTE(n_src),n_src) < 0 || delputl(NBYTE(n_tar),n_tar) < 0)
326: return -1;
327:
328: /* bases */
329: Bsrc = src;
330: Btar = tar;
331: esrc = src + n_src - 1;
332: etar = tar + n_tar - 1;
333:
334: /* initialize list and last block */
335: moves = NIL(Move);
336: last = NIL(Move);
337:
338: /* try making suffix tree */
339: if(!(tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree)))
340: {
341: /* not enough space for tree, remove matching prefix and suffix */
342: for(; src <= esrc && tar <= etar; ++src, ++tar)
343: if(*src != *tar)
344: break;
345: if((size = src-Bsrc) > 0)
346: {
347: register int cost_m, cost_a;
348:
349: cost_m = NBYTE(size) + NBYTE(0);
350: cost_a = NBYTE(size) + size;
351: if(cost_m < cost_a)
352: {
353: moves = newMove(DELTA_MOVE,size,0L,NIL(Move));
354: if(!moves)
355: return -1;
356: n_src -= src-Bsrc;
357: n_tar -= tar-Btar;
358: }
359: else
360: {
361: src = Bsrc;
362: tar = Btar;
363: }
364: }
365:
366: for(sp = esrc, tp = etar; sp >= src && tp >= tar; --sp, --tp)
367: if(*sp != *tp)
368: break;
369: if((size = esrc-sp) > 0)
370: {
371: addr = sp+1-Bsrc;
372: if(chkMove(size,addr,0L) > 0)
373: {
374: last = newMove(DELTA_MOVE,size,addr,NIL(Move));
375: if(!last)
376: return -1;
377: esrc = sp;
378: etar = tp;
379: n_tar = etar-tar+1;
380: n_src = esrc-src+1;
381: }
382: }
383:
384: /* try making the tree again */
385: tree = n_tar > 0 ? bldsuftree(src,n_src) : NIL(Suftree);
386: }
387:
388: /* compute block moves */
389: tp = NIL(char);
390: while(n_tar > 0)
391: {
392: char *match;
393:
394: if(tree)
395: size = mtchsuftree(tree,tar,n_tar,&match);
396: else size = mtchstring(src,n_src,tar,n_tar,&match);
397: if(size < 0)
398: return -1;
399: if(size > 0)
400: size = chkMove(size,(long)(match-Bsrc),(long)(tp ? tp-tar : 0));
401:
402: /* output a block move */
403: if(size > 0)
404: {
405: if(tp)
406: {
407: moves = makeAdd(tp,tar,moves);
408: if(!moves)
409: return -1;
410: tp = NIL(char);
411: }
412: moves = newMove(DELTA_MOVE,size,(long)(match-Bsrc),moves);
413: if(!moves)
414: return -1;
415: tar += size;
416: n_tar -= size;
417: }
418: else
419: {
420: if(!tp)
421: tp = tar;
422: tar += 1;
423: n_tar -= 1;
424: }
425: }
426:
427: /* add any remaining blocks */
428: if(tp)
429: {
430: if(last && chkMove(last->size,last->addr,(long)(tar-tp)) <= 0)
431: {
432: tar += last->size;
433: last = delMove(last);
434: }
435: moves = makeAdd(tp,tar,moves);
436: if(!moves)
437: return -1;
438: }
439: if(last)
440: {
441: moves->next = last;
442: last->last = moves;
443: }
444:
445: /* release space use for string matching */
446: if(tree)
447: delsuftree(tree);
448: else mtchstring(NIL(char),0L,NIL(char),0L,NIL(char*));
449:
450: /* optimize move instructions */
451: if(moves)
452: {
453: register Move *this;
454:
455: this = moves;
456: while(this->last)
457: this = this->last;
458: for(; this != NIL(Move); this = this->next)
459: if(this->type == DELTA_MOVE && this->size <= (M_MAX+A_MAX))
460: moves = this = optMove(this);
461:
462: while(moves->last)
463: moves = moves->last;
464: }
465:
466: /* write out the move instructions */
467: addr = 0L;
468: while(moves)
469: {
470: if(moves->type == DELTA_ADD)
471: moves->addr = addr;
472: addr += moves->size;
473: if(putMove(moves) < 0)
474: return -1;
475: moves = delMove(moves);
476: }
477:
478: /* write ending token */
479: delputc((char)DELTA_TYPE);
480:
481: /* flush buffer */
482: return delflush();
483: }
484: 0707070000000000031006440044230044230000010000000425126261600003000000006234mtchstring.c Ugsf Ggsf #include "update.h"
485:
486:
487: /*
488: ** Find the longest prefix of tar that match some substring of src
489: ** This uses an inverted index for speed.
490: */
491: #define ALPHA (256) /* alphabet size */
492:
493: static void freemem(n_index,index)
494: long *n_index;
495: char ***index;
496: {
497: register int i;
498: if(n_index && index)
499: {
500: for(i = 0; i < ALPHA; ++i)
501: if(n_index[i] > 0)
502: free(index[i]);
503: free(index);
504: free(n_index);
505: }
506: }
507:
508: /* initial assumptions: src[0] == tar[0] && src+n_match <= endsrc */
509: static long domatch(src,endsrc,tar,endtar,n_match)
510: char *src, *endsrc, *tar, *endtar;
511: long n_match;
512: {
513: register char *sp, *tp;
514:
515: /* see if this really improves on the current match */
516: for(sp = src+n_match, tp = tar+n_match; sp > src; --sp, --tp)
517: if(*sp != *tp)
518: break;
519:
520: /* got an improvement, match as many more as we can */
521: if(sp == src)
522: {
523: sp = src+n_match+1;
524: tp = tar+n_match+1;
525: for(; sp < endsrc && tp < endtar; ++sp, ++tp)
526: if(*sp != *tp)
527: break;
528: }
529: return tp-tar;
530: }
531:
532: /* the real thing */
533: long mtchstring(src,n_src,tar,n_tar,match)
534: char *src; /* the source string to match from */
535: long n_src; /* the length of src */
536: char *tar; /* the string a prefix of which is matched */
537: long n_tar; /* the length of tar */
538: char **match; /* to return the matched address in src */
539: {
540: char *endsrc, *endtar;
541: long n_match;
542: register int i;
543: register long n_ind;
544: register char **ind;
545: static long *N_index = NIL(long);
546: static char *Cursrc = NIL(char), ***Index = NIL(char**);
547: static int Alloced = 0;
548: char **malloc(), **realloc();
549:
550: /* free old inverted indices if necessary */
551: if(src != Cursrc)
552: {
553: if(Alloced)
554: freemem(N_index,Index);
555: Alloced = 0;
556: Cursrc = NIL(char);
557: }
558:
559: /* nothing to do */
560: if(!src || n_src <= 0 || !tar || n_tar <= 0)
561: return 0;
562:
563: endsrc = src + n_src;
564: endtar = tar + n_tar;
565:
566: /* build new index structure */
567: if(src != Cursrc)
568: {
569: Cursrc = src;
570: Alloced = 1;
571: if(N_index = (long*) malloc(ALPHA*sizeof(long)))
572: {
573: register char *sp;
574:
575: memzero((char*)N_index,ALPHA*sizeof(long));
576: if(!(Index = (char ***) malloc(ALPHA*sizeof(char**))))
577: {
578: free(N_index);
579: N_index = NIL(long);
580: Alloced = 0;
581: }
582: else for(sp = src; sp < endsrc; ++sp)
583: {
584: i = (int)(*((unsigned char *)(sp)));
585: ind = Index[i];
586: n_ind = N_index[i];
587:
588: /* make sure we have space */
589: if((n_ind%ALPHA) == 0)
590: {
591: ind = n_ind == 0 ?
592: malloc(ALPHA*sizeof(char *)) :
593: realloc(ind,(n_ind+ALPHA)*sizeof(char*));
594: Index[i] = ind;
595: }
596: if(ind)
597: {
598: ind[n_ind] = sp;
599: N_index[i] += 1;
600: }
601: else
602: {
603: freemem(N_index,Index);
604: N_index = NIL(long);
605: Index = NIL(char**);
606: Alloced = 0;
607: break;
608: }
609: }
610: }
611: }
612:
613: /* find longest matching prefix */
614: i = (int)(*((unsigned char *)(tar)));
615: ind = Alloced ? Index[i] : NULL;
616: n_ind = Alloced ? N_index[i] : -1;
617: n_match = 0;
618: while(1)
619: {
620: register long m;
621:
622: if(ind)
623: {
624: src = n_ind > 0 ? *ind++ : endsrc;
625: n_ind -= 1;
626: }
627: else for(; src+n_match < endsrc; ++src)
628: if(*src == *tar)
629: break;
630: if(src+n_match >= endsrc)
631: break;
632:
633: if((m = domatch(src,endsrc,tar,endtar,n_match)) > n_match)
634: {
635: n_match = m;
636: *match = src;
637: if(tar+n_match >= endtar)
638: break;
639: }
640: }
641:
642: return n_match;
643: }
644: 0707070000000000041006440044230044230000010000000425126261700002500000017763suftree.c Ugsf Ggsf #define _IN_SUF_TREE
645: #include "suftree.h"
646:
647: /*
648: ** Construct a suffix tree for a source string
649: ** and perform string matching of various input strings.
650: ** This is based on the suffix tree algorithm of E. McCreight.
651: ** I extended the algorithm to remove the restriction that the
652: ** last element of the string has to be different from the rest
653: ** of the string. Note also that since the alphabet is large (256),
654: ** instead of being stored in an array, the children of a node
655: ** are kept in a linked list which is managed by a move-to-front
656: ** heuristic.
657: **
658: ** For details, see the paper:
659: ** "A Space-Economical Suffix Tree Construction Algorithm"
660: ** E.M. McCreight, JACM, v.23, No.2, 1976, pp.262-272.
661: **
662: ** BldSuftree returns either NULL or the pointer to the root of the
663: ** tree. In the latter case, the tree can be destroyed by free()-ing
664: ** the root.
665: **
666: ** Written by Kiem-Phong Vo, 5/20/88.
667: */
668:
669: /* Delete a suffix tree */
670: void delsuftree(root)
671: Suftree *root;
672: {
673: if(!root)
674: return;
675: root -= 1;
676: while(root)
677: {
678: register Suftree *next;
679: next = NEXT(root);
680: free(root);
681: root = next;
682: }
683: }
684:
685: /* Find a child whose label string starts with a given character */
686: static Suftree *child_find(node,c)
687: Suftree *node; /* the node whose children will be searched */
688: Element c; /* the element being looked for */
689: {
690: register Suftree *np, *last;
691:
692: last = NIL(Suftree);
693: for(np = CHILD(node); np != NIL(Suftree); np = SIBLING(np))
694: if(LENGTH(np) > 0 && *LABEL(np) == c)
695: break;
696: else last = np;
697:
698: if(np && last)
699: {
700: /* move-to-front heuristic */
701: SIBLING(last) = SIBLING(np);
702: SIBLING(np) = CHILD(node);
703: CHILD(node) = np;
704: }
705: return np;
706: }
707:
708: /* Get space for tree nodes. */
709: static Suftree *getmem(root,n)
710: Suftree *root;
711: int n;
712: {
713: Suftree *list, *malloc(), **realloc();
714:
715: if((list = malloc((n+1)*sizeof(Suftree))) == NIL(Suftree))
716: {
717: if(root)
718: delsuftree(root);
719: return NIL(Suftree);
720: }
721:
722: /* store where this list is for later deletion */
723: NEXT(list) = NIL(Suftree);
724: if(root)
725: {
726: for(root -= 1; NEXT(root) != NIL(Suftree); root = NEXT(root))
727: ;
728: NEXT(root) = list;
729: }
730:
731: return list+1;
732: }
733:
734: /* Build the tree.
735: ** Following are the meaning of a few important variables:
736: ** clocus: contracted locus, this variable defines the
737: ** tree node that points to the longest prefix
738: ** that terminates at a node in the current tree.
739: ** locus: defines a tree node to be constructed that
740: ** points to the longest prefix that can be defined.
741: ** Unless both clocus and locus equal the root,
742: ** we maintain the invariance that clocus is the
743: ** parent of locus.
744: ** link: defines the sublink of the locus of the prefix
745: ** of the previously inserted suffix.
746: */
747: Suftree *bldsuftree(src,len)
748: Element *src; /* the string to build the tree from */
749: long int len; /* the length of the string */
750: {
751: register Element *sp, *mp, *rescan, *endmatch, *endsrc;
752: register Suftree *match, *clocus, *locus, *link;
753: register long int mtlen, relen;
754: register int n;
755: Suftree *root, *list, *endlist;
756:
757: if(len == 0)
758: return NIL(Suftree);
759:
760: /* get initial space for the tree nodes */
761: root = NIL(Suftree);
762:
763: /* 2*len+1 is the maximum number of nodes that can be created */
764: n = 2*len+1;
765: if(n > ALLOCSIZE)
766: n = ALLOCSIZE;
767: if(!(list = getmem(root,n)))
768: return NIL(Suftree);
769: endlist = list + n;
770:
771: /* make root node */
772: root = list++;
773: LABEL(root) = NIL(Element);
774: CHILD(root) = NIL(Suftree);
775: LINK(root) = root;
776:
777: /* locus and contracted locus of the empty substring */
778: locus = clocus = root;
779:
780: /* the current match length */
781: mtlen = 0;
782:
783: /* the end of the source string */
784: endsrc = src+len;
785:
786: /* now build the tree */
787: for(; src < endsrc; ++src)
788: {
789: /* prepare for scanning the current suffix */
790: if(locus != root)
791: {
792: /* define the string to be rescanned */
793: rescan = LABEL(locus);
794: relen = LENGTH(locus);
795:
796: /* minus the first character of the previous prefix */
797: if(clocus == root)
798: {
799: rescan++;
800: if(relen > 0)
801: --relen;
802: }
803: }
804: else mtlen = relen = 0;
805:
806: /* the length of the known-to-be-matched part */
807: if(mtlen > 0)
808: --mtlen;
809: /**/ ASSERT(relen <= mtlen)
810:
811: /* use sublink to rescan */
812: link = LINK(clocus);
813:
814: /* rescan */
815: while(relen > 0)
816: {
817: /* find a child of link that starts with the
818: first character of rescan. We then know that
819: rescan must match a prefix of that child.
820: */
821: match = child_find(link,*rescan);
822: /**/ ASSERT(match != NULL)
823:
824: /* clocus will be the parent of the new link */
825: clocus = link;
826:
827: /* rescan contains LABEL(match) */
828: if(relen >= LENGTH(match))
829: {
830: link = match;
831: relen -= LENGTH(match);
832: rescan += LENGTH(match);
833: }
834: /* rescan is a proper prefix of LABEL(match) */
835: else
836: {
837: if(list >= endlist)
838: {
839: if(!(list = getmem(root,ALLOCSIZE)))
840: return NIL(Suftree);
841: else endlist = list+ALLOCSIZE;
842: }
843:
844: /* make an internal node labeled with rescan */
845: LABEL(list) = rescan;
846: LENGTH(list) = relen;
847: CHILD(list) = match;
848: SIBLING(list) = SIBLING(match);
849: LINK(list) = root;
850:
851: /* adjust label and pointer of old node */
852: SIBLING(match) = NIL(Suftree);
853: LABEL(match) += relen;
854: LENGTH(match) -= relen;
855:
856: CHILD(link) = list;
857: link = list++;
858: break;
859: }
860: }
861:
862: /* define sublink for the prefix of the last suffix */
863: if(locus != root)
864: LINK(locus) = link;
865:
866: /* scan to match as much as possible */
867: locus = link;
868: sp = src + mtlen;
869: while(sp < endsrc)
870: {
871: /* see if it matches some child of clocus */
872: if(!(match = child_find(locus,*sp)))
873: break;
874:
875: /* clocus will be the parent of the new locus */
876: clocus = locus;
877:
878: /* find the extend of the match */
879: mp = LABEL(match);
880: endmatch = mp + LENGTH(match);
881: for(; sp < endsrc && mp < endmatch; ++sp, ++mp)
882: if(*sp != *mp)
883: break;
884:
885: /* the whole node is matched */
886: if(mp >= endmatch)
887: {
888: locus = match;
889: mtlen += LENGTH(match);
890: }
891: /* found the extended locus of this suffix */
892: else
893: {
894: if(list >= endlist)
895: {
896: if(!(list = getmem(root,ALLOCSIZE)))
897: return NIL(Suftree);
898: else endlist = list+ALLOCSIZE;
899: }
900:
901: /* make a new internal node */
902: LABEL(list) = LABEL(match);
903: LENGTH(list) = mp - LABEL(match);
904: CHILD(list) = match;
905: SIBLING(list) = SIBLING(match);
906: LINK(list) = root;
907:
908: SIBLING(match) = NIL(Suftree);
909: LABEL(match) += LENGTH(list);
910: LENGTH(match) -= LENGTH(list);
911: mtlen += LENGTH(list);
912:
913: /* the new node is the locus for this suffix */
914: CHILD(locus) = list;
915: locus = list++;
916: break;
917: }
918: }
919:
920: if(list >= endlist)
921: {
922: if(!(list = getmem(root,ALLOCSIZE)))
923: return NIL(Suftree);
924: else endlist = list+ALLOCSIZE;
925: }
926:
927: /* make a new external node for the suffix */
928: SUFFIX(list) = src;
929: LABEL(list) = sp;
930: LENGTH(list) = endsrc-sp;
931: CHILD(list) = NIL(Suftree);
932:
933: /* hook it in as the first child of locus */
934: SIBLING(list) = CHILD(locus);
935: CHILD(locus) = list++;
936: }
937:
938: return root;
939: }
940:
941: /* Given a raw string and a string represented in a suffix tree,
942: match the string against the tree to find a longest matching
943: prefix of the string.
944: Return the length of the match and where it occurs in the
945: string represented by the tree.
946: */
947: long mtchsuftree(tree,str,len,mtchp)
948: Suftree *tree; /* the root of the suffix tree */
949: Element *str; /* the string to be matched */
950: long len; /* the length of the string */
951: Element **mtchp; /* to return the address of the match */
952: {
953: register Suftree *match;
954: register Element *sp, *mp, *endmp, *endstr;
955: register long mlen;
956:
957: mlen = 0;
958: endstr = str + len;
959: while(1)
960: {
961: if(!(match = child_find(tree,*str)))
962: break;
963:
964: /* find the extent of the match */
965: mp = LABEL(match);
966: endmp = mp + LENGTH(match);
967: for(sp = str; sp < endstr && mp < endmp; ++sp, ++mp)
968: if(*sp != *mp)
969: break;
970:
971: /* update the length of the match */
972: mlen += sp-str;
973:
974: /* prepare for next iteration */
975: tree = match;
976: str = sp;
977:
978: /* see if we have to work any more */
979: if(mp < endmp || str >= endstr)
980: break;
981: }
982:
983: if(mlen == 0) /* no match */
984: *mtchp = NIL(Element);
985: else
986: {
987: /* find where the match starts */
988: while(CHILD(tree))
989: tree = CHILD(tree);
990: *mtchp = SUFFIX(tree);
991: }
992:
993: return mlen;
994: }
995: 0707070000000000051006440044230044230000010000000425126261700002500000002273suftree.h Ugsf Ggsf /* the type of list elements we play with */
996: typedef char Element;
997:
998: /* for suffix trees, a tree node looks like this */
999: typedef struct _ts_
1000: {
1001: Element *_label; /* substring labeling the edge */
1002: long int _length; /* the length of the string */
1003: struct _ts_ *_child; /* list of children */
1004: struct _ts_ *_sibling; /* link for the child list */
1005: union
1006: { /* these two fields are mutual exclusive */
1007: struct _ts_ *_link; /* sub-link */
1008: Element *_suffix; /* suffix */
1009: } _uls_;
1010: } Suftree;
1011:
1012: /* short hand for various fields in a tree node */
1013: #define LABEL(n) ((n)->_label)
1014: #define LENGTH(n) ((n)->_length)
1015: #define CHILD(n) ((n)->_child)
1016: #define SIBLING(n) ((n)->_sibling)
1017: #define LINK(n) ((n)->_uls_._link)
1018: #define SUFFIX(n) ((n)->_uls_._suffix)
1019:
1020: extern Suftree *bldsuftree();
1021: extern long mtchsuftree();
1022:
1023:
1024: /* the following definitions are not to be seen by users */
1025: #ifdef _IN_SUF_TREE
1026: #ifdef DEBUG
1027: #define ASSERT(p) if(!(p)) abort();
1028: #else
1029: #define ASSERT(p)
1030: #endif /*DEBUG*/
1031:
1032: #ifndef NULL
1033: #define NULL (0L)
1034: #endif /*NULL*/
1035:
1036: #ifndef NIL
1037: #define NIL(type) ((type*)NULL)
1038: #endif /*NIL*/
1039:
1040: #define ALLOCSIZE 256 /* amount of nodes to allocate each time */
1041: #define NEXT(n) ((n)->_sibling)
1042:
1043: #endif /*_IN_SUF_TREE*/
1044: 0707070000000000061006440044230044230000010000000425126261700002400000007231update.c Ugsf Ggsf #include "update.h"
1045:
1046: /*
1047: ** Reconstruct a target file from a source file and a delta file.
1048: ** The delta file contain block move instructions computed by delta().
1049: **
1050: ** Written by Kiem-Phong Vo, 5/20/88
1051: */
1052:
1053: /* buffers for delta and target files */
1054: static unsigned char *Ddata, *Dnext, *Dend,
1055: *Tdata, *Tnext, *Tend;
1056: static int Dfd, Tfd;
1057:
1058: #define delinit(buf,size,fd) (Ddata=Dnext=Dend=buf, Dfd=fd)
1059: #define tarinit(buf,size,fd) (Tdata=Tnext=buf, Tend=buf+size, Tfd=fd)
1060: #define tarflush() (write(Tfd,Tdata,Tnext-Tdata) >= 0 ? (Tnext=Tdata,0) : -1)
1061:
1062: /* read a byte from delta file */
1063: static int delgetc()
1064: {
1065: if(Dnext >= Dend)
1066: {
1067: register int n;
1068: if((n = read(Dfd,Ddata,BUFSIZE)) <= 0)
1069: return -1;
1070: Dnext = Ddata, Dend = Ddata+n;
1071: }
1072: return (int)(*Dnext++);
1073: }
1074:
1075: /* read a long value from delta file */
1076: static long delgetl(n)
1077: register int n;
1078: {
1079: register long lv;
1080:
1081: lv = 0;
1082: for(; n > 0; --n)
1083: {
1084: register int v;
1085: if((v = delgetc()) < 0)
1086: return -1;
1087: lv = lv*256 + v;
1088: }
1089: return lv;
1090: }
1091:
1092: /* transfer a number of bytes from a file to the target file */
1093: static int filetransfer(fd,n)
1094: int fd;
1095: long n;
1096: {
1097: while(n > 0)
1098: {
1099: register int r;
1100:
1101: if(Tnext >= Tend)
1102: if(tarflush() < 0)
1103: return -1;
1104: r = n > (Tend-Tnext) ? (Tend-Tnext) : n;
1105: if(read(fd,Tnext,r) != r)
1106: return -1;
1107: Tnext += r;
1108: n -= r;
1109: }
1110: return 0;
1111: }
1112:
1113: /* transfer a number of bytes from a memory area to the target file */
1114: static int memtransfer(addr,n)
1115: unsigned char *addr;
1116: register long n;
1117: {
1118: while(n > 0)
1119: {
1120: register int r;
1121:
1122: if(Tnext >= Tend)
1123: if(tarflush() < 0)
1124: return -1;
1125: r = n > (Tend-Tnext) ? (Tend-Tnext) : n;
1126: memcopy(Tnext,addr,r);
1127: Tnext += r;
1128: addr += r;
1129: n -= r;
1130: }
1131: return 0;
1132: }
1133:
1134: /* transfer a number of bytes from delta to target */
1135: static int deltransfer(n)
1136: long n;
1137: {
1138: register int d;
1139:
1140: /* transfer what's already in core */
1141: if((d = Dend-Dnext) > 0)
1142: {
1143: register int w = n <= d ? n : d;
1144:
1145: if(w > (Tend-Tnext))
1146: if(tarflush() < 0)
1147: return -1;
1148:
1149: /* copy from the delta buffer */
1150: memcopy(Tnext,Dnext,w);
1151: Dnext += w;
1152: Tnext += w;
1153: n -= w;
1154: }
1155:
1156: return n > 0 ? filetransfer(Dfd,n) : 0;
1157: }
1158:
1159: update(srcfd,offset,delfd,tarfd)
1160: int srcfd;
1161: long offset;
1162: int delfd;
1163: int tarfd;
1164: {
1165: register int i;
1166: register long n_src, n_tar;
1167: unsigned char delbuf[BUFSIZE], tarbuf[BUFSIZE];
1168: unsigned char *src, *tar, *malloc();
1169:
1170: /* start buffering system for delta file */
1171: delinit(delbuf,BUFSIZE,delfd);
1172:
1173: /* read the file sizes */
1174: if((i = delgetc()) < 0 || (i&DELTA_TYPE) != DELTA_TYPE)
1175: return -1;
1176: if((n_src = delgetl((i>>3)&07)) < 0 || (n_tar = delgetl(i&07)) < 0)
1177: return -1;
1178:
1179: /* make data area for target file */
1180: if(tar = malloc(n_tar)) /* assignment = */
1181: tarinit(tar,n_tar,tarfd);
1182: else tarinit(tarbuf,BUFSIZE,tarfd);
1183:
1184: /* read in source file if possible to avoid lseek */
1185: if(src = malloc(n_src)) /* assignment = */
1186: {
1187: lseek(srcfd,offset,0);
1188: if(read(srcfd,src,n_src) != n_src)
1189: return -1;
1190: }
1191:
1192: while((i = delgetc()) >= 0)
1193: {
1194: register long size, addr;
1195:
1196: if((size = delgetl((i>>3)&07)) < 0)
1197: return -1;
1198: switch(i&DELTA_TYPE)
1199: {
1200: default :
1201: return -1;
1202: case DELTA_TYPE :
1203: /* sync delta file pointer */
1204: if((addr = Dend-Dnext) > 0)
1205: lseek(Dfd,-addr,1);
1206: /* flush output buffer */
1207: if(tarflush() < 0)
1208: return -1;
1209: /* free space used */
1210: if(src)
1211: free(src);
1212: if(tar)
1213: free(tar);
1214: return 0;
1215: case DELTA_MOVE :
1216: if((addr = delgetl(i&07)) < 0)
1217: return -1;
1218: if(src)
1219: {
1220: if(memtransfer(src+addr,size) < 0)
1221: return -1;
1222: }
1223: else
1224: {
1225: if(lseek(srcfd,offset+addr,0) < 0)
1226: return -1;
1227: if(filetransfer(srcfd,size) < 0)
1228: return -1;
1229: }
1230: break;
1231: case DELTA_ADD :
1232: if(deltransfer(size) < 0)
1233: return -1;
1234: break;
1235: }
1236: }
1237:
1238: /* should never get here */
1239: return -1;
1240: }
1241: 0707070000000000071006440044230044230000010000000425126261700002400000001253update.h Ugsf Ggsf /* values for the instruction field */
1242: #define DELTA_TYPE 0300
1243: #define DELTA_MOVE 0200
1244: #define DELTA_ADD 0100
1245:
1246: /* number of bytes required to code a value */
1247: #define BASE 256
1248: #define ONE (BASE)
1249: #define TWO (BASE*BASE)
1250: #define THREE (BASE*BASE*BASE)
1251: #define NBYTE(v) ((v) < ONE ? 1 : ((v) < TWO ? 2 : ((v) < THREE ? 3 : 4)))
1252:
1253: #define BUFSIZE 2048
1254:
1255: #ifndef NULL
1256: #define NULL (0L)
1257: #endif
1258:
1259: #ifndef NIL
1260: #define NIL(type) ((type*)NULL)
1261: #endif
1262:
1263: /* function to copy data from one area to another */
1264: #ifdef SYSV
1265: #define memcopy(to,fr,n) memcpy(to,fr,n)
1266: #define memzero(s,n) memset(s,0,n)
1267: #endif
1268:
1269: #ifdef BSD
1270: #define memcopy(to,fr,n) bcopy(fr,to,n)
1271: #define memzero(s,n) bzero(s,n)
1272: #endif
1273: 0707070000000000101006440044230044230000010000000475460055300002300000003501Mamfile Ugsf Ggsf note # # make abstract machine file generated from Makefile # #
1274: setv AS as
1275: setv ASFLAGS
1276: setv AR ar
1277: setv ARFLAGS cr
1278: setv CC cc
1279: setv CCFLAGS "-O"
1280: setv CPP "$CC -E"
1281: setv CPIO cpio
1282: setv CPIOFLAGS
1283: setv F77 f77
1284: setv INSTALLROOT $HOME
1285: setv LD ld
1286: setv LDFLAGS
1287: setv LEX lex
1288: setv LEXFLAGS
1289: setv LPR lpr
1290: setv LPRFLAGS
1291: setv M4FLAGS
1292: setv MAKE nmake
1293: setv MAKEFLAGS
1294: setv PR pr
1295: setv PRFLAGS
1296: setv TAR tar
1297: setv YACC yacc
1298: setv YACCFLAGS -d
1299: make install
1300: make all
1301: make libodelta.a
1302: attr arch
1303: make delta.o
1304: make delta.c
1305: attr perm
1306: attr scan
1307: make suftree.h
1308: attr perm
1309: attr scan
1310: attr impl
1311: done suftree.h
1312: make update.h
1313: attr perm
1314: attr scan
1315: attr impl
1316: done update.h
1317: done delta.c
1318: prev delta.c
1319: setv SYSV -DSYSV
1320: exec $CC $CCFLAGS -I. "$SYSV" -c delta.c
1321: done delta.o
1322: make mtchstring.o
1323: make mtchstring.c
1324: attr perm
1325: attr scan
1326: prev update.h
1327: done mtchstring.c
1328: prev mtchstring.c
1329: exec $CC $CCFLAGS -I. "$SYSV" -c mtchstring.c
1330: done mtchstring.o
1331: make suftree.o
1332: make suftree.c
1333: attr perm
1334: attr scan
1335: prev suftree.h
1336: done suftree.c
1337: prev suftree.c
1338: exec $CC $CCFLAGS -I. -c suftree.c
1339: done suftree.o
1340: make update.o
1341: make update.c
1342: attr perm
1343: attr scan
1344: prev update.h
1345: done update.c
1346: prev update.c
1347: exec $CC $CCFLAGS -I. "$SYSV" -c update.c
1348: done update.o
1349: exec $AR cr libodelta.a delta.o mtchstring.o suftree.o update.o
1350: exec (ranlib libodelta.a) >/dev/null 2>&1 || true
1351: done libodelta.a
1352: done all
1353: make $INSTALLROOT/lib
1354: exec if test ! -d $INSTALLROOT/lib
1355: .... then rm -rf $INSTALLROOT/lib && mkdir $INSTALLROOT/lib || { rm -rf $INSTALLROOT && mkdir $INSTALLROOT && mkdir $INSTALLROOT/lib ;} || true
1356: .... fi
1357: done $INSTALLROOT/lib
1358: make $INSTALLROOT/lib/libodelta.a
1359: attr arch
1360: prev libodelta.a
1361: exec { cp libodelta.a $INSTALLROOT/lib/libodelta.a 2>/dev/null ;} || true
1362: exec (ranlib $INSTALLROOT/lib/libodelta.a) >/dev/null 2>&1 || true
1363: done $INSTALLROOT/lib/libodelta.a
1364: done install
1365: 0707070000000000000000000000000000000000010000000000000000000001300000000000TRAILER!!!
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.