|
|
1.1 root 1: static char sccsid[] = "@(#)diffreg.c 4.7 2/28/83";
2:
3: #include "diff.h"
4: /*
5: * diff - compare two files.
6: */
7:
8: /*
9: * Uses an algorithm due to Harold Stone, which finds
10: * a pair of longest identical subsequences in the two
11: * files.
12: *
13: * The major goal is to generate the match vector J.
14: * J[i] is the index of the line in file1 corresponding
15: * to line i file0. J[i] = 0 if there is no
16: * such line in file1.
17: *
18: * Lines are hashed so as to work in core. All potential
19: * matches are located by sorting the lines of each file
20: * on the hash (called ``value''). In particular, this
21: * collects the equivalence classes in file1 together.
22: * Subroutine equiv replaces the value of each line in
23: * file0 by the index of the first element of its
24: * matching equivalence in (the reordered) file1.
25: * To save space equiv squeezes file1 into a single
26: * array member in which the equivalence classes
27: * are simply concatenated, except that their first
28: * members are flagged by changing sign.
29: *
30: * Next the indices that point into member are unsorted into
31: * array class according to the original order of file0.
32: *
33: * The cleverness lies in routine stone. This marches
34: * through the lines of file0, developing a vector klist
35: * of "k-candidates". At step i a k-candidate is a matched
36: * pair of lines x,y (x in file0 y in file1) such that
37: * there is a common subsequence of length k
38: * between the first i lines of file0 and the first y
39: * lines of file1, but there is no such subsequence for
40: * any smaller y. x is the earliest possible mate to y
41: * that occurs in such a subsequence.
42: *
43: * Whenever any of the members of the equivalence class of
44: * lines in file1 matable to a line in file0 has serial number
45: * less than the y of some k-candidate, that k-candidate
46: * with the smallest such y is replaced. The new
47: * k-candidate is chained (via pred) to the current
48: * k-1 candidate so that the actual subsequence can
49: * be recovered. When a member has serial number greater
50: * that the y of all k-candidates, the klist is extended.
51: * At the end, the longest subsequence is pulled out
52: * and placed in the array J by unravel
53: *
54: * With J in hand, the matches there recorded are
55: * check'ed against reality to assure that no spurious
56: * matches have crept in due to hashing. If they have,
57: * they are broken, and "jackpot" is recorded--a harmless
58: * matter except that a true match for a spuriously
59: * mated line may now be unnecessarily reported as a change.
60: *
61: * Much of the complexity of the program comes simply
62: * from trying to minimize core utilization and
63: * maximize the range of doable problems by dynamically
64: * allocating what is needed and reusing what is not.
65: * The core requirements for problems larger than somewhat
66: * are (in words) 2*length(file0) + length(file1) +
67: * 3*(number of k-candidates installed), typically about
68: * 6n words for files of length n.
69: */
70:
71: #define prints(s) fputs(s,stdout)
72:
73: FILE *input[2];
74: FILE *fopen();
75:
76: struct cand {
77: int x;
78: int y;
79: int pred;
80: } cand;
81: struct line {
82: int serial;
83: int value;
84: } *file[2], line;
85: int len[2];
86: struct line *sfile[2]; /* shortened by pruning common prefix and suffix */
87: int slen[2];
88: int pref, suff; /* length of prefix and suffix */
89: int *class; /* will be overlaid on file[0] */
90: int *member; /* will be overlaid on file[1] */
91: int *klist; /* will be overlaid on file[0] after class */
92: struct cand *clist; /* merely a free storage pot for candidates */
93: int clen = 0;
94: int *J; /* will be overlaid on class */
95: long *ixold; /* will be overlaid on klist */
96: long *ixnew; /* will be overlaid on file[1] */
97:
98: diffreg()
99: {
100: register int i, j;
101: FILE *f1, *f2;
102: char buf1[BUFSIZ], buf2[BUFSIZ];
103:
104: if (hflag) {
105: diffargv[0] = "diffh";
106: execv(diffh, diffargv);
107: fprintf(stderr, "diff: ");
108: perror(diffh);
109: done();
110: }
111: dummy = malloc(1);
112: if ((stb1.st_mode & S_IFMT) == S_IFDIR)
113: file1 = splice(file1, file2);
114: else if ((stb2.st_mode & S_IFMT) == S_IFDIR)
115: file2 = splice(file2, file1);
116: else if (!strcmp(file1, "-")) {
117: if (!strcmp(file2, "-")) {
118: fprintf(stderr, "diff: can't specify - -\n");
119: done();
120: }
121: file1 = copytemp();
122: } else if (!strcmp(file2, "-"))
123: file2 = copytemp();
124: if ((f1 = fopen(file1, "r")) == NULL) {
125: fprintf(stderr, "diff: ");
126: perror(file1);
127: done();
128: }
129: if ((f2 = fopen(file2, "r")) == NULL) {
130: fprintf(stderr, "diff: ");
131: perror(file2);
132: fclose(f1);
133: done();
134: }
135: if (stb1.st_size != stb2.st_size)
136: goto notsame;
137: for (;;) {
138: i = fread(buf1, 1, BUFSIZ, f1);
139: j = fread(buf2, 1, BUFSIZ, f2);
140: if (i < 0 || j < 0 || i != j)
141: goto notsame;
142: if (i == 0 && j == 0) {
143: fclose(f1);
144: fclose(f2);
145: status = 0; /* files don't differ */
146: goto same;
147: }
148: for (j = 0; j < i; j++)
149: if (buf1[j] != buf2[j])
150: goto notsame;
151: }
152: notsame:
153: /*
154: * Files certainly differ at this point; set status accordingly
155: */
156: status = 1;
157: if (!asciifile(f1) || !asciifile(f2)) {
158: printf("Binary files %s and %s differ\n", file1, file2);
159: fclose(f1);
160: fclose(f2);
161: done();
162: }
163: prepare(0, f1);
164: prepare(1, f2);
165: fclose(f1);
166: fclose(f2);
167: prune();
168: sort(sfile[0],slen[0]);
169: sort(sfile[1],slen[1]);
170:
171: member = (int *)file[1];
172: equiv(sfile[0], slen[0], sfile[1], slen[1], member);
173: member = (int *)ralloc((char *)member,(slen[1]+2)*sizeof(int));
174:
175: class = (int *)file[0];
176: unsort(sfile[0], slen[0], class);
177: class = (int *)ralloc((char *)class,(slen[0]+2)*sizeof(int));
178:
179: klist = (int *)talloc((slen[0]+2)*sizeof(int));
180: clist = (struct cand *)talloc(sizeof(cand));
181: i = stone(class, slen[0], member, klist);
182: free((char *)member);
183: free((char *)class);
184:
185: J = (int *)talloc((len[0]+2)*sizeof(int));
186: unravel(klist[i]);
187: free((char *)clist);
188: free((char *)klist);
189:
190: ixold = (long *)talloc((len[0]+2)*sizeof(long));
191: ixnew = (long *)talloc((len[1]+2)*sizeof(long));
192: check();
193: output();
194: status = anychange;
195: same:
196: if (opt == D_CONTEXT && anychange == 0)
197: printf("No differences encountered\n");
198: done();
199: }
200:
201: char *
202: copytemp()
203: {
204: char buf[BUFSIZ];
205: register int i, f;
206:
207: signal(SIGHUP,done);
208: signal(SIGINT,done);
209: signal(SIGPIPE,done);
210: signal(SIGTERM,done);
211: tempfile = mktemp("/tmp/dXXXXX");
212: f = creat(tempfile,0600);
213: if (f < 0) {
214: fprintf(stderr, "diff: ");
215: perror(tempfile);
216: done();
217: }
218: while ((i = read(0,buf,BUFSIZ)) > 0)
219: if (write(f,buf,i) != i) {
220: fprintf(stderr, "diff: ");
221: perror(tempfile);
222: done();
223: }
224: close(f);
225: return (tempfile);
226: }
227:
228: char *
229: splice(dir, file)
230: char *dir, *file;
231: {
232: char *tail;
233: char buf[BUFSIZ];
234:
235: if (!strcmp(file, "-")) {
236: fprintf(stderr, "diff: can't specify - with other arg directory\n");
237: done();
238: }
239: tail = rindex(file, '/');
240: if (tail == 0)
241: tail = file;
242: else
243: tail++;
244: sprintf(buf, "%s/%s", dir, tail);
245: return (savestr(buf));
246: }
247:
248: prepare(i, fd)
249: int i;
250: FILE *fd;
251: {
252: register struct line *p;
253: register j,h;
254:
255: fseek(fd, (long)0, 0);
256: p = (struct line *)talloc(3*sizeof(line));
257: for(j=0; h=readhash(fd);) {
258: p = (struct line *)ralloc((char *)p,(++j+3)*sizeof(line));
259: p[j].value = h;
260: }
261: len[i] = j;
262: file[i] = p;
263: }
264:
265: prune()
266: {
267: register i,j;
268: for(pref=0;pref<len[0]&&pref<len[1]&&
269: file[0][pref+1].value==file[1][pref+1].value;
270: pref++ ) ;
271: for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
272: file[0][len[0]-suff].value==file[1][len[1]-suff].value;
273: suff++) ;
274: for(j=0;j<2;j++) {
275: sfile[j] = file[j]+pref;
276: slen[j] = len[j]-pref-suff;
277: for(i=0;i<=slen[j];i++)
278: sfile[j][i].serial = i;
279: }
280: }
281:
282: equiv(a,n,b,m,c)
283: struct line *a, *b;
284: int *c;
285: {
286: register int i, j;
287: i = j = 1;
288: while(i<=n && j<=m) {
289: if(a[i].value <b[j].value)
290: a[i++].value = 0;
291: else if(a[i].value == b[j].value)
292: a[i++].value = j;
293: else
294: j++;
295: }
296: while(i <= n)
297: a[i++].value = 0;
298: b[m+1].value = 0;
299: j = 0;
300: while(++j <= m) {
301: c[j] = -b[j].serial;
302: while(b[j+1].value == b[j].value) {
303: j++;
304: c[j] = b[j].serial;
305: }
306: }
307: c[j] = -1;
308: }
309:
310: stone(a,n,b,c)
311: int *a;
312: int *b;
313: int *c;
314: {
315: register int i, k,y;
316: int j, l;
317: int oldc, tc;
318: int oldl;
319: k = 0;
320: c[0] = newcand(0,0,0);
321: for(i=1; i<=n; i++) {
322: j = a[i];
323: if(j==0)
324: continue;
325: y = -b[j];
326: oldl = 0;
327: oldc = c[0];
328: do {
329: if(y <= clist[oldc].y)
330: continue;
331: l = search(c, k, y);
332: if(l!=oldl+1)
333: oldc = c[l-1];
334: if(l<=k) {
335: if(clist[c[l]].y <= y)
336: continue;
337: tc = c[l];
338: c[l] = newcand(i,y,oldc);
339: oldc = tc;
340: oldl = l;
341: } else {
342: c[l] = newcand(i,y,oldc);
343: k++;
344: break;
345: }
346: } while((y=b[++j]) > 0);
347: }
348: return(k);
349: }
350:
351: newcand(x,y,pred)
352: {
353: register struct cand *q;
354: clist = (struct cand *)ralloc((char *)clist,++clen*sizeof(cand));
355: q = clist + clen -1;
356: q->x = x;
357: q->y = y;
358: q->pred = pred;
359: return(clen-1);
360: }
361:
362: search(c, k, y)
363: int *c;
364: {
365: register int i, j, l;
366: int t;
367: if(clist[c[k]].y<y) /*quick look for typical case*/
368: return(k+1);
369: i = 0;
370: j = k+1;
371: while((l=(i+j)/2) > i) {
372: t = clist[c[l]].y;
373: if(t > y)
374: j = l;
375: else if(t < y)
376: i = l;
377: else
378: return(l);
379: }
380: return(l+1);
381: }
382:
383: unravel(p)
384: {
385: register int i;
386: register struct cand *q;
387: for(i=0; i<=len[0]; i++)
388: J[i] = i<=pref ? i:
389: i>len[0]-suff ? i+len[1]-len[0]:
390: 0;
391: for(q=clist+p;q->y!=0;q=clist+q->pred)
392: J[q->x+pref] = q->y+pref;
393: }
394:
395: /* check does double duty:
396: 1. ferret out any fortuitous correspondences due
397: to confounding by hashing (which result in "jackpot")
398: 2. collect random access indexes to the two files */
399:
400: check()
401: {
402: register int i, j;
403: int jackpot;
404: long ctold, ctnew;
405: char c,d;
406:
407: if ((input[0] = fopen(file1,"r")) == NULL) {
408: perror(file1);
409: done();
410: }
411: if ((input[1] = fopen(file2,"r")) == NULL) {
412: perror(file2);
413: done();
414: }
415: j = 1;
416: ixold[0] = ixnew[0] = 0;
417: jackpot = 0;
418: ctold = ctnew = 0;
419: for(i=1;i<=len[0];i++) {
420: if(J[i]==0) {
421: ixold[i] = ctold += skipline(0);
422: continue;
423: }
424: while(j<J[i]) {
425: ixnew[j] = ctnew += skipline(1);
426: j++;
427: }
428: for(;;) {
429: c = getc(input[0]);
430: d = getc(input[1]);
431: ctold++;
432: ctnew++;
433: if(bflag && isspace(c) && isspace(d)) {
434: do {
435: if(c=='\n') break;
436: ctold++;
437: } while(isspace(c=getc(input[0])));
438: do {
439: if(d=='\n') break;
440: ctnew++;
441: } while(isspace(d=getc(input[1])));
442: }
443: if(c!=d) {
444: jackpot++;
445: J[i] = 0;
446: if(c!='\n')
447: ctold += skipline(0);
448: if(d!='\n')
449: ctnew += skipline(1);
450: break;
451: }
452: if(c=='\n')
453: break;
454: }
455: ixold[i] = ctold;
456: ixnew[j] = ctnew;
457: j++;
458: }
459: for(;j<=len[1];j++) {
460: ixnew[j] = ctnew += skipline(1);
461: }
462: fclose(input[0]);
463: fclose(input[1]);
464: /*
465: if(jackpot)
466: fprintf(stderr, "jackpot\n");
467: */
468: }
469:
470: sort(a,n) /*shellsort CACM #201*/
471: struct line *a;
472: {
473: struct line w;
474: register int j,m;
475: struct line *ai;
476: register struct line *aim;
477: int k;
478: for(j=1;j<=n;j*= 2)
479: m = 2*j - 1;
480: for(m/=2;m!=0;m/=2) {
481: k = n-m;
482: for(j=1;j<=k;j++) {
483: for(ai = &a[j]; ai > a; ai -= m) {
484: aim = &ai[m];
485: if(aim < ai)
486: break; /*wraparound*/
487: if(aim->value > ai[0].value ||
488: aim->value == ai[0].value &&
489: aim->serial > ai[0].serial)
490: break;
491: w.value = ai[0].value;
492: ai[0].value = aim->value;
493: aim->value = w.value;
494: w.serial = ai[0].serial;
495: ai[0].serial = aim->serial;
496: aim->serial = w.serial;
497: }
498: }
499: }
500: }
501:
502: unsort(f, l, b)
503: struct line *f;
504: int *b;
505: {
506: register int *a;
507: register int i;
508: a = (int *)talloc((l+1)*sizeof(int));
509: for(i=1;i<=l;i++)
510: a[f[i].serial] = f[i].value;
511: for(i=1;i<=l;i++)
512: b[i] = a[i];
513: free((char *)a);
514: }
515:
516: skipline(f)
517: {
518: register i;
519: char c;
520:
521: for(i=1;(c=getc(input[f]))!='\n';i++)
522: if (c < 0)
523: return(i);
524: return(i);
525: }
526:
527: output()
528: {
529: int m;
530: register int i0, i1, j1;
531: int j0;
532: input[0] = fopen(file1,"r");
533: input[1] = fopen(file2,"r");
534: m = len[0];
535: J[0] = 0;
536: J[m+1] = len[1]+1;
537: if(opt!=D_EDIT) for(i0=1;i0<=m;i0=i1+1) {
538: while(i0<=m&&J[i0]==J[i0-1]+1) i0++;
539: j0 = J[i0-1]+1;
540: i1 = i0-1;
541: while(i1<m&&J[i1+1]==0) i1++;
542: j1 = J[i1+1]-1;
543: J[i1] = j1;
544: change(i0,i1,j0,j1);
545: } else for(i0=m;i0>=1;i0=i1-1) {
546: while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;
547: j0 = J[i0+1]-1;
548: i1 = i0+1;
549: while(i1>1&&J[i1-1]==0) i1--;
550: j1 = J[i1-1]+1;
551: J[i1] = j1;
552: change(i1,i0,j1,j0);
553: }
554: if(m==0)
555: change(1,0,1,len[1]);
556: if (opt==D_IFDEF) {
557: for (;;) {
558: #define c i0
559: c = getc(input[0]);
560: if (c < 0)
561: return;
562: putchar(c);
563: }
564: #undef c
565: }
566: }
567:
568: /* indicate that there is a difference between lines a and b of the from file
569: to get to lines c to d of the to file.
570: If a is greater then b then there are no lines in the from file involved
571: and this means that there were lines appended (beginning at b).
572: If c is greater than d then there are lines missing from the to file.
573: */
574: change(a,b,c,d)
575: {
576: char ch;
577: int lowa,upb,lowc,upd;
578: struct stat stbuf;
579:
580: if (opt != D_IFDEF && a>b && c>d)
581: return;
582: if (anychange == 0) {
583: anychange = 1;
584: if(opt == D_CONTEXT) {
585: printf("*** %s ", file1);
586: stat(file1, &stbuf);
587: printf("%s--- %s ",
588: ctime(&stbuf.st_mtime), file2);
589: stat(file2, &stbuf);
590: printf("%s", ctime(&stbuf.st_mtime));
591: }
592: }
593: if (a <= b && c <= d)
594: ch = 'c';
595: else
596: ch = (a <= b) ? 'd' : 'a';
597: if(opt == D_CONTEXT) {
598: lowa = max(1, a-context);
599: upb = min(len[0], b+context);
600: lowc = max(1, c-context);
601: upd = min(len[1], d+context);
602:
603: /* print out from file */
604: printf("***************\n*** ");
605: range(lowa,upb,",");
606: printf("\n");
607: if (ch == 'a')
608: fetch(ixold,lowa,upb,input[0]," ");
609: else {
610: fetch(ixold,lowa,a-1,input[0]," ");
611: fetch(ixold,a,b,input[0],ch == 'c' ? "! " : "- ");
612: fetch(ixold,b+1,upb,input[0]," ");
613: }
614: putchar('\n');
615: printf("--- ");
616: range(lowc,upd,",");
617: printf(" -----\n");
618: if (ch == 'd')
619: fetch(ixnew,lowc,upd,input[1]," ");
620: else {
621: fetch(ixnew,lowc,c-1,input[1]," ");
622: fetch(ixnew,c,d,input[1],ch == 'c' ? "! " : "+ ");
623: fetch(ixnew,d+1,upd,input[1]," ");
624: }
625: return;
626: }
627: switch (opt) {
628:
629: case D_NORMAL:
630: case D_EDIT:
631: range(a,b,",");
632: putchar(a>b?'a':c>d?'d':'c');
633: if(opt==D_NORMAL)
634: range(c,d,",");
635: putchar('\n');
636: break;
637: case D_REVERSE:
638: putchar(a>b?'a':c>d?'d':'c');
639: range(a,b," ");
640: putchar('\n');
641: break;
642: }
643: if(opt == D_NORMAL || opt == D_IFDEF) {
644: fetch(ixold,a,b,input[0],"< ", 1);
645: if(a<=b&&c<=d && opt == D_NORMAL)
646: prints("---\n");
647: }
648: fetch(ixnew,c,d,input[1],opt==D_NORMAL?"> ":"", 0);
649: if ((opt ==D_EDIT || opt == D_REVERSE) && c<=d)
650: prints(".\n");
651: if (inifdef) {
652: fprintf(stdout, "#endif %s\n", endifname);
653: inifdef = 0;
654: }
655: }
656:
657: range(a,b,separator)
658: char *separator;
659: {
660: printf("%d", a>b?b:a);
661: if(a<b) {
662: printf("%s%d", separator, b);
663: }
664: }
665:
666: fetch(f,a,b,lb,s,oldfile)
667: long *f;
668: FILE *lb;
669: char *s;
670: {
671: register int i, j;
672: register int nc;
673: int oneflag = (*ifdef1!='\0') != (*ifdef2!='\0');
674:
675: /*
676: * When doing #ifdef's, copy down to current line
677: * if this is the first file, so that stuff makes it to output.
678: */
679: if (opt == D_IFDEF && oldfile){
680: long curpos = ftell(lb);
681: /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
682: nc = f[a>b? b : a-1 ] - curpos;
683: for (i = 0; i < nc; i++)
684: putchar(getc(lb));
685: }
686: if (a > b)
687: return;
688: if (opt == D_IFDEF) {
689: if (inifdef)
690: fprintf(stdout, "#else %s%s\n", oneflag && oldfile==1 ? "!" : "", ifdef2);
691: else {
692: if (oneflag) {
693: /* There was only one ifdef given */
694: endifname = ifdef2;
695: if (oldfile)
696: fprintf(stdout, "#ifndef %s\n", endifname);
697: else
698: fprintf(stdout, "#ifdef %s\n", endifname);
699: }
700: else {
701: endifname = oldfile ? ifdef1 : ifdef2;
702: fprintf(stdout, "#ifdef %s\n", endifname);
703: }
704: }
705: inifdef = 1+oldfile;
706: }
707: for(i=a;i<=b;i++) {
708: fseek(lb,f[i-1],0);
709: nc = f[i]-f[i-1];
710: if (opt != D_IFDEF)
711: prints(s);
712: for(j=0;j<nc;j++)
713: putchar(getc(lb));
714: }
715: if (inifdef && !wantelses) {
716: fprintf(stdout, "#endif %s\n", endifname);
717: inifdef = 0;
718: }
719: }
720:
721: #define HALFLONG 16
722: #define low(x) (x&((1L<<HALFLONG)-1))
723: #define high(x) (x>>HALFLONG)
724:
725: /*
726: * hashing has the effect of
727: * arranging line in 7-bit bytes and then
728: * summing 1-s complement in 16-bit hunks
729: */
730: readhash(f)
731: FILE *f;
732: {
733: long sum;
734: register unsigned shift;
735: register space;
736: register t;
737: sum = 1;
738: space = 0;
739: if(!bflag) for(shift=0;(t=getc(f))!='\n';shift+=7) {
740: if(t==-1)
741: return(0);
742: sum += (long)t << (shift%=HALFLONG);
743: }
744: else for(shift=0;;) {
745: switch(t=getc(f)) {
746: case -1:
747: return(0);
748: case '\t':
749: case ' ':
750: space++;
751: continue;
752: default:
753: if(space) {
754: shift += 7;
755: space = 0;
756: }
757: sum += (long)t << (shift%=HALFLONG);
758: shift += 7;
759: continue;
760: case '\n':
761: break;
762: }
763: break;
764: }
765: sum = low(sum) + high(sum);
766: return((short)low(sum) + (short)high(sum));
767: }
768:
769: #include <a.out.h>
770:
771: asciifile(f)
772: FILE *f;
773: {
774: char buf[BUFSIZ];
775: register int cnt;
776: register char *cp;
777:
778: fseek(f, (long)0, 0);
779: cnt = fread(buf, 1, BUFSIZ, f);
780: if (cnt >= sizeof (struct exec)) {
781: struct exec hdr;
782: hdr = *(struct exec *)buf;
783: if (!N_BADMAG(hdr))
784: return (0);
785: }
786: cp = buf;
787: while (--cnt >= 0)
788: if (*cp++ & 0200)
789: return (0);
790: return (1);
791: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.