|
|
1.1 root 1: /* finddupes.c */
2:
3: /* Search for (and optionally delete) duplicate files in multiple */
4: /* directories based on size and either MD5 or CRC-32 "chksums". */
5:
6: #include <stdio.h>
7: #include <time.h>
8:
9: #include "dirwrap.h"
10: #ifdef USE_MD5
11: #include "md5.h"
12: #else /* CRC-32 */
13: #include "crc32.h"
14: #endif
15:
16: typedef struct {
17: char path[MAX_PATH+1];
18: long length;
19: time_t date;
20: #ifdef USE_MD5
21: BYTE chksum[MD5_DIGEST_SIZE];
22: #else
23: ulong chksum;
24: #endif
25: } file_t;
26:
27: file_t* file;
28: ulong file_count=0;
29:
30: int fchksum(const char* fname, long length,
31: #ifdef USE_MD5
32: BYTE*
33: #else
34: ulong*
35: #endif
36: chksum)
37: {
38: BYTE* buf=NULL;
39: FILE* fp;
40:
41: if((fp=fopen(fname,"rb"))==NULL) {
42: perror(fname);
43: return(-1);
44: }
45:
46: if(length && (buf=malloc(length))==NULL) {
47: printf("!Error allocating %ld bytes of memory for %s\n"
48: ,length,fname);
49: fclose(fp);
50: return(-1);
51: }
52:
53: if(fread(buf,sizeof(BYTE),length,fp) != length) {
54: perror(fname);
55: fclose(fp);
56: FREE_AND_NULL(buf);
57: return(-1);
58: }
59:
60: fclose(fp);
61: #ifdef USE_MD5
62: MD5_calc(chksum, buf, length);
63: #else
64: *chksum = crc32(buf, length);
65: #endif
66: FREE_AND_NULL(buf);
67: return(0);
68: }
69:
70: char* timestr(void)
71: {
72: char* p;
73: time_t t=time(NULL);
74: p=ctime(&t);
75: p[19]=0; /* chop off year and \n */
76: return(p+4); /* skip day-of-week */
77: }
78:
79: int searchdir(const char* path, BOOL recursive, ulong compare_bytes)
80: {
81: DIR* dir;
82: struct dirent* ent;
83: file_t* fp;
84: char fpath[MAX_PATH+1];
85:
86: printf("%s begin searching %s\n",timestr(), path);
87: if((dir = opendir(path))==NULL) {
88: perror(path);
89: return(1);
90: }
91:
92: while((ent = readdir(dir))!=NULL) {
93: if(kbhit())
94: break;
95: if(strcmp(ent->d_name,".")==0 || strcmp(ent->d_name,"..")==0)
96: continue;
97: strcpy(fpath,path);
98: backslash(fpath);
99: strcat(fpath,ent->d_name);
100: if(isdir(fpath)) {
101: if(recursive)
102: searchdir(fpath, recursive, compare_bytes);
103: continue;
104: }
105:
106: file=realloc(file,sizeof(file_t)*(file_count+1));
107: if(file==NULL) {
108: printf("!Error allocating %lu bytes\n",sizeof(file_t)*(file_count+1));
109: exit(1);
110: }
111: fp=&file[file_count];
112: memset(fp,0,sizeof(file_t));
113: strcpy(fp->path,fpath);
114: fp->date=fdate(fp->path);
115: if((fp->length=flength(fp->path))==-1) {
116: printf("!Failed to get length of %s\n",fp->path);
117: continue;
118: }
119: if(compare_bytes && fp->length > compare_bytes)
120: fp->length = compare_bytes;
121: if(fchksum(fp->path, fp->length,
122: #ifdef USE_MD5
123: fp->chksum
124: #else
125: &fp->chksum
126: #endif
127: ))
128: continue;
129: file_count++;
130: printf("%lu\r", file_count);
131: }
132:
133: closedir(dir);
134: printf("%s done searching %s\n",timestr(), path);
135: return(0);
136: }
137:
138: int compare_files(const file_t *f1, const file_t *f2 )
139: {
140: int result;
141:
142: /* Sort first by size (descending) */
143: if((result = f2->length - f1->length) != 0)
144: return(result);
145:
146: /* Then by chksum (ascending) */
147: if((result = memcmp(&f1->chksum, &f2->chksum, sizeof(f1->chksum))) != 0)
148: return(result);
149:
150: /* Then by date (descending) */
151: return(f2->date - f1->date);
152: }
153:
154: int main(int argc, char** argv)
155: {
156: char hex[32];
157: int i;
158: ulong fsize;
159: ulong dupe_count=0;
160: ulong del_files=0;
161: ulong del_bytes=0;
162: ulong compare_bytes=0;
163: BOOL recursive=FALSE;
164: BOOL del_dupes=FALSE;
165: BOOL dir_specified=FALSE;
166:
167: for(i=1;i<argc;i++) {
168: if(!stricmp(argv[i],"-d"))
169: del_dupes=TRUE;
170: else if(!stricmp(argv[i],"-r"))
171: recursive=TRUE;
172: else if(!stricmp(argv[i],"-b") && i<argc+1)
173: compare_bytes=atoi(argv[++i]);
174: else if(!stricmp(argv[i],"-k") && i<argc+1)
175: compare_bytes=atoi(argv[++i])*1024;
176: else if(argv[i][0]=='-') {
177: printf("%s [[-opt] [-opt] [...]] [[path] [path] [...]]\n", argv[0]);
178: printf("-r\t search directories recursively\n");
179: printf("-d\t delete duplicate files found\n");
180: printf("-b n\t compare up to n bytes of each file\n");
181: printf("-k n\t compare up to n kilobytes each of file\n");
182: exit(0);
183: }
184: else {
185: dir_specified=TRUE;
186: searchdir(argv[i], recursive, compare_bytes);
187: }
188: }
189: if(!dir_specified)
190: searchdir(".", recursive, compare_bytes);
191:
192: if(!file_count) {
193: printf("no files.\n");
194: return(0);
195: }
196:
197: printf("%s begin sorting (%lu files)\n", timestr(), file_count);
198: qsort(file,file_count,sizeof(file_t),compare_files);
199: printf("%s end sorting\n", timestr());
200:
201: printf("%s comparing (%lu files)\n", timestr(), file_count);
202:
203: for(i=0;i<file_count-1;i++) {
204: if(file[i].length != file[i+1].length)
205: continue; /* sizes must match */
206: if(memcmp(&file[i].chksum, &file[i+1].chksum, sizeof(file[i].chksum)))
207: continue; /* chksums must match */
208: #ifdef USE_MD5
209: MD5_hex(hex, file[i].chksum);
210: #else
211: sprintf(hex, "%08lx", file[i].chksum);
212: #endif
213: printf("Dupe: %s %7lu %s\n", hex, file[i].length, getfname(file[i].path));
214: if(del_dupes) {
215: fsize=flength(file[i].path);
216: printf("Removing %s (%lu bytes)\n", file[i].path, fsize);
217: if(remove(file[i].path)!=0)
218: perror(file[i].path);
219: else {
220: del_files++;
221: del_bytes+=fsize;
222: }
223: }
224: dupe_count++;
225: }
226:
227: printf("%s done (%lu duplicates found)\n", timestr(), dupe_count);
228: if(del_files)
229: printf("%lu bytes deleted in %lu files\n", del_bytes, del_files);
230: return(0);
231: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.