|
|
1.1 ! root 1: /* finddupes.c */ ! 2: ! 3: /* Search for (and optionally delete) duplicate files in multiple */ ! 4: /* directories based on size and either MD5 or CRC-32 "chksums". */ ! 5: ! 6: #include <stdio.h> ! 7: #include <time.h> ! 8: ! 9: #include "dirwrap.h" ! 10: #ifdef USE_MD5 ! 11: #include "md5.h" ! 12: #else /* CRC-32 */ ! 13: #include "crc32.h" ! 14: #endif ! 15: ! 16: typedef struct { ! 17: char path[MAX_PATH+1]; ! 18: long length; ! 19: time_t date; ! 20: #ifdef USE_MD5 ! 21: BYTE chksum[MD5_DIGEST_SIZE]; ! 22: #else ! 23: ulong chksum; ! 24: #endif ! 25: } file_t; ! 26: ! 27: file_t* file; ! 28: ulong file_count=0; ! 29: ! 30: int fchksum(const char* fname, long length, ! 31: #ifdef USE_MD5 ! 32: BYTE* ! 33: #else ! 34: ulong* ! 35: #endif ! 36: chksum) ! 37: { ! 38: BYTE* buf=NULL; ! 39: FILE* fp; ! 40: ! 41: if((fp=fopen(fname,"rb"))==NULL) { ! 42: perror(fname); ! 43: return(-1); ! 44: } ! 45: ! 46: if(length && (buf=malloc(length))==NULL) { ! 47: printf("!Error allocating %ld bytes of memory for %s\n" ! 48: ,length,fname); ! 49: fclose(fp); ! 50: return(-1); ! 51: } ! 52: ! 53: if(fread(buf,sizeof(BYTE),length,fp) != length) { ! 54: perror(fname); ! 55: fclose(fp); ! 56: FREE_AND_NULL(buf); ! 57: return(-1); ! 58: } ! 59: ! 60: fclose(fp); ! 61: #ifdef USE_MD5 ! 62: MD5_calc(chksum, buf, length); ! 63: #else ! 64: *chksum = crc32(buf, length); ! 65: #endif ! 66: FREE_AND_NULL(buf); ! 67: return(0); ! 68: } ! 69: ! 70: char* timestr(void) ! 71: { ! 72: char* p; ! 73: time_t t=time(NULL); ! 74: p=ctime(&t); ! 75: p[19]=0; /* chop off year and \n */ ! 76: return(p+4); /* skip day-of-week */ ! 77: } ! 78: ! 79: int searchdir(const char* path, BOOL recursive, ulong compare_bytes) ! 80: { ! 81: DIR* dir; ! 82: struct dirent* ent; ! 83: file_t* fp; ! 84: char fpath[MAX_PATH+1]; ! 85: ! 86: printf("%s begin searching %s\n",timestr(), path); ! 87: if((dir = opendir(path))==NULL) { ! 88: perror(path); ! 89: return(1); ! 90: } ! 91: ! 92: while((ent = readdir(dir))!=NULL) { ! 93: if(kbhit()) ! 94: break; ! 95: if(strcmp(ent->d_name,".")==0 || strcmp(ent->d_name,"..")==0) ! 96: continue; ! 97: strcpy(fpath,path); ! 98: backslash(fpath); ! 99: strcat(fpath,ent->d_name); ! 100: if(isdir(fpath)) { ! 101: if(recursive) ! 102: searchdir(fpath, recursive, compare_bytes); ! 103: continue; ! 104: } ! 105: ! 106: file=realloc(file,sizeof(file_t)*(file_count+1)); ! 107: if(file==NULL) { ! 108: printf("!Error allocating %lu bytes\n",sizeof(file_t)*(file_count+1)); ! 109: exit(1); ! 110: } ! 111: fp=&file[file_count]; ! 112: memset(fp,0,sizeof(file_t)); ! 113: strcpy(fp->path,fpath); ! 114: fp->date=fdate(fp->path); ! 115: if((fp->length=flength(fp->path))==-1) { ! 116: printf("!Failed to get length of %s\n",fp->path); ! 117: continue; ! 118: } ! 119: if(compare_bytes && fp->length > compare_bytes) ! 120: fp->length = compare_bytes; ! 121: if(fchksum(fp->path, fp->length, ! 122: #ifdef USE_MD5 ! 123: fp->chksum ! 124: #else ! 125: &fp->chksum ! 126: #endif ! 127: )) ! 128: continue; ! 129: file_count++; ! 130: printf("%lu\r", file_count); ! 131: } ! 132: ! 133: closedir(dir); ! 134: printf("%s done searching %s\n",timestr(), path); ! 135: return(0); ! 136: } ! 137: ! 138: int compare_files(const file_t *f1, const file_t *f2 ) ! 139: { ! 140: int result; ! 141: ! 142: /* Sort first by size (descending) */ ! 143: if((result = f2->length - f1->length) != 0) ! 144: return(result); ! 145: ! 146: /* Then by chksum (ascending) */ ! 147: if((result = memcmp(&f1->chksum, &f2->chksum, sizeof(f1->chksum))) != 0) ! 148: return(result); ! 149: ! 150: /* Then by date (descending) */ ! 151: return(f2->date - f1->date); ! 152: } ! 153: ! 154: int main(int argc, char** argv) ! 155: { ! 156: char hex[32]; ! 157: int i; ! 158: ulong fsize; ! 159: ulong dupe_count=0; ! 160: ulong del_files=0; ! 161: ulong del_bytes=0; ! 162: ulong compare_bytes=0; ! 163: BOOL recursive=FALSE; ! 164: BOOL del_dupes=FALSE; ! 165: BOOL dir_specified=FALSE; ! 166: ! 167: for(i=1;i<argc;i++) { ! 168: if(!stricmp(argv[i],"-d")) ! 169: del_dupes=TRUE; ! 170: else if(!stricmp(argv[i],"-r")) ! 171: recursive=TRUE; ! 172: else if(!stricmp(argv[i],"-b") && i<argc+1) ! 173: compare_bytes=atoi(argv[++i]); ! 174: else if(!stricmp(argv[i],"-k") && i<argc+1) ! 175: compare_bytes=atoi(argv[++i])*1024; ! 176: else if(argv[i][0]=='-') { ! 177: printf("%s [[-opt] [-opt] [...]] [[path] [path] [...]]\n", argv[0]); ! 178: printf("-r\t search directories recursively\n"); ! 179: printf("-d\t delete duplicate files found\n"); ! 180: printf("-b n\t compare up to n bytes of each file\n"); ! 181: printf("-k n\t compare up to n kilobytes each of file\n"); ! 182: exit(0); ! 183: } ! 184: else { ! 185: dir_specified=TRUE; ! 186: searchdir(argv[i], recursive, compare_bytes); ! 187: } ! 188: } ! 189: if(!dir_specified) ! 190: searchdir(".", recursive, compare_bytes); ! 191: ! 192: if(!file_count) { ! 193: printf("no files.\n"); ! 194: return(0); ! 195: } ! 196: ! 197: printf("%s begin sorting (%lu files)\n", timestr(), file_count); ! 198: qsort(file,file_count,sizeof(file_t),compare_files); ! 199: printf("%s end sorting\n", timestr()); ! 200: ! 201: printf("%s comparing (%lu files)\n", timestr(), file_count); ! 202: ! 203: for(i=0;i<file_count-1;i++) { ! 204: if(file[i].length != file[i+1].length) ! 205: continue; /* sizes must match */ ! 206: if(memcmp(&file[i].chksum, &file[i+1].chksum, sizeof(file[i].chksum))) ! 207: continue; /* chksums must match */ ! 208: #ifdef USE_MD5 ! 209: MD5_hex(hex, file[i].chksum); ! 210: #else ! 211: sprintf(hex, "%08lx", file[i].chksum); ! 212: #endif ! 213: printf("Dupe: %s %7lu %s\n", hex, file[i].length, getfname(file[i].path)); ! 214: if(del_dupes) { ! 215: fsize=flength(file[i].path); ! 216: printf("Removing %s (%lu bytes)\n", file[i].path, fsize); ! 217: if(remove(file[i].path)!=0) ! 218: perror(file[i].path); ! 219: else { ! 220: del_files++; ! 221: del_bytes+=fsize; ! 222: } ! 223: } ! 224: dupe_count++; ! 225: } ! 226: ! 227: printf("%s done (%lu duplicates found)\n", timestr(), dupe_count); ! 228: if(del_files) ! 229: printf("%lu bytes deleted in %lu files\n", del_bytes, del_files); ! 230: return(0); ! 231: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.