|
|
1.1 ! root 1: /* ! 2: * Huffman encoding program ! 3: * Adapted April 1979, from program by T.G. Szymanski, March 1978 ! 4: * Usage: pack [[ - ] filename ... ] filename ... ! 5: * - option: enable/disable listing of statistics ! 6: */ ! 7: ! 8: #include <stdio.h> ! 9: #include <sys/types.h> ! 10: #include <sys/stat.h> ! 11: ! 12: #define END 256 ! 13: #define BLKSIZE 512 ! 14: #define NAMELEN 80 ! 15: #define PACKED 017436 /* <US><RS> - Unlikely value */ ! 16: #define SUF0 '.' ! 17: #define SUF1 'z' ! 18: ! 19: struct stat status, ostatus; ! 20: ! 21: /* union for overlaying a long int with a set of four characters */ ! 22: union FOUR { ! 23: struct { long int lng; } l; ! 24: struct { char c0, c1, c2, c3; } c; ! 25: }; ! 26: ! 27: /* character counters */ ! 28: long count [END+1]; ! 29: union FOUR insize; ! 30: long outsize; ! 31: long dictsize; ! 32: int diffbytes; ! 33: ! 34: /* i/o stuff */ ! 35: char vflag = 0; ! 36: char filename [NAMELEN]; ! 37: int infile; /* unpacked file */ ! 38: int outfile; /* packed file */ ! 39: char inbuff [BLKSIZE]; ! 40: char outbuff [BLKSIZE+4]; ! 41: ! 42: /* variables associated with the tree */ ! 43: int maxlev; ! 44: int levcount [25]; ! 45: int lastnode; ! 46: int parent [2*END+1]; ! 47: ! 48: /* variables associated with the encoding process */ ! 49: char length [END+1]; ! 50: long bits [END+1]; ! 51: union FOUR mask; ! 52: long inc; ! 53: #ifndef pdp11 ! 54: char *maskshuff[4] = {&(mask.c.c3), &(mask.c.c2), &(mask.c.c1), &(mask.c.c0)}; ! 55: #else ! 56: char *maskshuff[4] = {&(mask.c.c1), &(mask.c.c0), &(mask.c.c3), &(mask.c.c2)}; ! 57: #endif ! 58: ! 59: /* the heap */ ! 60: int n; ! 61: struct heap { ! 62: long int count; ! 63: int node; ! 64: } heap [END+2]; ! 65: #define hmove(a,b) {(b).count = (a).count; (b).node = (a).node;} ! 66: ! 67: /* gather character frequency statistics */ ! 68: /* return 1 if successful, 0 otherwise */ ! 69: input () ! 70: { ! 71: register int i; ! 72: for (i=0; i<END; i++) ! 73: count[i] = 0; ! 74: while ((i = read(infile, inbuff, BLKSIZE)) > 0) ! 75: while (i > 0) ! 76: count[inbuff[--i]&0377] += 2; ! 77: if (i == 0) ! 78: return (1); ! 79: printf (": read error"); ! 80: return (0); ! 81: } ! 82: ! 83: /* encode the current file */ ! 84: /* return 1 if successful, 0 otherwise */ ! 85: output () ! 86: { ! 87: int c, i, inleft; ! 88: char *inp; ! 89: register char **q, *outp; ! 90: register int bitsleft; ! 91: long temp; ! 92: ! 93: /* output ``PACKED'' header */ ! 94: outbuff[0] = 037; /* ascii US */ ! 95: outbuff[1] = 036; /* ascii RS */ ! 96: /* output the length and the dictionary */ ! 97: temp = insize.l.lng; ! 98: for (i=5; i>=2; i--) { ! 99: outbuff[i] = (char) (temp & 0377); ! 100: temp >>= 8; ! 101: } ! 102: outp = &outbuff[6]; ! 103: *outp++ = maxlev; ! 104: for (i=1; i<maxlev; i++) ! 105: *outp++ = levcount[i]; ! 106: *outp++ = levcount[maxlev]-2; ! 107: for (i=1; i<=maxlev; i++) ! 108: for (c=0; c<END; c++) ! 109: if (length[c] == i) ! 110: *outp++ = c; ! 111: dictsize = outp-&outbuff[0]; ! 112: ! 113: /* output the text */ ! 114: lseek(infile, 0L, 0); ! 115: outsize = 0; ! 116: bitsleft = 8; ! 117: inleft = 0; ! 118: do { ! 119: if (inleft <= 0) { ! 120: inleft = read(infile, inp = &inbuff[0], BLKSIZE); ! 121: if (inleft < 0) { ! 122: printf (": read error"); ! 123: return (0); ! 124: } ! 125: } ! 126: c = (--inleft < 0) ? END : (*inp++ & 0377); ! 127: mask.l.lng = bits[c]<<bitsleft; ! 128: q = &maskshuff[0]; ! 129: if (bitsleft == 8) ! 130: *outp = **q++; ! 131: else ! 132: *outp |= **q++; ! 133: bitsleft -= length[c]; ! 134: while (bitsleft < 0) { ! 135: *++outp = **q++; ! 136: bitsleft += 8; ! 137: } ! 138: if (outp >= &outbuff[BLKSIZE]) { ! 139: if (write(outfile, outbuff, BLKSIZE) != BLKSIZE) { ! 140: wrerr: printf (".z: write error"); ! 141: return (0); ! 142: } ! 143: ((union FOUR *) outbuff)->l.lng = ((union FOUR *) &outbuff[BLKSIZE])->l.lng; ! 144: outp -= BLKSIZE; ! 145: outsize += BLKSIZE; ! 146: } ! 147: } while (c != END); ! 148: if (bitsleft < 8) ! 149: outp++; ! 150: c = outp-outbuff; ! 151: if (write(outfile, outbuff, c) != c) ! 152: goto wrerr; ! 153: outsize += c; ! 154: return (1); ! 155: } ! 156: ! 157: /* makes a heap out of heap[i],...,heap[n] */ ! 158: heapify (i) ! 159: { ! 160: register int k; ! 161: int lastparent; ! 162: struct heap heapsubi; ! 163: hmove (heap[i], heapsubi); ! 164: lastparent = n/2; ! 165: while (i <= lastparent) { ! 166: k = 2*i; ! 167: if (heap[k].count > heap[k+1].count && k < n) ! 168: k++; ! 169: if (heapsubi.count < heap[k].count) ! 170: break; ! 171: hmove (heap[k], heap[i]); ! 172: i = k; ! 173: } ! 174: hmove (heapsubi, heap[i]); ! 175: } ! 176: ! 177: /* return 1 after successful packing, 0 otherwise */ ! 178: int packfile () ! 179: { ! 180: register int c, i, p; ! 181: long bitsout; ! 182: ! 183: /* gather frequency statistics */ ! 184: if (input() == 0) ! 185: return (0); ! 186: ! 187: /* put occurring chars in heap with their counts */ ! 188: diffbytes = -1; ! 189: count[END] = 1; ! 190: insize.l.lng = n = 0; ! 191: for (i=END; i>=0; i--) { ! 192: parent[i] = 0; ! 193: if (count[i] > 0) { ! 194: diffbytes++; ! 195: insize.l.lng += count[i]; ! 196: heap[++n].count = count[i]; ! 197: heap[n].node = i; ! 198: } ! 199: } ! 200: insize.l.lng >>= 1; ! 201: for (i=n/2; i>=1; i--) ! 202: heapify(i); ! 203: ! 204: /* build Huffman tree */ ! 205: lastnode = END; ! 206: while (n > 1) { ! 207: parent[heap[1].node] = ++lastnode; ! 208: inc = heap[1].count; ! 209: hmove (heap[n], heap[1]); ! 210: n--; ! 211: heapify(1); ! 212: parent[heap[1].node] = lastnode; ! 213: heap[1].node = lastnode; ! 214: heap[1].count += inc; ! 215: heapify(1); ! 216: } ! 217: parent[lastnode] = 0; ! 218: ! 219: /* assign lengths to encoding for each character */ ! 220: bitsout = maxlev = 0; ! 221: for (i=1; i<=24; i++) ! 222: levcount[i] = 0; ! 223: for (i=0; i<=END; i++) { ! 224: c = 0; ! 225: for (p=parent[i]; p!=0; p=parent[p]) ! 226: c++; ! 227: levcount[c]++; ! 228: length[i] = c; ! 229: if (c > maxlev) ! 230: maxlev = c; ! 231: bitsout += c*(count[i]>>1); ! 232: } ! 233: if (maxlev > 24) { ! 234: /* can't occur unless insize.l.lng >= 2**24 */ ! 235: printf (": Huffman tree has too many levels"); ! 236: return(0); ! 237: } ! 238: ! 239: /* bitch if no compression, but do it anyway */ ! 240: outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes; ! 241: if ((insize.l.lng+BLKSIZE-1)/BLKSIZE <= (outsize+BLKSIZE-1)/BLKSIZE) { ! 242: printf (": no saving"); ! 243: } ! 244: ! 245: /* compute bit patterns for each character */ ! 246: inc = 1L << 24; ! 247: inc >>= maxlev; ! 248: mask.l.lng = 0; ! 249: for (i=maxlev; i>0; i--) { ! 250: for (c=0; c<=END; c++) ! 251: if (length[c] == i) { ! 252: bits[c] = mask.l.lng; ! 253: mask.l.lng += inc; ! 254: } ! 255: mask.l.lng &= ~inc; ! 256: inc <<= 1; ! 257: } ! 258: ! 259: return (output()); ! 260: } ! 261: ! 262: main(argc, argv) ! 263: int argc; char *argv[]; ! 264: { ! 265: register int i; ! 266: register char *cp; ! 267: int k, sep; ! 268: int fcount =0; /* count failures */ ! 269: ! 270: for (k=1; k<argc; k++) ! 271: { if (argv[k][0] == '-' && argv[k][1] == '\0') ! 272: { vflag = 1 - vflag; ! 273: continue; ! 274: } ! 275: fcount++; /* increase failure count - expect the worst */ ! 276: printf ("%s: %s", argv[0], argv[k]); ! 277: sep = -1; cp = filename; ! 278: for (i=0; i < (NAMELEN-3) && (*cp = argv[k][i]); i++) ! 279: if (*cp++ == '/') sep = i; ! 280: if (cp[-1]==SUF1 && cp[-2]==SUF0) ! 281: { printf (": already packed\n"); ! 282: continue; ! 283: } ! 284: if (i >= (NAMELEN-3) || (i-sep) > 13) ! 285: { printf (": file name too long\n"); ! 286: continue; ! 287: } ! 288: if ((infile = open (filename, 0)) < 0) ! 289: { printf (": cannot open\n"); ! 290: continue; ! 291: } ! 292: fstat(infile,&status); ! 293: if (status.st_mode&040000) ! 294: { printf (": cannot pack a directory\n"); ! 295: goto closein; ! 296: } ! 297: if( status.st_nlink != 1 ) ! 298: { printf(": has links\n"); ! 299: goto closein; ! 300: } ! 301: *cp++ = SUF0; *cp++ = SUF1; *cp = '\0'; ! 302: if( stat(filename, &ostatus) != -1) ! 303: { ! 304: printf(".z: already exists\n"); ! 305: goto closein; ! 306: } ! 307: if ((outfile = creat (filename, status.st_mode&07777)) < 0) ! 308: { printf (".z: cannot create\n"); ! 309: goto closein; ! 310: } ! 311: chown (filename, status.st_uid, status.st_gid); ! 312: ! 313: if (packfile()) ! 314: { unlink(argv[k]); ! 315: fcount--; /* success after all */ ! 316: if(insize.l.lng != 0){ ! 317: printf (": %.1f%% Compression\n", ! 318: ((double)(-outsize+(insize.l.lng))/(double)insize.l.lng)*100); ! 319: /* output statistics */ ! 320: if (vflag) { ! 321: printf(" from %ld to %ld bytes\n", insize.l.lng, outsize); ! 322: printf(" Huffman tree has %d levels below root\n", maxlev); ! 323: printf(" %d distinct bytes in input\n", diffbytes); ! 324: printf(" dictionary overhead = %ld bytes\n", dictsize); ! 325: printf(" effective entropy = %.2f bits/byte\n", ! 326: ((double) outsize / (double) insize.l.lng) * 8 ); ! 327: printf(" asymptotic entropy = %.2f bits/byte\n", ! 328: ((double) (outsize-dictsize) / (double) insize.l.lng) * 8 ); ! 329: } ! 330: } ! 331: else printf(": empty file\n"); ! 332: } ! 333: else ! 334: { printf (" - file unchanged\n"); ! 335: unlink(filename); ! 336: } ! 337: ! 338: closein: close (outfile); ! 339: close (infile); ! 340: utime(filename, &status.st_atime);/* preserve acc & mod times */ ! 341: } ! 342: return (fcount); ! 343: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.