|
|
1.1 root 1: /*
2: * Huffman encoding program
3: * Adapted April 1979, from program by T.G. Szymanski, March 1978
4: * Usage: pack [[ - ] filename ... ] filename ...
5: * - option: enable/disable listing of statistics
6: */
7:
8: #include <stdio.h>
9: #include <sys/types.h>
10: #include <sys/stat.h>
11:
12: #define END 256
13: #define BLKSIZE 512
14: #define NAMELEN 80
15: #define PACKED 017436 /* <US><RS> - Unlikely value */
16: #define SUF0 '.'
17: #define SUF1 'z'
18:
19: struct stat status, ostatus;
20:
21: /* union for overlaying a long int with a set of four characters */
22: union FOUR {
23: struct { long int lng; } l;
24: struct { char c0, c1, c2, c3; } c;
25: };
26:
27: /* character counters */
28: long count [END+1];
29: union FOUR insize;
30: long outsize;
31: long dictsize;
32: int diffbytes;
33:
34: /* i/o stuff */
35: char vflag = 0;
36: char filename [NAMELEN];
37: int infile; /* unpacked file */
38: int outfile; /* packed file */
39: char inbuff [BLKSIZE];
40: char outbuff [BLKSIZE+4];
41:
42: /* variables associated with the tree */
43: int maxlev;
44: int levcount [25];
45: int lastnode;
46: int parent [2*END+1];
47:
48: /* variables associated with the encoding process */
49: char length [END+1];
50: long bits [END+1];
51: union FOUR mask;
52: long inc;
53: #ifndef pdp11
54: char *maskshuff[4] = {&(mask.c.c3), &(mask.c.c2), &(mask.c.c1), &(mask.c.c0)};
55: #else
56: char *maskshuff[4] = {&(mask.c.c1), &(mask.c.c0), &(mask.c.c3), &(mask.c.c2)};
57: #endif
58:
59: /* the heap */
60: int n;
61: struct heap {
62: long int count;
63: int node;
64: } heap [END+2];
65: #define hmove(a,b) {(b).count = (a).count; (b).node = (a).node;}
66:
67: /* gather character frequency statistics */
68: /* return 1 if successful, 0 otherwise */
69: input ()
70: {
71: register int i;
72: for (i=0; i<END; i++)
73: count[i] = 0;
74: while ((i = read(infile, inbuff, BLKSIZE)) > 0)
75: while (i > 0)
76: count[inbuff[--i]&0377] += 2;
77: if (i == 0)
78: return (1);
79: printf (": read error");
80: return (0);
81: }
82:
83: /* encode the current file */
84: /* return 1 if successful, 0 otherwise */
85: output ()
86: {
87: int c, i, inleft;
88: char *inp;
89: register char **q, *outp;
90: register int bitsleft;
91: long temp;
92:
93: /* output ``PACKED'' header */
94: outbuff[0] = 037; /* ascii US */
95: outbuff[1] = 036; /* ascii RS */
96: /* output the length and the dictionary */
97: temp = insize.l.lng;
98: for (i=5; i>=2; i--) {
99: outbuff[i] = (char) (temp & 0377);
100: temp >>= 8;
101: }
102: outp = &outbuff[6];
103: *outp++ = maxlev;
104: for (i=1; i<maxlev; i++)
105: *outp++ = levcount[i];
106: *outp++ = levcount[maxlev]-2;
107: for (i=1; i<=maxlev; i++)
108: for (c=0; c<END; c++)
109: if (length[c] == i)
110: *outp++ = c;
111: dictsize = outp-&outbuff[0];
112:
113: /* output the text */
114: lseek(infile, 0L, 0);
115: outsize = 0;
116: bitsleft = 8;
117: inleft = 0;
118: do {
119: if (inleft <= 0) {
120: inleft = read(infile, inp = &inbuff[0], BLKSIZE);
121: if (inleft < 0) {
122: printf (": read error");
123: return (0);
124: }
125: }
126: c = (--inleft < 0) ? END : (*inp++ & 0377);
127: mask.l.lng = bits[c]<<bitsleft;
128: q = &maskshuff[0];
129: if (bitsleft == 8)
130: *outp = **q++;
131: else
132: *outp |= **q++;
133: bitsleft -= length[c];
134: while (bitsleft < 0) {
135: *++outp = **q++;
136: bitsleft += 8;
137: }
138: if (outp >= &outbuff[BLKSIZE]) {
139: if (write(outfile, outbuff, BLKSIZE) != BLKSIZE) {
140: wrerr: printf (".z: write error");
141: return (0);
142: }
143: ((union FOUR *) outbuff)->l.lng = ((union FOUR *) &outbuff[BLKSIZE])->l.lng;
144: outp -= BLKSIZE;
145: outsize += BLKSIZE;
146: }
147: } while (c != END);
148: if (bitsleft < 8)
149: outp++;
150: c = outp-outbuff;
151: if (write(outfile, outbuff, c) != c)
152: goto wrerr;
153: outsize += c;
154: return (1);
155: }
156:
157: /* makes a heap out of heap[i],...,heap[n] */
158: heapify (i)
159: {
160: register int k;
161: int lastparent;
162: struct heap heapsubi;
163: hmove (heap[i], heapsubi);
164: lastparent = n/2;
165: while (i <= lastparent) {
166: k = 2*i;
167: if (heap[k].count > heap[k+1].count && k < n)
168: k++;
169: if (heapsubi.count < heap[k].count)
170: break;
171: hmove (heap[k], heap[i]);
172: i = k;
173: }
174: hmove (heapsubi, heap[i]);
175: }
176:
177: /* return 1 after successful packing, 0 otherwise */
178: int packfile ()
179: {
180: register int c, i, p;
181: long bitsout;
182:
183: /* gather frequency statistics */
184: if (input() == 0)
185: return (0);
186:
187: /* put occurring chars in heap with their counts */
188: diffbytes = -1;
189: count[END] = 1;
190: insize.l.lng = n = 0;
191: for (i=END; i>=0; i--) {
192: parent[i] = 0;
193: if (count[i] > 0) {
194: diffbytes++;
195: insize.l.lng += count[i];
196: heap[++n].count = count[i];
197: heap[n].node = i;
198: }
199: }
200: insize.l.lng >>= 1;
201: for (i=n/2; i>=1; i--)
202: heapify(i);
203:
204: /* build Huffman tree */
205: lastnode = END;
206: while (n > 1) {
207: parent[heap[1].node] = ++lastnode;
208: inc = heap[1].count;
209: hmove (heap[n], heap[1]);
210: n--;
211: heapify(1);
212: parent[heap[1].node] = lastnode;
213: heap[1].node = lastnode;
214: heap[1].count += inc;
215: heapify(1);
216: }
217: parent[lastnode] = 0;
218:
219: /* assign lengths to encoding for each character */
220: bitsout = maxlev = 0;
221: for (i=1; i<=24; i++)
222: levcount[i] = 0;
223: for (i=0; i<=END; i++) {
224: c = 0;
225: for (p=parent[i]; p!=0; p=parent[p])
226: c++;
227: levcount[c]++;
228: length[i] = c;
229: if (c > maxlev)
230: maxlev = c;
231: bitsout += c*(count[i]>>1);
232: }
233: if (maxlev > 24) {
234: /* can't occur unless insize.l.lng >= 2**24 */
235: printf (": Huffman tree has too many levels");
236: return(0);
237: }
238:
239: /* bitch if no compression, but do it anyway */
240: outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes;
241: if ((insize.l.lng+BLKSIZE-1)/BLKSIZE <= (outsize+BLKSIZE-1)/BLKSIZE) {
242: printf (": no saving");
243: }
244:
245: /* compute bit patterns for each character */
246: inc = 1L << 24;
247: inc >>= maxlev;
248: mask.l.lng = 0;
249: for (i=maxlev; i>0; i--) {
250: for (c=0; c<=END; c++)
251: if (length[c] == i) {
252: bits[c] = mask.l.lng;
253: mask.l.lng += inc;
254: }
255: mask.l.lng &= ~inc;
256: inc <<= 1;
257: }
258:
259: return (output());
260: }
261:
262: main(argc, argv)
263: int argc; char *argv[];
264: {
265: register int i;
266: register char *cp;
267: int k, sep;
268: int fcount =0; /* count failures */
269:
270: for (k=1; k<argc; k++)
271: { if (argv[k][0] == '-' && argv[k][1] == '\0')
272: { vflag = 1 - vflag;
273: continue;
274: }
275: fcount++; /* increase failure count - expect the worst */
276: printf ("%s: %s", argv[0], argv[k]);
277: sep = -1; cp = filename;
278: for (i=0; i < (NAMELEN-3) && (*cp = argv[k][i]); i++)
279: if (*cp++ == '/') sep = i;
280: if (cp[-1]==SUF1 && cp[-2]==SUF0)
281: { printf (": already packed\n");
282: continue;
283: }
284: if (i >= (NAMELEN-3) || (i-sep) > 13)
285: { printf (": file name too long\n");
286: continue;
287: }
288: if ((infile = open (filename, 0)) < 0)
289: { printf (": cannot open\n");
290: continue;
291: }
292: fstat(infile,&status);
293: if (status.st_mode&040000)
294: { printf (": cannot pack a directory\n");
295: goto closein;
296: }
297: if( status.st_nlink != 1 )
298: { printf(": has links\n");
299: goto closein;
300: }
301: *cp++ = SUF0; *cp++ = SUF1; *cp = '\0';
302: if( stat(filename, &ostatus) != -1)
303: {
304: printf(".z: already exists\n");
305: goto closein;
306: }
307: if ((outfile = creat (filename, status.st_mode&07777)) < 0)
308: { printf (".z: cannot create\n");
309: goto closein;
310: }
311: chown (filename, status.st_uid, status.st_gid);
312:
313: if (packfile())
314: { unlink(argv[k]);
315: fcount--; /* success after all */
316: if(insize.l.lng != 0){
317: printf (": %.1f%% Compression\n",
318: ((double)(-outsize+(insize.l.lng))/(double)insize.l.lng)*100);
319: /* output statistics */
320: if (vflag) {
321: printf(" from %ld to %ld bytes\n", insize.l.lng, outsize);
322: printf(" Huffman tree has %d levels below root\n", maxlev);
323: printf(" %d distinct bytes in input\n", diffbytes);
324: printf(" dictionary overhead = %ld bytes\n", dictsize);
325: printf(" effective entropy = %.2f bits/byte\n",
326: ((double) outsize / (double) insize.l.lng) * 8 );
327: printf(" asymptotic entropy = %.2f bits/byte\n",
328: ((double) (outsize-dictsize) / (double) insize.l.lng) * 8 );
329: }
330: }
331: else printf(": empty file\n");
332: }
333: else
334: { printf (" - file unchanged\n");
335: unlink(filename);
336: }
337:
338: closein: close (outfile);
339: close (infile);
340: utime(filename, &status.st_atime);/* preserve acc & mod times */
341: }
342: return (fcount);
343: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.