|
|
1.1 root 1: /* inflate.c -- Not copyrighted 1992 by Mark Adler
2: version c4, 15 April 1992 */
3:
4:
5: /* You can do whatever you like with this source file, though I would
6: prefer that if you modify it and redistribute it that you include
7: comments to that effect with your name and the date. Thank you.
8:
9: History:
10: vers date who what
11: ---- --------- -------------- ------------------------------------
12: a ~~ Feb 92 M. Adler used full (large, one-step) lookup table
13: b1 21 Mar 92 M. Adler first version with partial lookup tables
14: b2 21 Mar 92 M. Adler fixed bug in fixed-code blocks
15: b3 22 Mar 92 M. Adler sped up match copies, cleaned up some
16: b4 25 Mar 92 M. Adler added prototypes; removed window[] (now
17: is the responsibility of unzip.h--also
18: changed name to slide[]), so needs diffs
19: for unzip.c and unzip.h (this allows
20: compiling in the small model on MSDOS);
21: fixed cast of q in huft_build();
22: b5 26 Mar 92 M. Adler got rid of unintended macro recursion.
23: b6 27 Mar 92 M. Adler got rid of nextbyte() routine. fixed
24: bug in inflate_fixed().
25: c1 30 Mar 92 M. Adler removed lbits, dbits environment variables.
26: changed BMAX to 16 for explode. Removed
27: OUTB usage, and replaced it with flush()--
28: this was a 20% speed improvement! Added
29: an explode.c (to replace unimplode.c) that
30: uses the huft routines here. Removed
31: register union.
32: c2 4 Apr 92 M. Adler fixed bug for file sizes a multiple of 32k.
33: c3 10 Apr 92 M. Adler reduced memory of code tables made by
34: huft_build significantly (factor of two to
35: three).
36: c4 15 Apr 92 M. Adler added NOMEMCPY do kill use of memcpy().
37: worked around a Turbo C optimization bug.
38: c5 21 Apr 92 M. Adler added the WSIZE #define to allow reducing
39: the 32K window size for specialized
40: applications.
41: c6 27 May 92 J.loup Gailly Adapted for pgp
42: */
43:
44:
45: /*
46: Inflate deflated (PKZIP's method 8 compressed) data. The compression
47: method searches for as much of the current string of bytes (up to a
48: length of 258) in the previous 32K bytes. If it doesn't find any
49: matches (of at least length 3), it codes the next byte. Otherwise, it
50: codes the length of the matched string and its distance backwards from
51: the current position. There is a single Huffman code that codes both
52: single bytes (called "literals") and match lengths. A second Huffman
53: code codes the distance information, which follows a length code. Each
54: length or distance code actually represents a base value and a number
55: of "extra" (sometimes zero) bits to get to add to the base value. At
56: the end of each deflated block is a special end-of-block (EOB) literal/
57: length code. The decoding process is basically: get a literal/length
58: code; if EOB then done; if a literal, emit the decoded byte; if a
59: length then get the distance and emit the referred-to bytes from the
60: sliding window of previously emitted data.
61:
62: There are (currently) three kinds of inflate blocks: stored, fixed, and
63: dynamic. The compressor deals with some chunk of data at a time, and
64: decides which method to use on a chunk-by-chunk basis. A chunk might
65: typically be 32K or 64K. If the chunk is uncompressible, then the
66: "stored" method is used. In this case, the bytes are simply stored as
67: is, eight bits per byte, with none of the above coding. The bytes are
68: preceded by a count, since there is no longer an EOB code.
69:
70: If the data is compressible, then either the fixed or dynamic methods
71: are used. In the dynamic method, the compressed data is preceded by
72: an encoding of the literal/length and distance Huffman codes that are
73: to be used to decode this block. The representation is itself Huffman
74: coded, and so is preceded by a description of that code. These code
75: descriptions take up a little space, and so for small blocks, there is
76: a predefined set of codes, called the fixed codes. The fixed method is
77: used if the block codes up smaller that way (usually for quite small
78: chunks), otherwise the dynamic method is used. In the latter case, the
79: codes are customized to the probabilities in the current block, and so
80: can code it much better than the pre-determined fixed codes.
81:
82: The Huffman codes themselves are decoded using a mutli-level table
83: lookup, in order to maximize the speed of decoding plus the speed of
84: building the decoding tables. See the comments below that precede the
85: lbits and dbits tuning parameters.
86: */
87:
88:
89: /*
90: Notes beyond the 1.93a appnote.txt:
91:
92: 1. Distance pointers never point before the beginning of the output
93: stream.
94: 2. Distance pointers can point back across blocks, up to 32k away.
95: 3. There is an implied maximum of 7 bits for the bit length table and
96: 15 bits for the actual data.
97: 4. If only one code exists, then it is encoded using one bit. (Zero
98: would be more efficient, but perhaps a little confusing.) If two
99: codes exist, they are coded using one bit each (0 and 1).
100: 5. There is no way of sending zero distance codes--a dummy must be
101: sent if there are none. (History: a pre 2.0 version of PKZIP would
102: store blocks with no distance codes, but this was discovered to be
103: too harsh a criterion.)
104: 6. There are up to 286 literal/length codes. Code 256 represents the
105: end-of-block. Note however that the static length tree defines
106: 288 codes just to fill out the Huffman codes. Codes 286 and 287
107: cannot be used though, since there is no length base or extra bits
108: defined for them. Similarily, there are up to 30 distance codes.
109: However, static trees define 32 codes (all 5 bits) to fill out the
110: Huffman codes, but the last two had better not show up in the data.
111: 7. Unzip can check dynamic huffman blocks for complete code sets.
112: The exception is that a single code would not be complete (see #4).
113: 8. The five bits following the block type is really the number of
114: literal codes sent minus 257.
115: 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
116: (1+6+6). Therefore, to output three times the length, you output
117: three codes (1+1+1), whereas to output four times the same length,
118: you only need two codes (1+3). Hmm.
119: 10. In the tree reconstruction algorithm, Code = Code + Increment
120: only if BitLength(i) is not zero. (Pretty obvious.)
121: 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
122: 12. Note: length code 284 can represent 227-258, but length code 285
123: really is 258. The last length deserves its own, short code
124: since it gets used a lot in very redundant files. The length
125: 258 is special since 258 - 3 (the min match length) is 255.
126: 13. The literal/length and distance code bit lengths are read as a
127: single stream of lengths. It is possible (and advantageous) for
128: a repeat code (16, 17, or 18) to go across the boundary between
129: the two sets of lengths.
130: */
131:
132: #include "zunzip.h"
133: #define OF __
134:
135: #ifndef WSIZE
136: # define WSIZE 8192
137: /* window size--must be a power of two, <= 32K, and equal to that of zip.
138: * On 16 bit machines (MSDOS), WSIZE must be <= 16K (32K is possible
139: * with a few hacks, see the zip archiver.
140: */
141: #endif
142:
143: #ifdef DYN_ALLOC
144: extern char *window;
145: #else
146: extern char window[];
147: #endif
148: #define slide window
149:
150: /* Huffman code lookup table entry--this entry is four bytes for machines
151: that have 16-bit pointers (e.g. PC's in the small or medium model).
152: Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16
153: means that v is a literal, 16 < e < 32 means that v is a pointer to
154: the next table, which codes e - 16 bits, and lastly e == 99 indicates
155: an unused code. If a code with e == 99 is looked up, this implies an
156: error in the data. */
157: struct huft {
158: byte e; /* number of extra bits or operation */
159: byte b; /* number of bits in this code or subcode */
160: union {
161: UWORD n; /* literal, length base, or distance base */
162: struct huft *t; /* pointer to next level of table */
163: } v;
164: };
165:
166:
167: /* Function prototypes */
168: int huft_build OF((unsigned *, unsigned, unsigned, UWORD *, UWORD *,
169: struct huft **, int *));
170: int huft_free OF((struct huft *));
171: void flush OF((unsigned));
172: int inflate_codes OF((struct huft *, struct huft *, int, int));
173: int inflate_stored OF((void));
174: int inflate_fixed OF((void));
175: int inflate_dynamic OF((void));
176: int inflate_block OF((int *));
177: int inflate_entry OF((void));
178: void inflate OF((void));
179:
180:
181: /* The inflate algorithm uses a sliding 32K byte window on the uncompressed
182: stream to find repeated byte strings. This is implemented here as a
183: circular buffer. The index is updated simply by incrementing and then
184: and'ing with 0x7fff (32K-1). */
185: /* It is left to other modules to supply the 32K area. It is assumed
186: to be usable as if it were declared "byte slide[32768];" or as just
187: "byte *slide;" and then malloc'ed in the latter case. The definition
188: must be in unzip.h, included above. */
189: unsigned wp; /* current position in slide */
190:
191:
192: /* Tables for deflate from PKZIP's appnote.txt. */
193: static unsigned border[] = { /* Order of the bit length code lengths */
194: 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
195: static UWORD cplens[] = { /* Copy lengths for literal codes 257..285 */
196: 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
197: 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
198: /* note: see note #13 above about the 258 in this list. */
199: static UWORD cplext[] = { /* Extra bits for literal codes 257..285 */
200: 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
201: 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */
202: static UWORD cpdist[] = { /* Copy offsets for distance codes 0..29 */
203: 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
204: 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
205: 8193, 12289, 16385, 24577};
206: static UWORD cpdext[] = { /* Extra bits for distance codes */
207: 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
208: 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
209: 12, 12, 13, 13};
210:
211:
212:
213: /* Macros for inflate() bit peeking and grabbing.
214: The usage is:
215:
216: NEEDBITS(j)
217: x = b & mask_bits[j];
218: DUMPBITS(j)
219:
220: where NEEDBITS makes sure that b has at least j bits in it, and
221: DUMPBITS removes the bits from b. The macros use the variable k
222: for the number of bits in b. Normally, b and k are register
223: variables for speed, and are initialized at the begining of a
224: routine that uses these macros from a global bit buffer and count.
225:
226: If we assume that EOB will be the longest code, then we will never
227: ask for bits with NEEDBITS that are beyond the end of the stream.
228: So, NEEDBITS should not read any more bytes than are needed to
229: meet the request. Then no bytes need to be "returned" to the buffer
230: at the end of the last block.
231:
232: However, this assumption is not true for fixed blocks--the EOB code
233: is 7 bits, but the other literal/length codes can be 8 or 9 bits.
234: (Why PK made the EOB code, which can only occur once in a block,
235: the *shortest* code in the set, I'll never know.) However, by
236: making the first table have a lookup of seven bits, the EOB code
237: will be found in that first lookup, and so will not require that too
238: many bits be pulled from the stream.
239: */
240:
241: ULONG bb; /* bit buffer */
242: unsigned bk; /* bits in bit buffer */
243:
244: UWORD bytebuf;
245: #define NEXTBYTE (ReadByte(&bytebuf), bytebuf)
246: #define NEEDBITS(n) {while(k<(n)){b|=((ULONG)NEXTBYTE)<<k;k+=8;}}
247: #define DUMPBITS(n) {b>>=(n);k-=(n);}
248:
249:
250: /*
251: Huffman code decoding is performed using a multi-level table lookup.
252: The fastest way to decode is to simply build a lookup table whose
253: size is determined by the longest code. However, the time it takes
254: to build this table can also be a factor if the data being decoded
255: is not very long. The most common codes are necessarily the
256: shortest codes, so those codes dominate the decoding time, and hence
257: the speed. The idea is you can have a shorter table that decodes the
258: shorter, more probable codes, and then point to subsidiary tables for
259: the longer codes. The time it costs to decode the longer codes is
260: then traded against the time it takes to make longer tables.
261:
262: This results of this trade are in the variables lbits and dbits
263: below. lbits is the number of bits the first level table for literal/
264: length codes can decode in one step, and dbits is the same thing for
265: the distance codes. Subsequent tables are also less than or equal to
266: those sizes. These values may be adjusted either when all of the
267: codes are shorter than that, in which case the longest code length in
268: bits is used, or when the shortest code is *longer* than the requested
269: table size, in which case the length of the shortest code in bits is
270: used.
271:
272: There are two different values for the two tables, since they code a
273: different number of possibilities each. The literal/length table
274: codes 286 possible values, or in a flat code, a little over eight
275: bits. The distance table codes 30 possible values, or a little less
276: than five bits, flat. The optimum values for speed end up being
277: about one bit more than those, so lbits is 8+1 and dbits is 5+1.
278: The optimum values may differ though from machine to machine, and
279: possibly even between compilers. Your mileage may vary.
280: */
281:
282:
283: int lbits = 9; /* bits in base literal/length lookup table */
284: int dbits = 6; /* bits in base distance lookup table */
285:
286:
287: /* If BMAX needs to be larger than 16, then h and x[] should be ULONG. */
288: #define BMAX 16 /* maximum bit length of any code (16 for explode) */
289: #define NMAX 288 /* maximum number of codes in any set */
290:
291:
292: unsigned hufts; /* track memory usage */
293:
294:
295: int huft_build(b, n, s, d, e, t, m)
296: unsigned *b; /* code lengths in bits (all assumed <= BMAX) */
297: unsigned n; /* number of codes (assumed <= NMAX) */
298: unsigned s; /* number of simple-valued codes (0..s-1) */
299: UWORD *d; /* list of base values for non-simple codes */
300: UWORD *e; /* list of extra bits for non-simple codes */
301: struct huft **t; /* result: starting table */
302: int *m; /* maximum lookup bits, returns actual */
303: /* Given a list of code lengths and a maximum table size, make a set of
304: tables to decode that set of codes. Return zero on success, one if
305: the given code set is incomplete (the tables are still built in this
306: case), two if the input is invalid (all zero length codes or an
307: oversubscribed set of lengths), and three if not enough memory. */
308: {
309: unsigned a; /* counter for codes of length k */
310: unsigned c[BMAX+1]; /* bit length count table */
311: unsigned f; /* i repeats in table every f entries */
312: int g; /* maximum code length */
313: int h; /* table level */
314: register unsigned i; /* counter, current code */
315: register unsigned j; /* counter */
316: register int k; /* number of bits in current code */
317: int l; /* bits per table (returned in m) */
318: register unsigned *p; /* pointer into c[], b[], or v[] */
319: register struct huft *q; /* points to current table */
320: struct huft r; /* table entry for structure assignment */
321: struct huft *u[BMAX]; /* table stack */
322: unsigned v[NMAX]; /* values in order of bit length */
323: register int w; /* bits before this table == (l * h) */
324: unsigned x[BMAX+1]; /* bit offsets, then code stack */
325: unsigned *xp; /* pointer into x */
326: int y; /* number of dummy codes added */
327: unsigned z; /* number of entries in current table */
328:
329:
330: /* Generate counts for each bit length */
331: memset(c, 0, sizeof(c));
332: p = b; i = n;
333: do {
334: c[*p++]++; /* assume all entries <= BMAX */
335: } while (--i);
336: if (c[0] == n)
337: return 2; /* bad input--all zero length codes */
338:
339:
340: /* Find minimum and maximum length, bound *m by those */
341: l = *m;
342: for (j = 1; j <= BMAX; j++)
343: if (c[j])
344: break;
345: k = j; /* minimum code length */
346: if (l < j)
347: l = j;
348: for (i = BMAX; i; i--)
349: if (c[i])
350: break;
351: g = i; /* maximum code length */
352: if (l > i)
353: l = i;
354: *m = l;
355:
356:
357: /* Adjust last length count to fill out codes, if needed */
358: for (y = 1 << j; j < i; j++, y <<= 1)
359: if ((y -= c[j]) < 0)
360: return 2; /* bad input: more codes than bits */
361: if ((y -= c[i]) < 0)
362: return 2;
363: c[i] += y;
364:
365:
366: /* Generate starting offsets into the value table for each length */
367: x[1] = j = 0;
368: p = c + 1; xp = x + 2;
369: while (--i) { /* note that i == g from above */
370: *xp++ = (j += *p++);
371: }
372:
373:
374: /* Make a table of values in order of bit lengths */
375: p = b; i = 0;
376: do {
377: if ((j = *p++) != 0)
378: v[x[j]++] = i;
379: } while (++i < n);
380:
381:
382: /* Generate the Huffman codes and for each, make the table entries */
383: x[0] = i = 0; /* first Huffman code is zero */
384: p = v; /* grab values in bit order */
385: h = -1; /* no tables yet--level -1 */
386: w = -l; /* bits decoded == (l * h) */
387: u[0] = NULL; q = NULL; z = 0; /* just to keep compilers happy */
388:
389: /* go through the bit lengths (k already is bits in shortest code) */
390: for (; k <= g; k++)
391: {
392: a = c[k];
393: while (a--)
394: {
395: /* here i is the Huffman code of length k bits for value *p */
396: /* make tables up to required level */
397: while (k > w + l)
398: {
399: h++;
400: w += l; /* previous table always l bits */
401:
402: /* compute minimum size table less than or equal to l bits */
403: z = (z = g - w) > l ? l : z; /* upper limit on table size */
404: if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
405: { /* too few codes for k-w bit table */
406: f -= a + 1; /* deduct codes from patterns left */
407: xp = c + k;
408: while (++j < z) /* try smaller tables up to z bits */
409: {
410: if ((f <<= 1) <= *++xp)
411: break; /* enough codes to use up j bits */
412: f -= *xp; /* else deduct codes from patterns */
413: }
414: }
415: z = 1 << j; /* table entries for j-bit table */
416:
417: /* allocate and link in new table */
418: if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) == NULL)
419: {
420: if (h)
421: huft_free(u[0]);
422: fprintf(stderr, "\n*** inflate out of memory *** ");
423: return 3; /* not enough memory */
424: }
425: hufts += z + 1; /* track memory usage */
426: *t = q + 1; /* link to list for huft_free() */
427: *(t = &(q->v.t)) = NULL;
428: u[h] = ++q; /* table starts after link */
429:
430: /* connect to last table, if there is one */
431: if (h)
432: {
433: x[h] = i; /* save pattern for backing up */
434: r.b = l; /* bits to dump before this table */
435: r.e = 16 + j; /* bits in this table */
436: r.v.t = q; /* pointer to this table */
437: j = i >> (w - l); /* (get around Turbo C bug) */
438: u[h-1][j] = r; /* connect to last table */
439: }
440: }
441:
442: /* set up table entry in r */
443: r.b = k - w;
444: if (p >= v + n)
445: r.e = 99; /* out of values--invalid code */
446: else if (*p < s)
447: {
448: r.e = *p < 256 ? 16 : 15; /* 256 is end-of-block code */
449: r.v.n = *p++; /* simple code is just the value */
450: }
451: else
452: {
453: r.e = e[*p - s]; /* non-simple--look up in lists */
454: r.v.n = d[*p++ - s];
455: }
456:
457: /* fill code-like entries with r */
458: f = 1 << (k - w);
459: for (j = i >> w; j < z; j += f)
460: q[j] = r;
461:
462: /* backwards increment the k-bit code i */
463: for (j = 1 << (k - 1); i & j; j >>= 1)
464: i ^= j;
465: i ^= j;
466:
467: /* backup over finished tables */
468: while ((i & ((1 << w) - 1)) != x[h])
469: {
470: h--; /* don't need to update q */
471: w -= l;
472: }
473: }
474: }
475:
476:
477: /* Return true (1) if we were given an incomplete table */
478: return y != 0 && n != 1;
479: }
480:
481:
482:
483: int huft_free(t)
484: struct huft *t; /* table to free */
485: /* Free the malloc'ed tables built by huft_build(), which makes a linked
486: list of the tables it made, with the links in a dummy first entry of
487: each table. */
488: {
489: register struct huft *p, *q;
490:
491:
492: /* Go through linked list, freeing from the malloced (t[-1]) address. */
493: p = t;
494: while (p != NULL)
495: {
496: q = (--p)->v.t;
497: free(p);
498: p = q;
499: }
500: return 0;
501: }
502:
503:
504:
505: void flush(w)
506: unsigned w; /* number of bytes to flush */
507: /* Do the equivalent of OUTB for the bytes slide[0..w-1]. */
508: {
509: unsigned n;
510: byte *p;
511:
512: p = (byte*)slide;
513: while (w)
514: {
515: n = (n = OUTBUFSIZ - outcnt) < w ? n : w;
516: memcpy(outptr, p, n); /* try to fill up buffer */
517: outptr += n;
518: if ((outcnt += n) == OUTBUFSIZ)
519: FlushOutput(); /* if full, empty */
520: p += n;
521: w -= n;
522: }
523: }
524:
525:
526:
527: int inflate_codes(tl, td, bl, bd)
528: struct huft *tl, *td; /* literal/length and distance decoder tables */
529: int bl, bd; /* number of bits decoded by tl[] and td[] */
530: /* inflate (decompress) the codes in a deflated (compressed) block.
531: Return an error code or zero if it all goes ok. */
532: {
533: register unsigned e; /* table entry flag/number of extra bits */
534: unsigned n, d; /* length and index for copy */
535: unsigned w; /* current window position */
536: struct huft *t; /* pointer to table entry */
537: ULONG ml, md; /* masks for bl and bd bits */
538: register ULONG b; /* bit buffer */
539: register unsigned k; /* number of bits in bit buffer */
540:
541:
542: /* make local copies of globals */
543: b = bb; /* initialize bit buffer */
544: k = bk;
545: w = wp; /* initialize window position */
546:
547:
548: /* inflate the coded data */
549: ml = mask_bits[bl]; /* precompute masks for speed */
550: md = mask_bits[bd];
551: while (1) /* do until end of block */
552: {
553: NEEDBITS(bl)
554: if ((e = (t = tl + (b & ml))->e) > 16)
555: do {
556: if (e == 99)
557: return 1;
558: DUMPBITS(t->b)
559: e -= 16;
560: NEEDBITS(e)
561: } while ((e = (t = t->v.t + (b & mask_bits[e]))->e) > 16);
562: DUMPBITS(t->b)
563: if (e == 16) /* then it's a literal */
564: {
565: slide[w++] = t->v.n;
566: if (w == WSIZE)
567: {
568: flush(w);
569: w = 0;
570: }
571: }
572: else /* it's an EOB or a length */
573: {
574: /* exit if end of block */
575: if (e == 15)
576: break;
577:
578: /* get length of block to copy */
579: NEEDBITS(e)
580: n = t->v.n + (b & mask_bits[e]);
581: DUMPBITS(e);
582:
583: /* decode distance of block to copy */
584: NEEDBITS(bd)
585: if ((e = (t = td + (b & md))->e) > 16)
586: do {
587: if (e == 99)
588: return 1;
589: DUMPBITS(t->b)
590: e -= 16;
591: NEEDBITS(e)
592: } while ((e = (t = t->v.t + (b & mask_bits[e]))->e) > 16);
593: DUMPBITS(t->b)
594: NEEDBITS(e)
595: d = w - t->v.n - (b & mask_bits[e]);
596: DUMPBITS(e)
597:
598: /* do the copy */
599: do {
600: n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
601: #ifndef NOMEMCPY
602: if (w - d >= e) /* (this test assumes unsigned comparison) */
603: {
604: memcpy(slide + w, slide + d, e);
605: w += e;
606: d += e;
607: }
608: else /* do it slow to avoid memcpy() overlap */
609: #endif /* !NOMEMCPY */
610: do {
611: slide[w++] = slide[d++];
612: } while (--e);
613: if (w == WSIZE)
614: {
615: flush(w);
616: w = 0;
617: }
618: } while (n);
619: }
620: }
621:
622:
623: /* restore the globals from the locals */
624: wp = w; /* restore global window pointer */
625: bb = b; /* restore global bit buffer */
626: bk = k;
627:
628:
629: /* done */
630: return 0;
631: }
632:
633:
634:
635: int inflate_stored()
636: /* "decompress" an inflated type 0 (stored) block. */
637: {
638: unsigned n; /* number of bytes in block */
639: unsigned w; /* current window position */
640: register ULONG b; /* bit buffer */
641: register unsigned k; /* number of bits in bit buffer */
642:
643:
644: /* make local copies of globals */
645: b = bb; /* initialize bit buffer */
646: k = bk;
647: w = wp; /* initialize window position */
648:
649:
650: /* go to byte boundary */
651: n = k & 7;
652: DUMPBITS(n);
653:
654:
655: /* get the length and its complement */
656: NEEDBITS(16)
657: n = b & 0xffff;
658: DUMPBITS(16)
659: NEEDBITS(16)
660: if (n != ((~b) & 0xffff))
661: return 1; /* error in compressed data */
662: DUMPBITS(16)
663:
664:
665: /* read and output the compressed data */
666: while (n--)
667: {
668: NEEDBITS(8)
669: slide[w++] = b;
670: if (w == WSIZE)
671: {
672: flush(w);
673: w = 0;
674: }
675: DUMPBITS(8)
676: }
677:
678:
679: /* restore the globals from the locals */
680: wp = w; /* restore global window pointer */
681: bb = b; /* restore global bit buffer */
682: bk = k;
683: return 0;
684: }
685:
686:
687:
688: int inflate_fixed()
689: /* decompress an inflated type 1 (fixed Huffman codes) block. We should
690: either replace this with a custom decoder, or at least precompute the
691: Huffman tables. */
692: {
693: int i; /* temporary variable */
694: struct huft *tl; /* literal/length code table */
695: struct huft *td; /* distance code table */
696: int bl; /* lookup bits for tl */
697: int bd; /* lookup bits for td */
698: unsigned l[288]; /* length list for huft_build */
699:
700:
701: /* set up literal table */
702: for (i = 0; i < 144; i++)
703: l[i] = 8;
704: for (; i < 256; i++)
705: l[i] = 9;
706: for (; i < 280; i++)
707: l[i] = 7;
708: for (; i < 288; i++) /* make a complete, but wrong code set */
709: l[i] = 8;
710: bl = 7;
711: if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0)
712: return i;
713:
714:
715: /* set up distance table */
716: for (i = 0; i < 30; i++) /* make an incomplete code set */
717: l[i] = 5;
718: bd = 5;
719: if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1)
720: {
721: huft_free(tl);
722: return i;
723: }
724:
725:
726: /* decompress until an end-of-block code */
727: if (inflate_codes(tl, td, bl, bd))
728: return 1;
729:
730:
731: /* free the decoding tables, return */
732: huft_free(tl);
733: huft_free(td);
734: return 0;
735: }
736:
737:
738:
739: int inflate_dynamic()
740: /* decompress an inflated type 2 (dynamic Huffman codes) block. */
741: {
742: int i; /* temporary variables */
743: unsigned j;
744: unsigned l; /* last length */
745: unsigned m; /* mask for bit lengths table */
746: unsigned n; /* number of lengths to get */
747: struct huft *tl; /* literal/length code table */
748: struct huft *td; /* distance code table */
749: int bl; /* lookup bits for tl */
750: int bd; /* lookup bits for td */
751: unsigned nb; /* number of bit length codes */
752: unsigned nl; /* number of literal/length codes */
753: unsigned nd; /* number of distance codes */
754: unsigned ll[286+30]; /* literal/length and distance code lengths */
755: register ULONG b; /* bit buffer */
756: register unsigned k; /* number of bits in bit buffer */
757:
758:
759: /* make local bit buffer */
760: b = bb;
761: k = bk;
762:
763:
764: /* read in table lengths */
765: NEEDBITS(5)
766: nl = 257 + (b & 0x1f); /* number of literal/length codes */
767: DUMPBITS(5)
768: NEEDBITS(5)
769: nd = 1 + (b & 0x1f); /* number of distance codes */
770: DUMPBITS(5)
771: NEEDBITS(4)
772: nb = 4 + (b & 0xf); /* number of bit length codes */
773: DUMPBITS(4)
774: if (nl > 286 || nd > 30)
775: return 1; /* bad lengths */
776:
777:
778: /* read in bit-length-code lengths */
779: for (i = 0; i < nb; i++)
780: {
781: NEEDBITS(3)
782: ll[border[i]] = b & 7;
783: DUMPBITS(3)
784: }
785: for (; i < 19; i++)
786: ll[border[i]] = 0;
787:
788:
789: /* build decoding table for trees--single level, 7 bit lookup */
790: bl = 7;
791: if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0)
792: {
793: if (i == 1)
794: huft_free(tl);
795: return i; /* incomplete code set */
796: }
797:
798:
799: /* read in literal and distance code lengths */
800: n = nl + nd;
801: m = mask_bits[bl];
802: i = l = 0;
803: while (i < n)
804: {
805: NEEDBITS(bl)
806: j = (td = tl + (b & m))->b;
807: DUMPBITS(j)
808: j = td->v.n;
809: if (j < 16) /* length of code in bits (0..15) */
810: ll[i++] = l = j; /* save last length in l */
811: else if (j == 16) /* repeat last length 3 to 6 times */
812: {
813: NEEDBITS(2)
814: j = 3 + (b & 3);
815: DUMPBITS(2)
816: if (i + j > n)
817: return 1;
818: while (j--)
819: ll[i++] = l;
820: }
821: else if (j == 17) /* 3 to 10 zero length codes */
822: {
823: NEEDBITS(3)
824: j = 3 + (b & 7);
825: DUMPBITS(3)
826: if (i + j > n)
827: return 1;
828: while (j--)
829: ll[i++] = 0;
830: l = 0;
831: }
832: else /* j == 18: 11 to 138 zero length codes */
833: {
834: NEEDBITS(7)
835: j = 11 + (b & 0x7f);
836: DUMPBITS(7)
837: if (i + j > n)
838: return 1;
839: while (j--)
840: ll[i++] = 0;
841: l = 0;
842: }
843: }
844:
845:
846: /* free decoding table for trees */
847: huft_free(tl);
848:
849:
850: /* restore the global bit buffer */
851: bb = b;
852: bk = k;
853:
854:
855: /* build the decoding tables for literal/length and distance codes */
856: bl = lbits;
857: if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0)
858: {
859: if (i == 1)
860: huft_free(tl);
861: return i; /* incomplete code set */
862: }
863: bd = dbits;
864: if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0)
865: {
866: if (i == 1)
867: huft_free(td);
868: huft_free(tl);
869: return i; /* incomplete code set */
870: }
871:
872:
873: /* decompress until an end-of-block code */
874: if (inflate_codes(tl, td, bl, bd))
875: return 1;
876:
877:
878: /* free the decoding tables, return */
879: huft_free(tl);
880: huft_free(td);
881: return 0;
882: }
883:
884:
885:
886: int inflate_block(e)
887: int *e; /* last block flag */
888: /* decompress an inflated block */
889: {
890: unsigned t; /* block type */
891: register ULONG b; /* bit buffer */
892: register unsigned k; /* number of bits in bit buffer */
893:
894:
895: /* make local bit buffer */
896: b = bb;
897: k = bk;
898:
899:
900: /* read in last block bit */
901: NEEDBITS(1)
902: *e = b & 1;
903: DUMPBITS(1)
904:
905:
906: /* read in block type */
907: NEEDBITS(2)
908: t = b & 3;
909: DUMPBITS(2)
910:
911:
912: /* restore the global bit buffer */
913: bb = b;
914: bk = k;
915:
916:
917: /* inflate that block type */
918: if (t == 2)
919: return inflate_dynamic();
920: if (t == 0)
921: return inflate_stored();
922: if (t == 1)
923: return inflate_fixed();
924:
925:
926: /* bad block type */
927: return 2;
928: }
929:
930:
931:
932: int inflate_entry()
933: /* decompress an inflated entry */
934: {
935: int e; /* last block flag */
936: int r; /* result code */
937: unsigned h; /* maximum struct huft's malloc'ed */
938:
939:
940: /* initialize window, bit buffer */
941: wp = 0;
942: bk = 0;
943: bb = 0;
944:
945:
946: /* decompress until the last block */
947: h = 0;
948: do {
949: hufts = 0;
950: if ((r = inflate_block(&e)) != 0)
951: return r;
952: if (hufts > h)
953: h = hufts;
954: } while (!e);
955:
956:
957: /* flush out slide */
958: flush(wp);
959:
960:
961: /* return success */
962: #ifdef DEBUG
963: fprintf(stderr, "<%u> ", h);
964: #endif /* DEBUG */
965: return 0;
966: }
967:
968:
969: void inflate()
970: /* ignore the return code for now ... */
971: {
972: #ifdef DYN_ALLOC
973: if (window == NULL) {
974: window = (char*) calloc((unsigned)WSIZE, 2*sizeof(char));
975: /* Note that inflate only needs WSIZE bytes, but the window
976: * array is shared with deflate, which needs 2*WISZE bytes.
977: */
978: if (window==NULL) err(4, "");
979: }
980: #endif
981:
982: inflate_entry();
983: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.