|
|
1.1.1.2 root 1: /*
2:
3: Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
4: Kai Uwe Rommel and Igor Mandrichenko.
5: Permission is granted to any individual or institution to use, copy, or
6: redistribute this software so long as all of the original files are included
7: unmodified, that it is not sold for profit, and that this copyright notice
8: is retained.
9:
10: */
11:
12: /*
13: * deflate.c by Jean-loup Gailly.
14: *
15: * PURPOSE
16: *
17: * Identify new text as repetitions of old text within a fixed-
18: * length sliding window trailing behind the new text.
19: *
20: * DISCUSSION
21: *
22: * The "deflation" process depends on being able to identify portions
23: * of the input text which are identical to earlier input (within a
24: * sliding window trailing behind the input currently being processed).
25: *
26: * The most straightforward technique turns out to be the fastest for
27: * most input files: try all possible matches and select the longest.
28: * The key feature is of this algorithm is that insertion and deletions
29: * from the string dictionary are very simple and thus fast. Insertions
30: * and deletions are performed at each input character, whereas string
31: * matches are performed only when the previous match ends. So it is
32: * preferable to spend more time in matches to allow very fast string
33: * insertions and deletions. The matching algorithm for small strings
34: * is inspired from that of Rabin & Karp. A brute force approach is
35: * used to find longer strings when a small match has been found.
36: * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
37: * (by Leonid Broukhis).
38: * A previous version of this file used a more sophisticated algorithm
39: * (by Fiala and Greene) which is guaranteed to run in linear amortized
40: * time, but has a larger average cost and uses more memory. However
41: * the F&G algorithm may be faster for some highly redundant files if
42: * the parameter max_chain_length (described below) is too large.
43: *
44: * ACKNOWLEDGEMENTS
45: *
46: * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
47: * I found it in 'freeze' written by Leonid Broukhis.
48: * Thanks to many info-zippers for bug reports and testing.
49: *
50: * REFERENCES
51: *
52: * APPNOTE.TXT documentation file in PKZIP 2.0 distribution.
53: *
54: * A description of the Rabin and Karp algorithm is given in the book
55: * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
56: *
57: * Fiala,E.R., and Greene,D.H.
58: * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
59: *
60: * INTERFACE
61: *
62: * void lm_init (int pack_level, ush *flags)
63: * Initialize the "longest match" routines for a new file
64: *
65: * ulg deflate (void)
66: * Processes a new input file and return its compressed length. Sets
67: * the compressed length, crc, deflate flags and internal file
68: * attributes.
69: */
70:
1.1.1.3 ! root 71: #include "zunzip.h"
1.1.1.2 root 72: #include "zip.h"
73:
74: /* ===========================================================================
75: * Configuration parameters
76: */
77:
78: /* Compile with MEDIUM_MEM to reduce the memory requirements or
79: * with SMALL_MEM to use as little memory as possible.
80: * Warning: defining these symbols affects MATCH_BUFSIZE and HASH_BITS
81: * (see below) and thus affects the compression ratio. The compressed output
82: * is still correct, and might even be smaller in some cases.
83: */
84:
85: #ifdef SMALL_MEM
86: # define HASH_BITS 13 /* Number of bits used to hash strings */
87: #else
88: #ifdef MEDIUM_MEM
89: # define HASH_BITS 14
90: #else
91: # define HASH_BITS 15
92: /* For portability to 16 bit machines, do not use values above 15. */
93: #endif
94: #endif
95:
96: #define HASH_SIZE (unsigned)(1<<HASH_BITS)
97: #define HASH_MASK (HASH_SIZE-1)
98: #define WMASK (WSIZE-1)
99: /* HASH_SIZE and WSIZE must be powers of two */
100:
101: #define NIL 0
102: /* Tail of hash chains */
103:
104: #define FAST 4
105: #define SLOW 2
106: /* speed options for the general purpose bit flag */
107:
108: #ifndef TOO_FAR
109: # define TOO_FAR 4096
110: #endif
111: /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
112:
113: /* ===========================================================================
114: * Local data used by the "longest match" routines.
115: */
116:
117: typedef ush Pos;
118: typedef unsigned IPos;
119: /* A Pos is an index in the character window. We use short instead of int to
120: * save space in the various tables. IPos is used only for parameter passing.
121: */
122:
123: #ifndef DYN_ALLOC
124: uch far window[2L*WSIZE];
125: /* Sliding window. Input bytes are read into the second half of the window,
126: * and move to the first half later to keep a dictionary of at least WSIZE
127: * bytes. With this organization, matches are limited to a distance of
128: * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
129: * performed with a length multiple of the block size. Also, it limits
130: * the window size to 64K, which is quite useful on MSDOS.
131: * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
132: * be less efficient since the data would have to be copied WSIZE/BSZ times)
133: */
134: Pos far prev[WSIZE];
135: /* Link to older string with same hash index. To limit the size of this
136: * array to 64K, this link is maintained only for the last 32K strings.
137: * An index in this array is thus a window index modulo 32K.
138: */
139: Pos far head[HASH_SIZE];
140: /* Heads of the hash chains or NIL */
141: #else
142: uch far * near window = NULL;
143: Pos far * near prev = NULL;
1.1.1.3 ! root 144: static Pos far * near head;
1.1.1.2 root 145: #endif
146:
147: long block_start;
148: /* window position at the beginning of the current output block. Gets
149: * negative when the window is moved backwards.
150: */
151:
152: local unsigned near ins_h; /* hash index of string to be inserted */
153:
154: #define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
155: /* Number of bits by which ins_h and del_h must be shifted at each
156: * input step. It must be such that after MIN_MATCH steps, the oldest
157: * byte no longer takes part in the hash key, that is:
158: * H_SHIFT * MIN_MATCH >= HASH_BITS
159: */
160:
161: unsigned int near prev_length;
162: /* Length of the best match at previous step. Matches not greater than this
163: * are discarded. This is used in the lazy match evaluation.
164: */
165:
166: unsigned near strstart; /* start of string to insert */
1.1.1.3 ! root 167: unsigned near match_start; /* start of matching string */
1.1.1.2 root 168: local int near eofile; /* flag set at end of input file */
169: local unsigned near lookahead; /* number of valid bytes ahead in window */
170:
171: unsigned near max_chain_length;
172: /* To speed up deflation, hash chains are never searched beyond this length.
173: * A higher limit improves compression ratio but degrades the speed.
174: */
175:
176: local unsigned int max_lazy_match;
177: /* Attempt to find a better match only when the current match is strictly
178: * smaller than this value.
179: */
180:
181: int near good_match;
182: /* Use a faster search when the previous match is longer than this */
183:
184:
185: /* Values for max_lazy_match, good_match and max_chain_length, depending on
186: * the desired pack level (0..9). The values given below have been tuned to
187: * exclude worst case performance for pathological files. Better values may be
188: * found for specific files.
189: */
190: typedef struct config {
191: int good_length;
192: int max_lazy;
193: unsigned max_chain;
194: uch flag;
195: } config;
196:
197: local config configuration_table[10] = {
198: /* good lazy chain flag */
199: /* 0 */ {0, 0, 0, 0}, /* store only */
200: /* 1 */ {4, 4, 16, FAST}, /* maximum speed */
201: /* 2 */ {6, 8, 16, 0},
202: /* 3 */ {8, 16, 32, 0},
203: /* 4 */ {8, 32, 64, 0},
204: /* 5 */ {8, 64, 128, 0},
205: /* 6 */ {8, 128, 256, 0},
206: /* 7 */ {8, 128, 512, 0},
207: /* 8 */ {32, 258, 1024, 0},
208: /* 9 */ {32, 258, 4096, SLOW}}; /* maximum compression */
209:
210: /* Note: the current code requires max_lazy >= MIN_MATCH and max_chain >= 4
211: * but these restrictions can easily be removed at a small cost.
212: */
213:
214: #define EQUAL 0
215: /* result of memcmp for equal strings */
216:
217: /* ===========================================================================
218: * Prototypes for local functions. Use asm version by default for
219: * MSDOS but not Unix. However the asm version version is recommended
220: * for 386 Unix.
221: */
222: #ifdef ATARI_ST
223: # undef MSDOS /* avoid the processor specific parts */
224: #endif
225: #if defined(MSDOS) && !defined(NO_ASM) && !defined(ASM)
226: # define ASM
227: #endif
228:
229: local void fill_window OF((void));
230: int longest_match OF((IPos cur_match));
231: #ifdef ASM
232: void match_init OF((void)); /* asm code initialization */
233: #endif
234:
235: #ifdef DEBUG
236: local void check_match OF((IPos start, IPos match, int length));
237: #endif
238:
1.1.1.3 ! root 239: #undef MIN
1.1.1.2 root 240: #define MIN(a,b) ((a) <= (b) ? (a) : (b))
241: /* The arguments must not have side effects. */
242:
243: /* ===========================================================================
244: * Update a hash value with the given input byte
245: * IN assertion: all calls to to UPDATE_HASH are made with consecutive
246: * input characters, so that a running hash key can be computed from the
247: * previous key instead of complete recalculation each time.
248: */
249: #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
250:
251: /* ===========================================================================
252: * Insert string s in the dictionary and set match_head to the previous head
253: * of the hash chain (the most recent string with same hash key). Return
254: * the previous length of the hash chain.
255: * IN assertion: all calls to to INSERT_STRING are made with consecutive
256: * input characters and the first MIN_MATCH bytes of s are valid
257: * (except for the last MIN_MATCH-1 bytes of the input file).
258: */
259: #define INSERT_STRING(s, match_head) \
260: (UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]), \
261: prev[(s) & WMASK] = match_head = head[ins_h], \
262: head[ins_h] = (s))
263:
264: /* ===========================================================================
265: * Initialize the "longest match" routines for a new file
266: */
267: void lm_init (pack_level, flags)
268: int pack_level; /* 0: store, 1: best speed, 9: best compression */
269: ush *flags; /* general purpose bit flag */
270: {
271: register unsigned j;
272:
273: if (pack_level < 1 || pack_level > 9) error("bad pack level");
274:
275: /* Use dynamic allocation if compiler does not like big static arrays: */
276: #ifdef DYN_ALLOC
1.1.1.3 ! root 277: window = (uch far*) fcalloc(WSIZE, 2*sizeof(uch));
! 278: prev = (Pos far*) fcalloc(WSIZE, sizeof(Pos));
! 279: head = (Pos far*) fcalloc(HASH_SIZE, sizeof(Pos));
! 280:
! 281: if (window == NULL || prev == NULL || head == NULL) {
! 282: err(ZE_MEM, "window allocation");
! 283: }
1.1.1.2 root 284: #endif /* DYN_ALLOC */
285: #ifdef ASM
286: match_init(); /* initialize the asm code */
287: #endif
288: /* Initialize the hash table. */
289: for (j = 0; j < HASH_SIZE; j++) head[j] = NIL;
290: /* prev will be initialized on the fly */
291:
292: /* Set the default configuration parameters:
293: */
294: max_lazy_match = configuration_table[pack_level].max_lazy;
295: good_match = configuration_table[pack_level].good_length;
296: max_chain_length = configuration_table[pack_level].max_chain;
297: *flags |= configuration_table[pack_level].flag;
298: /* ??? reduce max_chain_length for binary files */
299:
300: strstart = 0;
301: block_start = 0L;
302:
303: #if defined(MSDOS) && !defined(__32BIT__)
304: /* Can't read a 64K block under MSDOS */
305: lookahead = read_buf((char*)window, (unsigned)WSIZE);
306: #else
307: lookahead = read_buf((char*)window, 2*WSIZE);
308: #endif
309: if (lookahead == 0 || lookahead == (unsigned)EOF) {
310: eofile = 1, lookahead = 0;
311: return;
312: }
313: eofile = 0;
314: /* Make sure that we always have enough lookahead. This is important
315: * if input comes from a device such as a tty.
316: */
317: while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();
318:
319: ins_h = 0;
320: for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, window[j]);
321: /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
322: * not important since only literal bytes will be emitted.
323: */
324: }
325:
1.1.1.3 ! root 326: void lm_free()
! 327: {
! 328: #ifdef DYN_ALLOC
! 329: free(window);
! 330: free(prev);
! 331: free(head);
! 332: window = NULL;
! 333: prev = head = NULL;
! 334: #endif
! 335: }
! 336:
1.1.1.2 root 337: /* ===========================================================================
338: * Set match_start to the longest match starting at the given string and
339: * return its length. Matches shorter or equal to prev_length are discarded,
340: * in which case the result is equal to prev_length and match_start is
341: * garbage.
342: * IN assertions: cur_match is the head of the hash chain for the current
343: * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
344: */
345: #ifndef ASM
346: /* For MSDOS, OS/2 and 386 Unix, an optimized version is in match.asm. The code
347: * is functionally equivalent, so you can use the C version if desired.
348: */
349: int longest_match(cur_match)
350: IPos cur_match; /* current match */
351: {
352: unsigned chain_length = max_chain_length; /* max hash chain length */
353: register uch far *scan = window + strstart; /* current string */
354: register uch far *match = scan; /* matched string */
355: register int len; /* length of current match */
356: int best_len = prev_length; /* best match length so far */
357: IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL;
358: /* Stop when cur_match becomes <= limit. To simplify the code,
359: * we prevent matches with the string of window index 0.
360: */
361: #ifdef UNALIGNED_OK
362: register ush scan_start = *(ush*)scan;
363: register ush scan_end = *(ush*)(scan+best_len-1);
364: #else
365: register uch scan_start = *scan;
366: register uch scan_end1 = scan[best_len-1];
367: register uch scan_end = scan[best_len];
368: #endif
369:
370: /* Do not waste too much time if we already have a good match: */
371: if (prev_length >= good_match) {
372: chain_length >>= 2;
373: }
374:
375: do {
376: Assert(cur_match < strstart, "no future");
377: match = window + cur_match;
378:
379: /* Skip to next match if the match length cannot increase
380: * or if the match length is less than 2:
381: */
382: #if (defined(UNALIGNED_OK) && HASH_BITS >= 8)
383: /* This code assumes sizeof(unsigned short) == 2 and
384: * sizeof(unsigned long) == 4. Do not use UNALIGNED_OK if your
385: * compiler uses different sizes.
386: */
387: if (*(ush*)(match+best_len-1) != scan_end ||
388: *(ush*)match != scan_start) continue;
389:
390: len = MIN_MATCH - 4;
391: /* It is not necessary to compare scan[2] and match[2] since they are
392: * always equal when the other bytes match, given that the hash keys
393: * are equal and that HASH_BITS >= 8.
394: */
395: do {} while ((len+=4) < MAX_MATCH-3 &&
396: *(ulg*)(scan+len) == *(ulg*)(match+len));
397: /* The funny do {} generates better code for most compilers */
398:
399: if (*(ush*)(scan+len) == *(ush*)(match+len)) len += 2;
400: if (scan[len] == match[len]) len++;
401:
402: #else /* UNALIGNED_OK */
403: if (match[best_len] != scan_end ||
404: match[best_len-1] != scan_end1 || *match != scan_start)
405: continue;
406: /* It is not necessary to compare scan[1] and match[1] since they
407: * are always equal when the other bytes match, given that
408: * the hash keys are equal and that h_shift+8 <= HASH_BITS,
409: * that is, when the last byte is entirely included in the hash key.
410: * The condition is equivalent to
411: * (HASH_BITS+2)/3 + 8 <= HASH_BITS
412: * or: HASH_BITS >= 13
413: * Also, we check for a match at best_len-1 to get rid quickly of
414: * the match with the suffix of the match made at the previous step,
415: * which is known to fail.
416: */
417: #if HASH_BITS >= 13
418: len = 1;
419: #else
420: len = 0;
421: #endif
422: do {} while (++len < MAX_MATCH && scan[len] == match[len]);
423:
424: #endif /* UNALIGNED_OK */
425:
426: if (len > best_len) {
427: match_start = cur_match;
428: best_len = len;
429: if (len == MAX_MATCH) break;
430: #ifdef UNALIGNED_OK
431: scan_end = *(ush*)(scan+best_len-1);
432: #else
433: scan_end1 = scan[best_len-1];
434: scan_end = scan[best_len];
435: #endif
436: }
437: } while (--chain_length != 0 &&
438: (cur_match = prev[cur_match & WMASK]) > limit);
439:
440: return best_len;
441: }
442: #endif /* NO_ASM */
443:
444: #ifdef DEBUG
445: /* ===========================================================================
446: * Check that the match at match_start is indeed a match.
447: */
448: local void check_match(start, match, length)
449: IPos start, match;
450: int length;
451: {
452: /* check that the match is indeed a match */
453: if (memcmp((char*)window + match,
454: (char*)window + start, length) != EQUAL) {
455: fprintf(stderr,
456: " start %d, match %d, length %d\n",
457: start, match, length);
458: error("invalid match");
459: }
460: if (verbose > 1) {
461: fprintf(stderr,"\\[%d,%d]", start-match, length);
462: do { putc(window[start++], stderr); } while (--length != 0);
463: }
464: }
465: #else
466: # define check_match(start, match, length)
467: #endif
468:
469: /* ===========================================================================
470: * Fill the window when the lookahead becomes insufficient.
471: * Updates strstart and lookahead, and sets eofile if end of input file.
472: * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
473: * OUT assertions: at least one byte has been read, or eofile is set;
474: * file reads are performed for at least two bytes (required for the
475: * translate_eol option).
476: */
477: local void fill_window()
478: {
479: register unsigned n, m;
480: unsigned more = (unsigned)((ulg)2*WSIZE - (ulg)lookahead - (ulg)strstart);
481: /* Amount of free space at the end of the window. */
482:
483: /* If the window is full, move the upper half to the lower one to make
484: * room in the upper half.
485: */
486: if (more == (unsigned)EOF) {
487: /* Very unlikely, but possible on 16 bit machine if strstart == 0
488: * and lookahead == 1 (input done one byte at time)
489: */
490: more--;
491: } else if (more <= 1) {
492: /* By the IN assertion, the window is not empty so we can't confuse
493: * more == 0 with more == 64K on a 16 bit machine.
494: */
495: memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
496: match_start -= WSIZE;
497: strstart -= WSIZE;
498: /* strstart - WSIZE >= WSIZE - 1 - lookahead >= WSIZE - MIN_LOOKAHEAD
499: * so we now have strstart >= MAX_DIST:
500: */
501: Assert (strstart >= MAX_DIST, "window slide too early");
502: block_start -= (long) WSIZE;
503:
504: for (n = 0; n < HASH_SIZE; n++) {
505: m = head[n];
506: head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
507: }
508: for (n = 0; n < WSIZE; n++) {
509: m = prev[n];
510: prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
511: /* If n is not on any hash chain, prev[n] is garbage but
512: * its value will never be used.
513: */
514: }
515: more += WSIZE;
516: #ifdef ZIP
517: if (verbose) putc('.', stderr);
518: #endif
519: }
520: /* At this point, more >= 2 */
521: n = read_buf((char*)window+strstart+lookahead, more);
522: if (n == 0 || n == (unsigned)EOF) {
523: eofile = 1;
524: } else {
525: lookahead += n;
526: }
527: }
528:
529: /* ===========================================================================
530: * Flush the current block, with given end-of-file flag.
531: * IN assertion: strstart is set to the end of the current match.
532: */
533: #define FLUSH_BLOCK(eof) \
534: flush_block(block_start >= 0L ? (char*)&window[block_start] : (char*)NULL,\
535: (long)strstart - block_start, (eof))
536:
537: /* ===========================================================================
538: * Processes a new input file and return its compressed length.
539: */
540: #ifdef NO_LAZY
541: ulg deflate()
542: {
543: IPos hash_head; /* head of the hash chain */
544: int flush; /* set if current block must be flushed */
545: unsigned match_length = 0; /* length of best match */
546:
547: prev_length = MIN_MATCH-1;
548: while (lookahead != 0) {
549: /* Insert the string window[strstart .. strstart+2] in the
550: * dictionary, and set hash_head to the head of the hash chain:
551: */
552: INSERT_STRING(strstart, hash_head);
553:
554: /* Find the longest match, discarding those <= prev_length.
555: * At this point we have always match_length < MIN_MATCH
556: */
557: if (hash_head != NIL && strstart - hash_head <= MAX_DIST) {
558: /* To simplify the code, we prevent matches with the string
559: * of window index 0 (in particular we have to avoid a match
560: * of the string with itself at the start of the input file).
561: */
562: match_length = longest_match (hash_head);
563: /* longest_match() sets match_start */
564: if (match_length > lookahead) match_length = lookahead;
565: }
566: if (match_length >= MIN_MATCH) {
567: check_match(strstart, match_start, match_length);
568:
569: flush = ct_tally(strstart-match_start, match_length - MIN_MATCH);
570:
571: lookahead -= match_length;
572: match_length--; /* string at strstart already in hash table */
573: do {
574: strstart++;
575: INSERT_STRING(strstart, hash_head);
576: /* strstart never exceeds WSIZE-MAX_MATCH, so there are
577: * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
578: * these bytes are garbage, but it does not matter since the
579: * next lookahead bytes will always be emitted as literals.
580: */
581: } while (--match_length != 0);
582: } else {
583: /* No match, output a literal byte */
584: flush = ct_tally (0, window[strstart]);
585: lookahead--;
586: }
587: strstart++;
588: if (flush) FLUSH_BLOCK(0), block_start = strstart;
589:
590: /* Make sure that we always have enough lookahead, except
591: * at the end of the input file. We need MAX_MATCH bytes
592: * for the next match, plus MIN_MATCH bytes to insert the
593: * string following the next match.
594: */
595: while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();
596:
597: }
598: return FLUSH_BLOCK(1); /* eof */
599: }
600: #else /* LAZY */
601: /* ===========================================================================
602: * Same as above, but achieves better compression. We use a lazy
603: * evaluation for matches: a match is finally adopted only if there is
604: * no better match at the next window position.
605: */
606: ulg deflate()
607: {
608: IPos hash_head; /* head of hash chain */
609: IPos prev_match; /* previous match */
610: int flush; /* set if current block must be flushed */
611: int match_available = 0; /* set if previous match exists */
612: register unsigned match_length = MIN_MATCH-1; /* length of best match */
613: #ifdef DEBUG
614: extern ulg isize; /* byte length of input file, for debug only */
615: #endif
616:
617: /* Process the input block. */
618: while (lookahead != 0) {
619: /* Insert the string window[strstart .. strstart+2] in the
620: * dictionary, and set hash_head to the head of the hash chain:
621: */
622: INSERT_STRING(strstart, hash_head);
623:
624: /* Find the longest match, discarding those <= prev_length.
625: */
626: prev_length = match_length, prev_match = match_start;
627: match_length = MIN_MATCH-1;
628:
629: if (hash_head != NIL && prev_length < max_lazy_match &&
630: strstart - hash_head <= MAX_DIST) {
631: /* To simplify the code, we prevent matches with the string
632: * of window index 0 (in particular we have to avoid a match
633: * of the string with itself at the start of the input file).
634: */
635: match_length = longest_match (hash_head);
636: /* longest_match() sets match_start */
637: if (match_length > lookahead) match_length = lookahead;
638: /* Ignore a length 3 match if it is too distant: */
639: if (match_length == MIN_MATCH && strstart-match_start > TOO_FAR){
640: /* If prev_match is also MIN_MATCH, match_start is garbage
641: * but we will ignore the current match anyway.
642: */
643: match_length--;
644: }
645: }
646: /* If there was a match at the previous step and the current
647: * match is not better, output the previous match:
648: */
649: if (prev_length >= MIN_MATCH && match_length <= prev_length) {
650:
651: check_match(strstart-1, prev_match, prev_length);
652:
653: flush = ct_tally(strstart-1-prev_match, prev_length - MIN_MATCH);
654:
655: /* Insert in hash table all strings up to the end of the match.
656: * strstart-1 and strstart are already inserted.
657: */
658: lookahead -= prev_length-1;
659: prev_length -= 2;
660: do {
661: strstart++;
662: INSERT_STRING(strstart, hash_head);
663: /* strstart never exceeds WSIZE-MAX_MATCH, so there are
664: * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
665: * these bytes are garbage, but it does not matter since the
666: * next lookahead bytes will always be emitted as literals.
667: */
668: } while (--prev_length != 0);
669: match_available = 0;
670: match_length = MIN_MATCH-1;
671: strstart++;
672: if (flush) FLUSH_BLOCK(0), block_start = strstart;
673:
674: } else if (match_available) {
675: /* If there was no match at the previous position, output a
676: * single literal. If there was a match but the current match
677: * is longer, truncate the previous match to a single literal.
678: */
679: Tracevv((stderr,"%c",window[strstart-1]));
680: if (ct_tally (0, window[strstart-1])) {
681: FLUSH_BLOCK(0), block_start = strstart;
682: }
683: strstart++;
684: lookahead--;
685: } else {
686: /* There is no previous match to compare with, wait for
687: * the next step to decide.
688: */
689: match_available = 1;
690: strstart++;
691: lookahead--;
692: }
693: #if 0 /* for pgp: disabled to allow compiling with -DDEBUG */
694: Assert (strstart <= isize && lookahead <= isize, "a bit too far");
695: #endif
696:
697: /* Make sure that we always have enough lookahead, except
698: * at the end of the input file. We need MAX_MATCH bytes
699: * for the next match, plus MIN_MATCH bytes to insert the
700: * string following the next match.
701: */
702: while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();
703: }
704: if (match_available) ct_tally (0, window[strstart-1]);
705:
706: return FLUSH_BLOCK(1); /* eof */
707: }
708: #endif /* LAZY */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.