|
|
1.1.1.7 ! root 1: /* ! 2: ! 3: Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly, ! 4: Kai Uwe Rommel and Igor Mandrichenko. ! 5: Permission is granted to any individual or institution to use, copy, or ! 6: redistribute this software so long as all of the original files are included ! 7: unmodified, that it is not sold for profit, and that this copyright notice ! 8: is retained. ! 9: ! 10: */ ! 11: ! 12: /* ! 13: * deflate.c by Jean-loup Gailly. ! 14: * ! 15: * PURPOSE ! 16: * ! 17: * Identify new text as repetitions of old text within a fixed- ! 18: * length sliding window trailing behind the new text. ! 19: * ! 20: * DISCUSSION ! 21: * ! 22: * The "deflation" process depends on being able to identify portions ! 23: * of the input text which are identical to earlier input (within a ! 24: * sliding window trailing behind the input currently being processed). ! 25: * ! 26: * The most straightforward technique turns out to be the fastest for ! 27: * most input files: try all possible matches and select the longest. ! 28: * The key feature is of this algorithm is that insertion and deletions ! 29: * from the string dictionary are very simple and thus fast. Insertions ! 30: * and deletions are performed at each input character, whereas string ! 31: * matches are performed only when the previous match ends. So it is ! 32: * preferable to spend more time in matches to allow very fast string ! 33: * insertions and deletions. The matching algorithm for small strings ! 34: * is inspired from that of Rabin & Karp. A brute force approach is ! 35: * used to find longer strings when a small match has been found. ! 36: * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze ! 37: * (by Leonid Broukhis). ! 38: * A previous version of this file used a more sophisticated algorithm ! 39: * (by Fiala and Greene) which is guaranteed to run in linear amortized ! 40: * time, but has a larger average cost and uses more memory. However ! 41: * the F&G algorithm may be faster for some highly redundant files if ! 42: * the parameter max_chain_length (described below) is too large. ! 43: * ! 44: * ACKNOWLEDGEMENTS ! 45: * ! 46: * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and ! 47: * I found it in 'freeze' written by Leonid Broukhis. ! 48: * Thanks to many info-zippers for bug reports and testing. ! 49: * ! 50: * REFERENCES ! 51: * ! 52: * APPNOTE.TXT documentation file in PKZIP 2.0 distribution. ! 53: * ! 54: * A description of the Rabin and Karp algorithm is given in the book ! 55: * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. ! 56: * ! 57: * Fiala,E.R., and Greene,D.H. ! 58: * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 ! 59: * ! 60: * INTERFACE ! 61: * ! 62: * void lm_init (int pack_level, ush *flags) ! 63: * Initialize the "longest match" routines for a new file ! 64: * ! 65: * ulg deflate (void) ! 66: * Processes a new input file and return its compressed length. Sets ! 67: * the compressed length, crc, deflate flags and internal file ! 68: * attributes. ! 69: */ ! 70: ! 71: #include "zunzip.h" ! 72: #include "zip.h" ! 73: #ifdef MACTC5 ! 74: #include "Macutil3.h" ! 75: #endif ! 76: ! 77: /* =========================================================================== ! 78: * Configuration parameters ! 79: */ ! 80: ! 81: /* Compile with MEDIUM_MEM to reduce the memory requirements or ! 82: * with SMALL_MEM to use as little memory as possible. ! 83: * Warning: defining these symbols affects MATCH_BUFSIZE and HASH_BITS ! 84: * (see below) and thus affects the compression ratio. The compressed output ! 85: * is still correct, and might even be smaller in some cases. ! 86: */ ! 87: ! 88: #ifdef SMALL_MEM ! 89: # define HASH_BITS 13 /* Number of bits used to hash strings */ ! 90: #else ! 91: #ifdef MEDIUM_MEM ! 92: # define HASH_BITS 14 ! 93: #else ! 94: # define HASH_BITS 15 ! 95: /* For portability to 16 bit machines, do not use values above 15. */ ! 96: #endif ! 97: #endif ! 98: ! 99: #define HASH_SIZE (unsigned)(1<<HASH_BITS) ! 100: #define HASH_MASK (HASH_SIZE-1) ! 101: #define WMASK (WSIZE-1) ! 102: /* HASH_SIZE and WSIZE must be powers of two */ ! 103: ! 104: #define NIL 0 ! 105: /* Tail of hash chains */ ! 106: ! 107: #define FAST 4 ! 108: #define SLOW 2 ! 109: /* speed options for the general purpose bit flag */ ! 110: ! 111: #ifndef TOO_FAR ! 112: # define TOO_FAR 4096 ! 113: #endif ! 114: /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ ! 115: ! 116: /* =========================================================================== ! 117: * Local data used by the "longest match" routines. ! 118: */ ! 119: ! 120: typedef ush Pos; ! 121: typedef unsigned IPos; ! 122: /* A Pos is an index in the character window. We use short instead of int to ! 123: * save space in the various tables. IPos is used only for parameter passing. ! 124: */ ! 125: ! 126: #ifndef DYN_ALLOC ! 127: uch far slide[2L*WSIZE]; ! 128: /* Sliding window. Input bytes are read into the second half of the window, ! 129: * and move to the first half later to keep a dictionary of at least WSIZE ! 130: * bytes. With this organization, matches are limited to a distance of ! 131: * WSIZE-MAX_MATCH bytes, but this ensures that IO is always ! 132: * performed with a length multiple of the block size. Also, it limits ! 133: * the window size to 64K, which is quite useful on MSDOS. ! 134: * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would ! 135: * be less efficient since the data would have to be copied WSIZE/BSZ times) ! 136: */ ! 137: Pos far prev[WSIZE]; ! 138: /* Link to older string with same hash index. To limit the size of this ! 139: * array to 64K, this link is maintained only for the last 32K strings. ! 140: * An index in this array is thus a window index modulo 32K. ! 141: */ ! 142: Pos far head[HASH_SIZE]; ! 143: /* Heads of the hash chains or NIL */ ! 144: #else ! 145: uch far * near slide = NULL; ! 146: Pos far * near prev = NULL; ! 147: static void far *__slide, *__prev; ! 148: static Pos far * near head; ! 149: #endif ! 150: ! 151: long block_start; ! 152: /* window position at the beginning of the current output block. Gets ! 153: * negative when the window is moved backwards. ! 154: */ ! 155: ! 156: local unsigned near ins_h; /* hash index of string to be inserted */ ! 157: ! 158: #define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH) ! 159: /* Number of bits by which ins_h and del_h must be shifted at each ! 160: * input step. It must be such that after MIN_MATCH steps, the oldest ! 161: * byte no longer takes part in the hash key, that is: ! 162: * H_SHIFT * MIN_MATCH >= HASH_BITS ! 163: */ ! 164: ! 165: unsigned int near prev_length; ! 166: /* Length of the best match at previous step. Matches not greater than this ! 167: * are discarded. This is used in the lazy match evaluation. ! 168: */ ! 169: ! 170: unsigned near strstart; /* start of string to insert */ ! 171: unsigned near match_start; /* start of matching string */ ! 172: local int near eofile; /* flag set at end of input file */ ! 173: local unsigned near lookahead; /* number of valid bytes ahead in window */ ! 174: ! 175: unsigned near max_chain_length; ! 176: /* To speed up deflation, hash chains are never searched beyond this length. ! 177: * A higher limit improves compression ratio but degrades the speed. ! 178: */ ! 179: ! 180: local unsigned int max_lazy_match; ! 181: /* Attempt to find a better match only when the current match is strictly ! 182: * smaller than this value. ! 183: */ ! 184: ! 185: int near good_match; ! 186: /* Use a faster search when the previous match is longer than this */ ! 187: ! 188: ! 189: /* Values for max_lazy_match, good_match and max_chain_length, depending on ! 190: * the desired pack level (0..9). The values given below have been tuned to ! 191: * exclude worst case performance for pathological files. Better values may be ! 192: * found for specific files. ! 193: */ ! 194: typedef struct config { ! 195: int good_length; ! 196: int max_lazy; ! 197: unsigned max_chain; ! 198: uch flag; ! 199: } config; ! 200: ! 201: local config configuration_table[10] = { ! 202: /* good lazy chain flag */ ! 203: /* 0 */ {0, 0, 0, 0}, /* store only */ ! 204: /* 1 */ {4, 4, 16, FAST}, /* maximum speed */ ! 205: /* 2 */ {6, 8, 16, 0}, ! 206: /* 3 */ {8, 16, 32, 0}, ! 207: /* 4 */ {8, 32, 64, 0}, ! 208: /* 5 */ {8, 64, 128, 0}, ! 209: /* 6 */ {8, 128, 256, 0}, ! 210: /* 7 */ {8, 128, 512, 0}, ! 211: /* 8 */ {32, 258, 1024, 0}, ! 212: /* 9 */ {32, 258, 4096, SLOW}}; /* maximum compression */ ! 213: ! 214: /* Note: the current code requires max_lazy >= MIN_MATCH and max_chain >= 4 ! 215: * but these restrictions can easily be removed at a small cost. ! 216: */ ! 217: ! 218: #define EQUAL 0 ! 219: /* result of memcmp for equal strings */ ! 220: ! 221: /* =========================================================================== ! 222: * Prototypes for local functions. Use asm version by default for ! 223: * MSDOS but not Unix. However the asm version version is recommended ! 224: * for 386 Unix. ! 225: */ ! 226: #ifdef ATARI_ST ! 227: # undef MSDOS /* avoid the processor specific parts */ ! 228: #endif ! 229: #if defined(MSDOS) && !defined(NO_ASM) && !defined(ASM) ! 230: # define ASM ! 231: #endif ! 232: ! 233: local void fill_window OF((void)); ! 234: int longest_match OF((IPos cur_match)); ! 235: #ifdef ASM ! 236: void match_init OF((void)); /* asm code initialization */ ! 237: #endif ! 238: ! 239: #ifdef DEBUG ! 240: local void check_match OF((IPos start, IPos match, int length)); ! 241: #endif ! 242: ! 243: #undef MIN ! 244: #define MIN(a,b) ((a) <= (b) ? (a) : (b)) ! 245: /* The arguments must not have side effects. */ ! 246: ! 247: /* =========================================================================== ! 248: * Update a hash value with the given input byte ! 249: * IN assertion: all calls to to UPDATE_HASH are made with consecutive ! 250: * input characters, so that a running hash key can be computed from the ! 251: * previous key instead of complete recalculation each time. ! 252: */ ! 253: #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK) ! 254: ! 255: /* =========================================================================== ! 256: * Insert string s in the dictionary and set match_head to the previous head ! 257: * of the hash chain (the most recent string with same hash key). Return ! 258: * the previous length of the hash chain. ! 259: * IN assertion: all calls to to INSERT_STRING are made with consecutive ! 260: * input characters and the first MIN_MATCH bytes of s are valid ! 261: * (except for the last MIN_MATCH-1 bytes of the input file). ! 262: */ ! 263: #define INSERT_STRING(s, match_head) \ ! 264: (UPDATE_HASH(ins_h, slide[(s) + MIN_MATCH-1]), \ ! 265: prev[(s) & WMASK] = match_head = head[ins_h], \ ! 266: head[ins_h] = (s)) ! 267: ! 268: /* =========================================================================== ! 269: * Initialize the "longest match" routines for a new file ! 270: */ ! 271: void lm_init (pack_level, flags) ! 272: int pack_level; /* 0: store, 1: best speed, 9: best compression */ ! 273: ush *flags; /* general purpose bit flag */ ! 274: { ! 275: register unsigned j; ! 276: ! 277: if (pack_level < 1 || pack_level > 9) error("bad pack level"); ! 278: ! 279: /* Use dynamic allocation if compiler does not like big static arrays: */ ! 280: #ifdef DYN_ALLOC ! 281: __slide = slide = (uch far*) fcalloc(WSIZE*2*sizeof(uch)+16, 1); ! 282: __prev = prev = (Pos far*) fcalloc(WSIZE*sizeof(Pos)+16, 1); ! 283: head = (Pos far*) fcalloc(HASH_SIZE, sizeof(Pos)); ! 284: ! 285: if (slide == NULL || prev == NULL || head == NULL) { ! 286: err(ZE_MEM, "window allocation"); ! 287: } ! 288: #endif /* DYN_ALLOC */ ! 289: #ifdef ASM ! 290: match_init(); /* initialize the asm code */ ! 291: #endif ! 292: /* Initialize the hash table. */ ! 293: for (j = 0; j < HASH_SIZE; j++) head[j] = NIL; ! 294: /* prev will be initialized on the fly */ ! 295: ! 296: /* Set the default configuration parameters: ! 297: */ ! 298: max_lazy_match = configuration_table[pack_level].max_lazy; ! 299: good_match = configuration_table[pack_level].good_length; ! 300: max_chain_length = configuration_table[pack_level].max_chain; ! 301: *flags |= configuration_table[pack_level].flag; ! 302: /* ??? reduce max_chain_length for binary files */ ! 303: ! 304: strstart = 0; ! 305: block_start = 0L; ! 306: ! 307: #if defined(MSDOS) && !defined(__32BIT__) && !defined(__GO32__) ! 308: /* Can't read a 64K block under MSDOS */ ! 309: lookahead = read_buf((char*)slide, (unsigned)WSIZE); ! 310: #else ! 311: lookahead = read_buf((char*)slide, 2*WSIZE); ! 312: #endif ! 313: if (lookahead == 0 || lookahead == (unsigned)EOF) { ! 314: eofile = 1, lookahead = 0; ! 315: return; ! 316: } ! 317: eofile = 0; ! 318: /* Make sure that we always have enough lookahead. This is important ! 319: * if input comes from a device such as a tty. ! 320: */ ! 321: while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); ! 322: ! 323: ins_h = 0; ! 324: for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, slide[j]); ! 325: /* If lookahead < MIN_MATCH, ins_h is garbage, but this is ! 326: * not important since only literal bytes will be emitted. ! 327: */ ! 328: } ! 329: ! 330: void lm_free() ! 331: { ! 332: #ifdef DYN_ALLOC ! 333: #ifndef __TURBOC__ /*EWS*/ ! 334: fcfree(__slide); ! 335: fcfree(__prev); ! 336: fcfree(head); ! 337: #else ! 338: free(__slide); ! 339: free(__prev); ! 340: free(head); ! 341: #endif ! 342: slide = NULL; ! 343: prev = head = NULL; ! 344: #endif ! 345: } ! 346: ! 347: /* =========================================================================== ! 348: * Set match_start to the longest match starting at the given string and ! 349: * return its length. Matches shorter or equal to prev_length are discarded, ! 350: * in which case the result is equal to prev_length and match_start is ! 351: * garbage. ! 352: * IN assertions: cur_match is the head of the hash chain for the current ! 353: * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 ! 354: */ ! 355: #ifndef ASM ! 356: /* For MSDOS, OS/2 and 386 Unix, an optimized version is in match.asm. The code ! 357: * is functionally equivalent, so you can use the C version if desired. ! 358: */ ! 359: int longest_match(cur_match) ! 360: IPos cur_match; /* current match */ ! 361: { ! 362: unsigned chain_length = max_chain_length; /* max hash chain length */ ! 363: register uch far *scan = slide + strstart; /* current string */ ! 364: register uch far *match = scan; /* matched string */ ! 365: register int len; /* length of current match */ ! 366: int best_len = prev_length; /* best match length so far */ ! 367: IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL; ! 368: /* Stop when cur_match becomes <= limit. To simplify the code, ! 369: * we prevent matches with the string of slide index 0. ! 370: */ ! 371: #ifdef UNALIGNED_OK ! 372: register ush scan_start = *(ush*)scan; ! 373: register ush scan_end = *(ush*)(scan+best_len-1); ! 374: #else ! 375: register uch scan_start = *scan; ! 376: register uch scan_end1 = scan[best_len-1]; ! 377: register uch scan_end = scan[best_len]; ! 378: #endif ! 379: ! 380: /* Do not waste too much time if we already have a good match: */ ! 381: if (prev_length >= good_match) { ! 382: chain_length >>= 2; ! 383: } ! 384: ! 385: do { ! 386: Assert(cur_match < strstart, "no future"); ! 387: match = slide + cur_match; ! 388: ! 389: /* Skip to next match if the match length cannot increase ! 390: * or if the match length is less than 2: ! 391: */ ! 392: #if (defined(UNALIGNED_OK) && HASH_BITS >= 8) ! 393: /* This code assumes sizeof(unsigned short) == 2 and ! 394: * sizeof(unsigned long) == 4. Do not use UNALIGNED_OK if your ! 395: * compiler uses different sizes. ! 396: */ ! 397: if (*(ush*)(match+best_len-1) != scan_end || ! 398: *(ush*)match != scan_start) continue; ! 399: ! 400: len = MIN_MATCH - 4; ! 401: /* It is not necessary to compare scan[2] and match[2] since they are ! 402: * always equal when the other bytes match, given that the hash keys ! 403: * are equal and that HASH_BITS >= 8. ! 404: */ ! 405: do {} while ((len+=4) < MAX_MATCH-3 && ! 406: *(ulg*)(scan+len) == *(ulg*)(match+len)); ! 407: /* The funny do {} generates better code for most compilers */ ! 408: ! 409: if (*(ush*)(scan+len) == *(ush*)(match+len)) len += 2; ! 410: if (scan[len] == match[len]) len++; ! 411: ! 412: #else /* UNALIGNED_OK */ ! 413: if (match[best_len] != scan_end || ! 414: match[best_len-1] != scan_end1 || *match != scan_start) ! 415: continue; ! 416: /* It is not necessary to compare scan[1] and match[1] since they ! 417: * are always equal when the other bytes match, given that ! 418: * the hash keys are equal and that h_shift+8 <= HASH_BITS, ! 419: * that is, when the last byte is entirely included in the hash key. ! 420: * The condition is equivalent to ! 421: * (HASH_BITS+2)/3 + 8 <= HASH_BITS ! 422: * or: HASH_BITS >= 13 ! 423: * Also, we check for a match at best_len-1 to get rid quickly of ! 424: * the match with the suffix of the match made at the previous step, ! 425: * which is known to fail. ! 426: */ ! 427: #if HASH_BITS >= 13 ! 428: len = 1; ! 429: #else ! 430: len = 0; ! 431: #endif ! 432: do {} while (++len < MAX_MATCH && scan[len] == match[len]); ! 433: ! 434: #endif /* UNALIGNED_OK */ ! 435: ! 436: if (len > best_len) { ! 437: match_start = cur_match; ! 438: best_len = len; ! 439: if (len == MAX_MATCH) break; ! 440: #ifdef UNALIGNED_OK ! 441: scan_end = *(ush*)(scan+best_len-1); ! 442: #else ! 443: scan_end1 = scan[best_len-1]; ! 444: scan_end = scan[best_len]; ! 445: #endif ! 446: } ! 447: } while (--chain_length != 0 && ! 448: (cur_match = prev[cur_match & WMASK]) > limit); ! 449: ! 450: return best_len; ! 451: } ! 452: #endif /* NO_ASM */ ! 453: ! 454: #ifdef DEBUG ! 455: /* =========================================================================== ! 456: * Check that the match at match_start is indeed a match. ! 457: */ ! 458: local void check_match(start, match, length) ! 459: IPos start, match; ! 460: int length; ! 461: { ! 462: /* check that the match is indeed a match */ ! 463: if (memcmp((char*)slide + match, ! 464: (char*)slide + start, length) != EQUAL) { ! 465: fprintf(stderr, ! 466: " start %d, match %d, length %d\n", ! 467: start, match, length); ! 468: error("invalid match"); ! 469: } ! 470: if (verbose > 1) { ! 471: fprintf(stderr,"\\[%d,%d]", start-match, length); ! 472: /* putc a macro, not safe to modify args!! */ ! 473: do { putc(slide[start], stderr); start++; } while (--length!=0); ! 474: } ! 475: } ! 476: #else ! 477: # define check_match(start, match, length) ! 478: #endif ! 479: ! 480: /* =========================================================================== ! 481: * Fill the window when the lookahead becomes insufficient. ! 482: * Updates strstart and lookahead, and sets eofile if end of input file. ! 483: * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0 ! 484: * OUT assertions: at least one byte has been read, or eofile is set; ! 485: * file reads are performed for at least two bytes (required for the ! 486: * translate_eol option). ! 487: */ ! 488: local void fill_window() ! 489: { ! 490: register unsigned n, m; ! 491: unsigned more = (unsigned)((ulg)2*WSIZE - (ulg)lookahead - (ulg)strstart); ! 492: /* Amount of free space at the end of the window. */ ! 493: ! 494: /* If the window is full, move the upper half to the lower one to make ! 495: * room in the upper half. ! 496: */ ! 497: if (more == (unsigned)EOF) { ! 498: /* Very unlikely, but possible on 16 bit machine if strstart == 0 ! 499: * and lookahead == 1 (input done one byte at time) ! 500: */ ! 501: more--; ! 502: } else if (more <= 1) { ! 503: /* By the IN assertion, the window is not empty so we can't confuse ! 504: * more == 0 with more == 64K on a 16 bit machine. ! 505: */ ! 506: memcpy((char*)slide, (char*)slide+WSIZE, (unsigned)WSIZE); ! 507: match_start -= WSIZE; ! 508: strstart -= WSIZE; ! 509: /* strstart - WSIZE >= WSIZE - 1 - lookahead >= WSIZE - MIN_LOOKAHEAD ! 510: * so we now have strstart >= MAX_DIST: ! 511: */ ! 512: Assert (strstart >= MAX_DIST, "window slide too early"); ! 513: block_start -= (long) WSIZE; ! 514: ! 515: for (n = 0; n < HASH_SIZE; n++) { ! 516: m = head[n]; ! 517: head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL); ! 518: } ! 519: for (n = 0; n < WSIZE; n++) { ! 520: m = prev[n]; ! 521: prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL); ! 522: /* If n is not on any hash chain, prev[n] is garbage but ! 523: * its value will never be used. ! 524: */ ! 525: } ! 526: more += WSIZE; ! 527: #ifdef ZIP ! 528: if (verbose) putc('.', stderr); ! 529: #endif ! 530: } ! 531: /* At this point, more >= 2 */ ! 532: n = read_buf((char*)slide+strstart+lookahead, more); ! 533: if (n == 0 || n == (unsigned)EOF) { ! 534: eofile = 1; ! 535: } else { ! 536: lookahead += n; ! 537: } ! 538: } ! 539: ! 540: /* =========================================================================== ! 541: * Flush the current block, with given end-of-file flag. ! 542: * IN assertion: strstart is set to the end of the current match. ! 543: */ ! 544: #define FLUSH_BLOCK(eof) \ ! 545: flush_block(block_start >= 0L ? (char*)&slide[block_start] : (char*)NULL,\ ! 546: (long)strstart - block_start, (eof)) ! 547: ! 548: /* =========================================================================== ! 549: * Processes a new input file and return its compressed length. ! 550: */ ! 551: #ifdef NO_LAZY ! 552: ulg deflate() ! 553: { ! 554: IPos hash_head; /* head of the hash chain */ ! 555: int flush; /* set if current block must be flushed */ ! 556: unsigned match_length = 0; /* length of best match */ ! 557: ! 558: prev_length = MIN_MATCH-1; ! 559: while (lookahead != 0) { ! 560: /* Insert the string slide[strstart .. strstart+2] in the ! 561: * dictionary, and set hash_head to the head of the hash chain: ! 562: */ ! 563: #ifdef MACTC5 ! 564: mac_poll_for_break(); ! 565: #endif ! 566: INSERT_STRING(strstart, hash_head); ! 567: ! 568: /* Find the longest match, discarding those <= prev_length. ! 569: * At this point we have always match_length < MIN_MATCH ! 570: */ ! 571: if (hash_head != NIL && strstart - hash_head <= MAX_DIST) { ! 572: /* To simplify the code, we prevent matches with the string ! 573: * of slide index 0 (in particular we have to avoid a match ! 574: * of the string with itself at the start of the input file). ! 575: */ ! 576: match_length = longest_match (hash_head); ! 577: /* longest_match() sets match_start */ ! 578: if (match_length > lookahead) match_length = lookahead; ! 579: } ! 580: if (match_length >= MIN_MATCH) { ! 581: check_match(strstart, match_start, match_length); ! 582: ! 583: flush = ct_tally(strstart-match_start, match_length - MIN_MATCH); ! 584: ! 585: lookahead -= match_length; ! 586: match_length--; /* string at strstart already in hash table */ ! 587: do { ! 588: strstart++; ! 589: INSERT_STRING(strstart, hash_head); ! 590: /* strstart never exceeds WSIZE-MAX_MATCH, so there are ! 591: * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH ! 592: * these bytes are garbage, but it does not matter since the ! 593: * next lookahead bytes will always be emitted as literals. ! 594: */ ! 595: } while (--match_length != 0); ! 596: } else { ! 597: /* No match, output a literal byte */ ! 598: flush = ct_tally (0, slide[strstart]); ! 599: lookahead--; ! 600: } ! 601: strstart++; ! 602: if (flush) FLUSH_BLOCK(0), block_start = strstart; ! 603: ! 604: /* Make sure that we always have enough lookahead, except ! 605: * at the end of the input file. We need MAX_MATCH bytes ! 606: * for the next match, plus MIN_MATCH bytes to insert the ! 607: * string following the next match. ! 608: */ ! 609: while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); ! 610: ! 611: } ! 612: return FLUSH_BLOCK(1); /* eof */ ! 613: } ! 614: #else /* LAZY */ ! 615: /* =========================================================================== ! 616: * Same as above, but achieves better compression. We use a lazy ! 617: * evaluation for matches: a match is finally adopted only if there is ! 618: * no better match at the next window position. ! 619: */ ! 620: ulg deflate() ! 621: { ! 622: IPos hash_head; /* head of hash chain */ ! 623: IPos prev_match; /* previous match */ ! 624: int flush; /* set if current block must be flushed */ ! 625: int match_available = 0; /* set if previous match exists */ ! 626: register unsigned match_length = MIN_MATCH-1; /* length of best match */ ! 627: #ifdef DEBUG ! 628: extern ulg isize; /* byte length of input file, for debug only */ ! 629: #endif ! 630: ! 631: /* Process the input block. */ ! 632: while (lookahead != 0) { ! 633: /* Insert the string slide[strstart .. strstart+2] in the ! 634: * dictionary, and set hash_head to the head of the hash chain: ! 635: */ ! 636: #ifdef MACTC5 ! 637: mac_poll_for_break(); ! 638: #endif ! 639: INSERT_STRING(strstart, hash_head); ! 640: ! 641: /* Find the longest match, discarding those <= prev_length. ! 642: */ ! 643: prev_length = match_length, prev_match = match_start; ! 644: match_length = MIN_MATCH-1; ! 645: ! 646: if (hash_head != NIL && prev_length < max_lazy_match && ! 647: strstart - hash_head <= MAX_DIST) { ! 648: /* To simplify the code, we prevent matches with the string ! 649: * of slide index 0 (in particular we have to avoid a match ! 650: * of the string with itself at the start of the input file). ! 651: */ ! 652: match_length = longest_match (hash_head); ! 653: /* longest_match() sets match_start */ ! 654: if (match_length > lookahead) match_length = lookahead; ! 655: /* Ignore a length 3 match if it is too distant: */ ! 656: if (match_length == MIN_MATCH && strstart-match_start > TOO_FAR){ ! 657: /* If prev_match is also MIN_MATCH, match_start is garbage ! 658: * but we will ignore the current match anyway. ! 659: */ ! 660: match_length--; ! 661: } ! 662: } ! 663: /* If there was a match at the previous step and the current ! 664: * match is not better, output the previous match: ! 665: */ ! 666: if (prev_length >= MIN_MATCH && match_length <= prev_length) { ! 667: ! 668: check_match(strstart-1, prev_match, prev_length); ! 669: ! 670: flush = ct_tally(strstart-1-prev_match, prev_length - MIN_MATCH); ! 671: ! 672: /* Insert in hash table all strings up to the end of the match. ! 673: * strstart-1 and strstart are already inserted. ! 674: */ ! 675: lookahead -= prev_length-1; ! 676: prev_length -= 2; ! 677: do { ! 678: strstart++; ! 679: INSERT_STRING(strstart, hash_head); ! 680: /* strstart never exceeds WSIZE-MAX_MATCH, so there are ! 681: * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH ! 682: * these bytes are garbage, but it does not matter since the ! 683: * next lookahead bytes will always be emitted as literals. ! 684: */ ! 685: } while (--prev_length != 0); ! 686: match_available = 0; ! 687: match_length = MIN_MATCH-1; ! 688: strstart++; ! 689: if (flush) FLUSH_BLOCK(0), block_start = strstart; ! 690: ! 691: } else if (match_available) { ! 692: /* If there was no match at the previous position, output a ! 693: * single literal. If there was a match but the current match ! 694: * is longer, truncate the previous match to a single literal. ! 695: */ ! 696: Tracevv((stderr,"%c",slide[strstart-1])); ! 697: if (ct_tally (0, slide[strstart-1])) { ! 698: FLUSH_BLOCK(0), block_start = strstart; ! 699: } ! 700: strstart++; ! 701: lookahead--; ! 702: } else { ! 703: /* There is no previous match to compare with, wait for ! 704: * the next step to decide. ! 705: */ ! 706: match_available = 1; ! 707: strstart++; ! 708: lookahead--; ! 709: } ! 710: #if 0 /* for pgp: disabled to allow compiling with -DDEBUG */ ! 711: Assert (strstart <= isize && lookahead <= isize, "a bit too far"); ! 712: #endif ! 713: ! 714: /* Make sure that we always have enough lookahead, except ! 715: * at the end of the input file. We need MAX_MATCH bytes ! 716: * for the next match, plus MIN_MATCH bytes to insert the ! 717: * string following the next match. ! 718: */ ! 719: while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); ! 720: } ! 721: if (match_available) ct_tally (0, slide[strstart-1]); ! 722: ! 723: return FLUSH_BLOCK(1); /* eof */ ! 724: } ! 725: #endif /* LAZY */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.