|
|
1.1 ! root 1: /*--------------------------------------------------------------------------*/ ! 2: /* lzh.c - file compression subroutines for lzss + Huffman encoding */ ! 3: /* */ ! 4: /* Source code history: */ ! 5: /* */ ! 6: /* The original lzhuf.c source was written by Haruyasu Yoshizaki on */ ! 7: /* 11/20/1988 with some minor changes 4/6/1989. Comments were */ ! 8: /* translated by Haruhiko Okumura on 4/7/1989. */ ! 9: /* */ ! 10: /* The original lzss compression was written by Haruhiko Okumura, */ ! 11: /* 12-2-404 Green Heights, 580 Nagasawa, Yokosuka 239, Japan. The */ ! 12: /* Adaptive Huffman algorithm was added by Yoshizaki to increase */ ! 13: /* speed and compression and developed it into the LHarc archiver. */ ! 14: /* */ ! 15: /* Modifications were made by Allan Hoeltje P.O. Box 18045 Boulder, */ ! 16: /* Colorado, USA 80308-8045, during the month of June 1989. These */ ! 17: /* modifications include: More comments; better file error handling; */ ! 18: /* run-length encoding input and output to increase the compression */ ! 19: /* ratio. Note: the run length encoding gives you about 2 to 5 per */ ! 20: /* cent better compression but more importantly it speeds up the */ ! 21: /* compression process on text files by about 60 per cent. */ ! 22: /* */ ! 23: /* Additional modifications made on February 17, 1991 to make the */ ! 24: /* routines more usable as subroutines for a parent application. */ ! 25: /* The two routines, lzhEncode and lzhDecode are the main entry */ ! 26: /* points. Everything else is static. */ ! 27: /* */ ! 28: /* It is my understanding that the lzHuff algorithm and source code */ ! 29: /* is in the public domain and it's use is free and unrestricted. */ ! 30: /*--------------------------------------------------------------------------*/ ! 31: ! 32: #include <stdio.h> ! 33: #include <stdlib.h> ! 34: #include <string.h> ! 35: #include <ctype.h> ! 36: #include "rsalib.h" ! 37: #include "rsaio.h" ! 38: ! 39: /* ! 40: ** Convert to or from external byte order. ! 41: ** Note that hilo_swap does nothing if this is a LSB-first CPU. ! 42: */ ! 43: ! 44: #define convert2(x,lx) hilo_swap( (byteptr)&(x), (lx) ) ! 45: #define convert(x) convert2( (x), sizeof(x) ) ! 46: ! 47: ! 48: #define EXIT_FAILURE 1 ! 49: #define EXIT_SUCCESS 0 ! 50: typedef unsigned char uchar; ! 51: ! 52: static FILE *inFile; /* clear text input */ ! 53: static FILE *outFile; /* compressed output */ ! 54: ! 55: static unsigned long int codesize = 0; ! 56: static unsigned long int inCount = 0; ! 57: static unsigned long int outCount = 0; ! 58: ! 59: void Error( char *message ) ! 60: { ! 61: printf( "\n%s\n", message ); ! 62: exit( EXIT_FAILURE ); ! 63: } ! 64: ! 65: /*--------------------------------------------------------------------------*/ ! 66: /* Run length encoded getc and putc routines. */ ! 67: /*--------------------------------------------------------------------------*/ ! 68: ! 69: /* getCHR ! 70: This does a simple getc and count. ! 71: */ ! 72: static int getCHR( FILE *f ) ! 73: { ! 74: int c; ! 75: if ((c = getc( f )) != EOF) ! 76: inCount++; ! 77: return( c ); ! 78: } ! 79: ! 80: ! 81: /* putCHR ! 82: This does a simple putc and count with a write error check. ! 83: */ ! 84: void putCHR( int c, FILE *f ) ! 85: { ! 86: if (putc( c, f ) == EOF) ! 87: { ! 88: Error( "lzh putCHR: can't write output file!" ); ! 89: } ! 90: outCount++; ! 91: } ! 92: ! 93: ! 94: #define NOHIST 0 /* don't consider previous input */ ! 95: #define INREP 1 /* sending a repeated value */ ! 96: #define SENTCHAR 1 /* lastchar set, no lookahead yet */ ! 97: #define SENDNEWC 2 /* run over, send new char next */ ! 98: #define SENDCNT 3 /* newchar set, send count next */ ! 99: #define DLE 0x90 /* repeat sequence marker */ ! 100: ! 101: static unsigned char state = NOHIST; /* current packing state */ ! 102: ! 103: /*--------------------------------------------------------------------------*/ ! 104: /* getRLC */ ! 105: /* Non-repeat compression - text is read from file "f" and passed */ ! 106: /* through normally except that a run of more than two characters is */ ! 107: /* encoded as: <char> <DLE> <count>. Special case: a count of zero */ ! 108: /* indicates that the DLE is really a DLE, not a repeat marker. */ ! 109: /*--------------------------------------------------------------------------*/ ! 110: ! 111: static int ! 112: getRLC( FILE *f ) ! 113: { ! 114: static int lastc; /* value returned on last call */ ! 115: static int repcnt; /* repetition counter */ ! 116: static int c; /* latest value seen */ ! 117: static char *badstate = "lzh getRLC: Bad packing state!"; ! 118: ! 119: switch (state) /* depends on our state */ ! 120: { ! 121: case NOHIST: /* no relevant history */ ! 122: state = SENTCHAR; ! 123: return (lastc = getCHR(f)); /* remember the value next time */ ! 124: break; ! 125: case SENTCHAR: /* char was sent. look ahead */ ! 126: switch (lastc) /* action depends on char */ ! 127: { ! 128: case DLE: /* if we sent a real DLE */ ! 129: state = NOHIST; /* then start over again */ ! 130: return (0); /* but note that the DLE was real */ ! 131: break; ! 132: case EOF: /* EOF is always a special case */ ! 133: return (EOF); ! 134: break; ! 135: default: /* else test for a repeat */ ! 136: for (repcnt = 1 ; ! 137: ((c = getCHR(f)) == lastc) && (repcnt < 255) ; ! 138: repcnt++); /* find end of run */ ! 139: ! 140: switch(repcnt) /* action depends on run size */ ! 141: { ! 142: case 1: /* not a repeat */ ! 143: return (lastc = c); /* but remember value next time */ ! 144: break; ! 145: case 2: /* a repeat, but too short */ ! 146: state = SENDNEWC; /* send the second one next time */ ! 147: return (lastc); ! 148: break; ! 149: default: /* a run - compress it */ ! 150: state = SENDCNT; /* send repeat count next time */ ! 151: return (DLE); /* send repeat marker this time */ ! 152: break; ! 153: } ! 154: } ! 155: case SENDNEWC: /* send second char of short run */ ! 156: state = SENTCHAR; ! 157: return (lastc = c); ! 158: break; ! 159: case SENDCNT: /* sent DLE, now send count */ ! 160: state = SENDNEWC; ! 161: return (repcnt); ! 162: break; ! 163: default: ! 164: { ! 165: Error( badstate ); ! 166: } ! 167: } ! 168: } ! 169: ! 170: ! 171: /*--------------------------------------------------------------------------*/ ! 172: /* putRLC */ ! 173: /* This routine is used to decode non-repeat compression bytes and */ ! 174: /* write them to file "t". Bytes are passed one at a time in coded */ ! 175: /* format, and are written out uncoded. The data is stored normally, */ ! 176: /* except that runs of more than two characters are represented as: */ ! 177: /* */ ! 178: /* <char> <DLE> <count> */ ! 179: /* with a special case that a count of zero indicates a DLE as data, */ ! 180: /* not as a repeat marker. */ ! 181: /*--------------------------------------------------------------------------*/ ! 182: ! 183: static int ! 184: putRLC( int c, FILE *t ) ! 185: { ! 186: static int lastc; /* last character seen */ ! 187: static char *badstate = "lzh putRLC: Bad unpacking state!"; ! 188: ! 189: switch (state) /* action depends on our state */ ! 190: { ! 191: case NOHIST: /* no previous history */ ! 192: if (c == DLE) /* if starting a series */ ! 193: state = INREP; /* then remember it next time */ ! 194: else ! 195: putCHR( (lastc = c), t ); /* else nothing unusual */ ! 196: break; ! 197: case INREP: /* in a repeat */ ! 198: if (c) /* if count is nonzero */ ! 199: while(--c) /* then repeatedly ... */ ! 200: putCHR( lastc, t ); /* ... output the byte */ ! 201: else ! 202: putCHR( DLE, t ); /* else output DLE as data */ ! 203: state = NOHIST; /* back to no history */ ! 204: break; ! 205: default: ! 206: { ! 207: Error( badstate ); ! 208: } ! 209: } ! 210: return(0); ! 211: } ! 212: ! 213: ! 214: /*--------------------------------------------------------------------------*/ ! 215: /* LZSS Compression */ ! 216: /*--------------------------------------------------------------------------*/ ! 217: ! 218: #define buffSize 2048 /* size of ring buffer */ ! 219: #define lookSize 60 /* lookahead buffer size */ ! 220: #define THRESHOLD 2 /* if matchLen is greater than Threshold */ ! 221: /* then code string into position & length */ ! 222: #define treeRoot buffSize /* index for root of binary search tree */ ! 223: ! 224: /* ! 225: ring buffer with extra bytes to facilitate string comparison of ! 226: longest match. This is set by the InsertNode() procedure. ! 227: */ ! 228: ! 229: static unsigned char textBuf[ buffSize + lookSize - 1 ]; ! 230: static int matchPos, matchLen; ! 231: static int lson[ buffSize + 1 ]; ! 232: static int rson[ buffSize + 257 ]; ! 233: static int dad [ buffSize + 1 ]; ! 234: ! 235: ! 236: /*--------------------------------------------------------------------------*/ ! 237: /* InitTree */ ! 238: /* Initialize the LZSS trees. */ ! 239: /*--------------------------------------------------------------------------*/ ! 240: ! 241: static void InitTree( void ) ! 242: { ! 243: register int i; ! 244: ! 245: /* ! 246: For i = 0 to buffSize, rson[i] and lson[i] will be the right and ! 247: left children of node i. These nodes need not be initialized. Also, ! 248: dad[i] is the parent of node i. These are initialized to "treeRoot" ! 249: which means 'not used.' ! 250: ! 251: For i = buffSize+1 to buffSize+256, rson[i] is the root of the tree ! 252: for strings that begin with character i. These are initialized ! 253: to "treeRoot". Note there are 256 trees. ! 254: */ ! 255: ! 256: for (i = buffSize + 1 ; i <= (buffSize + 256) ; i++) ! 257: rson[i] = treeRoot; /* root */ ! 258: for (i = 0 ; i < buffSize ; i++) ! 259: dad[i] = treeRoot; /* node */ ! 260: } ! 261: ! 262: ! 263: /*--------------------------------------------------------------------------*/ ! 264: /* InsertNode */ ! 265: /* Inserts string of length lookSize, textBuf[r..r+lookSize-1], into */ ! 266: /* one of the trees (textBuf[r]'th tree) and returns the longest-match */ ! 267: /* position and length via the global variables matchPos and matchLen. */ ! 268: /* If matchLen = lookSize, then remove the old node in favor of the new */ ! 269: /* one, because the old one will be deleted sooner. Note r plays double */ ! 270: /* role, as tree node and position in buffer. */ ! 271: /*--------------------------------------------------------------------------*/ ! 272: ! 273: static void InsertNode(int r) ! 274: { ! 275: int i, p, cmp; ! 276: unsigned char *key; ! 277: unsigned c; ! 278: ! 279: cmp = 1; ! 280: key = &textBuf[r]; ! 281: p = buffSize + 1 + key[0]; ! 282: rson[r] = lson[r] = treeRoot; ! 283: matchLen = 0; ! 284: for ( ; ; ) ! 285: { ! 286: if (cmp >= 0) ! 287: { ! 288: if (rson[p] != treeRoot) ! 289: p = rson[p]; ! 290: else ! 291: { ! 292: rson[p] = r; ! 293: dad [r] = p; ! 294: return; ! 295: } ! 296: } ! 297: else ! 298: { ! 299: if (lson[p] != treeRoot) ! 300: p = lson[p]; ! 301: else ! 302: { ! 303: lson[p] = r; ! 304: dad [r] = p; ! 305: return; ! 306: } ! 307: } ! 308: for (i = 1; i < lookSize; i++) ! 309: if ((cmp = key[i] - textBuf[p + i]) != 0) ! 310: break; ! 311: if (i > THRESHOLD) ! 312: { ! 313: if (i > matchLen) ! 314: { ! 315: matchPos = ((r - p) & (buffSize - 1)) - 1; ! 316: if ((matchLen = i) >= lookSize) ! 317: break; ! 318: } ! 319: if (i == matchLen) ! 320: { ! 321: if ((c = ((r - p) & (buffSize - 1)) - 1) < matchPos) ! 322: { ! 323: matchPos = c; ! 324: } ! 325: } ! 326: } ! 327: } ! 328: dad[r] = dad[p]; ! 329: lson[r] = lson[p]; ! 330: rson[r] = rson[p]; ! 331: dad[lson[p]] = r; ! 332: dad[rson[p]] = r; ! 333: if (rson[dad[p]] == p) ! 334: rson[dad[p]] = r; ! 335: else ! 336: lson[dad[p]] = r; ! 337: dad[p] = treeRoot; /* remove p */ ! 338: } ! 339: ! 340: ! 341: /*--------------------------------------------------------------------------*/ ! 342: /* DeleteNode */ ! 343: /* Delete node p from the tree. */ ! 344: /*--------------------------------------------------------------------------*/ ! 345: ! 346: static void DeleteNode( register int p ) ! 347: { ! 348: register int q; ! 349: ! 350: if (dad[p] == treeRoot) ! 351: return; /* not registered */ ! 352: if (rson[p] == treeRoot) ! 353: q = lson[p]; ! 354: else ! 355: if (lson[p] == treeRoot) ! 356: q = rson[p]; ! 357: else ! 358: { ! 359: q = lson[p]; ! 360: if (rson[q] != treeRoot) ! 361: { ! 362: do { ! 363: q = rson[q]; ! 364: } ! 365: while (rson[q] != treeRoot); ! 366: ! 367: rson[dad[q]] = lson[q]; ! 368: dad[lson[q]] = dad[q]; ! 369: lson[q] = lson[p]; ! 370: dad[lson[p]] = q; ! 371: } ! 372: rson[q] = rson[p]; ! 373: dad[rson[p]] = q; ! 374: } ! 375: dad[q] = dad[p]; ! 376: ! 377: if (rson[dad[p]] == p) ! 378: rson[dad[p]] = q; ! 379: else ! 380: lson[dad[p]] = q; ! 381: dad[p] = treeRoot; ! 382: } ! 383: ! 384: /*--------------------------------------------------------------------------*/ ! 385: /* Huffman Coding */ ! 386: /*--------------------------------------------------------------------------*/ ! 387: ! 388: /* kinds of characters (character code = 0..N_CHAR-1) */ ! 389: #define N_CHAR (256 - THRESHOLD + lookSize) ! 390: #define tableSize (N_CHAR * 2 - 1) /* size of table */ ! 391: #define rootSize (tableSize - 1) /* position of root */ ! 392: #define MAX_FREQ 0x8000 /* update the tree when the root */ ! 393: /* frequency comes to this value. */ ! 394: ! 395: /*--------------------------------------------------------------------------*/ ! 396: /* Tables for encoding the upper 6 bits of position */ ! 397: /*--------------------------------------------------------------------------*/ ! 398: ! 399: static uchar p_len[64] = ! 400: { ! 401: 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, ! 402: 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, ! 403: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 404: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 405: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 406: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 407: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, ! 408: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 ! 409: }; ! 410: ! 411: static uchar p_code[64] = ! 412: { ! 413: 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68, ! 414: 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C, ! 415: 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC, ! 416: 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE, ! 417: 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE, ! 418: 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE, ! 419: 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, ! 420: 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF ! 421: }; ! 422: ! 423: /*--------------------------------------------------------------------------*/ ! 424: /* Tables for decoding the upper 6 bits of position */ ! 425: /*--------------------------------------------------------------------------*/ ! 426: ! 427: static uchar d_code[256] = ! 428: { ! 429: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ! 430: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ! 431: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ! 432: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ! 433: 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, ! 434: 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, ! 435: 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, ! 436: 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, ! 437: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, ! 438: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, ! 439: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, ! 440: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 441: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 442: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 443: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, ! 444: 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, ! 445: 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, ! 446: 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, ! 447: 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D, ! 448: 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F, ! 449: 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11, ! 450: 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13, ! 451: 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15, ! 452: 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17, ! 453: 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B, ! 454: 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, ! 455: 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, ! 456: 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, ! 457: 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B, ! 458: 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F, ! 459: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, ! 460: 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, ! 461: }; ! 462: ! 463: static uchar d_len[256] = ! 464: { ! 465: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, ! 466: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, ! 467: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, ! 468: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, ! 469: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, ! 470: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, ! 471: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, ! 472: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, ! 473: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, ! 474: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, ! 475: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 476: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 477: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 478: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 479: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 480: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 481: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 482: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, ! 483: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 484: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 485: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 486: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 487: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 488: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, ! 489: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 490: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 491: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 492: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 493: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 494: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, ! 495: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, ! 496: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, ! 497: }; ! 498: ! 499: static unsigned freq[tableSize + 1]; /* frequency table */ ! 500: ! 501: static int prnt[ tableSize + N_CHAR ]; ! 502: /* pointers to parent nodes, except for the elements */ ! 503: /* [tableSize..tableSize + N_CHAR - 1] which are used */ ! 504: /* to get the positions of leaves corresponding to the */ ! 505: /* codes. */ ! 506: ! 507: static int son[ tableSize ]; /* pointers to child nodes (son[], son[] + 1) */ ! 508: ! 509: static unsigned getbuf = 0; ! 510: static uchar getlen = 0; ! 511: ! 512: ! 513: /*--------------------------------------------------------------------------*/ ! 514: /* GetBit */ ! 515: /* Get one bit. */ ! 516: /*--------------------------------------------------------------------------*/ ! 517: ! 518: static int GetBit( void ) ! 519: { ! 520: int i; ! 521: ! 522: while (getlen <= 8) ! 523: { ! 524: if ((i = getc( inFile )) < 0) ! 525: i = 0; ! 526: getbuf |= i << (8 - getlen); ! 527: getlen += 8; ! 528: } ! 529: i = getbuf; ! 530: getbuf <<= 1; ! 531: getlen--; ! 532: return (i < 0); ! 533: } ! 534: ! 535: ! 536: /*--------------------------------------------------------------------------*/ ! 537: /* GetByte */ ! 538: /* Get one byte. */ ! 539: /*--------------------------------------------------------------------------*/ ! 540: ! 541: static int GetByte( void ) ! 542: { ! 543: unsigned i; ! 544: ! 545: while (getlen <= 8) ! 546: { ! 547: if ((i = getc( inFile )) < 0) ! 548: i = 0; ! 549: getbuf |= i << (8 - getlen); ! 550: getlen += 8; ! 551: } ! 552: i = getbuf; ! 553: getbuf <<= 8; ! 554: getlen -= 8; ! 555: return i >> 8; ! 556: } ! 557: ! 558: ! 559: /*--------------------------------------------------------------------------*/ ! 560: /* Putcode */ ! 561: /* Write c bits of code. */ ! 562: /*--------------------------------------------------------------------------*/ ! 563: ! 564: static unsigned putbuf = 0; ! 565: static uchar putlen = 0; ! 566: ! 567: static void Putcode(int l, unsigned c) ! 568: { ! 569: static char *werr = "lzh Putcode: can't write output file!"; ! 570: ! 571: putbuf |= (c >> putlen); ! 572: if ((putlen += l) >= 8) ! 573: { ! 574: if (putc( putbuf >> 8, outFile ) == EOF) ! 575: { ! 576: Error( werr ); ! 577: } ! 578: if ((putlen -= 8) >= 8) ! 579: { ! 580: if (putc( putbuf, outFile ) == EOF) ! 581: { ! 582: Error( werr ); ! 583: } ! 584: codesize += 2; ! 585: putlen -= 8; ! 586: putbuf = c << (l - putlen); ! 587: } ! 588: else ! 589: { ! 590: putbuf <<= 8; ! 591: codesize++; ! 592: } ! 593: } ! 594: } ! 595: ! 596: ! 597: /*--------------------------------------------------------------------------*/ ! 598: /* StartHuff */ ! 599: /* initialize the Huffman trees. */ ! 600: /*--------------------------------------------------------------------------*/ ! 601: ! 602: static void StartHuff( void ) ! 603: { ! 604: register int i, j; ! 605: ! 606: for (i = 0; i < N_CHAR; i++) ! 607: { ! 608: freq[i] = 1; ! 609: son[i] = i + tableSize; ! 610: prnt[i + tableSize] = i; ! 611: } ! 612: i = 0; ! 613: j = N_CHAR; ! 614: ! 615: while (j <= rootSize) ! 616: { ! 617: freq[j] = freq[i] + freq[i + 1]; ! 618: son[j] = i; ! 619: prnt[i] = prnt[i + 1] = j; ! 620: i += 2; ! 621: j++; ! 622: } ! 623: freq[tableSize] = 0xffff; ! 624: prnt[rootSize] = 0; ! 625: } ! 626: ! 627: ! 628: /*--------------------------------------------------------------------------*/ ! 629: /* reconst */ ! 630: /* Reconstruction of tree. */ ! 631: /*--------------------------------------------------------------------------*/ ! 632: ! 633: static void reconst( void ) ! 634: { ! 635: register int i, j, k; ! 636: unsigned int f, l; ! 637: ! 638: /* Collect leaf nodes in the first half of the table and replace the */ ! 639: /* freq by (freq + 1) / 2. */ ! 640: ! 641: j = 0; ! 642: for (i = 0; i < tableSize; i++) ! 643: { ! 644: if (son[i] >= tableSize) ! 645: { ! 646: freq[j] = (freq[i] + 1) / 2; ! 647: son[j] = son[i]; ! 648: j++; ! 649: } ! 650: } ! 651: ! 652: /* begin constructing tree by connecting sons */ ! 653: ! 654: for (i = 0, j = N_CHAR; j < tableSize; i += 2, j++) ! 655: { ! 656: k = i + 1; ! 657: f = freq[j] = freq[i] + freq[k]; ! 658: ! 659: for (k = j - 1; f < freq[k]; k--); ! 660: ! 661: k++; ! 662: l = (j - k) * 2; ! 663: memmove( &freq[k + 1], &freq[k], l ); ! 664: freq[k] = f; ! 665: memmove( &son[k + 1], &son[k], l ); ! 666: son[k] = i; ! 667: } ! 668: ! 669: /* connect prnt */ ! 670: ! 671: for (i = 0; i < tableSize; i++) ! 672: if ((k = son[i]) >= tableSize) ! 673: prnt[k] = i; ! 674: else ! 675: prnt[k] = prnt[k + 1] = i; ! 676: } ! 677: ! 678: ! 679: /*--------------------------------------------------------------------------*/ ! 680: /* update */ ! 681: /* Increment frequency of given code by one, and update tree. */ ! 682: /*--------------------------------------------------------------------------*/ ! 683: ! 684: static void update(int c) ! 685: { ! 686: int i, j, k, l; ! 687: ! 688: if (freq[rootSize] == MAX_FREQ) ! 689: reconst(); ! 690: ! 691: c = prnt[c + tableSize]; ! 692: do { ! 693: k = ++freq[c]; ! 694: ! 695: /* if the order is disturbed, exchange nodes */ ! 696: if (k > freq[l = c + 1]) ! 697: { ! 698: while (k > freq[++l]); ! 699: l--; ! 700: freq[c] = freq[l]; ! 701: freq[l] = k; ! 702: ! 703: i = son[c]; ! 704: prnt[i] = l; ! 705: if (i < tableSize) ! 706: prnt[i + 1] = l; ! 707: ! 708: j = son[l]; ! 709: son[l] = i; ! 710: ! 711: prnt[j] = c; ! 712: if (j < tableSize) ! 713: prnt[j + 1] = c; ! 714: son[c] = j; ! 715: ! 716: c = l; ! 717: } ! 718: } ! 719: while ((c = prnt[c]) != 0); /* repeat up to root */ ! 720: } ! 721: ! 722: ! 723: /*--------------------------------------------------------------------------*/ ! 724: /* EncodeChar */ ! 725: /*--------------------------------------------------------------------------*/ ! 726: ! 727: static void EncodeChar(unsigned c) ! 728: { ! 729: unsigned i; ! 730: int j, k; ! 731: ! 732: i = j = 0; ! 733: k = prnt[c + tableSize]; ! 734: ! 735: /* travel from leaf to root */ ! 736: do { ! 737: i >>= 1; ! 738: ! 739: /* if node's address is odd-numbered, choose bigger brother node */ ! 740: if (k & 1) i += 0x8000; ! 741: ! 742: j++; ! 743: } ! 744: while ((k = prnt[k]) != rootSize); ! 745: ! 746: Putcode(j, i); ! 747: update(c); ! 748: } ! 749: ! 750: ! 751: /*--------------------------------------------------------------------------*/ ! 752: /* EncodePosition */ ! 753: /*--------------------------------------------------------------------------*/ ! 754: ! 755: static void EncodePosition(unsigned c) ! 756: { ! 757: unsigned i; ! 758: ! 759: /* output upper 6 bits by table lookup */ ! 760: ! 761: i = c >> 6; ! 762: Putcode( p_len[i], (unsigned)p_code[i] << 8 ); ! 763: ! 764: /* output lower 6 bits verbatim */ ! 765: Putcode( 6, (c & 0x3f) << 10 ); ! 766: } ! 767: ! 768: ! 769: /*--------------------------------------------------------------------------*/ ! 770: /* EncodeEnd */ ! 771: /*--------------------------------------------------------------------------*/ ! 772: ! 773: static void EncodeEnd( void ) ! 774: { ! 775: static char *werr = "lzh EncodeEnd: can't write output file!"; ! 776: ! 777: if (putlen) ! 778: { ! 779: if (putc(putbuf >> 8, outFile) == EOF) ! 780: Error( werr ); ! 781: codesize++; ! 782: } ! 783: } ! 784: ! 785: ! 786: /*--------------------------------------------------------------------------*/ ! 787: /* DecodeChar */ ! 788: /*--------------------------------------------------------------------------*/ ! 789: ! 790: static int DecodeChar( void ) ! 791: { ! 792: register unsigned c; ! 793: ! 794: /* Travel from root to leaf choosing the smaller child node (son[]) if */ ! 795: /* the read bit is 0, the bigger (son[]+1} if 1. */ ! 796: ! 797: c = son[rootSize]; ! 798: while (c < tableSize) ! 799: { ! 800: c += GetBit(); ! 801: c = son[c]; ! 802: } ! 803: c -= tableSize; ! 804: update(c); ! 805: return c; ! 806: } ! 807: ! 808: ! 809: /*--------------------------------------------------------------------------*/ ! 810: /* DecodePosition */ ! 811: /*--------------------------------------------------------------------------*/ ! 812: ! 813: static int DecodePosition( void ) ! 814: { ! 815: register unsigned i, j, c; ! 816: ! 817: /* recover upper 6 bits from table */ ! 818: i = GetByte(); ! 819: c = (unsigned)d_code[i] << 6; ! 820: j = d_len[i]; ! 821: ! 822: /* read lower 6 bits verbatim */ ! 823: j -= 2; ! 824: while (j--) ! 825: i = (i << 1) + GetBit(); ! 826: return (c | (i & 0x3f)); ! 827: } ! 828: ! 829: ! 830: /* lzhEncode ! 831: Compress the input file and write to the output file. ! 832: Return the ratio of output to input size. ! 833: */ ! 834: int lzhEncode( FILE *in, FILE *out ) ! 835: { ! 836: int i, c, len, r, s, last_matchLen; ! 837: unsigned long int textsize, beginByte; ! 838: static char *werr = "lzhEncode: can't write output file!"; ! 839: ! 840: inFile = in; /* set the global file pointers */ ! 841: outFile = out; ! 842: ! 843: /* Skip to the end of file and get the byte length. Write the length ! 844: as the first word in the compressed output file. ! 845: */ ! 846: beginByte = ftell( inFile ); /* just in case we were prepositioned */ ! 847: fseek( inFile, 0L, SEEK_END ); ! 848: textsize = ftell( inFile ) - beginByte; ! 849: fseek( inFile, beginByte, SEEK_SET ); /* go back to the beginning of the file */ ! 850: ! 851: if (textsize == 0) ! 852: return( -1 ); /* empty files are easy - signal an error */ ! 853: ! 854: convert( textsize ); /* convert to little endian if necessary */ ! 855: ! 856: if (fwrite( &textsize, sizeof textsize, 1, outFile ) < 1) ! 857: Error( werr ); ! 858: ! 859: StartHuff(); /* init the Huffman trees */ ! 860: InitTree(); /* init the LZSS trees */ ! 861: inCount = 0; /* init the input character count */ ! 862: s = 0; ! 863: r = buffSize - lookSize; ! 864: for (i = 0; i < r; i++) ! 865: textBuf[i] = ' '; ! 866: ! 867: /* fill the look ahead buffer */ ! 868: ! 869: for (len = 0; (len < lookSize) && ((c = getRLC( inFile )) != EOF); len++) ! 870: textBuf[r + len] = c; ! 871: for (i = 1; i <= lookSize; i++) ! 872: InsertNode( r - i ); ! 873: InsertNode( r ); ! 874: do { ! 875: if (matchLen > len) ! 876: matchLen = len; ! 877: if (matchLen <= THRESHOLD) ! 878: { ! 879: matchLen = 1; ! 880: EncodeChar( textBuf[r] ); ! 881: } ! 882: else ! 883: { ! 884: EncodeChar( 255 - THRESHOLD + matchLen ); ! 885: EncodePosition( matchPos ); ! 886: } ! 887: last_matchLen = matchLen; ! 888: for (i = 0; (i < last_matchLen) && ((c = getRLC( inFile )) != EOF); i++) ! 889: { ! 890: DeleteNode( s ); ! 891: textBuf[s] = c; ! 892: if (s < lookSize - 1) ! 893: textBuf[s + buffSize] = c; ! 894: s = (s + 1) & (buffSize - 1); ! 895: r = (r + 1) & (buffSize - 1); ! 896: InsertNode( r ); ! 897: } ! 898: ! 899: while (i++ < last_matchLen) ! 900: { ! 901: DeleteNode( s ); ! 902: s = (s + 1) & (buffSize - 1); ! 903: r = (r + 1) & (buffSize - 1); ! 904: if (--len) ! 905: InsertNode( r ); ! 906: } ! 907: } ! 908: while (len > 0); ! 909: ! 910: EncodeEnd(); ! 911: ! 912: return( (int)((codesize * 10) / inCount )); ! 913: } ! 914: ! 915: ! 916: /*--------------------------------------------------------------------------*/ ! 917: /* lzhDecode */ ! 918: /*--------------------------------------------------------------------------*/ ! 919: ! 920: void lzhDecode( FILE *in, FILE *out ) ! 921: { ! 922: int i, j, k, r, c; ! 923: unsigned long int textsize; ! 924: static char *werr = "lzhDecode: can't write output file!"; ! 925: ! 926: inFile = in; /* set the global file pointers */ ! 927: outFile = out; ! 928: ! 929: /* get the size of the input file in the first word */ ! 930: ! 931: if (fread( &textsize, sizeof textsize, 1, inFile ) < 1) ! 932: Error( "lzhDecode: Can't read the input file" ); ! 933: ! 934: convert( textsize ); /* convert to little endian if necessary */ ! 935: if (textsize == 0) ! 936: return; /* nothing to decode, empty file */ ! 937: ! 938: StartHuff(); ! 939: for (i = 0; i < (buffSize - lookSize); i++) ! 940: textBuf[i] = ' '; ! 941: r = buffSize - lookSize; ! 942: ! 943: outCount = 0; /* init the output character count */ ! 944: while (outCount < textsize ) ! 945: { ! 946: c = DecodeChar(); ! 947: if (c < 256) ! 948: { ! 949: if (putRLC( c, outFile ) == EOF) ! 950: Error( werr ); ! 951: ! 952: textBuf[r++] = c; ! 953: r &= (buffSize - 1); ! 954: } ! 955: else ! 956: { ! 957: i = (r - DecodePosition() - 1) & (buffSize - 1); ! 958: j = c - 255 + THRESHOLD; ! 959: for (k = 0; k < j; k++) ! 960: { ! 961: c = textBuf[(i + k) & (buffSize - 1)]; ! 962: if (putRLC( c, outFile ) == EOF) ! 963: Error( werr ); ! 964: textBuf[r++] = c; ! 965: r &= (buffSize - 1); ! 966: } ! 967: } ! 968: } ! 969: } ! 970: ! 971: /* ---- end of lzh.c */ ! 972:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.