|
|
1.1 root 1: /*--------------------------------------------------------------------------*/
2: /* lzh.c - file compression subroutines for lzss + Huffman encoding */
3: /* */
4: /* Source code history: */
5: /* */
6: /* The original lzhuf.c source was written by Haruyasu Yoshizaki on */
7: /* 11/20/1988 with some minor changes 4/6/1989. Comments were */
8: /* translated by Haruhiko Okumura on 4/7/1989. */
9: /* */
10: /* The original lzss compression was written by Haruhiko Okumura, */
11: /* 12-2-404 Green Heights, 580 Nagasawa, Yokosuka 239, Japan. The */
12: /* Adaptive Huffman algorithm was added by Yoshizaki to increase */
13: /* speed and compression and developed it into the LHarc archiver. */
14: /* */
15: /* Modifications were made by Allan Hoeltje P.O. Box 18045 Boulder, */
16: /* Colorado, USA 80308-8045, during the month of June 1989. These */
17: /* modifications include: More comments; better file error handling; */
18: /* run-length encoding input and output to increase the compression */
19: /* ratio. Note: the run length encoding gives you about 2 to 5 per */
20: /* cent better compression but more importantly it speeds up the */
21: /* compression process on text files by about 60 per cent. */
22: /* */
23: /* Additional modifications made on February 17, 1991 to make the */
24: /* routines more usable as subroutines for a parent application. */
25: /* The two routines, lzhEncode and lzhDecode are the main entry */
26: /* points. Everything else is static. */
27: /* */
28: /* It is my understanding that the lzHuff algorithm and source code */
29: /* is in the public domain and it's use is free and unrestricted. */
30: /*--------------------------------------------------------------------------*/
31:
32: #include <stdio.h>
33: #include <stdlib.h>
34: #include <string.h>
35: #include <ctype.h>
36: #include "rsalib.h"
37: #include "rsaio.h"
38:
39: /*
40: ** Convert to or from external byte order.
41: ** Note that hilo_swap does nothing if this is a LSB-first CPU.
42: */
43:
44: #define convert2(x,lx) hilo_swap( (byteptr)&(x), (lx) )
45: #define convert(x) convert2( (x), sizeof(x) )
46:
47:
48: #define EXIT_FAILURE 1
49: #define EXIT_SUCCESS 0
50: typedef unsigned char uchar;
51:
52: static FILE *inFile; /* clear text input */
53: static FILE *outFile; /* compressed output */
54:
55: static unsigned long int codesize = 0;
56: static unsigned long int inCount = 0;
57: static unsigned long int outCount = 0;
58:
59: void Error( char *message )
60: {
61: printf( "\n%s\n", message );
62: exit( EXIT_FAILURE );
63: }
64:
65: /*--------------------------------------------------------------------------*/
66: /* Run length encoded getc and putc routines. */
67: /*--------------------------------------------------------------------------*/
68:
69: /* getCHR
70: This does a simple getc and count.
71: */
72: static int getCHR( FILE *f )
73: {
74: int c;
75: if ((c = getc( f )) != EOF)
76: inCount++;
77: return( c );
78: }
79:
80:
81: /* putCHR
82: This does a simple putc and count with a write error check.
83: */
84: void putCHR( int c, FILE *f )
85: {
86: if (putc( c, f ) == EOF)
87: {
88: Error( "lzh putCHR: can't write output file!" );
89: }
90: outCount++;
91: }
92:
93:
94: #define NOHIST 0 /* don't consider previous input */
95: #define INREP 1 /* sending a repeated value */
96: #define SENTCHAR 1 /* lastchar set, no lookahead yet */
97: #define SENDNEWC 2 /* run over, send new char next */
98: #define SENDCNT 3 /* newchar set, send count next */
99: #define DLE 0x90 /* repeat sequence marker */
100:
101: static unsigned char state = NOHIST; /* current packing state */
102:
103: /*--------------------------------------------------------------------------*/
104: /* getRLC */
105: /* Non-repeat compression - text is read from file "f" and passed */
106: /* through normally except that a run of more than two characters is */
107: /* encoded as: <char> <DLE> <count>. Special case: a count of zero */
108: /* indicates that the DLE is really a DLE, not a repeat marker. */
109: /*--------------------------------------------------------------------------*/
110:
111: static int
112: getRLC( FILE *f )
113: {
114: static int lastc; /* value returned on last call */
115: static int repcnt; /* repetition counter */
116: static int c; /* latest value seen */
117: static char *badstate = "lzh getRLC: Bad packing state!";
118:
119: switch (state) /* depends on our state */
120: {
121: case NOHIST: /* no relevant history */
122: state = SENTCHAR;
123: return (lastc = getCHR(f)); /* remember the value next time */
124: break;
125: case SENTCHAR: /* char was sent. look ahead */
126: switch (lastc) /* action depends on char */
127: {
128: case DLE: /* if we sent a real DLE */
129: state = NOHIST; /* then start over again */
130: return (0); /* but note that the DLE was real */
131: break;
132: case EOF: /* EOF is always a special case */
133: return (EOF);
134: break;
135: default: /* else test for a repeat */
136: for (repcnt = 1 ;
137: ((c = getCHR(f)) == lastc) && (repcnt < 255) ;
138: repcnt++); /* find end of run */
139:
140: switch(repcnt) /* action depends on run size */
141: {
142: case 1: /* not a repeat */
143: return (lastc = c); /* but remember value next time */
144: break;
145: case 2: /* a repeat, but too short */
146: state = SENDNEWC; /* send the second one next time */
147: return (lastc);
148: break;
149: default: /* a run - compress it */
150: state = SENDCNT; /* send repeat count next time */
151: return (DLE); /* send repeat marker this time */
152: break;
153: }
154: }
155: case SENDNEWC: /* send second char of short run */
156: state = SENTCHAR;
157: return (lastc = c);
158: break;
159: case SENDCNT: /* sent DLE, now send count */
160: state = SENDNEWC;
161: return (repcnt);
162: break;
163: default:
164: {
165: Error( badstate );
166: }
167: }
168: }
169:
170:
171: /*--------------------------------------------------------------------------*/
172: /* putRLC */
173: /* This routine is used to decode non-repeat compression bytes and */
174: /* write them to file "t". Bytes are passed one at a time in coded */
175: /* format, and are written out uncoded. The data is stored normally, */
176: /* except that runs of more than two characters are represented as: */
177: /* */
178: /* <char> <DLE> <count> */
179: /* with a special case that a count of zero indicates a DLE as data, */
180: /* not as a repeat marker. */
181: /*--------------------------------------------------------------------------*/
182:
183: static int
184: putRLC( int c, FILE *t )
185: {
186: static int lastc; /* last character seen */
187: static char *badstate = "lzh putRLC: Bad unpacking state!";
188:
189: switch (state) /* action depends on our state */
190: {
191: case NOHIST: /* no previous history */
192: if (c == DLE) /* if starting a series */
193: state = INREP; /* then remember it next time */
194: else
195: putCHR( (lastc = c), t ); /* else nothing unusual */
196: break;
197: case INREP: /* in a repeat */
198: if (c) /* if count is nonzero */
199: while(--c) /* then repeatedly ... */
200: putCHR( lastc, t ); /* ... output the byte */
201: else
202: putCHR( DLE, t ); /* else output DLE as data */
203: state = NOHIST; /* back to no history */
204: break;
205: default:
206: {
207: Error( badstate );
208: }
209: }
210: return(0);
211: }
212:
213:
214: /*--------------------------------------------------------------------------*/
215: /* LZSS Compression */
216: /*--------------------------------------------------------------------------*/
217:
218: #define buffSize 2048 /* size of ring buffer */
219: #define lookSize 60 /* lookahead buffer size */
220: #define THRESHOLD 2 /* if matchLen is greater than Threshold */
221: /* then code string into position & length */
222: #define treeRoot buffSize /* index for root of binary search tree */
223:
224: /*
225: ring buffer with extra bytes to facilitate string comparison of
226: longest match. This is set by the InsertNode() procedure.
227: */
228:
229: static unsigned char textBuf[ buffSize + lookSize - 1 ];
230: static int matchPos, matchLen;
231: static int lson[ buffSize + 1 ];
232: static int rson[ buffSize + 257 ];
233: static int dad [ buffSize + 1 ];
234:
235:
236: /*--------------------------------------------------------------------------*/
237: /* InitTree */
238: /* Initialize the LZSS trees. */
239: /*--------------------------------------------------------------------------*/
240:
241: static void InitTree( void )
242: {
243: register int i;
244:
245: /*
246: For i = 0 to buffSize, rson[i] and lson[i] will be the right and
247: left children of node i. These nodes need not be initialized. Also,
248: dad[i] is the parent of node i. These are initialized to "treeRoot"
249: which means 'not used.'
250:
251: For i = buffSize+1 to buffSize+256, rson[i] is the root of the tree
252: for strings that begin with character i. These are initialized
253: to "treeRoot". Note there are 256 trees.
254: */
255:
256: for (i = buffSize + 1 ; i <= (buffSize + 256) ; i++)
257: rson[i] = treeRoot; /* root */
258: for (i = 0 ; i < buffSize ; i++)
259: dad[i] = treeRoot; /* node */
260: }
261:
262:
263: /*--------------------------------------------------------------------------*/
264: /* InsertNode */
265: /* Inserts string of length lookSize, textBuf[r..r+lookSize-1], into */
266: /* one of the trees (textBuf[r]'th tree) and returns the longest-match */
267: /* position and length via the global variables matchPos and matchLen. */
268: /* If matchLen = lookSize, then remove the old node in favor of the new */
269: /* one, because the old one will be deleted sooner. Note r plays double */
270: /* role, as tree node and position in buffer. */
271: /*--------------------------------------------------------------------------*/
272:
273: static void InsertNode(int r)
274: {
275: int i, p, cmp;
276: unsigned char *key;
277: unsigned c;
278:
279: cmp = 1;
280: key = &textBuf[r];
281: p = buffSize + 1 + key[0];
282: rson[r] = lson[r] = treeRoot;
283: matchLen = 0;
284: for ( ; ; )
285: {
286: if (cmp >= 0)
287: {
288: if (rson[p] != treeRoot)
289: p = rson[p];
290: else
291: {
292: rson[p] = r;
293: dad [r] = p;
294: return;
295: }
296: }
297: else
298: {
299: if (lson[p] != treeRoot)
300: p = lson[p];
301: else
302: {
303: lson[p] = r;
304: dad [r] = p;
305: return;
306: }
307: }
308: for (i = 1; i < lookSize; i++)
309: if ((cmp = key[i] - textBuf[p + i]) != 0)
310: break;
311: if (i > THRESHOLD)
312: {
313: if (i > matchLen)
314: {
315: matchPos = ((r - p) & (buffSize - 1)) - 1;
316: if ((matchLen = i) >= lookSize)
317: break;
318: }
319: if (i == matchLen)
320: {
321: if ((c = ((r - p) & (buffSize - 1)) - 1) < matchPos)
322: {
323: matchPos = c;
324: }
325: }
326: }
327: }
328: dad[r] = dad[p];
329: lson[r] = lson[p];
330: rson[r] = rson[p];
331: dad[lson[p]] = r;
332: dad[rson[p]] = r;
333: if (rson[dad[p]] == p)
334: rson[dad[p]] = r;
335: else
336: lson[dad[p]] = r;
337: dad[p] = treeRoot; /* remove p */
338: }
339:
340:
341: /*--------------------------------------------------------------------------*/
342: /* DeleteNode */
343: /* Delete node p from the tree. */
344: /*--------------------------------------------------------------------------*/
345:
346: static void DeleteNode( register int p )
347: {
348: register int q;
349:
350: if (dad[p] == treeRoot)
351: return; /* not registered */
352: if (rson[p] == treeRoot)
353: q = lson[p];
354: else
355: if (lson[p] == treeRoot)
356: q = rson[p];
357: else
358: {
359: q = lson[p];
360: if (rson[q] != treeRoot)
361: {
362: do {
363: q = rson[q];
364: }
365: while (rson[q] != treeRoot);
366:
367: rson[dad[q]] = lson[q];
368: dad[lson[q]] = dad[q];
369: lson[q] = lson[p];
370: dad[lson[p]] = q;
371: }
372: rson[q] = rson[p];
373: dad[rson[p]] = q;
374: }
375: dad[q] = dad[p];
376:
377: if (rson[dad[p]] == p)
378: rson[dad[p]] = q;
379: else
380: lson[dad[p]] = q;
381: dad[p] = treeRoot;
382: }
383:
384: /*--------------------------------------------------------------------------*/
385: /* Huffman Coding */
386: /*--------------------------------------------------------------------------*/
387:
388: /* kinds of characters (character code = 0..N_CHAR-1) */
389: #define N_CHAR (256 - THRESHOLD + lookSize)
390: #define tableSize (N_CHAR * 2 - 1) /* size of table */
391: #define rootSize (tableSize - 1) /* position of root */
392: #define MAX_FREQ 0x8000 /* update the tree when the root */
393: /* frequency comes to this value. */
394:
395: /*--------------------------------------------------------------------------*/
396: /* Tables for encoding the upper 6 bits of position */
397: /*--------------------------------------------------------------------------*/
398:
399: static uchar p_len[64] =
400: {
401: 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
402: 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
403: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
404: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
405: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
406: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
407: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
408: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
409: };
410:
411: static uchar p_code[64] =
412: {
413: 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
414: 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
415: 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
416: 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
417: 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
418: 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
419: 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
420: 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
421: };
422:
423: /*--------------------------------------------------------------------------*/
424: /* Tables for decoding the upper 6 bits of position */
425: /*--------------------------------------------------------------------------*/
426:
427: static uchar d_code[256] =
428: {
429: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
430: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
431: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
432: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
433: 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
434: 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
435: 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
436: 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
437: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
438: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
439: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
440: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
441: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
442: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
443: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
444: 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
445: 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
446: 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
447: 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
448: 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
449: 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
450: 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
451: 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
452: 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
453: 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
454: 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
455: 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
456: 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
457: 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
458: 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
459: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
460: 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
461: };
462:
463: static uchar d_len[256] =
464: {
465: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
466: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
467: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
468: 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
469: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
470: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
471: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
472: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
473: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
474: 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
475: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
476: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
477: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
478: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
479: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
480: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
481: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
482: 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
483: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
484: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
485: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
486: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
487: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
488: 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
489: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
490: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
491: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
492: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
493: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
494: 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
495: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
496: 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
497: };
498:
499: static unsigned freq[tableSize + 1]; /* frequency table */
500:
501: static int prnt[ tableSize + N_CHAR ];
502: /* pointers to parent nodes, except for the elements */
503: /* [tableSize..tableSize + N_CHAR - 1] which are used */
504: /* to get the positions of leaves corresponding to the */
505: /* codes. */
506:
507: static int son[ tableSize ]; /* pointers to child nodes (son[], son[] + 1) */
508:
509: static unsigned getbuf = 0;
510: static uchar getlen = 0;
511:
512:
513: /*--------------------------------------------------------------------------*/
514: /* GetBit */
515: /* Get one bit. */
516: /*--------------------------------------------------------------------------*/
517:
518: static int GetBit( void )
519: {
520: int i;
521:
522: while (getlen <= 8)
523: {
524: if ((i = getc( inFile )) < 0)
525: i = 0;
526: getbuf |= i << (8 - getlen);
527: getlen += 8;
528: }
529: i = getbuf;
530: getbuf <<= 1;
531: getlen--;
532: return (i < 0);
533: }
534:
535:
536: /*--------------------------------------------------------------------------*/
537: /* GetByte */
538: /* Get one byte. */
539: /*--------------------------------------------------------------------------*/
540:
541: static int GetByte( void )
542: {
543: unsigned i;
544:
545: while (getlen <= 8)
546: {
547: if ((i = getc( inFile )) < 0)
548: i = 0;
549: getbuf |= i << (8 - getlen);
550: getlen += 8;
551: }
552: i = getbuf;
553: getbuf <<= 8;
554: getlen -= 8;
555: return i >> 8;
556: }
557:
558:
559: /*--------------------------------------------------------------------------*/
560: /* Putcode */
561: /* Write c bits of code. */
562: /*--------------------------------------------------------------------------*/
563:
564: static unsigned putbuf = 0;
565: static uchar putlen = 0;
566:
567: static void Putcode(int l, unsigned c)
568: {
569: static char *werr = "lzh Putcode: can't write output file!";
570:
571: putbuf |= (c >> putlen);
572: if ((putlen += l) >= 8)
573: {
574: if (putc( putbuf >> 8, outFile ) == EOF)
575: {
576: Error( werr );
577: }
578: if ((putlen -= 8) >= 8)
579: {
580: if (putc( putbuf, outFile ) == EOF)
581: {
582: Error( werr );
583: }
584: codesize += 2;
585: putlen -= 8;
586: putbuf = c << (l - putlen);
587: }
588: else
589: {
590: putbuf <<= 8;
591: codesize++;
592: }
593: }
594: }
595:
596:
597: /*--------------------------------------------------------------------------*/
598: /* StartHuff */
599: /* initialize the Huffman trees. */
600: /*--------------------------------------------------------------------------*/
601:
602: static void StartHuff( void )
603: {
604: register int i, j;
605:
606: for (i = 0; i < N_CHAR; i++)
607: {
608: freq[i] = 1;
609: son[i] = i + tableSize;
610: prnt[i + tableSize] = i;
611: }
612: i = 0;
613: j = N_CHAR;
614:
615: while (j <= rootSize)
616: {
617: freq[j] = freq[i] + freq[i + 1];
618: son[j] = i;
619: prnt[i] = prnt[i + 1] = j;
620: i += 2;
621: j++;
622: }
623: freq[tableSize] = 0xffff;
624: prnt[rootSize] = 0;
625: }
626:
627:
628: /*--------------------------------------------------------------------------*/
629: /* reconst */
630: /* Reconstruction of tree. */
631: /*--------------------------------------------------------------------------*/
632:
633: static void reconst( void )
634: {
635: register int i, j, k;
636: unsigned int f, l;
637:
638: /* Collect leaf nodes in the first half of the table and replace the */
639: /* freq by (freq + 1) / 2. */
640:
641: j = 0;
642: for (i = 0; i < tableSize; i++)
643: {
644: if (son[i] >= tableSize)
645: {
646: freq[j] = (freq[i] + 1) / 2;
647: son[j] = son[i];
648: j++;
649: }
650: }
651:
652: /* begin constructing tree by connecting sons */
653:
654: for (i = 0, j = N_CHAR; j < tableSize; i += 2, j++)
655: {
656: k = i + 1;
657: f = freq[j] = freq[i] + freq[k];
658:
659: for (k = j - 1; f < freq[k]; k--);
660:
661: k++;
662: l = (j - k) * 2;
663: memmove( &freq[k + 1], &freq[k], l );
664: freq[k] = f;
665: memmove( &son[k + 1], &son[k], l );
666: son[k] = i;
667: }
668:
669: /* connect prnt */
670:
671: for (i = 0; i < tableSize; i++)
672: if ((k = son[i]) >= tableSize)
673: prnt[k] = i;
674: else
675: prnt[k] = prnt[k + 1] = i;
676: }
677:
678:
679: /*--------------------------------------------------------------------------*/
680: /* update */
681: /* Increment frequency of given code by one, and update tree. */
682: /*--------------------------------------------------------------------------*/
683:
684: static void update(int c)
685: {
686: int i, j, k, l;
687:
688: if (freq[rootSize] == MAX_FREQ)
689: reconst();
690:
691: c = prnt[c + tableSize];
692: do {
693: k = ++freq[c];
694:
695: /* if the order is disturbed, exchange nodes */
696: if (k > freq[l = c + 1])
697: {
698: while (k > freq[++l]);
699: l--;
700: freq[c] = freq[l];
701: freq[l] = k;
702:
703: i = son[c];
704: prnt[i] = l;
705: if (i < tableSize)
706: prnt[i + 1] = l;
707:
708: j = son[l];
709: son[l] = i;
710:
711: prnt[j] = c;
712: if (j < tableSize)
713: prnt[j + 1] = c;
714: son[c] = j;
715:
716: c = l;
717: }
718: }
719: while ((c = prnt[c]) != 0); /* repeat up to root */
720: }
721:
722:
723: /*--------------------------------------------------------------------------*/
724: /* EncodeChar */
725: /*--------------------------------------------------------------------------*/
726:
727: static void EncodeChar(unsigned c)
728: {
729: unsigned i;
730: int j, k;
731:
732: i = j = 0;
733: k = prnt[c + tableSize];
734:
735: /* travel from leaf to root */
736: do {
737: i >>= 1;
738:
739: /* if node's address is odd-numbered, choose bigger brother node */
740: if (k & 1) i += 0x8000;
741:
742: j++;
743: }
744: while ((k = prnt[k]) != rootSize);
745:
746: Putcode(j, i);
747: update(c);
748: }
749:
750:
751: /*--------------------------------------------------------------------------*/
752: /* EncodePosition */
753: /*--------------------------------------------------------------------------*/
754:
755: static void EncodePosition(unsigned c)
756: {
757: unsigned i;
758:
759: /* output upper 6 bits by table lookup */
760:
761: i = c >> 6;
762: Putcode( p_len[i], (unsigned)p_code[i] << 8 );
763:
764: /* output lower 6 bits verbatim */
765: Putcode( 6, (c & 0x3f) << 10 );
766: }
767:
768:
769: /*--------------------------------------------------------------------------*/
770: /* EncodeEnd */
771: /*--------------------------------------------------------------------------*/
772:
773: static void EncodeEnd( void )
774: {
775: static char *werr = "lzh EncodeEnd: can't write output file!";
776:
777: if (putlen)
778: {
779: if (putc(putbuf >> 8, outFile) == EOF)
780: Error( werr );
781: codesize++;
782: }
783: }
784:
785:
786: /*--------------------------------------------------------------------------*/
787: /* DecodeChar */
788: /*--------------------------------------------------------------------------*/
789:
790: static int DecodeChar( void )
791: {
792: register unsigned c;
793:
794: /* Travel from root to leaf choosing the smaller child node (son[]) if */
795: /* the read bit is 0, the bigger (son[]+1} if 1. */
796:
797: c = son[rootSize];
798: while (c < tableSize)
799: {
800: c += GetBit();
801: c = son[c];
802: }
803: c -= tableSize;
804: update(c);
805: return c;
806: }
807:
808:
809: /*--------------------------------------------------------------------------*/
810: /* DecodePosition */
811: /*--------------------------------------------------------------------------*/
812:
813: static int DecodePosition( void )
814: {
815: register unsigned i, j, c;
816:
817: /* recover upper 6 bits from table */
818: i = GetByte();
819: c = (unsigned)d_code[i] << 6;
820: j = d_len[i];
821:
822: /* read lower 6 bits verbatim */
823: j -= 2;
824: while (j--)
825: i = (i << 1) + GetBit();
826: return (c | (i & 0x3f));
827: }
828:
829:
830: /* lzhEncode
831: Compress the input file and write to the output file.
832: Return the ratio of output to input size.
833: */
834: int lzhEncode( FILE *in, FILE *out )
835: {
836: int i, c, len, r, s, last_matchLen;
837: unsigned long int textsize, beginByte;
838: static char *werr = "lzhEncode: can't write output file!";
839:
840: inFile = in; /* set the global file pointers */
841: outFile = out;
842:
843: /* Skip to the end of file and get the byte length. Write the length
844: as the first word in the compressed output file.
845: */
846: beginByte = ftell( inFile ); /* just in case we were prepositioned */
847: fseek( inFile, 0L, SEEK_END );
848: textsize = ftell( inFile ) - beginByte;
849: fseek( inFile, beginByte, SEEK_SET ); /* go back to the beginning of the file */
850:
851: if (textsize == 0)
852: return( -1 ); /* empty files are easy - signal an error */
853:
854: convert( textsize ); /* convert to little endian if necessary */
855:
856: if (fwrite( &textsize, sizeof textsize, 1, outFile ) < 1)
857: Error( werr );
858:
859: StartHuff(); /* init the Huffman trees */
860: InitTree(); /* init the LZSS trees */
861: inCount = 0; /* init the input character count */
862: s = 0;
863: r = buffSize - lookSize;
864: for (i = 0; i < r; i++)
865: textBuf[i] = ' ';
866:
867: /* fill the look ahead buffer */
868:
869: for (len = 0; (len < lookSize) && ((c = getRLC( inFile )) != EOF); len++)
870: textBuf[r + len] = c;
871: for (i = 1; i <= lookSize; i++)
872: InsertNode( r - i );
873: InsertNode( r );
874: do {
875: if (matchLen > len)
876: matchLen = len;
877: if (matchLen <= THRESHOLD)
878: {
879: matchLen = 1;
880: EncodeChar( textBuf[r] );
881: }
882: else
883: {
884: EncodeChar( 255 - THRESHOLD + matchLen );
885: EncodePosition( matchPos );
886: }
887: last_matchLen = matchLen;
888: for (i = 0; (i < last_matchLen) && ((c = getRLC( inFile )) != EOF); i++)
889: {
890: DeleteNode( s );
891: textBuf[s] = c;
892: if (s < lookSize - 1)
893: textBuf[s + buffSize] = c;
894: s = (s + 1) & (buffSize - 1);
895: r = (r + 1) & (buffSize - 1);
896: InsertNode( r );
897: }
898:
899: while (i++ < last_matchLen)
900: {
901: DeleteNode( s );
902: s = (s + 1) & (buffSize - 1);
903: r = (r + 1) & (buffSize - 1);
904: if (--len)
905: InsertNode( r );
906: }
907: }
908: while (len > 0);
909:
910: EncodeEnd();
911:
912: return( (int)((codesize * 10) / inCount ));
913: }
914:
915:
916: /*--------------------------------------------------------------------------*/
917: /* lzhDecode */
918: /*--------------------------------------------------------------------------*/
919:
920: void lzhDecode( FILE *in, FILE *out )
921: {
922: int i, j, k, r, c;
923: unsigned long int textsize;
924: static char *werr = "lzhDecode: can't write output file!";
925:
926: inFile = in; /* set the global file pointers */
927: outFile = out;
928:
929: /* get the size of the input file in the first word */
930:
931: if (fread( &textsize, sizeof textsize, 1, inFile ) < 1)
932: Error( "lzhDecode: Can't read the input file" );
933:
934: convert( textsize ); /* convert to little endian if necessary */
935: if (textsize == 0)
936: return; /* nothing to decode, empty file */
937:
938: StartHuff();
939: for (i = 0; i < (buffSize - lookSize); i++)
940: textBuf[i] = ' ';
941: r = buffSize - lookSize;
942:
943: outCount = 0; /* init the output character count */
944: while (outCount < textsize )
945: {
946: c = DecodeChar();
947: if (c < 256)
948: {
949: if (putRLC( c, outFile ) == EOF)
950: Error( werr );
951:
952: textBuf[r++] = c;
953: r &= (buffSize - 1);
954: }
955: else
956: {
957: i = (r - DecodePosition() - 1) & (buffSize - 1);
958: j = c - 255 + THRESHOLD;
959: for (k = 0; k < j; k++)
960: {
961: c = textBuf[(i + k) & (buffSize - 1)];
962: if (putRLC( c, outFile ) == EOF)
963: Error( werr );
964: textBuf[r++] = c;
965: r &= (buffSize - 1);
966: }
967: }
968: }
969: }
970:
971: /* ---- end of lzh.c */
972:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.