|
|
1.1 root 1: #ifdef SCCSID
2: static char *SccsId = "@(#)compress.c 1.14 9/24/87";
3: #endif /* SCCSID */
4: static char rcs_ident[] = "Based on compress.c,v 4.0 85/07/30 12:50:00 joe Release";
5:
6: /*
7: * Compress - data compression program
8: */
9: #define min(a,b) ((a>b) ? b : a)
10:
11: /*
12: * machine variants which require cc -Dmachine: pdp11, z8000, pcxt
13: */
14:
15: /*
16: * Set USERMEM to the maximum amount of physical user memory available
17: * in bytes. USERMEM is used to determine the maximum BITS that can be used
18: * for compression.
19: *
20: * SACREDMEM is the amount of physical memory saved for others; compress
21: * will hog the rest.
22: */
23: #ifndef SACREDMEM
24: #define SACREDMEM 0
25: #endif
26:
27: #ifndef USERMEM
28: # define USERMEM 450000 /* default user memory */
29: #endif
30:
31: #ifdef interdata /* (Perkin-Elmer) */
32: #define SIGNED_COMPARE_SLOW /* signed compare is slower than unsigned */
33: #endif
34:
35: #ifdef pdp11
36: # define BITS 12 /* max bits/code for 16-bit machine */
37: # define NO_UCHAR /* also if "unsigned char" functions as signed char */
38: # undef USERMEM
39: #endif /* pdp11 */ /* don't forget to compile with -i */
40:
41: #ifdef z8000
42: # define BITS 12
43: # undef vax /* weird preprocessor */
44: # undef USERMEM
45: #endif /* z8000 */
46:
47: #ifdef pcxt
48: # define BITS 12
49: # undef USERMEM
50: #endif /* pcxt */
51:
52: #ifdef USERMEM
53: # if USERMEM >= (433484+SACREDMEM)
54: # define PBITS 16
55: # else
56: # if USERMEM >= (229600+SACREDMEM)
57: # define PBITS 15
58: # else
59: # if USERMEM >= (127536+SACREDMEM)
60: # define PBITS 14
61: # else
62: # if USERMEM >= (73464+SACREDMEM)
63: # define PBITS 13
64: # else
65: # define PBITS 12
66: # endif
67: # endif
68: # endif
69: # endif
70: # undef USERMEM
71: #endif /* USERMEM */
72:
73: #ifdef PBITS /* Preferred BITS for this memory size */
74: # ifndef BITS
75: # define BITS PBITS
76: # endif /* BITS */
77: #endif /* PBITS */
78:
79: #if BITS == 16
80: # define HSIZE 69001 /* 95% occupancy */
81: #endif
82: #if BITS == 15
83: # define HSIZE 35023 /* 94% occupancy */
84: #endif
85: #if BITS == 14
86: # define HSIZE 18013 /* 91% occupancy */
87: #endif
88: #if BITS == 13
89: # define HSIZE 9001 /* 91% occupancy */
90: #endif
91: #if BITS <= 12
92: # define HSIZE 5003 /* 80% occupancy */
93: #endif
94:
95: #ifdef M_XENIX /* Stupid compiler can't handle arrays with */
96: # if BITS == 16 /* more than 65535 bytes - so we fake it */
97: # define XENIX_16
98: # else
99: # if BITS > 13 /* Code only handles BITS = 12, 13, or 16 */
100: # define BITS 13
101: # endif
102: # endif
103: #endif
104:
105: /*
106: * a code_int must be able to hold 2**BITS values of type int, and also -1
107: */
108: #if BITS > 15
109: typedef long int code_int;
110: #else
111: typedef int code_int;
112: #endif
113:
114: #ifdef SIGNED_COMPARE_SLOW
115: typedef unsigned long int count_int;
116: typedef unsigned short int count_short;
117: #else
118: typedef long int count_int;
119: #endif
120:
121: #ifdef NO_UCHAR
122: typedef char char_type;
123: #else
124: typedef unsigned char char_type;
125: #endif /* UCHAR */
126: char_type magic_header[] = { "\037\235" }; /* 1F 9D */
127:
128: /* Defines for third byte of header */
129: #define BIT_MASK 0x1f
130: #define BLOCK_MASK 0x80
131: /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
132: a fourth header byte (for expansion).
133: */
134: #define INIT_BITS 9 /* initial number of bits/code */
135:
136: /*
137: * compress.c - File compression ala IEEE Computer, June 1984.
138: *
139: * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
140: * Jim McKie (decvax!mcvax!jim)
141: * Steve Davies (decvax!vax135!petsd!peora!srd)
142: * Ken Turkowski (decvax!decwrl!turtlevax!ken)
143: * James A. Woods (decvax!ihnp4!ames!jaw)
144: * Joe Orost (decvax!vax135!petsd!joe)
145: *
146: */
147:
148: #include <stdio.h>
149: #include <ctype.h>
150: #include <signal.h>
151: #include <sys/types.h>
152: #include <sys/stat.h>
153:
154: #define ARGVAL() (*++(*argv) || (--argc && *++argv))
155:
156: int n_bits; /* number of bits/code */
157: int maxbits = BITS; /* user settable max # bits/code */
158: code_int maxcode; /* maximum code, given n_bits */
159: code_int maxmaxcode = 1L << BITS; /* should NEVER generate this code */
160: #ifdef COMPATIBLE /* But wrong! */
161: # define MAXCODE(n_bits) (1L << (n_bits) - 1)
162: #else
163: # define MAXCODE(n_bits) ((1L << (n_bits)) - 1)
164: #endif /* COMPATIBLE */
165:
166: #ifdef XENIX_16
167: count_int htab0[8192];
168: count_int htab1[8192];
169: count_int htab2[8192];
170: count_int htab3[8192];
171: count_int htab4[8192];
172: count_int htab5[8192];
173: count_int htab6[8192];
174: count_int htab7[8192];
175: count_int htab8[HSIZE-65536];
176: count_int * htab[9] = {
177: htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8 };
178:
179: #define htabof(i) (htab[(i) >> 13][(i) & 0x1fff])
180: unsigned short code0tab[16384];
181: unsigned short code1tab[16384];
182: unsigned short code2tab[16384];
183: unsigned short code3tab[16384];
184: unsigned short code4tab[16384];
185: unsigned short * codetab[5] = {
186: code0tab, code1tab, code2tab, code3tab, code4tab };
187:
188: #define codetabof(i) (codetab[(i) >> 14][(i) & 0x3fff])
189:
190: #else /* Normal machine */
191: # ifdef sel
192: /* support gould base register problems */
193: /*NOBASE*/
194: count_int htab [HSIZE];
195: unsigned short codetab [HSIZE];
196: /*NOBASE*/
197: # else /* !gould */
198: count_int htab [HSIZE];
199: unsigned short codetab [HSIZE];
200: # endif /* !gould */
201: #define htabof(i) htab[i]
202: #define codetabof(i) codetab[i]
203: #endif /* !XENIX_16 */
204: code_int hsize = HSIZE; /* for dynamic table sizing */
205: count_int fsize;
206:
207: /*
208: * To save much memory, we overlay the table used by compress() with those
209: * used by decompress(). The tab_prefix table is the same size and type
210: * as the codetab. The tab_suffix table needs 2**BITS characters. We
211: * get this from the beginning of htab. The output stack uses the rest
212: * of htab, and contains characters. There is plenty of room for any
213: * possible stack (stack used to be 8000 characters).
214: */
215:
216: #define tab_prefixof(i) codetabof(i)
217: #ifdef XENIX_16
218: # define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]
219: # define de_stack ((char_type *)(htab2))
220: #else /* Normal machine */
221: # define tab_suffixof(i) ((char_type *)(htab))[i]
222: # define de_stack ((char_type *)&tab_suffixof(1<<BITS))
223: #endif /* XENIX_16 */
224:
225: code_int free_ent = 0; /* first unused entry */
226: int exit_stat = 0;
227:
228: code_int getcode();
229:
230: Usage() {
231: #ifdef DEBUG
232: fprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n");
233: }
234: int debug = 0;
235: #else
236: fprintf(stderr,"Usage: compress [-dfvcV] [-b maxbits] [file ...]\n");
237: }
238: #endif /* DEBUG */
239: int nomagic = 0; /* Use a 3-byte magic number header, unless old file */
240: int zcat_flg = 0; /* Write output on stdout, suppress messages */
241: int quiet = 1; /* don't tell me about compression */
242:
243: /*
244: * block compression parameters -- after all codes are used up,
245: * and compression rate changes, start over.
246: */
247: int block_compress = BLOCK_MASK;
248: int clear_flg = 0;
249: long int ratio = 0;
250: #define CHECK_GAP 10000 /* ratio check interval */
251: count_int checkpoint = CHECK_GAP;
252: /*
253: * the next two codes should not be changed lightly, as they must not
254: * lie within the contiguous general code space.
255: */
256: #define FIRST 257 /* first free entry */
257: #define CLEAR 256 /* table clear output code */
258:
259: int force = 0;
260: char ofname [100];
261: #ifdef DEBUG
262: int verbose = 0;
263: #endif /* DEBUG */
264: int (*bgnd_flag)();
265:
266: int do_decomp = 0;
267:
268: /*****************************************************************
269: * TAG( main )
270: *
271: * Algorithm from "A Technique for High Performance Data Compression",
272: * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
273: *
274: * Usage: compress [-dfvc] [-b bits] [file ...]
275: * Inputs:
276: * -d: If given, decompression is done instead.
277: *
278: * -c: Write output on stdout, don't remove original.
279: *
280: * -b: Parameter limits the max number of bits/code.
281: *
282: * -f: Forces output file to be generated, even if one already
283: * exists, and even if no space is saved by compressing.
284: * If -f is not used, the user will be prompted if stdin is
285: * a tty, otherwise, the output file will not be overwritten.
286: *
287: * -v: Write compression statistics
288: *
289: * file ...: Files to be compressed. If none specified, stdin
290: * is used.
291: * Outputs:
292: * file.Z: Compressed form of file with same mode, owner, and utimes
293: * or stdout (if stdin used as input)
294: *
295: * Assumptions:
296: * When filenames are given, replaces with the compressed version
297: * (.Z suffix) only if the file decreases in size.
298: * Algorithm:
299: * Modified Lempel-Ziv method (LZW). Basically finds common
300: * substrings and replaces them with a variable size code. This is
301: * deterministic, and can be done on the fly. Thus, the decompression
302: * procedure needs no input table, but tracks the way the table was built.
303: */
304:
305: main( argc, argv )
306: register int argc; char **argv;
307: {
308: int overwrite = 0; /* Do not overwrite unless given -f flag */
309: char tempname[100];
310: char **filelist, **fileptr;
311: char *cp, *rindex(), *malloc();
312: struct stat statbuf;
313: extern onintr(), oops();
314:
315:
316: if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
317: signal ( SIGINT, onintr );
318: signal ( SIGSEGV, oops );
319: }
320:
321: #ifdef COMPATIBLE
322: nomagic = 1; /* Original didn't have a magic number */
323: #endif /* COMPATIBLE */
324:
325: filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
326: *filelist = NULL;
327:
328: if((cp = rindex(argv[0], '/')) != 0) {
329: cp++;
330: } else {
331: cp = argv[0];
332: }
333: if(strcmp(cp, "uncompress") == 0) {
334: do_decomp = 1;
335: } else if(strcmp(cp, "zcat") == 0) {
336: do_decomp = 1;
337: zcat_flg = 1;
338: }
339:
340: #ifdef BSD4_2
341: /* 4.2BSD dependent - take it out if not */
342: setlinebuf( stderr );
343: #endif /* BSD4_2 */
344:
345: /* Argument Processing
346: * All flags are optional.
347: * -D => debug
348: * -V => print Version; debug verbose
349: * -d => do_decomp
350: * -v => unquiet
351: * -f => force overwrite of output file
352: * -n => no header: useful to uncompress old files
353: * -b maxbits => maxbits. If -b is specified, then maxbits MUST be
354: * given also.
355: * -c => cat all output to stdout
356: * -C => generate output compatible with compress 2.0.
357: * if a string is left, must be an input filename.
358: */
359: for (argc--, argv++; argc > 0; argc--, argv++) {
360: if (**argv == '-') { /* A flag argument */
361: while (*++(*argv)) { /* Process all flags in this arg */
362: switch (**argv) {
363: #ifdef DEBUG
364: case 'D':
365: debug = 1;
366: break;
367: case 'V':
368: verbose = 1;
369: version();
370: break;
371: #else
372: case 'V':
373: version();
374: break;
375: #endif /* DEBUG */
376: case 'v':
377: quiet = 0;
378: break;
379: case 'd':
380: do_decomp = 1;
381: break;
382: case 'f':
383: case 'F':
384: overwrite = 1;
385: force = 1;
386: break;
387: case 'n':
388: nomagic = 1;
389: break;
390: case 'C':
391: block_compress = 0;
392: break;
393: case 'b':
394: if (!ARGVAL()) {
395: fprintf(stderr, "Missing maxbits\n");
396: Usage();
397: exit(1);
398: }
399: maxbits = atoi(*argv);
400: goto nextarg;
401: case 'c':
402: zcat_flg = 1;
403: break;
404: case 'q':
405: quiet = 1;
406: break;
407: default:
408: fprintf(stderr, "Unknown flag: '%c'; ", **argv);
409: Usage();
410: exit(1);
411: }
412: }
413: }
414: else { /* Input file name */
415: *fileptr++ = *argv; /* Build input file list */
416: *fileptr = NULL;
417: /* process nextarg; */
418: }
419: nextarg: continue;
420: }
421:
422: if(maxbits < INIT_BITS) maxbits = INIT_BITS;
423: if (maxbits > BITS) maxbits = BITS;
424: maxmaxcode = 1L << maxbits;
425:
426: if (*filelist != NULL) {
427: for (fileptr = filelist; *fileptr; fileptr++) {
428: exit_stat = 0;
429: if (do_decomp != 0) { /* DECOMPRESSION */
430: /* Check for .Z suffix */
431: if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) {
432: /* No .Z: tack one on */
433: strcpy(tempname, *fileptr);
434: strcat(tempname, ".Z");
435: *fileptr = tempname;
436: }
437: /* Open input file */
438: if ((freopen(*fileptr, "r", stdin)) == NULL) {
439: perror(*fileptr); continue;
440: }
441: /* Check the magic number */
442: if (nomagic == 0) {
443: if ((getchar() != (magic_header[0] & 0xFF))
444: || (getchar() != (magic_header[1] & 0xFF))) {
445: fprintf(stderr, "%s: not in compressed format\n",
446: *fileptr);
447: continue;
448: }
449: maxbits = getchar(); /* set -b from file */
450: block_compress = maxbits & BLOCK_MASK;
451: maxbits &= BIT_MASK;
452: maxmaxcode = 1L << maxbits;
453: if(maxbits > BITS) {
454: fprintf(stderr,
455: "%s: compressed with %d bits, can only handle %d bits\n",
456: *fileptr, maxbits, BITS);
457: continue;
458: }
459: }
460: /* Generate output filename */
461: strcpy(ofname, *fileptr);
462: ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */
463: } else { /* COMPRESSION */
464: if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) {
465: fprintf(stderr, "%s: already has .Z suffix -- no change\n",
466: *fileptr);
467: continue;
468: }
469: /* Open input file */
470: if ((freopen(*fileptr, "r", stdin)) == NULL) {
471: perror(*fileptr); continue;
472: }
473: stat ( *fileptr, &statbuf );
474: fsize = (long) statbuf.st_size;
475: /*
476: * tune hash table size for small files -- ad hoc,
477: * but the sizes match earlier #defines, which
478: * serve as upper bounds on the number of output codes.
479: */
480: hsize = HSIZE;
481: if ( fsize < (1 << 12) )
482: hsize = min ( 5003, HSIZE );
483: else if ( fsize < (1 << 13) )
484: hsize = min ( 9001, HSIZE );
485: else if ( fsize < (1 << 14) )
486: hsize = min ( 18013, HSIZE );
487: else if ( fsize < (1 << 15) )
488: hsize = min ( 35023, HSIZE );
489: else if ( fsize < 47000 )
490: hsize = min ( 50021, HSIZE );
491:
492: /* Generate output filename */
493: strcpy(ofname, *fileptr);
494: #ifndef BSD4_2 /* Short filenames */
495: if ((cp=rindex(ofname,'/')) != NULL) cp++;
496: else cp = ofname;
497: if (strlen(cp) > 12) {
498: fprintf(stderr,"%s: filename too long to tack on .Z\n",cp);
499: continue;
500: }
501: #endif /* BSD4_2 Long filenames allowed */
502: strcat(ofname, ".Z");
503: }
504: /* Check for overwrite of existing file */
505: if (overwrite == 0 && zcat_flg == 0) {
506: if (stat(ofname, &statbuf) == 0) {
507: char response[2];
508: response[0] = 'n';
509: fprintf(stderr, "%s already exists;", ofname);
510: if (foreground()) {
511: fprintf(stderr, " do you wish to overwrite %s (y or n)? ",
512: ofname);
513: fflush(stderr);
514: read(2, response, 2);
515: while (response[1] != '\n') {
516: if (read(2, response+1, 1) < 0) { /* Ack! */
517: perror("stderr"); break;
518: }
519: }
520: }
521: if (response[0] != 'y') {
522: fprintf(stderr, "\tnot overwritten\n");
523: continue;
524: }
525: }
526: }
527: if(zcat_flg == 0) { /* Open output file */
528: if (freopen(ofname, "w", stdout) == NULL) {
529: perror(ofname);
530: continue;
531: }
532: if(!quiet)
533: fprintf(stderr, "%s: ", *fileptr);
534: }
535:
536: /* Actually do the compression/decompression */
537: if (do_decomp == 0) compress();
538: #ifndef DEBUG
539: else decompress();
540: #else
541: else if (debug == 0) decompress();
542: else printcodes();
543: if (verbose) dump_tab();
544: #endif /* DEBUG */
545: if(zcat_flg == 0) {
546: copystat(*fileptr, ofname); /* Copy stats */
547: if((exit_stat == 1) || (!quiet))
548: putc('\n', stderr);
549: }
550: }
551: } else { /* Standard input */
552: if (do_decomp == 0) {
553: compress();
554: #ifdef DEBUG
555: if(verbose) dump_tab();
556: #endif /* DEBUG */
557: if(!quiet)
558: putc('\n', stderr);
559: } else {
560: /* Check the magic number */
561: if (nomagic == 0) {
562: if ((getchar()!=(magic_header[0] & 0xFF))
563: || (getchar()!=(magic_header[1] & 0xFF))) {
564: fprintf(stderr, "stdin: not in compressed format\n");
565: exit(1);
566: }
567: maxbits = getchar(); /* set -b from file */
568: block_compress = maxbits & BLOCK_MASK;
569: maxbits &= BIT_MASK;
570: maxmaxcode = 1L << maxbits;
571: fsize = 100000; /* assume stdin large for USERMEM */
572: if(maxbits > BITS) {
573: fprintf(stderr,
574: "stdin: compressed with %d bits, can only handle %d bits\n",
575: maxbits, BITS);
576: exit(1);
577: }
578: }
579: #ifndef DEBUG
580: decompress();
581: #else
582: if (debug == 0) decompress();
583: else printcodes();
584: if (verbose) dump_tab();
585: #endif /* DEBUG */
586: }
587: }
588: exit(exit_stat);
589: }
590:
591: static int offset;
592: long int in_count = 1; /* length of input */
593: long int bytes_out; /* length of compressed output */
594: long int out_count = 0; /* # of codes output (for debugging) */
595:
596: /*
597: * compress stdin to stdout
598: *
599: * Algorithm: use open addressing double hashing (no chaining) on the
600: * prefix code / next character combination. We do a variant of Knuth's
601: * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
602: * secondary probe. Here, the modular division first probe is gives way
603: * to a faster exclusive-or manipulation. Also do block compression with
604: * an adaptive reset, whereby the code table is cleared when the compression
605: * ratio decreases, but after the table fills. The variable-length output
606: * codes are re-sized at this point, and a special CLEAR code is generated
607: * for the decompressor. Late addition: construct the table according to
608: * file size for noticeable speed improvement on small files. Please direct
609: * questions about this implementation to ames!jaw.
610: */
611:
612: compress() {
613: register long fcode;
614: register code_int i = 0;
615: register int c;
616: register code_int ent;
617: #ifdef XENIX_16
618: register code_int disp;
619: #else /* Normal machine */
620: register int disp;
621: #endif
622: register code_int hsize_reg;
623: register int hshift;
624:
625: #ifndef COMPATIBLE
626: if (nomagic == 0) {
627: putchar(magic_header[0]); putchar(magic_header[1]);
628: putchar((char)(maxbits | block_compress));
629: if(ferror(stdout))
630: writeerr();
631: }
632: #endif /* COMPATIBLE */
633:
634: offset = 0;
635: bytes_out = 3; /* includes 3-byte header mojo */
636: out_count = 0;
637: clear_flg = 0;
638: ratio = 0;
639: in_count = 1;
640: checkpoint = CHECK_GAP;
641: maxcode = MAXCODE(n_bits = INIT_BITS);
642: free_ent = ((block_compress) ? FIRST : 256 );
643:
644: ent = getchar ();
645:
646: hshift = 0;
647: for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
648: hshift++;
649: hshift = 8 - hshift; /* set hash code range bound */
650:
651: hsize_reg = hsize;
652: cl_hash( (count_int) hsize_reg); /* clear hash table */
653:
654: #ifdef SIGNED_COMPARE_SLOW
655: while ( (c = getchar()) != (unsigned) EOF ) {
656: #else
657: while ( (c = getchar()) != EOF ) {
658: #endif
659: in_count++;
660: fcode = (long) (((long) c << maxbits) + ent);
661: i = (((long)c << hshift) ^ ent); /* xor hashing */
662:
663: if ( htabof (i) == fcode ) {
664: ent = codetabof (i);
665: continue;
666: } else if ( (long)htabof (i) < 0 ) /* empty slot */
667: goto nomatch;
668: disp = hsize_reg - i; /* secondary hash (after G. Knott) */
669: if ( i == 0 )
670: disp = 1;
671: probe:
672: if ( (i -= disp) < 0 )
673: i += hsize_reg;
674:
675: if ( htabof (i) == fcode ) {
676: ent = codetabof (i);
677: continue;
678: }
679: if ( (long)htabof (i) > 0 )
680: goto probe;
681: nomatch:
682: output ( (code_int) ent );
683: out_count++;
684: ent = c;
685: #ifdef SIGNED_COMPARE_SLOW
686: if ( (unsigned) free_ent < (unsigned) maxmaxcode) {
687: #else
688: if ( free_ent < maxmaxcode ) {
689: #endif
690: codetabof (i) = free_ent++; /* code -> hashtable */
691: htabof (i) = fcode;
692: }
693: else if ( (count_int)in_count >= checkpoint && block_compress )
694: cl_block ();
695: }
696: /*
697: * Put out the final code.
698: */
699: output( (code_int)ent );
700: out_count++;
701: output( (code_int)-1 );
702:
703: /*
704: * Print out stats on stderr
705: */
706: if(zcat_flg == 0 && !quiet) {
707: #ifdef DEBUG
708: fprintf( stderr,
709: "%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
710: in_count, out_count, bytes_out );
711: prratio( stderr, in_count, bytes_out );
712: fprintf( stderr, "\n");
713: fprintf( stderr, "\tCompression as in compact: " );
714: prratio( stderr, in_count-bytes_out, in_count );
715: fprintf( stderr, "\n");
716: fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
717: free_ent - 1, n_bits );
718: #else /* !DEBUG */
719: fprintf( stderr, "Compression: " );
720: prratio( stderr, in_count-bytes_out, in_count );
721: #endif /* DEBUG */
722: }
723: if(bytes_out > in_count) /* exit(2) if no savings */
724: exit_stat = 2;
725: return;
726: }
727:
728: /*****************************************************************
729: * TAG( output )
730: *
731: * Output the given code.
732: * Inputs:
733: * code: A n_bits-bit integer. If == -1, then EOF. This assumes
734: * that n_bits =< (long)wordsize - 1.
735: * Outputs:
736: * Outputs code to the file.
737: * Assumptions:
738: * Chars are 8 bits long.
739: * Algorithm:
740: * Maintain a BITS character long buffer (so that 8 codes will
741: * fit in it exactly). Use the VAX insv instruction to insert each
742: * code in turn. When the buffer fills up empty it and start over.
743: */
744:
745: static char buf[BITS];
746:
747: #ifndef vax
748: char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
749: char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
750: #endif /* vax */
751:
752: output( code )
753: code_int code;
754: {
755: #ifdef DEBUG
756: static int col = 0;
757: #endif /* DEBUG */
758:
759: /*
760: * On the VAX, it is important to have the register declarations
761: * in exactly the order given, or the asm will break.
762: */
763: register int r_off = offset, bits= n_bits;
764: register char * bp = buf;
765:
766: #ifdef DEBUG
767: if ( verbose )
768: fprintf( stderr, "%5d%c", code,
769: (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
770: #endif /* DEBUG */
771: if ( code >= 0 ) {
772: #ifdef vax
773: /* VAX DEPENDENT!! Implementation on other machines is below.
774: *
775: * Translation: Insert BITS bits from the argument starting at
776: * offset bits from the beginning of buf.
777: */
778: 0; /* Work around for pcc -O bug with asm and if stmt */
779: asm( "insv 4(ap),r11,r10,(r9)" );
780: #else /* not a vax */
781: /*
782: * byte/bit numbering on the VAX is simulated by the following code
783: */
784: /*
785: * Get to the first byte.
786: */
787: bp += (r_off >> 3);
788: r_off &= 7;
789: /*
790: * Since code is always >= 8 bits, only need to mask the first
791: * hunk on the left.
792: */
793: *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
794: bp++;
795: bits -= (8 - r_off);
796: code >>= 8 - r_off;
797: /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
798: if ( bits >= 8 ) {
799: *bp++ = code;
800: code >>= 8;
801: bits -= 8;
802: }
803: /* Last bits. */
804: if(bits)
805: *bp = code;
806: #endif /* vax */
807: offset += n_bits;
808: if ( offset == (n_bits << 3) ) {
809: bp = buf;
810: bits = n_bits;
811: bytes_out += bits;
812: do
813: putchar(*bp++);
814: while(--bits);
815: offset = 0;
816: }
817:
818: /*
819: * If the next entry is going to be too big for the code size,
820: * then increase it, if possible.
821: */
822: if ( free_ent > maxcode || (clear_flg > 0))
823: {
824: /*
825: * Write the whole buffer, because the input side won't
826: * discover the size increase until after it has read it.
827: */
828: if ( offset > 0 ) {
829: if( fwrite( buf, 1, n_bits, stdout ) != n_bits)
830: writeerr();
831: bytes_out += n_bits;
832: }
833: offset = 0;
834:
835: if ( clear_flg ) {
836: maxcode = MAXCODE (n_bits = INIT_BITS);
837: clear_flg = 0;
838: }
839: else {
840: n_bits++;
841: if ( n_bits == maxbits )
842: maxcode = maxmaxcode;
843: else
844: maxcode = MAXCODE(n_bits);
845: }
846: #ifdef DEBUG
847: if ( debug ) {
848: fprintf( stderr, "\nChange to %d bits\n", n_bits );
849: col = 0;
850: }
851: #endif /* DEBUG */
852: }
853: } else {
854: /*
855: * At EOF, write the rest of the buffer.
856: */
857: if ( offset > 0 )
858: fwrite( buf, 1, (offset + 7) / 8, stdout );
859: bytes_out += (offset + 7) / 8;
860: offset = 0;
861: fflush( stdout );
862: #ifdef DEBUG
863: if ( verbose )
864: fprintf( stderr, "\n" );
865: #endif /* DEBUG */
866: if( ferror( stdout ) )
867: writeerr();
868: }
869: }
870:
871: /*
872: * Decompress stdin to stdout. This routine adapts to the codes in the
873: * file building the "string" table on-the-fly; requiring no table to
874: * be stored in the compressed file. The tables used herein are shared
875: * with those of the compress() routine. See the definitions above.
876: */
877:
878: decompress() {
879: register char_type *stackp;
880: register int finchar;
881: register code_int code, oldcode, incode;
882:
883: /*
884: * As above, initialize the first 256 entries in the table.
885: */
886: maxcode = MAXCODE(n_bits = INIT_BITS);
887: for ( code = 255; code >= 0; code-- ) {
888: tab_prefixof(code) = 0;
889: tab_suffixof(code) = (char_type)code;
890: }
891: free_ent = ((block_compress) ? FIRST : 256 );
892:
893: finchar = oldcode = getcode();
894: if(oldcode == -1) /* EOF already? */
895: return; /* Get out of here */
896: putchar( (char)finchar ); /* first code must be 8 bits = char */
897: if(ferror(stdout)) /* Crash if can't write */
898: writeerr();
899: stackp = de_stack;
900:
901: while ( (code = getcode()) > -1 ) {
902:
903: if ( (code == CLEAR) && block_compress ) {
904: for ( code = 255; code >= 0; code-- )
905: tab_prefixof(code) = 0;
906: clear_flg = 1;
907: free_ent = FIRST - 1;
908: if ( (code = getcode ()) == -1 ) /* O, untimely death! */
909: break;
910: }
911: incode = code;
912: /*
913: * Special case for KwKwK string.
914: */
915: if ( code >= free_ent ) {
916: *stackp++ = finchar;
917: code = oldcode;
918: }
919:
920: /*
921: * Generate output characters in reverse order
922: */
923: #ifdef SIGNED_COMPARE_SLOW
924: while ( ((unsigned long)code) >= ((unsigned long)256) ) {
925: #else
926: while ( code >= 256 ) {
927: #endif
928: *stackp++ = tab_suffixof(code);
929: code = tab_prefixof(code);
930: }
931: *stackp++ = finchar = tab_suffixof(code);
932:
933: /*
934: * And put them out in forward order
935: */
936: do
937: putchar ( *--stackp );
938: while ( stackp > de_stack );
939:
940: /*
941: * Generate the new entry.
942: */
943: if ( (code=free_ent) < maxmaxcode ) {
944: tab_prefixof(code) = (unsigned short)oldcode;
945: tab_suffixof(code) = finchar;
946: free_ent = code+1;
947: }
948: /*
949: * Remember previous code.
950: */
951: oldcode = incode;
952: }
953: fflush( stdout );
954: if(ferror(stdout))
955: writeerr();
956: }
957:
958: /*****************************************************************
959: * TAG( getcode )
960: *
961: * Read one code from the standard input. If EOF, return -1.
962: * Inputs:
963: * stdin
964: * Outputs:
965: * code or -1 is returned.
966: */
967:
968: code_int
969: getcode() {
970: /*
971: * On the VAX, it is important to have the register declarations
972: * in exactly the order given, or the asm will break.
973: */
974: register code_int code;
975: static int offset = 0, size = 0;
976: static char_type buf[BITS];
977: register int r_off, bits;
978: register char_type *bp = buf;
979:
980: if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {
981: /*
982: * If the next entry will be too big for the current code
983: * size, then we must increase the size. This implies reading
984: * a new buffer full, too.
985: */
986: if ( free_ent > maxcode ) {
987: n_bits++;
988: if ( n_bits == maxbits )
989: maxcode = maxmaxcode; /* won't get any bigger now */
990: else
991: maxcode = MAXCODE(n_bits);
992: }
993: if ( clear_flg > 0) {
994: maxcode = MAXCODE (n_bits = INIT_BITS);
995: clear_flg = 0;
996: }
997: size = fread( buf, 1, n_bits, stdin );
998: if ( size <= 0 )
999: return -1; /* end of file */
1000: offset = 0;
1001: /* Round size down to integral number of codes */
1002: size = (size << 3) - (n_bits - 1);
1003: }
1004: r_off = offset;
1005: bits = n_bits;
1006: #ifdef vax
1007: asm( "extzv r10,r9,(r8),r11" );
1008: #else /* not a vax */
1009: /*
1010: * Get to the first byte.
1011: */
1012: bp += (r_off >> 3);
1013: r_off &= 7;
1014: /* Get first part (low order bits) */
1015: #ifdef NO_UCHAR
1016: code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff;
1017: #else
1018: code = (*bp++ >> r_off);
1019: #endif /* NO_UCHAR */
1020: bits -= (8 - r_off);
1021: r_off = 8 - r_off; /* now, offset into code word */
1022: /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1023: if ( bits >= 8 ) {
1024: #ifdef NO_UCHAR
1025: code |= (*bp++ & 0xff) << r_off;
1026: #else
1027: code |= *bp++ << r_off;
1028: #endif /* NO_UCHAR */
1029: r_off += 8;
1030: bits -= 8;
1031: }
1032: /* high order bits. */
1033: code |= (*bp & rmask[bits]) << r_off;
1034: #endif /* vax */
1035: offset += n_bits;
1036:
1037: return code;
1038: }
1039:
1040: char *
1041: rindex(s, c) /* For those who don't have it in libc.a */
1042: register char *s, c;
1043: {
1044: char *p;
1045: for (p = NULL; *s; s++)
1046: if (*s == c)
1047: p = s;
1048: return(p);
1049: }
1050:
1051: #ifdef DEBUG
1052: printcodes()
1053: {
1054: /*
1055: * Just print out codes from input file. For debugging.
1056: */
1057: code_int code;
1058: int col = 0, bits;
1059:
1060: bits = n_bits = INIT_BITS;
1061: maxcode = MAXCODE(n_bits);
1062: free_ent = ((block_compress) ? FIRST : 256 );
1063: while ( ( code = getcode() ) >= 0 ) {
1064: if ( (code == CLEAR) && block_compress ) {
1065: free_ent = FIRST - 1;
1066: clear_flg = 1;
1067: }
1068: else if ( free_ent < maxmaxcode )
1069: free_ent++;
1070: if ( bits != n_bits ) {
1071: fprintf(stderr, "\nChange to %d bits\n", n_bits );
1072: bits = n_bits;
1073: col = 0;
1074: }
1075: fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
1076: }
1077: putc( '\n', stderr );
1078: exit( 0 );
1079: }
1080:
1081: #ifdef XENIX_16
1082: code_int stab1[8192] ;
1083: code_int stab2[8192] ;
1084: code_int stab3[8192] ;
1085: code_int stab4[8192] ;
1086: code_int stab5[8192] ;
1087: code_int stab6[8192] ;
1088: code_int stab7[8192] ;
1089: code_int stab8[8192] ;
1090: code_int * sorttab[8] = {stab1, stab2, stab3, stab4, stab5, stab6, stab7,
1091: stab8 } ;
1092: #define stabof(i) (sorttab[(i) >> 13][(i) & 0x1fff])
1093: #else
1094: code_int sorttab[HSIZE]; /* sorted pointers into htab */
1095: #define stabof(i) (sorttab[i])
1096: #endif
1097:
1098: dump_tab() /* dump string table */
1099: {
1100: register int i, first;
1101: register ent;
1102: #define STACK_SIZE 15000
1103: int stack_top = STACK_SIZE;
1104: register c;
1105: unsigned mbshift ;
1106:
1107: if(do_decomp == 0) { /* compressing */
1108: register int flag = 1;
1109:
1110: for(i=0; i<hsize; i++) { /* build sort pointers */
1111: if((long)htabof(i) >= 0) {
1112: stabof(codetabof(i)) = i;
1113: }
1114: }
1115: first = block_compress ? FIRST : 256;
1116: for(i = first; i < free_ent; i++) {
1117: fprintf(stderr, "%5d: \"", i);
1118: de_stack[--stack_top] = '\n';
1119: de_stack[--stack_top] = '"';
1120: stack_top = in_stack((htabof(stabof(i))>>maxbits)&0xff,
1121: stack_top);
1122: /* for(ent=htabof(stabof(i)) & ((1<<maxbits)-1); */
1123: mbshift = ((1 << maxbits) - 1) ;
1124: ent = htabof(stabof(i)) & mbshift ;
1125: for(;
1126: ent > 256;
1127: /* ent=htabof(stabof(ent)) & ((1<<maxbits)-1)) { */
1128: ent=htabof(stabof(ent)) & mbshift) {
1129: stack_top = in_stack(htabof(stabof(ent)) >> maxbits,
1130: stack_top);
1131: }
1132: stack_top = in_stack(ent, stack_top);
1133: fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr);
1134: stack_top = STACK_SIZE;
1135: }
1136: } else if(!debug) { /* decompressing */
1137:
1138: for ( i = 0; i < free_ent; i++ ) {
1139: ent = i;
1140: c = tab_suffixof(ent);
1141: if ( isascii(c) && isprint(c) )
1142: fprintf( stderr, "%5d: %5d/'%c' \"",
1143: ent, tab_prefixof(ent), c );
1144: else
1145: fprintf( stderr, "%5d: %5d/\\%03o \"",
1146: ent, tab_prefixof(ent), c );
1147: de_stack[--stack_top] = '\n';
1148: de_stack[--stack_top] = '"';
1149: for ( ; ent != 0;
1150: ent = (ent >= FIRST ? tab_prefixof(ent) : 0) ) {
1151: stack_top = in_stack(tab_suffixof(ent), stack_top);
1152: }
1153: fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr );
1154: stack_top = STACK_SIZE;
1155: }
1156: }
1157: }
1158:
1159: int
1160: in_stack(c, stack_top)
1161: register c, stack_top;
1162: {
1163: if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) {
1164: de_stack[--stack_top] = c;
1165: } else {
1166: switch( c ) {
1167: case '\n': de_stack[--stack_top] = 'n'; break;
1168: case '\t': de_stack[--stack_top] = 't'; break;
1169: case '\b': de_stack[--stack_top] = 'b'; break;
1170: case '\f': de_stack[--stack_top] = 'f'; break;
1171: case '\r': de_stack[--stack_top] = 'r'; break;
1172: case '\\': de_stack[--stack_top] = '\\'; break;
1173: default:
1174: de_stack[--stack_top] = '0' + c % 8;
1175: de_stack[--stack_top] = '0' + (c / 8) % 8;
1176: de_stack[--stack_top] = '0' + c / 64;
1177: break;
1178: }
1179: de_stack[--stack_top] = '\\';
1180: }
1181: return stack_top;
1182: }
1183: #endif /* DEBUG */
1184:
1185: writeerr()
1186: {
1187: perror ( ofname );
1188: unlink ( ofname );
1189: exit ( 1 );
1190: }
1191:
1192: copystat(ifname, ofname)
1193: char *ifname, *ofname;
1194: {
1195: struct stat statbuf;
1196: int mode;
1197: time_t timep[2];
1198:
1199: fclose(stdout);
1200: if (stat(ifname, &statbuf)) { /* Get stat on input file */
1201: perror(ifname);
1202: return;
1203: }
1204: if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) {
1205: if(quiet)
1206: fprintf(stderr, "%s: ", ifname);
1207: fprintf(stderr, " -- not a regular file: unchanged");
1208: exit_stat = 1;
1209: } else if (statbuf.st_nlink > 1) {
1210: if(quiet)
1211: fprintf(stderr, "%s: ", ifname);
1212: fprintf(stderr, " -- has %d other links: unchanged",
1213: statbuf.st_nlink - 1);
1214: exit_stat = 1;
1215: } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */
1216: if(!quiet)
1217: fprintf(stderr, " -- file unchanged");
1218: } else { /* ***** Successful Compression ***** */
1219: exit_stat = 0;
1220: mode = statbuf.st_mode & 07777;
1221: if (chmod(ofname, mode)) /* Copy modes */
1222: perror(ofname);
1223: chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */
1224: timep[0] = statbuf.st_atime;
1225: timep[1] = statbuf.st_mtime;
1226: utime(ofname, timep); /* Update last accessed and modified times */
1227: if (unlink(ifname)) /* Remove input file */
1228: perror(ifname);
1229: if(!quiet)
1230: fprintf(stderr, " -- replaced with %s", ofname);
1231: return; /* Successful return */
1232: }
1233:
1234: /* Unsuccessful return -- one of the tests failed */
1235: if (unlink(ofname))
1236: perror(ofname);
1237: }
1238: /*
1239: * This routine returns 1 if we are running in the foreground and stderr
1240: * is a tty.
1241: */
1242: foreground()
1243: {
1244: if(bgnd_flag) { /* background? */
1245: return(0);
1246: } else { /* foreground */
1247: if(isatty(2)) { /* and stderr is a tty */
1248: return(1);
1249: } else {
1250: return(0);
1251: }
1252: }
1253: }
1254:
1255: onintr ( )
1256: {
1257: unlink ( ofname );
1258: exit ( 1 );
1259: }
1260:
1261: oops ( ) /* wild pointer -- assume bad input */
1262: {
1263: if ( do_decomp == 1 )
1264: fprintf ( stderr, "uncompress: corrupt input\n" );
1265: unlink ( ofname );
1266: exit ( 1 );
1267: }
1268:
1269: cl_block () /* table clear for block compress */
1270: {
1271: register long int rat;
1272:
1273: checkpoint = in_count + CHECK_GAP;
1274: #ifdef DEBUG
1275: if ( debug ) {
1276: fprintf ( stderr, "count: %ld, ratio: ", in_count );
1277: prratio ( stderr, in_count, bytes_out );
1278: fprintf ( stderr, "\n");
1279: }
1280: #endif /* DEBUG */
1281:
1282: if(in_count > 0x007fffff) { /* shift will overflow */
1283: rat = bytes_out >> 8;
1284: if(rat == 0) { /* Don't divide by zero */
1285: rat = 0x7fffffff;
1286: } else {
1287: rat = in_count / rat;
1288: }
1289: } else {
1290: rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
1291: }
1292: if ( rat > ratio ) {
1293: ratio = rat;
1294: } else {
1295: ratio = 0;
1296: #ifdef DEBUG
1297: if(verbose)
1298: dump_tab(); /* dump string table */
1299: #endif
1300: cl_hash ( (count_int) hsize );
1301: free_ent = FIRST;
1302: clear_flg = 1;
1303: output ( (code_int) CLEAR );
1304: #ifdef DEBUG
1305: if(debug)
1306: fprintf ( stderr, "clear\n" );
1307: #endif /* DEBUG */
1308: }
1309: }
1310:
1311: cl_hash(hsize) /* reset code table */
1312: register count_int hsize;
1313: {
1314: #ifndef XENIX_16 /* Normal machine */
1315: register count_int *htab_p = htab+hsize;
1316: #else
1317: register j;
1318: register long k = hsize;
1319: register count_int *htab_p;
1320: #endif
1321: register long i;
1322: register long m1 = -1;
1323:
1324: #ifdef XENIX_16
1325: for(j=0; j<=8 && k>=0; j++,k-=8192) {
1326: i = 8192;
1327: if(k < 8192) {
1328: i = k;
1329: }
1330: htab_p = &(htab[j][i]);
1331: i -= 16;
1332: if(i > 0) {
1333: #else
1334: i = hsize - 16;
1335: #endif
1336: do { /* might use Sys V memset(3) here */
1337: *(htab_p-16) = m1;
1338: *(htab_p-15) = m1;
1339: *(htab_p-14) = m1;
1340: *(htab_p-13) = m1;
1341: *(htab_p-12) = m1;
1342: *(htab_p-11) = m1;
1343: *(htab_p-10) = m1;
1344: *(htab_p-9) = m1;
1345: *(htab_p-8) = m1;
1346: *(htab_p-7) = m1;
1347: *(htab_p-6) = m1;
1348: *(htab_p-5) = m1;
1349: *(htab_p-4) = m1;
1350: *(htab_p-3) = m1;
1351: *(htab_p-2) = m1;
1352: *(htab_p-1) = m1;
1353: htab_p -= 16;
1354: } while ((i -= 16) >= 0);
1355: #ifdef XENIX_16
1356: }
1357: }
1358: #endif
1359: for ( i += 16; i > 0; i-- )
1360: *--htab_p = m1;
1361: }
1362:
1363: prratio(stream, num, den)
1364: FILE *stream;
1365: long int num, den;
1366: {
1367: register int q; /* Doesn't need to be long */
1368:
1369: if(num > 214748L) { /* 2147483647/10000 */
1370: q = num / (den / 10000L);
1371: } else {
1372: q = 10000L * num / den; /* Long calculations, though */
1373: }
1374: if (q < 0) {
1375: putc('-', stream);
1376: q = -q;
1377: }
1378: fprintf(stream, "%d.%02d%%", q / 100, q % 100);
1379: }
1380:
1381: version()
1382: {
1383: fprintf(stderr, "%s\n", rcs_ident);
1384: fprintf(stderr, "Options: ");
1385: #ifdef vax
1386: fprintf(stderr, "vax, ");
1387: #endif
1388: #ifdef NO_UCHAR
1389: fprintf(stderr, "NO_UCHAR, ");
1390: #endif
1391: #ifdef SIGNED_COMPARE_SLOW
1392: fprintf(stderr, "SIGNED_COMPARE_SLOW, ");
1393: #endif
1394: #ifdef XENIX_16
1395: fprintf(stderr, "XENIX_16, ");
1396: #endif
1397: #ifdef COMPATIBLE
1398: fprintf(stderr, "COMPATIBLE, ");
1399: #endif
1400: #ifdef DEBUG
1401: fprintf(stderr, "DEBUG, ");
1402: #endif
1403: #ifdef BSD4_2
1404: fprintf(stderr, "BSD4_2, ");
1405: #endif
1406: fprintf(stderr, "BITS = %d\n", BITS);
1407: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.