|
|
1.1 ! root 1: #ifndef lint ! 2: static char sccsid[] = "@(#)code.c 4.2 (Berkeley) 7/21/83"; ! 3: #endif not lint ! 4: ! 5: /* ! 6: * PURPOSE: sorted list compressor (works with a modified 'find' ! 7: * to encode/decode a filename database) ! 8: * ! 9: * USAGE: bigram < list > bigrams ! 10: * process bigrams (see updatedb) > common_bigrams ! 11: * code common_bigrams < list > squozen_list ! 12: * ! 13: * METHOD: Uses 'front compression' (see ";login:", March 1983, p. 8 ). ! 14: * Output format is, per line, an offset differential count byte ! 15: * followed by a partially bigram-encoded ascii residue. ! 16: * ! 17: * The codes are: ! 18: * ! 19: * 0-28 likeliest differential counts + offset to make nonnegative ! 20: * 30 escape code for out-of-range count to follow in next word ! 21: * 128-255 bigram codes, (128 most common, as determined by 'updatedb') ! 22: * 32-127 single character (printable) ascii residue ! 23: * ! 24: * SEE ALSO: updatedb.csh, bigram.c, find.c ! 25: * ! 26: * AUTHOR: James A. Woods, Informatics General Corp., ! 27: * NASA Ames Research Center, 10/82 ! 28: */ ! 29: ! 30: #include <stdio.h> ! 31: ! 32: #define MAXPATH 1024 /* maximum pathname length */ ! 33: #define RESET 30 /* switch code */ ! 34: ! 35: char path[MAXPATH]; ! 36: char oldpath[MAXPATH] = " "; ! 37: char bigrams[257] = { 0 }; ! 38: ! 39: main ( argc, argv ) ! 40: int argc; char *argv[]; ! 41: { ! 42: int count, oldcount, diffcount; ! 43: int j, code; ! 44: char bigram[3]; ! 45: FILE *fp; ! 46: ! 47: oldcount = 0; ! 48: bigram[2] = NULL; ! 49: ! 50: if ((fp = fopen(argv[1], "r")) == NULL) { ! 51: printf("Usage: code common_bigrams < list > coded_list\n"); ! 52: exit(1); ! 53: } ! 54: fgets ( bigrams, 257, fp ); ! 55: fwrite ( bigrams, 1, 256, stdout ); ! 56: ! 57: while ( gets ( path ) != NULL ) { ! 58: /* ! 59: squelch unprintable chars so as not to botch decoding ! 60: */ ! 61: for ( j = 0; path[j] != NULL; j++ ) { ! 62: path[j] &= 0177; ! 63: if ( path[j] < 040 || path[j] == 0177 ) ! 64: path[j] = '?'; ! 65: } ! 66: count = prefix_length ( oldpath, path ); ! 67: diffcount = count - oldcount; ! 68: if ( (diffcount < -14) || (diffcount > 14) ) { ! 69: putc ( RESET, stdout ); ! 70: putw ( diffcount + 14, stdout ); ! 71: } ! 72: else ! 73: putc ( diffcount + 14, stdout ); ! 74: ! 75: for ( j = count; path[j] != NULL; j += 2 ) { ! 76: if ( path[j + 1] == NULL ) { ! 77: putchar ( path[j] ); ! 78: break; ! 79: } ! 80: bigram[0] = path[j]; ! 81: bigram[1] = path[j + 1]; ! 82: /* ! 83: linear search for specific bigram in string table ! 84: */ ! 85: if ( (code = strindex ( bigrams, bigram )) % 2 == 0 ) ! 86: putchar ( (code / 2) | 0200 ); ! 87: else ! 88: fputs ( bigram, stdout ); ! 89: } ! 90: strcpy ( oldpath, path ); ! 91: oldcount = count; ! 92: } ! 93: } ! 94: ! 95: strindex ( string, pattern ) /* return location of pattern in string or -1 */ ! 96: char *string, *pattern; ! 97: { ! 98: register char *s, *p, *q; ! 99: ! 100: for ( s = string; *s != NULL; s++ ) ! 101: if ( *s == *pattern ) { /* fast first char check */ ! 102: for ( p = pattern + 1, q = s + 1; *p != NULL; p++, q++ ) ! 103: if ( *q != *p ) ! 104: break; ! 105: if ( *p == NULL ) ! 106: return ( q - strlen ( pattern ) - string ); ! 107: } ! 108: return ( -1 ); ! 109: } ! 110: ! 111: prefix_length ( s1, s2 ) /* return length of longest common prefix */ ! 112: char *s1, *s2; /* ... of strings s1 and s2 */ ! 113: { ! 114: register char *start; ! 115: ! 116: for ( start = s1; *s1 == *s2; s1++, s2++ ) ! 117: if ( *s1 == NULL ) ! 118: break; ! 119: return ( s1 - start ); ! 120: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.