Annotation of 43BSD/usr.bin/find/code.c, revision 1.1.1.1

1.1       root        1: #ifndef lint
                      2: static char sccsid[] = "@(#)code.c     4.2     (Berkeley)      7/21/83";
                      3: #endif not lint
                      4: 
                      5: /*
                      6:  * PURPOSE:    sorted list compressor (works with a modified 'find'
                      7:  *             to encode/decode a filename database)
                      8:  *
                      9:  * USAGE:      bigram < list > bigrams
                     10:  *             process bigrams (see updatedb) > common_bigrams
                     11:  *             code common_bigrams < list > squozen_list
                     12:  *
                     13:  * METHOD:     Uses 'front compression' (see ";login:", March 1983, p. 8 ).
                     14:  *             Output format is, per line, an offset differential count byte
                     15:  *             followed by a partially bigram-encoded ascii residue. 
                     16:  *
                     17:  *     The codes are:
                     18:  *
                     19:  *     0-28    likeliest differential counts + offset to make nonnegative 
                     20:  *     30      escape code for out-of-range count to follow in next word
                     21:  *     128-255 bigram codes, (128 most common, as determined by 'updatedb')
                     22:  *     32-127  single character (printable) ascii residue
                     23:  *
                     24:  * SEE ALSO:   updatedb.csh, bigram.c, find.c
                     25:  * 
                     26:  * AUTHOR:     James A. Woods, Informatics General Corp.,
                     27:  *             NASA Ames Research Center, 10/82
                     28:  */
                     29: 
                     30: #include <stdio.h>
                     31: 
                     32: #define MAXPATH 1024           /* maximum pathname length */
                     33: #define        RESET   30              /* switch code */
                     34: 
                     35: char path[MAXPATH];
                     36: char oldpath[MAXPATH] = " ";   
                     37: char bigrams[257] = { 0 };
                     38: 
                     39: main ( argc, argv )
                     40:        int argc; char *argv[];
                     41: {
                     42:        int count, oldcount, diffcount;
                     43:        int j, code;
                     44:        char bigram[3];
                     45:        FILE *fp;
                     46: 
                     47:        oldcount = 0;
                     48:        bigram[2] = NULL;
                     49: 
                     50:        if ((fp = fopen(argv[1], "r")) == NULL) {
                     51:                printf("Usage: code common_bigrams < list > coded_list\n");
                     52:                exit(1);
                     53:        }
                     54:        fgets ( bigrams, 257, fp );
                     55:        fwrite ( bigrams, 1, 256, stdout );
                     56: 
                     57:        while ( gets ( path ) != NULL ) {
                     58:                /*
                     59:                   squelch unprintable chars so as not to botch decoding
                     60:                */
                     61:                for ( j = 0; path[j] != NULL; j++ ) {   
                     62:                        path[j] &= 0177;                
                     63:                        if ( path[j] < 040 || path[j] == 0177 )
                     64:                                path[j] = '?';
                     65:                }
                     66:                count = prefix_length ( oldpath, path );
                     67:                diffcount = count - oldcount;
                     68:                if ( (diffcount < -14) || (diffcount > 14) ) {
                     69:                        putc ( RESET, stdout );
                     70:                        putw ( diffcount + 14, stdout );
                     71:                }
                     72:                else
                     73:                        putc ( diffcount + 14, stdout );        
                     74: 
                     75:                for ( j = count; path[j] != NULL; j += 2 ) {
                     76:                        if ( path[j + 1] == NULL ) {
                     77:                                putchar ( path[j] );
                     78:                                break;
                     79:                        }
                     80:                        bigram[0] = path[j];
                     81:                        bigram[1] = path[j + 1];
                     82:                        /*
                     83:                            linear search for specific bigram in string table
                     84:                        */
                     85:                        if ( (code = strindex ( bigrams, bigram )) % 2 == 0 )
                     86:                                putchar ( (code / 2) | 0200 );  
                     87:                        else            
                     88:                                fputs ( bigram, stdout );
                     89:                }
                     90:                strcpy ( oldpath, path );       
                     91:                oldcount = count;
                     92:        }
                     93: }
                     94: 
                     95: strindex ( string, pattern )   /* return location of pattern in string or -1 */
                     96:        char *string, *pattern;
                     97: {
                     98:        register char *s, *p, *q;
                     99: 
                    100:        for ( s = string; *s != NULL; s++ ) 
                    101:                if ( *s == *pattern ) {         /* fast first char check */
                    102:                        for ( p = pattern + 1, q = s + 1; *p != NULL; p++, q++ )
                    103:                                if ( *q != *p )
                    104:                                        break;
                    105:                        if ( *p == NULL )       
                    106:                                return ( q - strlen ( pattern ) - string );
                    107:                }
                    108:        return ( -1 );
                    109: }
                    110: 
                    111: prefix_length ( s1, s2 )       /* return length of longest common prefix */
                    112:        char *s1, *s2;          /* ... of strings s1 and s2 */
                    113: {
                    114:        register char *start;
                    115: 
                    116:        for ( start = s1; *s1 == *s2; s1++, s2++ )      
                    117:                if ( *s1 == NULL )              
                    118:                        break;
                    119:        return ( s1 - start );
                    120: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.