Annotation of 43BSDReno/usr.bin/locate/code/locate.code.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Copyright (c) 1989 The Regents of the University of California.
        !             3:  * All rights reserved.
        !             4:  *
        !             5:  * This code is derived from software contributed to Berkeley by
        !             6:  * James A. Woods.
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms are permitted
        !             9:  * provided that: (1) source distributions retain this entire copyright
        !            10:  * notice and comment, and (2) distributions including binaries display
        !            11:  * the following acknowledgement:  ``This product includes software
        !            12:  * developed by the University of California, Berkeley and its contributors''
        !            13:  * in the documentation or other materials provided with the distribution
        !            14:  * and in all advertising materials mentioning features or use of this
        !            15:  * software. Neither the name of the University nor the names of its
        !            16:  * contributors may be used to endorse or promote products derived
        !            17:  * from this software without specific prior written permission.
        !            18:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
        !            19:  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
        !            20:  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
        !            21:  */
        !            22: 
        !            23: #ifndef lint
        !            24: char copyright[] =
        !            25: "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
        !            26:  All rights reserved.\n";
        !            27: #endif /* not lint */
        !            28: 
        !            29: #ifndef lint
        !            30: static char sccsid[] = "@(#)locate.code.c      4.8 (Berkeley) 6/1/90";
        !            31: #endif /* not lint */
        !            32: 
        !            33: /*
        !            34:  * PURPOSE:    sorted list compressor (works with a modified 'find'
        !            35:  *             to encode/decode a filename database)
        !            36:  *
        !            37:  * USAGE:      bigram < list > bigrams
        !            38:  *             process bigrams (see updatedb) > common_bigrams
        !            39:  *             code common_bigrams < list > squozen_list
        !            40:  *
        !            41:  * METHOD:     Uses 'front compression' (see ";login:", Volume 8, Number 1
        !            42:  *             February/March 1983, p. 8 ).  Output format is, per line, an
        !            43:  *             offset differential count byte followed by a partially bigram-
        !            44:  *             encoded ascii residue.  A bigram is a two-character sequence,
        !            45:  *             the first 128 most common of which are encoded in one byte.
        !            46:  *
        !            47:  * EXAMPLE:    For simple front compression with no bigram encoding,
        !            48:  *             if the input is...              then the output is...
        !            49:  *
        !            50:  *             /usr/src                         0 /usr/src
        !            51:  *             /usr/src/cmd/aardvark.c          8 /cmd/aardvark.c
        !            52:  *             /usr/src/cmd/armadillo.c        14 armadillo.c
        !            53:  *             /usr/tmp/zoo                     5 tmp/zoo
        !            54:  *
        !            55:  *     The codes are:
        !            56:  *
        !            57:  *     0-28    likeliest differential counts + offset to make nonnegative 
        !            58:  *     30      switch code for out-of-range count to follow in next word
        !            59:  *     128-255 bigram codes (128 most common, as determined by 'updatedb')
        !            60:  *     32-127  single character (printable) ascii residue (ie, literal)
        !            61:  *
        !            62:  * SEE ALSO:   updatedb.csh, bigram.c, find.c
        !            63:  * 
        !            64:  * AUTHOR:     James A. Woods, Informatics General Corp.,
        !            65:  *             NASA Ames Research Center, 10/82
        !            66:  */
        !            67: 
        !            68: #include <sys/param.h>
        !            69: #include <stdio.h>
        !            70: #include "locate.h"
        !            71: 
        !            72: #define BGBUFSIZE      (NBG * 2)       /* size of bigram buffer */
        !            73: 
        !            74: char buf1[MAXPATHLEN] = " ";   
        !            75: char buf2[MAXPATHLEN];
        !            76: char bigrams[BGBUFSIZE + 1] = { 0 };
        !            77: 
        !            78: main ( argc, argv )
        !            79:        int argc; char *argv[];
        !            80: {
        !            81:        register char *cp, *oldpath = buf1, *path = buf2;
        !            82:        int code, count, diffcount, oldcount = 0;
        !            83:        FILE *fp;
        !            84: 
        !            85:        if ((fp = fopen(argv[1], "r")) == NULL) {
        !            86:                printf("Usage: code common_bigrams < list > squozen_list\n");
        !            87:                exit(1);
        !            88:        }
        !            89:        /* first copy bigram array to stdout */
        !            90:        fgets ( bigrams, BGBUFSIZE + 1, fp );
        !            91:        fwrite ( bigrams, 1, BGBUFSIZE, stdout );
        !            92:        fclose( fp );
        !            93: 
        !            94:        while ( fgets ( path, sizeof(buf2), stdin ) != NULL ) {
        !            95:                /* truncate newline */
        !            96:                cp = path + strlen(path) - 1;
        !            97:                if (cp > path && *cp == '\n')
        !            98:                        *cp = '\0';
        !            99:                /* squelch characters that would botch the decoding */
        !           100:                for ( cp = path; *cp != NULL; cp++ ) {
        !           101:                        if ( (unsigned char)*cp >= PARITY )
        !           102:                                *cp &= PARITY-1;
        !           103:                        else if ( *cp <= SWITCH )
        !           104:                                *cp = '?';
        !           105:                }
        !           106:                /* skip longest common prefix */
        !           107:                for ( cp = path; *cp == *oldpath; cp++, oldpath++ )
        !           108:                        if ( *oldpath == NULL )
        !           109:                                break;
        !           110:                count = cp - path;
        !           111:                diffcount = count - oldcount + OFFSET;
        !           112:                oldcount = count;
        !           113:                if ( diffcount < 0 || diffcount > 2*OFFSET ) {
        !           114:                        putc ( SWITCH, stdout );
        !           115:                        putw ( diffcount, stdout );
        !           116:                }
        !           117:                else
        !           118:                        putc ( diffcount, stdout );     
        !           119: 
        !           120:                while ( *cp != NULL ) {
        !           121:                        if ( *(cp + 1) == NULL ) {
        !           122:                                putchar ( *cp );
        !           123:                                break;
        !           124:                        }
        !           125:                        if ( (code = bgindex ( cp )) < 0 ) {
        !           126:                                putchar ( *cp++ );
        !           127:                                putchar ( *cp++ );
        !           128:                        }
        !           129:                        else {  /* found, so mark byte with parity bit */
        !           130:                                putchar ( (code / 2) | PARITY );
        !           131:                                cp += 2;
        !           132:                        }
        !           133:                }
        !           134:                if ( path == buf1 )             /* swap pointers */
        !           135:                        path = buf2, oldpath = buf1;
        !           136:                else
        !           137:                        path = buf1, oldpath = buf2;
        !           138:        }
        !           139: }
        !           140: 
        !           141: bgindex ( bg )                 /* return location of bg in bigrams or -1 */
        !           142:        char *bg;
        !           143: {
        !           144:        register char *p;
        !           145:        register char bg0 = bg[0], bg1 = bg[1];
        !           146: 
        !           147:        for ( p = bigrams; *p != NULL; p++ )
        !           148:                if ( *p++ == bg0 && *p == bg1 )
        !           149:                        break;
        !           150:        return ( *p == NULL ? -1 : --p - bigrams );
        !           151: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.