GNUtools/libg++/libio/dbz/dbz.c - annotate

Return to dbz.c CVS log
Up to [Apple XNU] / GNUtools / libg++ / libio / dbz
Annotation of GNUtools/libg++/libio/dbz/dbz.c, revision 1.1.1.1

1.1       root        1: /*
                      2: 
                      3: dbz.c  V3.2
                      4: 
                      5: Copyright 1988 Jon Zeeff ([email protected])
                      6: You can use this code in any manner, as long as you leave my name on it
                      7: and don't hold me responsible for any problems with it.
                      8: 
                      9: Hacked on by [email protected] (David Butler); Sun Jun  5 00:27:08 CDT 1988
                     10: 
                     11: Various improvments + INCORE by [email protected] (Mark Moraes)
                     12: 
                     13: Major reworking by Henry Spencer as part of the C News project.
                     14: 
                     15: These routines replace dbm as used by the usenet news software
                     16: (it's not a full dbm replacement by any means).  It's fast and
                     17: simple.  It contains no AT&T code.
                     18: 
                     19: In general, dbz's files are 1/20 the size of dbm's.  Lookup performance
                     20: is somewhat better, while file creation is spectacularly faster, especially
                     21: if the incore facility is used.
                     22: 
                     23: */
                     24: 
                     25: #include <stdio.h>
                     26: #include <sys/types.h>
                     27: #include <string.h>
                     28: #include <ctype.h>
                     29: #include <errno.h>
                     30: #ifndef __STDC__
                     31: extern int errno;
                     32: #endif
                     33: #include <dbz.h>
                     34: 
                     35: /*
                     36:  * #ifdef index.  "LIA" = "leave it alone unless you know what you're doing".
                     37:  *
                     38:  * FUNNYSEEKS  SEEK_SET is not 0, get it from <unistd.h>
                     39:  * INDEX_SIZE  backward compatibility with old dbz; avoid using this
                     40:  * NMEMORY     number of days of memory for use in sizing new table (LIA)
                     41:  * INCORE      backward compatibility with old dbz; use dbzincore() instead
                     42:  * DBZDEBUG    enable debugging
                     43:  * DEFSIZE     default table size (not as critical as in old dbz)
                     44:  * OLDBNEWS    default case mapping as in old B News; set NOBUFFER
                     45:  * BNEWS       default case mapping as in current B News; set NOBUFFER
                     46:  * DEFCASE     default case-map algorithm selector
                     47:  * NOTAGS      fseek offsets are strange, do not do tagging (see below)
                     48:  * NPAGBUF     size of .pag buffer, in longs (LIA)
                     49:  * SHISTBUF    size of ASCII-file buffer, in bytes (LIA)
                     50:  * MAXRUN      length of run which shifts to next table (see below) (LIA)
                     51:  * OVERFLOW    long-int arithmetic overflow must be avoided, will trap
                     52:  * NOBUFFER    do not buffer hash-table i/o, B News locking is defective
                     53:  */
                     54: 
                     55: #ifdef FUNNYSEEKS
                     56: #include <unistd.h>
                     57: #else
                     58: #define        SEEK_SET        0
                     59: #endif
                     60: #ifdef OVERFLOW
                     61: #include <limits.h>
                     62: #endif
                     63: 
                     64: static int dbzversion = 3;     /* for validating .dir file format */
                     65: 
                     66: /*
                     67:  * The dbz database exploits the fact that when news stores a <key,value>
                     68:  * tuple, the `value' part is a seek offset into a text file, pointing to
                     69:  * a copy of the `key' part.  This avoids the need to store a copy of
                     70:  * the key in the dbz files.  However, the text file *must* exist and be
                     71:  * consistent with the dbz files, or things will fail.
                     72:  *
                     73:  * The basic format of the database is a simple hash table containing the
                     74:  * values.  A value is stored by indexing into the table using a hash value
                     75:  * computed from the key; collisions are resolved by linear probing (just
                     76:  * search forward for an empty slot, wrapping around to the beginning of
                     77:  * the table if necessary).  Linear probing is a performance disaster when
                     78:  * the table starts to get full, so a complication is introduced.  The
                     79:  * database is actually one *or more* tables, stored sequentially in the
                     80:  * .pag file, and the length of linear-probe sequences is limited.  The
                     81:  * search (for an existing item or an empty slot) always starts in the
                     82:  * first table, and whenever MAXRUN probes have been done in table N,
                     83:  * probing continues in table N+1.  This behaves reasonably well even in
                     84:  * cases of massive overflow.  There are some other small complications
                     85:  * added, see comments below.
                     86:  *
                     87:  * The table size is fixed for any particular database, but is determined
                     88:  * dynamically when a database is rebuilt.  The strategy is to try to pick
                     89:  * the size so the first table will be no more than 2/3 full, that being
                     90:  * slightly before the point where performance starts to degrade.  (It is
                     91:  * desirable to be a bit conservative because the overflow strategy tends
                     92:  * to produce files with holes in them, which is a nuisance.)
                     93:  */
                     94: 
                     95: /*
                     96:  * The following is for backward compatibility.
                     97:  */
                     98: #ifdef INDEX_SIZE
                     99: #define        DEFSIZE INDEX_SIZE
                    100: #endif
                    101: 
                    102: /*
                    103:  * ANSI C says an offset into a file is a long, not an off_t, for some
                    104:  * reason.  This actually does simplify life a bit, but it's still nice
                    105:  * to have a distinctive name for it.  Beware, this is just for readability,
                    106:  * don't try to change this.
                    107:  */
                    108: #define        of_t    long
                    109: #define        SOF     (sizeof(of_t))
                    110: 
                    111: /*
                    112:  * We assume that unused areas of a binary file are zeros, and that the
                    113:  * bit pattern of `(of_t)0' is all zeros.  The alternative is rather
                    114:  * painful file initialization.  Note that okayvalue(), if OVERFLOW is
                    115:  * defined, knows what value of an offset would cause overflow.
                    116:  */
                    117: #define        VACANT          ((of_t)0)
                    118: #define        BIAS(o)         ((o)+1)         /* make any valid of_t non-VACANT */
                    119: #define        UNBIAS(o)       ((o)-1)         /* reverse BIAS() effect */
                    120: 
                    121: /*
                    122:  * In a Unix implementation, or indeed any in which an of_t is a byte
                    123:  * count, there are a bunch of high bits free in an of_t.  There is a
                    124:  * use for them.  Checking a possible hit by looking it up in the base
                    125:  * file is relatively expensive, and the cost can be dramatically reduced
                    126:  * by using some of those high bits to tag the value with a few more bits
                    127:  * of the key's hash.  This detects most false hits without the overhead of
                    128:  * seek+read+strcmp.  We use the top bit to indicate whether the value is
                    129:  * tagged or not, and don't tag a value which is using the tag bits itself.
                    130:  * We're in trouble if the of_t representation wants to use the top bit.
                    131:  * The actual bitmasks and offset come from the configuration stuff,
                    132:  * which permits fiddling with them as necessary, and also suppressing
                    133:  * them completely (by defining the masks to 0).  We build pre-shifted
                    134:  * versions of the masks for efficiency.
                    135:  */
                    136: static of_t tagbits;           /* pre-shifted tag mask */
                    137: static of_t taghere;           /* pre-shifted tag-enable bit */
                    138: static of_t tagboth;           /* tagbits|taghere */
                    139: #define        HASTAG(o)       ((o)&taghere)
                    140: #define        TAG(o)          ((o)&tagbits)
                    141: #define        NOTAG(o)        ((o)&~tagboth)
                    142: #define        CANTAG(o)       (((o)&tagboth) == 0)
                    143: #define        MKTAG(v)        (((v)<<conf.tagshift)&tagbits)
                    144: 
                    145: /*
                    146:  * A new, from-scratch database, not built as a rebuild of an old one,
                    147:  * needs to know table size, casemap algorithm, and tagging.  Normally
                    148:  * the user supplies this info, but there have to be defaults.
                    149:  */
                    150: #ifndef DEFSIZE
                    151: #define        DEFSIZE 120011          /* 300007 might be better */
                    152: #endif
                    153: #ifdef OLDBNEWS
                    154: #define        DEFCASE '0'             /* B2.10 -- no mapping */
                    155: #define        NOBUFFER                /* B News locking is defective */
                    156: #endif
                    157: #ifdef BNEWS
                    158: #define        DEFCASE '='             /* B2.11 -- all mapped */
                    159: #define        NOBUFFER                /* B News locking is defective */
                    160: #endif
                    161: #ifndef DEFCASE                        /* C News compatibility is the default */
                    162: #define        DEFCASE 'C'             /* C News -- RFC822 mapping */
                    163: #endif
                    164: #ifndef NOTAGS
                    165: #define        TAGENB  0x80            /* tag enable is top bit, tag is next 7 */
                    166: #define        TAGMASK 0x7f
                    167: #define        TAGSHIFT        24
                    168: #else
                    169: #define        TAGENB  0               /* no tags */
                    170: #define        TAGMASK 0
                    171: #define        TAGSHIFT        0
                    172: #endif
                    173: 
                    174: /*
                    175:  * We read configuration info from the .dir file into this structure,
                    176:  * so we can avoid wired-in assumptions for an existing database.
                    177:  *
                    178:  * Among the info is a record of recent peak usages, so that a new table
                    179:  * size can be chosen intelligently when rebuilding.  10 is a good
                    180:  * number of usages to keep, since news displays marked fluctuations
                    181:  * in volume on a 7-day cycle.
                    182:  */
                    183: struct dbzconfig {
                    184:        int olddbz;             /* .dir file empty but .pag not? */
                    185:        of_t tsize;             /* table size */
                    186: #      ifndef NMEMORY
                    187: #      define  NMEMORY 10      /* # days of use info to remember */
                    188: #      endif
                    189: #      define  NUSEDS  (1+NMEMORY)
                    190:        of_t used[NUSEDS];      /* entries used today, yesterday, ... */
                    191:        int valuesize;          /* size of table values, == SOF */
                    192:        int bytemap[SOF];       /* byte-order map */
                    193:        char casemap;           /* case-mapping algorithm (see cipoint()) */
                    194:        char fieldsep;          /* field separator in base file, if any */
                    195:        of_t tagenb;            /* unshifted tag-enable bit */
                    196:        of_t tagmask;           /* unshifted tag mask */
                    197:        int tagshift;           /* shift count for tagmask and tagenb */
                    198: };
                    199: static struct dbzconfig conf;
                    200: static int getconf();
                    201: static long getno();
                    202: static int putconf();
                    203: static void mybytemap();
                    204: static of_t bytemap();
                    205: 
                    206: /* 
                    207:  * For a program that makes many, many references to the database, it
                    208:  * is a large performance win to keep the table in core, if it will fit.
                    209:  * Note that this does hurt robustness in the event of crashes, and
                    210:  * dbmclose() *must* be called to flush the in-core database to disk.
                    211:  * The code is prepared to deal with the possibility that there isn't
                    212:  * enough memory.  There *is* an assumption that a size_t is big enough
                    213:  * to hold the size (in bytes) of one table, so dbminit() tries to figure
                    214:  * out whether this is possible first.
                    215:  *
                    216:  * The preferred way to ask for an in-core table is to do dbzincore(1)
                    217:  * before dbminit().  The default is not to do it, although -DINCORE
                    218:  * overrides this for backward compatibility with old dbz.
                    219:  *
                    220:  * We keep only the first table in core.  This greatly simplifies the
                    221:  * code, and bounds memory demand.  Furthermore, doing this is a large
                    222:  * performance win even in the event of massive overflow.
                    223:  */
                    224: #ifdef INCORE
                    225: static int incore = 1;
                    226: #else
                    227: static int incore = 0;
                    228: #endif
                    229: 
                    230: /*
                    231:  * Stdio buffer for .pag reads.  Buffering more than about 16 does not help
                    232:  * significantly at the densities we try to maintain, and the much larger
                    233:  * buffers that most stdios default to are much more expensive to fill.
                    234:  * With small buffers, stdio is performance-competitive with raw read(),
                    235:  * and it's much more portable.
                    236:  */
                    237: #ifndef NPAGBUF
                    238: #define        NPAGBUF 16
                    239: #endif
                    240: #ifndef NOBUFFER
                    241: #ifdef _IOFBF
                    242: static of_t pagbuf[NPAGBUF];   /* only needed if !NOBUFFER && _IOFBF */
                    243: #endif
                    244: #endif
                    245: 
                    246: /*
                    247:  * Stdio buffer for base-file reads.  Message-IDs (all news ever needs to
                    248:  * read) are essentially never longer than 64 bytes, and the typical stdio
                    249:  * buffer is so much larger that it is much more expensive to fill.
                    250:  */
                    251: #ifndef SHISTBUF
                    252: #define        SHISTBUF        64
                    253: #endif
                    254: #ifdef _IOFBF
                    255: static char basebuf[SHISTBUF];         /* only needed if _IOFBF exists */
                    256: #endif
                    257: 
                    258: /*
                    259:  * Data structure for recording info about searches.
                    260:  */
                    261: struct searcher {
                    262:        of_t place;             /* current location in file */
                    263:        int tabno;              /* which table we're in */
                    264:        int run;                /* how long we'll stay in this table */
                    265: #              ifndef MAXRUN
                    266: #              define  MAXRUN  100
                    267: #              endif
                    268:        long hash;              /* the key's hash code (for optimization) */
                    269:        of_t tag;               /* tag we are looking for */
                    270:        int seen;               /* have we examined current location? */
                    271:        int aborted;            /* has i/o error aborted search? */
                    272: };
                    273: static void start();
                    274: #define        FRESH   ((struct searcher *)NULL)
                    275: static of_t search();
                    276: #define        NOTFOUND        ((of_t)-1)
                    277: static int okayvalue();
                    278: static int set();
                    279: 
                    280: /*
                    281:  * Arguably the searcher struct for a given routine ought to be local to
                    282:  * it, but a fetch() is very often immediately followed by a store(), and
                    283:  * in some circumstances it is a useful performance win to remember where
                    284:  * the fetch() completed.  So we use a global struct and remember whether
                    285:  * it is current.
                    286:  */
                    287: static struct searcher srch;
                    288: static struct searcher *prevp; /* &srch or FRESH */
                    289: 
                    290: /* byte-ordering stuff */
                    291: static int mybmap[SOF];                        /* my byte order (see mybytemap()) */
                    292: static int bytesame;                   /* is database order same as mine? */
                    293: #define        MAPIN(o)        ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))
                    294: #define        MAPOUT(o)       ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))
                    295: 
                    296: /*
                    297:  * The double parentheses needed to make this work are ugly, but the
                    298:  * alternative (under most compilers) is to pack around 2K of unused
                    299:  * strings -- there's just no way to get rid of them.
                    300:  */
                    301: static int debug;                      /* controlled by dbzdebug() */
                    302: #ifdef DBZDEBUG
                    303: #define DEBUG(args) if (debug) { (void) printf args ; }
                    304: #else
                    305: #define        DEBUG(args)     ;
                    306: #endif
                    307: 
                    308: /* externals used */
                    309: extern char *malloc();
                    310: extern char *calloc();
                    311: extern void free();            /* ANSI C; some old implementations say int */
                    312: extern int atoi();
                    313: extern long atol();
                    314: 
                    315: /* misc. forwards */
                    316: static long hash();
                    317: static void crcinit();
                    318: static char *cipoint();
                    319: static char *mapcase();
                    320: static int isprime();
                    321: static FILE *latebase();
                    322: 
                    323: /* file-naming stuff */
                    324: static char dir[] = ".dir";
                    325: static char pag[] = ".pag";
                    326: static char *enstring();
                    327: 
                    328: /* central data structures */
                    329: static FILE *basef;            /* descriptor for base file */
                    330: static char *basefname;                /* name for not-yet-opened base file */
                    331: static FILE *dirf;             /* descriptor for .dir file */
                    332: static int dirronly;           /* dirf open read-only? */
                    333: static FILE *pagf = NULL;      /* descriptor for .pag file */
                    334: static of_t pagpos;            /* posn in pagf; only search may set != -1 */
                    335: static int pagronly;           /* pagf open read-only? */
                    336: static of_t *corepag;          /* incore version of .pag file, if any */
                    337: static FILE *bufpagf;          /* well-buffered pagf, for incore rewrite */
                    338: static of_t *getcore();
                    339: static int putcore();
                    340: static int written;            /* has a store() been done? */
                    341: 
                    342: /*
                    343:  - dbzfresh - set up a new database, no historical info
                    344:  */
                    345: int                            /* 0 success, -1 failure */
                    346: dbzfresh(name, size, fs, cmap, tagmask)
                    347: char *name;                    /* base name; .dir and .pag must exist */
                    348: long size;                     /* table size (0 means default) */
                    349: int fs;                                /* field-separator character in base file */
                    350: int cmap;                      /* case-map algorithm (0 means default) */
                    351: of_t tagmask;                  /* 0 default, 1 no tags */
                    352: {
                    353:        register char *fn;
                    354:        struct dbzconfig c;
                    355:        register of_t m;
                    356:        register FILE *f;
                    357: 
                    358:        if (pagf != NULL) {
                    359:                DEBUG(("dbzfresh: database already open\n"));
                    360:                return(-1);
                    361:        }
                    362:        if (size != 0 && size < 2) {
                    363:                DEBUG(("dbzfresh: preposterous size (%ld)\n", size));
                    364:                return(-1);
                    365:        }
                    366: 
                    367:        /* get default configuration */
                    368:        if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0)
                    369:                return(-1);     /* "can't happen" */
                    370: 
                    371:        /* and mess with it as specified */
                    372:        if (size != 0)
                    373:                c.tsize = size;
                    374:        c.fieldsep = fs;
                    375:        switch (cmap) {
                    376:        case 0:
                    377:        case '0':
                    378:        case 'B':               /* 2.10 compat */
                    379:                c.casemap = '0';        /* '\0' nicer, but '0' printable! */
                    380:                break;
                    381:        case '=':
                    382:        case 'b':               /* 2.11 compat */
                    383:                c.casemap = '=';
                    384:                break;
                    385:        case 'C':
                    386:                c.casemap = 'C';
                    387:                break;
                    388:        case '?':
                    389:                c.casemap = DEFCASE;
                    390:                break;
                    391:        default:
                    392:                DEBUG(("dbzfresh case map `%c' unknown\n", cmap));
                    393:                return(-1);
                    394:                break;
                    395:        }
                    396:        switch (tagmask) {
                    397:        case 0:                 /* default */
                    398:                break;
                    399:        case 1:                 /* no tags */
                    400:                c.tagshift = 0;
                    401:                c.tagmask = 0;
                    402:                c.tagenb = 0;
                    403:                break;
                    404:        default:
                    405:                m = tagmask;
                    406:                c.tagshift = 0;
                    407:                while (!(m&01)) {
                    408:                        m >>= 1;
                    409:                        c.tagshift++;
                    410:                }
                    411:                c.tagmask = m;
                    412:                c.tagenb = (m << 1) & ~m;
                    413:                break;
                    414:        }
                    415: 
                    416:        /* write it out */
                    417:        fn = enstring(name, dir);
                    418:        if (fn == NULL)
                    419:                return(-1);
                    420:        f = fopen(fn, "w");
                    421:        free(fn);
                    422:        if (f == NULL) {
                    423:                DEBUG(("dbzfresh: unable to write config\n"));
                    424:                return(-1);
                    425:        }
                    426:        if (putconf(f, &c) < 0) {
                    427:                (void) fclose(f);
                    428:                return(-1);
                    429:        }
                    430:        if (fclose(f) == EOF) {
                    431:                DEBUG(("dbzfresh: fclose failure\n"));
                    432:                return(-1);
                    433:        }
                    434: 
                    435:        /* create/truncate .pag */
                    436:        fn = enstring(name, pag);
                    437:        if (fn == NULL)
                    438:                return(-1);
                    439:        f = fopen(fn, "w");
                    440:        free(fn);
                    441:        if (f == NULL) {
                    442:                DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));
                    443:                return(-1);
                    444:        } else
                    445:                (void) fclose(f);
                    446: 
                    447:        /* and punt to dbminit for the hard work */
                    448:        return(dbminit(name));
                    449: }
                    450: 
                    451: /*
                    452:  - dbzsize - what's a good table size to hold this many entries?
                    453:  */
                    454: long
                    455: dbzsize(contents)
                    456: long contents;                 /* 0 means what's the default */
                    457: {
                    458:        register long n;
                    459: 
                    460:        if (contents <= 0) {    /* foulup or default inquiry */
                    461:                DEBUG(("dbzsize: preposterous input (%ld)\n", contents));
                    462:                return(DEFSIZE);
                    463:        }
                    464:        n = (contents/2)*3;     /* try to keep table at most 2/3 full */
                    465:        if (!(n&01))            /* make it odd */
                    466:                n++;
                    467:        DEBUG(("dbzsize: tentative size %ld\n", n));
                    468:        while (!isprime(n))     /* and look for a prime */
                    469:                n += 2;
                    470:        DEBUG(("dbzsize: final size %ld\n", n));
                    471: 
                    472:        return(n);
                    473: }
                    474: 
                    475: /*
                    476:  - isprime - is a number prime?
                    477:  *
                    478:  * This is not a terribly efficient approach.
                    479:  */
                    480: static int                     /* predicate */
                    481: isprime(x)
                    482: register long x;
                    483: {
                    484:        static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
                    485:        register int *ip;
                    486:        register long div;
                    487:        register long stop;
                    488: 
                    489:        /* hit the first few primes quickly to eliminate easy ones */
                    490:        /* this incidentally prevents ridiculously small tables */
                    491:        for (ip = quick; (div = *ip) != 0; ip++)
                    492:                if (x%div == 0) {
                    493:                        DEBUG(("isprime: quick result on %ld\n", (long)x));
                    494:                        return(0);
                    495:                }
                    496: 
                    497:        /* approximate square root of x */
                    498:        for (stop = x; x/stop < stop; stop >>= 1)
                    499:                continue;
                    500:        stop <<= 1;
                    501: 
                    502:        /* try odd numbers up to stop */
                    503:        for (div = *--ip; div < stop; div += 2)
                    504:                if (x%div == 0)
                    505:                        return(0);
                    506: 
                    507:        return(1);
                    508: }
                    509: 
                    510: /*
                    511:  - dbzagain - set up a new database to be a rebuild of an old one
                    512:  */
                    513: int                            /* 0 success, -1 failure */
                    514: dbzagain(name, oldname)
                    515: char *name;                    /* base name; .dir and .pag must exist */
                    516: char *oldname;                 /* base name; all must exist */
                    517: {
                    518:        register char *fn;
                    519:        struct dbzconfig c;
                    520:        register int i;
                    521:        register long top;
                    522:        register FILE *f;
                    523:        register int newtable;
                    524:        register of_t newsize;
                    525: 
                    526:        if (pagf != NULL) {
                    527:                DEBUG(("dbzagain: database already open\n"));
                    528:                return(-1);
                    529:        }
                    530: 
                    531:        /* pick up the old configuration */
                    532:        fn = enstring(oldname, dir);
                    533:        if (fn == NULL)
                    534:                return(-1);
                    535:        f = fopen(fn, "r");
                    536:        free(fn);
                    537:        if (f == NULL) {
                    538:                DEBUG(("dbzagain: cannot open old .dir file\n"));
                    539:                return(-1);
                    540:        }
                    541:        i = getconf(f, (FILE *)NULL, &c);
                    542:        (void) fclose(f);
                    543:        if (i < 0) {
                    544:                DEBUG(("dbzagain: getconf failed\n"));
                    545:                return(-1);
                    546:        }
                    547: 
                    548:        /* tinker with it */
                    549:        top = 0;
                    550:        newtable = 0;
                    551:        for (i = 0; i < NUSEDS; i++) {
                    552:                if (top < c.used[i])
                    553:                        top = c.used[i];
                    554:                if (c.used[i] == 0)
                    555:                        newtable = 1;   /* hasn't got full usage history yet */
                    556:        }
                    557:        if (top == 0) {
                    558:                DEBUG(("dbzagain: old table has no contents!\n"));
                    559:                newtable = 1;
                    560:        }
                    561:        for (i = NUSEDS-1; i > 0; i--)
                    562:                c.used[i] = c.used[i-1];
                    563:        c.used[0] = 0;
                    564:        newsize = dbzsize(top);
                    565:        if (!newtable || newsize > c.tsize)     /* don't shrink new table */
                    566:                c.tsize = newsize;
                    567: 
                    568:        /* write it out */
                    569:        fn = enstring(name, dir);
                    570:        if (fn == NULL)
                    571:                return(-1);
                    572:        f = fopen(fn, "w");
                    573:        free(fn);
                    574:        if (f == NULL) {
                    575:                DEBUG(("dbzagain: unable to write new .dir\n"));
                    576:                return(-1);
                    577:        }
                    578:        i = putconf(f, &c);
                    579:        (void) fclose(f);
                    580:        if (i < 0) {
                    581:                DEBUG(("dbzagain: putconf failed\n"));
                    582:                return(-1);
                    583:        }
                    584: 
                    585:        /* create/truncate .pag */
                    586:        fn = enstring(name, pag);
                    587:        if (fn == NULL)
                    588:                return(-1);
                    589:        f = fopen(fn, "w");
                    590:        free(fn);
                    591:        if (f == NULL) {
                    592:                DEBUG(("dbzagain: unable to create/truncate .pag file\n"));
                    593:                return(-1);
                    594:        } else
                    595:                (void) fclose(f);
                    596: 
                    597:        /* and let dbminit do the work */
                    598:        return(dbminit(name));
                    599: }
                    600: 
                    601: /*
                    602:  - dbminit - open a database, creating it (using defaults) if necessary
                    603:  *
                    604:  * We try to leave errno set plausibly, to the extent that underlying
                    605:  * functions permit this, since many people consult it if dbminit() fails.
                    606:  */
                    607: int                            /* 0 success, -1 failure */
                    608: dbminit(name)
                    609: char *name;
                    610: {
                    611:        register int i;
                    612:        register size_t s;
                    613:        register char *dirfname;
                    614:        register char *pagfname;
                    615: 
                    616:        if (pagf != NULL) {
                    617:                DEBUG(("dbminit: dbminit already called once\n"));
                    618:                errno = 0;
                    619:                return(-1);
                    620:        }
                    621: 
                    622:        /* open the .dir file */
                    623:        dirfname = enstring(name, dir);
                    624:        if (dirfname == NULL)
                    625:                return(-1);
                    626:        dirf = fopen(dirfname, "r+");
                    627:        if (dirf == NULL) {
                    628:                dirf = fopen(dirfname, "r");
                    629:                dirronly = 1;
                    630:        } else
                    631:                dirronly = 0;
                    632:        free(dirfname);
                    633:        if (dirf == NULL) {
                    634:                DEBUG(("dbminit: can't open .dir file\n"));
                    635:                return(-1);
                    636:        }
                    637: 
                    638:        /* open the .pag file */
                    639:        pagfname = enstring(name, pag);
                    640:        if (pagfname == NULL) {
                    641:                (void) fclose(dirf);
                    642:                return(-1);
                    643:        }
                    644:        pagf = fopen(pagfname, "r+b");
                    645:        if (pagf == NULL) {
                    646:                pagf = fopen(pagfname, "rb");
                    647:                if (pagf == NULL) {
                    648:                        DEBUG(("dbminit: .pag open failed\n"));
                    649:                        (void) fclose(dirf);
                    650:                        free(pagfname);
                    651:                        return(-1);
                    652:                }
                    653:                pagronly = 1;
                    654:        } else if (dirronly)
                    655:                pagronly = 1;
                    656:        else
                    657:                pagronly = 0;
                    658: #ifdef NOBUFFER
                    659:        /*
                    660:         * B News does not do adequate locking on its database accesses.
                    661:         * Why it doesn't get into trouble using dbm is a mystery.  In any
                    662:         * case, doing unbuffered i/o does not cure the problem, but does
                    663:         * enormously reduce its incidence.
                    664:         */
                    665:        (void) setbuf(pagf, (char *)NULL);
                    666: #else
                    667: #ifdef _IOFBF
                    668:        (void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf));
                    669: #endif
                    670: #endif
                    671:        pagpos = -1;
                    672:        /* don't free pagfname, need it below */
                    673: 
                    674:        /* open the base file */
                    675:        basef = fopen(name, "r");
                    676:        if (basef == NULL) {
                    677:                DEBUG(("dbminit: basefile open failed\n"));
                    678:                basefname = enstring(name, "");
                    679:                if (basefname == NULL) {
                    680:                        (void) fclose(pagf);
                    681:                        (void) fclose(dirf);
                    682:                        free(pagfname);
                    683:                        pagf = NULL;
                    684:                        return(-1);
                    685:                }
                    686:        } else
                    687:                basefname = NULL;
                    688: #ifdef _IOFBF
                    689:        if (basef != NULL)
                    690:                (void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf));
                    691: #endif
                    692: 
                    693:        /* pick up configuration */
                    694:        if (getconf(dirf, pagf, &conf) < 0) {
                    695:                DEBUG(("dbminit: getconf failure\n"));
                    696:                (void) fclose(basef);
                    697:                (void) fclose(pagf);
                    698:                (void) fclose(dirf);
                    699:                free(pagfname);
                    700:                pagf = NULL;
                    701:                errno = EDOM;   /* kind of a kludge, but very portable */
                    702:                return(-1);
                    703:        }
                    704:        tagbits = conf.tagmask << conf.tagshift;
                    705:        taghere = conf.tagenb << conf.tagshift;
                    706:        tagboth = tagbits | taghere;
                    707:        mybytemap(mybmap);
                    708:        bytesame = 1;
                    709:        for (i = 0; i < SOF; i++)
                    710:                if (mybmap[i] != conf.bytemap[i])
                    711:                        bytesame = 0;
                    712: 
                    713:        /* get first table into core, if it looks desirable and feasible */
                    714:        s = (size_t)conf.tsize * SOF;
                    715:        if (incore && (of_t)(s/SOF) == conf.tsize) {
                    716:                bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b");
                    717:                if (bufpagf != NULL)
                    718:                        corepag = getcore(bufpagf);
                    719:        } else {
                    720:                bufpagf = NULL;
                    721:                corepag = NULL;
                    722:        }
                    723:        free(pagfname);
                    724: 
                    725:        /* misc. setup */
                    726:        crcinit();
                    727:        written = 0;
                    728:        prevp = FRESH;
                    729:        DEBUG(("dbminit: succeeded\n"));
                    730:        return(0);
                    731: }
                    732: 
                    733: /*
                    734:  - enstring - concatenate two strings into a malloced area
                    735:  */
                    736: static char *                  /* NULL if malloc fails */
                    737: enstring(s1, s2)
                    738: char *s1;
                    739: char *s2;
                    740: {
                    741:        register char *p;
                    742: 
                    743:        p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1);
                    744:        if (p != NULL) {
                    745:                (void) strcpy(p, s1);
                    746:                (void) strcat(p, s2);
                    747:        } else {
                    748:                DEBUG(("enstring(%s, %s) out of memory\n", s1, s2));
                    749:        }
                    750:        return(p);
                    751: }
                    752: 
                    753: /*
                    754:  - dbmclose - close a database
                    755:  */
                    756: int
                    757: dbmclose()
                    758: {
                    759:        register int ret = 0;
                    760: 
                    761:        if (pagf == NULL) {
                    762:                DEBUG(("dbmclose: not opened!\n"));
                    763:                return(-1);
                    764:        }
                    765: 
                    766:        if (fclose(pagf) == EOF) {
                    767:                DEBUG(("dbmclose: fclose(pagf) failed\n"));
                    768:                ret = -1;
                    769:        }
                    770:        pagf = basef;           /* ensure valid pointer; dbzsync checks it */
                    771:        if (dbzsync() < 0)
                    772:                ret = -1;
                    773:        if (bufpagf != NULL && fclose(bufpagf) == EOF) {
                    774:                DEBUG(("dbmclose: fclose(bufpagf) failed\n"));
                    775:                ret = -1;
                    776:        }
                    777:        if (corepag != NULL)
                    778:                free((char *)corepag);
                    779:        corepag = NULL;
                    780:        if (fclose(basef) == EOF) {
                    781:                DEBUG(("dbmclose: fclose(basef) failed\n"));
                    782:                ret = -1;
                    783:        }
                    784:        if (basefname != NULL)
                    785:                free(basefname);
                    786:        basef = NULL;
                    787:        pagf = NULL;
                    788:        if (fclose(dirf) == EOF) {
                    789:                DEBUG(("dbmclose: fclose(dirf) failed\n"));
                    790:                ret = -1;
                    791:        }
                    792: 
                    793:        DEBUG(("dbmclose: %s\n", (ret == 0) ? "succeeded" : "failed"));
                    794:        return(ret);
                    795: }
                    796: 
                    797: /*
                    798:  - dbzsync - push all in-core data out to disk
                    799:  */
                    800: int
                    801: dbzsync()
                    802: {
                    803:        register int ret = 0;
                    804: 
                    805:        if (pagf == NULL) {
                    806:                DEBUG(("dbzsync: not opened!\n"));
                    807:                return(-1);
                    808:        }
                    809:        if (!written)
                    810:                return(0);
                    811: 
                    812:        if (corepag != NULL) {
                    813:                if (putcore(corepag, bufpagf) < 0) {
                    814:                        DEBUG(("dbzsync: putcore failed\n"));
                    815:                        ret = -1;
                    816:                }
                    817:        }
                    818:        if (!conf.olddbz)
                    819:                if (putconf(dirf, &conf) < 0)
                    820:                        ret = -1;
                    821: 
                    822:        DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed"));
                    823:        return(ret);
                    824: }
                    825: 
                    826: /*
                    827:  - dbzcancel - cancel writing of in-core data
                    828:  * Mostly for use from child processes.
                    829:  * Note that we don't need to futz around with stdio buffers, because we
                    830:  * always fflush them immediately anyway and so they never have stale data.
                    831:  */
                    832: int
                    833: dbzcancel()
                    834: {
                    835:        if (pagf == NULL) {
                    836:                DEBUG(("dbzcancel: not opened!\n"));
                    837:                return(-1);
                    838:        }
                    839: 
                    840:        written = 0;
                    841:        return(0);
                    842: }
                    843: 
                    844: /*
                    845:  - dbzfetch - fetch() with case mapping built in
                    846:  */
                    847: datum
                    848: dbzfetch(key)
                    849: datum key;
                    850: {
                    851:        char buffer[DBZMAXKEY + 1];
                    852:        datum mappedkey;
                    853:        register size_t keysize;
                    854: 
                    855:        DEBUG(("dbzfetch: (%s)\n", key.dptr));
                    856: 
                    857:        /* Key is supposed to be less than DBZMAXKEY */
                    858:        keysize = key.dsize;
                    859:        if (keysize >= DBZMAXKEY) {
                    860:                keysize = DBZMAXKEY;
                    861:                DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
                    862:        }
                    863: 
                    864:        mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
                    865:        buffer[keysize] = '\0'; /* just a debug aid */
                    866:        mappedkey.dsize = keysize;
                    867: 
                    868:        return(fetch(mappedkey));
                    869: }
                    870: 
                    871: /*
                    872:  - fetch - get an entry from the database
                    873:  *
                    874:  * Disgusting fine point, in the name of backward compatibility:  if the
                    875:  * last character of "key" is a NUL, that character is (effectively) not
                    876:  * part of the comparison against the stored keys.
                    877:  */
                    878: datum                          /* dptr NULL, dsize 0 means failure */
                    879: fetch(key)
                    880: datum key;
                    881: {
                    882:        char buffer[DBZMAXKEY + 1];
                    883:        static of_t key_ptr;            /* return value points here */
                    884:        datum output;
                    885:        register size_t keysize;
                    886:        register size_t cmplen;
                    887:        register char *sepp;
                    888: 
                    889:        DEBUG(("fetch: (%s)\n", key.dptr));
                    890:        output.dptr = NULL;
                    891:        output.dsize = 0;
                    892:        prevp = FRESH;
                    893: 
                    894:        /* Key is supposed to be less than DBZMAXKEY */
                    895:        keysize = key.dsize;
                    896:        if (keysize >= DBZMAXKEY) {
                    897:                keysize = DBZMAXKEY;
                    898:                DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
                    899:        }
                    900: 
                    901:        if (pagf == NULL) {
                    902:                DEBUG(("fetch: database not open!\n"));
                    903:                return(output);
                    904:        } else if (basef == NULL) {     /* basef didn't exist yet */
                    905:                basef = latebase();
                    906:                if (basef == NULL)
                    907:                        return(output);
                    908:        }
                    909: 
                    910:        cmplen = keysize;
                    911:        sepp = &conf.fieldsep;
                    912:        if (key.dptr[keysize-1] == '\0') {
                    913:                cmplen--;
                    914:                sepp = &buffer[keysize-1];
                    915:        }
                    916:        start(&srch, &key, FRESH);
                    917:        while ((key_ptr = search(&srch)) != NOTFOUND) {
                    918:                DEBUG(("got 0x%lx\n", key_ptr));
                    919: 
                    920:                /* fetch the key */
                    921:                if (fseek(basef, key_ptr, SEEK_SET) != 0) {
                    922:                        DEBUG(("fetch: seek failed\n"));
                    923:                        return(output);
                    924:                }
                    925:                if (fread(buffer, 1, keysize, basef) != keysize) {
                    926:                        DEBUG(("fetch: read failed\n"));
                    927:                        return(output);
                    928:                }
                    929: 
                    930:                /* try it */
                    931:                buffer[keysize] = '\0';         /* terminated for DEBUG */
                    932:                (void) mapcase(buffer, buffer, keysize);
                    933:                DEBUG(("fetch: buffer (%s) looking for (%s) size = %d\n", 
                    934:                                                buffer, key.dptr, keysize));
                    935:                if (memcmp(key.dptr, buffer, cmplen) == 0 &&
                    936:                                (*sepp == conf.fieldsep || *sepp == '\0')) {
                    937:                        /* we found it */
                    938:                        output.dptr = (char *)&key_ptr;
                    939:                        output.dsize = SOF;
                    940:                        DEBUG(("fetch: successful\n"));
                    941:                        return(output);
                    942:                }
                    943:        }
                    944: 
                    945:        /* we didn't find it */
                    946:        DEBUG(("fetch: failed\n"));
                    947:        prevp = &srch;                  /* remember where we stopped */
                    948:        return(output);
                    949: }
                    950: 
                    951: /*
                    952:  - latebase - try to open a base file that wasn't there at the start
                    953:  */
                    954: static FILE *
                    955: latebase()
                    956: {
                    957:        register FILE *it;
                    958: 
                    959:        if (basefname == NULL) {
                    960:                DEBUG(("latebase: name foulup\n"));
                    961:                return(NULL);
                    962:        }
                    963:        it = fopen(basefname, "r");
                    964:        if (it == NULL) {
                    965:                DEBUG(("latebase: still can't open base\n"));
                    966:        } else {
                    967:                DEBUG(("latebase: late open succeeded\n"));
                    968:                free(basefname);
                    969:                basefname = NULL;
                    970: #ifdef _IOFBF
                    971:                (void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf));
                    972: #endif
                    973:        }
                    974:        return(it);
                    975: }
                    976: 
                    977: /*
                    978:  - dbzstore - store() with case mapping built in
                    979:  */
                    980: int
                    981: dbzstore(key, data)
                    982: datum key;
                    983: datum data;
                    984: {
                    985:        char buffer[DBZMAXKEY + 1];
                    986:        datum mappedkey;
                    987:        register size_t keysize;
                    988: 
                    989:        DEBUG(("dbzstore: (%s)\n", key.dptr));
                    990: 
                    991:        /* Key is supposed to be less than DBZMAXKEY */
                    992:        keysize = key.dsize;
                    993:        if (keysize >= DBZMAXKEY) {
                    994:                DEBUG(("dbzstore: key size too big (%d)\n", key.dsize));
                    995:                return(-1);
                    996:        }
                    997: 
                    998:        mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
                    999:        buffer[keysize] = '\0'; /* just a debug aid */
                   1000:        mappedkey.dsize = keysize;
                   1001: 
                   1002:        return(store(mappedkey, data));
                   1003: }
                   1004: 
                   1005: /*
                   1006:  - store - add an entry to the database
                   1007:  */
                   1008: int                            /* 0 success, -1 failure */
                   1009: store(key, data)
                   1010: datum key;
                   1011: datum data;
                   1012: {
                   1013:        of_t value;
                   1014: 
                   1015:        if (pagf == NULL) {
                   1016:                DEBUG(("store: database not open!\n"));
                   1017:                return(-1);
                   1018:        } else if (basef == NULL) {     /* basef didn't exist yet */
                   1019:                basef = latebase();
                   1020:                if (basef == NULL)
                   1021:                        return(-1);
                   1022:        }
                   1023:        if (pagronly) {
                   1024:                DEBUG(("store: database open read-only\n"));
                   1025:                return(-1);
                   1026:        }
                   1027:        if (data.dsize != SOF) {
                   1028:                DEBUG(("store: value size wrong (%d)\n", data.dsize));
                   1029:                return(-1);
                   1030:        }
                   1031:        if (key.dsize >= DBZMAXKEY) {
                   1032:                DEBUG(("store: key size too big (%d)\n", key.dsize));
                   1033:                return(-1);
                   1034:        }
                   1035: 
                   1036:        /* copy the value in to ensure alignment */
                   1037:        (void) memcpy((char *)&value, data.dptr, SOF);
                   1038:        DEBUG(("store: (%s, %ld)\n", key.dptr, (long)value));
                   1039:        if (!okayvalue(value)) {
                   1040:                DEBUG(("store: reserved bit or overflow in 0x%lx\n", value));
                   1041:                return(-1);
                   1042:        }
                   1043: 
                   1044:        /* find the place, exploiting previous search if possible */
                   1045:        start(&srch, &key, prevp);
                   1046:        while (search(&srch) != NOTFOUND)
                   1047:                continue;
                   1048: 
                   1049:        prevp = FRESH;
                   1050:        conf.used[0]++;
                   1051:        DEBUG(("store: used count %ld\n", conf.used[0]));
                   1052:        written = 1;
                   1053:        return(set(&srch, value));
                   1054: }
                   1055: 
                   1056: /*
                   1057:  - dbzincore - control attempts to keep .pag file in core
                   1058:  */
                   1059: int                            /* old setting */
                   1060: dbzincore(value)
                   1061: int value;
                   1062: {
                   1063:        register int old = incore;
                   1064: 
                   1065:        incore = value;
                   1066:        return(old);
                   1067: }
                   1068: 
                   1069: /*
                   1070:  - getconf - get configuration from .dir file
                   1071:  */
                   1072: static int                     /* 0 success, -1 failure */
                   1073: getconf(df, pf, cp)
                   1074: register FILE *df;             /* NULL means just give me the default */
                   1075: register FILE *pf;             /* NULL means don't care about .pag */
                   1076: register struct dbzconfig *cp;
                   1077: {
                   1078:        register int c;
                   1079:        register int i;
                   1080:        int err = 0;
                   1081: 
                   1082:        c = (df != NULL) ? getc(df) : EOF;
                   1083:        if (c == EOF) {         /* empty file, no configuration known */
                   1084:                cp->olddbz = 0;
                   1085:                if (df != NULL && pf != NULL && getc(pf) != EOF)
                   1086:                        cp->olddbz = 1;
                   1087:                cp->tsize = DEFSIZE;
                   1088:                cp->fieldsep = '\t';
                   1089:                for (i = 0; i < NUSEDS; i++)
                   1090:                        cp->used[i] = 0;
                   1091:                cp->valuesize = SOF;
                   1092:                mybytemap(cp->bytemap);
                   1093:                cp->casemap = DEFCASE;
                   1094:                cp->tagenb = TAGENB;
                   1095:                cp->tagmask = TAGMASK;
                   1096:                cp->tagshift = TAGSHIFT;
                   1097:                DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n",
                   1098:                        cp->tsize, cp->casemap, cp->tagenb, 
                   1099:                        cp->tagmask, cp->tagshift));
                   1100:                return(0);
                   1101:        }
                   1102:        (void) ungetc(c, df);
                   1103: 
                   1104:        /* first line, the vital stuff */
                   1105:        if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z')
                   1106:                err = -1;
                   1107:        if (getno(df, &err) != dbzversion)
                   1108:                err = -1;
                   1109:        cp->tsize = getno(df, &err);
                   1110:        cp->fieldsep = getno(df, &err);
                   1111:        while ((c = getc(df)) == ' ')
                   1112:                continue;
                   1113:        cp->casemap = c;
                   1114:        cp->tagenb = getno(df, &err);
                   1115:        cp->tagmask = getno(df, &err);
                   1116:        cp->tagshift = getno(df, &err);
                   1117:        cp->valuesize = getno(df, &err);
                   1118:        if (cp->valuesize != SOF) {
                   1119:                DEBUG(("getconf: wrong of_t size (%d)\n", cp->valuesize));
                   1120:                err = -1;
                   1121:                cp->valuesize = SOF;    /* to protect the loops below */
                   1122:        }
                   1123:        for (i = 0; i < cp->valuesize; i++)
                   1124:                cp->bytemap[i] = getno(df, &err);
                   1125:        if (getc(df) != '\n')
                   1126:                err = -1;
                   1127:        DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize,
                   1128:                        cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask,
                   1129:                        cp->tagshift));
                   1130:        DEBUG(("bytemap (%d)", cp->valuesize));
                   1131:        for (i = 0; i < cp->valuesize; i++) {
                   1132:                DEBUG((" %d", cp->bytemap[i]));
                   1133:        }
                   1134:        DEBUG(("\n"));
                   1135: 
                   1136:        /* second line, the usages */
                   1137:        for (i = 0; i < NUSEDS; i++)
                   1138:                cp->used[i] = getno(df, &err);
                   1139:        if (getc(df) != '\n')
                   1140:                err = -1;
                   1141:        DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2]));
                   1142: 
                   1143:        if (err < 0) {
                   1144:                DEBUG(("getconf error\n"));
                   1145:                return(-1);
                   1146:        }
                   1147:        return(0);
                   1148: }
                   1149: 
                   1150: /*
                   1151:  - getno - get a long
                   1152:  */
                   1153: static long
                   1154: getno(f, ep)
                   1155: FILE *f;
                   1156: int *ep;
                   1157: {
                   1158:        register char *p;
                   1159: #      define  MAXN    50
                   1160:        char getbuf[MAXN];
                   1161:        register int c;
                   1162: 
                   1163:        while ((c = getc(f)) == ' ')
                   1164:                continue;
                   1165:        if (c == EOF || c == '\n') {
                   1166:                DEBUG(("getno: missing number\n"));
                   1167:                *ep = -1;
                   1168:                return(0);
                   1169:        }
                   1170:        p = getbuf;
                   1171:        *p++ = c;
                   1172:        while ((c = getc(f)) != EOF && c != '\n' && c != ' ')
                   1173:                if (p < &getbuf[MAXN-1])
                   1174:                        *p++ = c;
                   1175:        if (c == EOF) {
                   1176:                DEBUG(("getno: EOF\n"));
                   1177:                *ep = -1;
                   1178:        } else
                   1179:                (void) ungetc(c, f);
                   1180:        *p = '\0';
                   1181: 
                   1182:        if (strspn(getbuf, "-1234567890") != strlen(getbuf)) {
                   1183:                DEBUG(("getno: `%s' non-numeric\n", getbuf));
                   1184:                *ep = -1;
                   1185:        }
                   1186:        return(atol(getbuf));
                   1187: }
                   1188: 
                   1189: /*
                   1190:  - putconf - write configuration to .dir file
                   1191:  */
                   1192: static int                     /* 0 success, -1 failure */
                   1193: putconf(f, cp)
                   1194: register FILE *f;
                   1195: register struct dbzconfig *cp;
                   1196: {
                   1197:        register int i;
                   1198:        register int ret = 0;
                   1199: 
                   1200:        if (fseek(f, (of_t)0, SEEK_SET) != 0) {
                   1201:                DEBUG(("fseek failure in putconf\n"));
                   1202:                ret = -1;
                   1203:        }
                   1204:        fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion, cp->tsize,
                   1205:                                cp->fieldsep, cp->casemap, cp->tagenb,
                   1206:                                cp->tagmask, cp->tagshift, cp->valuesize);
                   1207:        for (i = 0; i < cp->valuesize; i++)
                   1208:                fprintf(f, " %d", cp->bytemap[i]);
                   1209:        fprintf(f, "\n");
                   1210:        for (i = 0; i < NUSEDS; i++)
                   1211:                fprintf(f, "%ld%c", cp->used[i], (i < NUSEDS-1) ? ' ' : '\n');
                   1212: 
                   1213:        (void) fflush(f);
                   1214:        if (ferror(f))
                   1215:                ret = -1;
                   1216: 
                   1217:        DEBUG(("putconf status %d\n", ret));
                   1218:        return(ret);
                   1219: }
                   1220: 
                   1221: /*
                   1222:  - getcore - try to set up an in-core copy of .pag file
                   1223:  */
                   1224: static of_t *                  /* pointer to copy, or NULL */
                   1225: getcore(f)
                   1226: FILE *f;
                   1227: {
                   1228:        register of_t *p;
                   1229:        register size_t i;
                   1230:        register size_t nread;
                   1231:        register char *it;
                   1232: 
                   1233:        it = malloc((size_t)conf.tsize * SOF);
                   1234:        if (it == NULL) {
                   1235:                DEBUG(("getcore: malloc failed\n"));
                   1236:                return(NULL);
                   1237:        }
                   1238: 
                   1239:        nread = fread(it, SOF, (size_t)conf.tsize, f);
                   1240:        if (ferror(f)) {
                   1241:                DEBUG(("getcore: read failed\n"));
                   1242:                free(it);
                   1243:                return(NULL);
                   1244:        }
                   1245: 
                   1246:        p = (of_t *)it + nread;
                   1247:        i = (size_t)conf.tsize - nread;
                   1248:        while (i-- > 0)
                   1249:                *p++ = VACANT;
                   1250:        return((of_t *)it);
                   1251: }
                   1252: 
                   1253: /*
                   1254:  - putcore - try to rewrite an in-core table
                   1255:  */
                   1256: static int                     /* 0 okay, -1 fail */
                   1257: putcore(tab, f)
                   1258: of_t *tab;
                   1259: FILE *f;
                   1260: {
                   1261:        if (fseek(f, (of_t)0, SEEK_SET) != 0) {
                   1262:                DEBUG(("fseek failure in putcore\n"));
                   1263:                return(-1);
                   1264:        }
                   1265:        (void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f);
                   1266:        (void) fflush(f);
                   1267:        return((ferror(f)) ? -1 : 0);
                   1268: }
                   1269: 
                   1270: /*
                   1271:  - start - set up to start or restart a search
                   1272:  */
                   1273: static void
                   1274: start(sp, kp, osp)
                   1275: register struct searcher *sp;
                   1276: register datum *kp;
                   1277: register struct searcher *osp;         /* may be FRESH, i.e. NULL */
                   1278: {
                   1279:        register long h;
                   1280: 
                   1281:        h = hash(kp->dptr, kp->dsize);
                   1282:        if (osp != FRESH && osp->hash == h) {
                   1283:                if (sp != osp)
                   1284:                        *sp = *osp;
                   1285:                DEBUG(("search restarted\n"));
                   1286:        } else {
                   1287:                sp->hash = h;
                   1288:                sp->tag = MKTAG(h / conf.tsize);
                   1289:                DEBUG(("tag 0x%lx\n", sp->tag));
                   1290:                sp->place = h % conf.tsize;
                   1291:                sp->tabno = 0;
                   1292:                sp->run = (conf.olddbz) ? conf.tsize : MAXRUN;
                   1293:                sp->aborted = 0;
                   1294:        }
                   1295:        sp->seen = 0;
                   1296: }
                   1297: 
                   1298: /*
                   1299:  - search - conduct part of a search
                   1300:  */
                   1301: static of_t                    /* NOTFOUND if we hit VACANT or error */
                   1302: search(sp)
                   1303: register struct searcher *sp;
                   1304: {
                   1305:        register of_t dest;
                   1306:        register of_t value;
                   1307:        of_t val;               /* buffer for value (can't fread register) */
                   1308:        register of_t place;
                   1309: 
                   1310:        if (sp->aborted)
                   1311:                return(NOTFOUND);
                   1312: 
                   1313:        for (;;) {
                   1314:                /* determine location to be examined */
                   1315:                place = sp->place;
                   1316:                if (sp->seen) {
                   1317:                        /* go to next location */
                   1318:                        if (--sp->run <= 0) {
                   1319:                                sp->tabno++;
                   1320:                                sp->run = MAXRUN;
                   1321:                        }
                   1322:                        place = (place+1)%conf.tsize + sp->tabno*conf.tsize;
                   1323:                        sp->place = place;
                   1324:                } else
                   1325:                        sp->seen = 1;   /* now looking at current location */
                   1326:                DEBUG(("search @ %ld\n", place));
                   1327: 
                   1328:                /* get the tagged value */
                   1329:                if (corepag != NULL && place < conf.tsize) {
                   1330:                        DEBUG(("search: in core\n"));
                   1331:                        value = MAPIN(corepag[place]);
                   1332:                } else {
                   1333:                        /* seek, if necessary */
                   1334:                        dest = place * SOF;
                   1335:                        if (pagpos != dest) {
                   1336:                                if (fseek(pagf, dest, SEEK_SET) != 0) {
                   1337:                                        DEBUG(("search: seek failed\n"));
                   1338:                                        pagpos = -1;
                   1339:                                        sp->aborted = 1;
                   1340:                                        return(NOTFOUND);
                   1341:                                }
                   1342:                                pagpos = dest;
                   1343:                        }
                   1344: 
                   1345:                        /* read it */
                   1346:                        if (fread((char *)&val, sizeof(val), 1, pagf) == 1)
                   1347:                                value = MAPIN(val);
                   1348:                        else if (ferror(pagf)) {
                   1349:                                DEBUG(("search: read failed\n"));
                   1350:                                pagpos = -1;
                   1351:                                sp->aborted = 1;
                   1352:                                return(NOTFOUND);
                   1353:                        } else
                   1354:                                value = VACANT;
                   1355: 
                   1356:                        /* and finish up */
                   1357:                        pagpos += sizeof(val);
                   1358:                }
                   1359: 
                   1360:                /* vacant slot is always cause to return */
                   1361:                if (value == VACANT) {
                   1362:                        DEBUG(("search: empty slot\n"));
                   1363:                        return(NOTFOUND);
                   1364:                };
                   1365: 
                   1366:                /* check the tag */
                   1367:                value = UNBIAS(value);
                   1368:                DEBUG(("got 0x%lx\n", value));
                   1369:                if (!HASTAG(value)) {
                   1370:                        DEBUG(("tagless\n"));
                   1371:                        return(value);
                   1372:                } else if (TAG(value) == sp->tag) {
                   1373:                        DEBUG(("match\n"));
                   1374:                        return(NOTAG(value));
                   1375:                } else {
                   1376:                        DEBUG(("mismatch 0x%lx\n", TAG(value)));
                   1377:                }
                   1378:        }
                   1379:        /* NOTREACHED */
                   1380: }
                   1381: 
                   1382: /*
                   1383:  - okayvalue - check that a value can be stored
                   1384:  */
                   1385: static int                     /* predicate */
                   1386: okayvalue(value)
                   1387: of_t value;
                   1388: {
                   1389:        if (HASTAG(value))
                   1390:                return(0);
                   1391: #ifdef OVERFLOW
                   1392:        if (value == LONG_MAX)  /* BIAS() and UNBIAS() will overflow */
                   1393:                return(0);
                   1394: #endif
                   1395:        return(1);
                   1396: }
                   1397: 
                   1398: /*
                   1399:  - set - store a value into a location previously found by search
                   1400:  */
                   1401: static int                     /* 0 success, -1 failure */
                   1402: set(sp, value)
                   1403: register struct searcher *sp;
                   1404: of_t value;
                   1405: {
                   1406:        register of_t place = sp->place;
                   1407:        register of_t v = value;
                   1408: 
                   1409:        if (sp->aborted)
                   1410:                return(-1);
                   1411: 
                   1412:        if (CANTAG(v) && !conf.olddbz) {
                   1413:                v |= sp->tag | taghere;
                   1414:                if (v != UNBIAS(VACANT))        /* BIAS(v) won't look VACANT */
                   1415: #ifdef OVERFLOW
                   1416:                        if (v != LONG_MAX)      /* and it won't overflow */
                   1417: #endif
                   1418:                        value = v;
                   1419:        }
                   1420:        DEBUG(("tagged value is 0x%lx\n", value));
                   1421:        value = BIAS(value);
                   1422:        value = MAPOUT(value);
                   1423: 
                   1424:        /* If we have the index file in memory, use it */
                   1425:        if (corepag != NULL && place < conf.tsize) {
                   1426:                corepag[place] = value;
                   1427:                DEBUG(("set: incore\n"));
                   1428:                return(0);
                   1429:        }
                   1430: 
                   1431:        /* seek to spot */
                   1432:        pagpos = -1;            /* invalidate position memory */
                   1433:        if (fseek(pagf, place * SOF, SEEK_SET) != 0) {
                   1434:                DEBUG(("set: seek failed\n"));
                   1435:                sp->aborted = 1;
                   1436:                return(-1);
                   1437:        }
                   1438: 
                   1439:        /* write in data */
                   1440:        if (fwrite((char *)&value, SOF, 1, pagf) != 1) {
                   1441:                DEBUG(("set: write failed\n"));
                   1442:                sp->aborted = 1;
                   1443:                return(-1);
                   1444:        }
                   1445:        /* fflush improves robustness, and buffer re-use is rare anyway */
                   1446:        if (fflush(pagf) == EOF) {
                   1447:                DEBUG(("set: fflush failed\n"));
                   1448:                sp->aborted = 1;
                   1449:                return(-1);
                   1450:        }
                   1451: 
                   1452:        DEBUG(("set: succeeded\n"));
                   1453:        return(0);
                   1454: }
                   1455: 
                   1456: /*
                   1457:  - mybytemap - determine this machine's byte map
                   1458:  *
                   1459:  * A byte map is an array of ints, sizeof(of_t) of them.  The 0th int
                   1460:  * is the byte number of the high-order byte in my of_t, and so forth.
                   1461:  */
                   1462: static void
                   1463: mybytemap(map)
                   1464: int map[];                     /* -> int[SOF] */
                   1465: {
                   1466:        union {
                   1467:                of_t o;
                   1468:                char c[SOF];
                   1469:        } u;
                   1470:        register int *mp = &map[SOF];
                   1471:        register int ntodo;
                   1472:        register int i;
                   1473: 
                   1474:        u.o = 1;
                   1475:        for (ntodo = (int)SOF; ntodo > 0; ntodo--) {
                   1476:                for (i = 0; i < SOF; i++)
                   1477:                        if (u.c[i] != 0)
                   1478:                                break;
                   1479:                if (i == SOF) {
                   1480:                        /* trouble -- set it to *something* consistent */
                   1481:                        DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo));
                   1482:                        for (i = 0; i < SOF; i++)
                   1483:                                map[i] = i;
                   1484:                        return;
                   1485:                }
                   1486:                DEBUG(("mybytemap: byte %d\n", i));
                   1487:                *--mp = i;
                   1488:                while (u.c[i] != 0)
                   1489:                        u.o <<= 1;
                   1490:        }
                   1491: }
                   1492: 
                   1493: /*
                   1494:  - bytemap - transform an of_t from byte ordering map1 to map2
                   1495:  */
                   1496: static of_t                    /* transformed result */
                   1497: bytemap(ino, map1, map2)
                   1498: of_t ino;
                   1499: int *map1;
                   1500: int *map2;
                   1501: {
                   1502:        union oc {
                   1503:                of_t o;
                   1504:                char c[SOF];
                   1505:        };
                   1506:        union oc in;
                   1507:        union oc out;
                   1508:        register int i;
                   1509: 
                   1510:        in.o = ino;
                   1511:        for (i = 0; i < SOF; i++)
                   1512:                out.c[map2[i]] = in.c[map1[i]];
                   1513:        return(out.o);
                   1514: }
                   1515: 
                   1516: /*
                   1517:  * This is a simplified version of the pathalias hashing function.
                   1518:  * Thanks to Steve Belovin and Peter Honeyman
                   1519:  *
                   1520:  * hash a string into a long int.  31 bit crc (from andrew appel).
                   1521:  * the crc table is computed at run time by crcinit() -- we could
                   1522:  * precompute, but it takes 1 clock tick on a 750.
                   1523:  *
                   1524:  * This fast table calculation works only if POLY is a prime polynomial
                   1525:  * in the field of integers modulo 2.  Since the coefficients of a
                   1526:  * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
                   1527:  * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
                   1528:  * 31 down to 25 are zero.  Happily, we have candidates, from
                   1529:  * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
                   1530:  *     x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
                   1531:  *     x^31 + x^3 + x^0
                   1532:  *
                   1533:  * We reverse the bits to get:
                   1534:  *     111101010000000000000000000000001 but drop the last 1
                   1535:  *         f   5   0   0   0   0   0   0
                   1536:  *     010010000000000000000000000000001 ditto, for 31-bit crc
                   1537:  *        4   8   0   0   0   0   0   0
                   1538:  */
                   1539: 
                   1540: #define POLY 0x48000000L       /* 31-bit polynomial (avoids sign problems) */
                   1541: 
                   1542: static long CrcTable[128];
                   1543: 
                   1544: /*
                   1545:  - crcinit - initialize tables for hash function
                   1546:  */
                   1547: static void
                   1548: crcinit()
                   1549: {
                   1550:        register int i, j;
                   1551:        register long sum;
                   1552: 
                   1553:        for (i = 0; i < 128; ++i) {
                   1554:                sum = 0L;
                   1555:                for (j = 7 - 1; j >= 0; --j)
                   1556:                        if (i & (1 << j))
                   1557:                                sum ^= POLY >> j;
                   1558:                CrcTable[i] = sum;
                   1559:        }
                   1560:        DEBUG(("crcinit: done\n"));
                   1561: }
                   1562: 
                   1563: /*
                   1564:  - hash - Honeyman's nice hashing function
                   1565:  */
                   1566: static long
                   1567: hash(name, size)
                   1568: register char *name;
                   1569: register int size;
                   1570: {
                   1571:        register long sum = 0L;
                   1572: 
                   1573:        while (size--) {
                   1574:                sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
                   1575:        }
                   1576:        DEBUG(("hash: returns (%ld)\n", sum));
                   1577:        return(sum);
                   1578: }
                   1579: 
                   1580: /*
                   1581:  * case-mapping stuff
                   1582:  *
                   1583:  * Borrowed from C News, by permission of the authors.  Somewhat modified.
                   1584:  *
                   1585:  * We exploit the fact that we are dealing only with headers here, and
                   1586:  * headers are limited to the ASCII characters by RFC822.  It is barely
                   1587:  * possible that we might be dealing with a translation into another
                   1588:  * character set, but in particular it's very unlikely for a header
                   1589:  * character to be outside -128..255.
                   1590:  *
                   1591:  * Life would be a whole lot simpler if tolower() could safely and portably
                   1592:  * be applied to any char.
                   1593:  */
                   1594: 
                   1595: #define        OFFSET  128             /* avoid trouble with negative chars */
                   1596: 
                   1597: /* must call casencmp before invoking TOLOW... */
                   1598: #define        TOLOW(c)        (cmap[(c)+OFFSET])
                   1599: 
                   1600: /* ...but the use of it in CISTREQN is safe without the preliminary call (!) */
                   1601: /* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */
                   1602: #define CISTREQN(a, b, n) \
                   1603:        (TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0)
                   1604: 
                   1605: #define        MAPSIZE (256+OFFSET)
                   1606: static char cmap[MAPSIZE];     /* relies on init to '\0' */
                   1607: static int mprimed = 0;                /* has cmap been set up? */
                   1608: 
                   1609: /*
                   1610:  - mapprime - set up case-mapping stuff
                   1611:  */
                   1612: static void
                   1613: mapprime()
                   1614: {
                   1615:        register char *lp;
                   1616:        register char *up;
                   1617:        register int c;
                   1618:        register int i;
                   1619:        static char lower[] = "abcdefghijklmnopqrstuvwxyz";
                   1620:        static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
                   1621: 
                   1622:        for (lp = lower, up = upper; *lp != '\0'; lp++, up++) {
                   1623:                c = *lp;
                   1624:                cmap[c+OFFSET] = c;
                   1625:                cmap[*up+OFFSET] = c;
                   1626:        }
                   1627:        for (i = 0; i < MAPSIZE; i++)
                   1628:                if (cmap[i] == '\0')
                   1629:                        cmap[i] = (char)(i-OFFSET);
                   1630:        mprimed = 1;
                   1631: }
                   1632: 
                   1633: /*
                   1634:  - casencmp - case-independent strncmp
                   1635:  */
                   1636: static int                     /* < == > 0 */
                   1637: casencmp(s1, s2, len)
                   1638: char *s1;
                   1639: char *s2;
                   1640: int len;
                   1641: {
                   1642:        register char *p1;
                   1643:        register char *p2;
                   1644:        register int n;
                   1645: 
                   1646:        if (!mprimed)
                   1647:                mapprime();
                   1648: 
                   1649:        p1 = s1;
                   1650:        p2 = s2;
                   1651:        n = len;
                   1652:        while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) {
                   1653:                p1++;
                   1654:                p2++;
                   1655:        }
                   1656:        if (n < 0)
                   1657:                return(0);
                   1658: 
                   1659:        /*
                   1660:         * The following case analysis is necessary so that characters
                   1661:         * which look negative collate low against normal characters but
                   1662:         * high against the end-of-string NUL.
                   1663:         */
                   1664:        if (*p1 == '\0' && *p2 == '\0')
                   1665:                return(0);
                   1666:        else if (*p1 == '\0')
                   1667:                return(-1);
                   1668:        else if (*p2 == '\0')
                   1669:                return(1);
                   1670:        else
                   1671:                return(TOLOW(*p1) - TOLOW(*p2));
                   1672: }
                   1673: 
                   1674: /*
                   1675:  - mapcase - do case-mapped copy
                   1676:  */
                   1677: static char *                  /* returns src or dst */
                   1678: mapcase(dst, src, siz)
                   1679: char *dst;                     /* destination, used only if mapping needed */
                   1680: char *src;                     /* source; src == dst is legal */
                   1681: size_t siz;
                   1682: {
                   1683:        register char *s;
                   1684:        register char *d;
                   1685:        register char *c;       /* case break */
                   1686:        register char *e;       /* end of source */
                   1687: 
                   1688: 
                   1689:        c = cipoint(src, siz);
                   1690:        if (c == NULL)
                   1691:                return(src);
                   1692: 
                   1693:        if (!mprimed)
                   1694:                mapprime();
                   1695:        s = src;
                   1696:        e = s + siz;
                   1697:        d = dst;
                   1698: 
                   1699:        while (s < c)
                   1700:                *d++ = *s++;
                   1701:        while (s < e)
                   1702:                *d++ = TOLOW(*s++);
                   1703: 
                   1704:        return(dst);
                   1705: }
                   1706: 
                   1707: /*
                   1708:  - cipoint - where in this message-ID does it become case-insensitive?
                   1709:  *
                   1710:  * The RFC822 code is not quite complete.  Absolute, total, full RFC822
                   1711:  * compliance requires a horrible parsing job, because of the arcane
                   1712:  * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi,
                   1713:  * for example.  There are three or four things that might occur in the
                   1714:  * domain part of a message-id that are case-sensitive.  They don't seem
                   1715:  * to ever occur in real news, thank Cthulhu.  (What?  You were expecting
                   1716:  * a merciful and forgiving deity to be invoked in connection with RFC822?
                   1717:  * Forget it; none of them would come near it.)
                   1718:  */
                   1719: static char *                  /* pointer into s, or NULL for "nowhere" */
                   1720: cipoint(s, siz)
                   1721: char *s;
                   1722: size_t siz;
                   1723: {
                   1724:        register char *p;
                   1725:        static char post[] = "postmaster";
                   1726:        static int plen = sizeof(post)-1;
                   1727: 
                   1728:        switch (conf.casemap) {
                   1729:        case '0':               /* unmapped, sensible */
                   1730:                return(NULL);
                   1731:                break;
                   1732:        case 'C':               /* C News, RFC 822 conformant (approx.) */
                   1733:                p = memchr(s, '@', siz);
                   1734:                if (p == NULL)                  /* no local/domain split */
                   1735:                        return(NULL);           /* assume all local */
                   1736:                else if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) {
                   1737:                        /* crazy -- "postmaster" is case-insensitive */
                   1738:                        return(s);
                   1739:                } else
                   1740:                        return(p);
                   1741:                break;
                   1742:        case '=':               /* 2.11, neither sensible nor conformant */
                   1743:                return(s);      /* all case-insensitive */
                   1744:                break;
                   1745:        }
                   1746: 
                   1747:        DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap));
                   1748:        return(NULL);           /* just leave it alone */
                   1749: }
                   1750: 
                   1751: /*
                   1752:  - dbzdebug - control dbz debugging at run time
                   1753:  */
                   1754: int                            /* old value */
                   1755: dbzdebug(value)
                   1756: int value;
                   1757: {
                   1758: #ifdef DBZDEBUG
                   1759:        register int old = debug;
                   1760: 
                   1761:        debug = value;
                   1762:        return(old);
                   1763: #else
                   1764:        return(-1);
                   1765: #endif
                   1766: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.