|
|
1.1 ! root 1: /* ! 2: ! 3: dbz.c V3.2 ! 4: ! 5: Copyright 1988 Jon Zeeff ([email protected]) ! 6: You can use this code in any manner, as long as you leave my name on it ! 7: and don't hold me responsible for any problems with it. ! 8: ! 9: Hacked on by [email protected] (David Butler); Sun Jun 5 00:27:08 CDT 1988 ! 10: ! 11: Various improvments + INCORE by [email protected] (Mark Moraes) ! 12: ! 13: Major reworking by Henry Spencer as part of the C News project. ! 14: ! 15: These routines replace dbm as used by the usenet news software ! 16: (it's not a full dbm replacement by any means). It's fast and ! 17: simple. It contains no AT&T code. ! 18: ! 19: In general, dbz's files are 1/20 the size of dbm's. Lookup performance ! 20: is somewhat better, while file creation is spectacularly faster, especially ! 21: if the incore facility is used. ! 22: ! 23: */ ! 24: ! 25: #include <stdio.h> ! 26: #include <sys/types.h> ! 27: #include <string.h> ! 28: #include <ctype.h> ! 29: #include <errno.h> ! 30: #ifndef __STDC__ ! 31: extern int errno; ! 32: #endif ! 33: #include <dbz.h> ! 34: ! 35: /* ! 36: * #ifdef index. "LIA" = "leave it alone unless you know what you're doing". ! 37: * ! 38: * FUNNYSEEKS SEEK_SET is not 0, get it from <unistd.h> ! 39: * INDEX_SIZE backward compatibility with old dbz; avoid using this ! 40: * NMEMORY number of days of memory for use in sizing new table (LIA) ! 41: * INCORE backward compatibility with old dbz; use dbzincore() instead ! 42: * DBZDEBUG enable debugging ! 43: * DEFSIZE default table size (not as critical as in old dbz) ! 44: * OLDBNEWS default case mapping as in old B News; set NOBUFFER ! 45: * BNEWS default case mapping as in current B News; set NOBUFFER ! 46: * DEFCASE default case-map algorithm selector ! 47: * NOTAGS fseek offsets are strange, do not do tagging (see below) ! 48: * NPAGBUF size of .pag buffer, in longs (LIA) ! 49: * SHISTBUF size of ASCII-file buffer, in bytes (LIA) ! 50: * MAXRUN length of run which shifts to next table (see below) (LIA) ! 51: * OVERFLOW long-int arithmetic overflow must be avoided, will trap ! 52: * NOBUFFER do not buffer hash-table i/o, B News locking is defective ! 53: */ ! 54: ! 55: #ifdef FUNNYSEEKS ! 56: #include <unistd.h> ! 57: #else ! 58: #define SEEK_SET 0 ! 59: #endif ! 60: #ifdef OVERFLOW ! 61: #include <limits.h> ! 62: #endif ! 63: ! 64: static int dbzversion = 3; /* for validating .dir file format */ ! 65: ! 66: /* ! 67: * The dbz database exploits the fact that when news stores a <key,value> ! 68: * tuple, the `value' part is a seek offset into a text file, pointing to ! 69: * a copy of the `key' part. This avoids the need to store a copy of ! 70: * the key in the dbz files. However, the text file *must* exist and be ! 71: * consistent with the dbz files, or things will fail. ! 72: * ! 73: * The basic format of the database is a simple hash table containing the ! 74: * values. A value is stored by indexing into the table using a hash value ! 75: * computed from the key; collisions are resolved by linear probing (just ! 76: * search forward for an empty slot, wrapping around to the beginning of ! 77: * the table if necessary). Linear probing is a performance disaster when ! 78: * the table starts to get full, so a complication is introduced. The ! 79: * database is actually one *or more* tables, stored sequentially in the ! 80: * .pag file, and the length of linear-probe sequences is limited. The ! 81: * search (for an existing item or an empty slot) always starts in the ! 82: * first table, and whenever MAXRUN probes have been done in table N, ! 83: * probing continues in table N+1. This behaves reasonably well even in ! 84: * cases of massive overflow. There are some other small complications ! 85: * added, see comments below. ! 86: * ! 87: * The table size is fixed for any particular database, but is determined ! 88: * dynamically when a database is rebuilt. The strategy is to try to pick ! 89: * the size so the first table will be no more than 2/3 full, that being ! 90: * slightly before the point where performance starts to degrade. (It is ! 91: * desirable to be a bit conservative because the overflow strategy tends ! 92: * to produce files with holes in them, which is a nuisance.) ! 93: */ ! 94: ! 95: /* ! 96: * The following is for backward compatibility. ! 97: */ ! 98: #ifdef INDEX_SIZE ! 99: #define DEFSIZE INDEX_SIZE ! 100: #endif ! 101: ! 102: /* ! 103: * ANSI C says an offset into a file is a long, not an off_t, for some ! 104: * reason. This actually does simplify life a bit, but it's still nice ! 105: * to have a distinctive name for it. Beware, this is just for readability, ! 106: * don't try to change this. ! 107: */ ! 108: #define of_t long ! 109: #define SOF (sizeof(of_t)) ! 110: ! 111: /* ! 112: * We assume that unused areas of a binary file are zeros, and that the ! 113: * bit pattern of `(of_t)0' is all zeros. The alternative is rather ! 114: * painful file initialization. Note that okayvalue(), if OVERFLOW is ! 115: * defined, knows what value of an offset would cause overflow. ! 116: */ ! 117: #define VACANT ((of_t)0) ! 118: #define BIAS(o) ((o)+1) /* make any valid of_t non-VACANT */ ! 119: #define UNBIAS(o) ((o)-1) /* reverse BIAS() effect */ ! 120: ! 121: /* ! 122: * In a Unix implementation, or indeed any in which an of_t is a byte ! 123: * count, there are a bunch of high bits free in an of_t. There is a ! 124: * use for them. Checking a possible hit by looking it up in the base ! 125: * file is relatively expensive, and the cost can be dramatically reduced ! 126: * by using some of those high bits to tag the value with a few more bits ! 127: * of the key's hash. This detects most false hits without the overhead of ! 128: * seek+read+strcmp. We use the top bit to indicate whether the value is ! 129: * tagged or not, and don't tag a value which is using the tag bits itself. ! 130: * We're in trouble if the of_t representation wants to use the top bit. ! 131: * The actual bitmasks and offset come from the configuration stuff, ! 132: * which permits fiddling with them as necessary, and also suppressing ! 133: * them completely (by defining the masks to 0). We build pre-shifted ! 134: * versions of the masks for efficiency. ! 135: */ ! 136: static of_t tagbits; /* pre-shifted tag mask */ ! 137: static of_t taghere; /* pre-shifted tag-enable bit */ ! 138: static of_t tagboth; /* tagbits|taghere */ ! 139: #define HASTAG(o) ((o)&taghere) ! 140: #define TAG(o) ((o)&tagbits) ! 141: #define NOTAG(o) ((o)&~tagboth) ! 142: #define CANTAG(o) (((o)&tagboth) == 0) ! 143: #define MKTAG(v) (((v)<<conf.tagshift)&tagbits) ! 144: ! 145: /* ! 146: * A new, from-scratch database, not built as a rebuild of an old one, ! 147: * needs to know table size, casemap algorithm, and tagging. Normally ! 148: * the user supplies this info, but there have to be defaults. ! 149: */ ! 150: #ifndef DEFSIZE ! 151: #define DEFSIZE 120011 /* 300007 might be better */ ! 152: #endif ! 153: #ifdef OLDBNEWS ! 154: #define DEFCASE '0' /* B2.10 -- no mapping */ ! 155: #define NOBUFFER /* B News locking is defective */ ! 156: #endif ! 157: #ifdef BNEWS ! 158: #define DEFCASE '=' /* B2.11 -- all mapped */ ! 159: #define NOBUFFER /* B News locking is defective */ ! 160: #endif ! 161: #ifndef DEFCASE /* C News compatibility is the default */ ! 162: #define DEFCASE 'C' /* C News -- RFC822 mapping */ ! 163: #endif ! 164: #ifndef NOTAGS ! 165: #define TAGENB 0x80 /* tag enable is top bit, tag is next 7 */ ! 166: #define TAGMASK 0x7f ! 167: #define TAGSHIFT 24 ! 168: #else ! 169: #define TAGENB 0 /* no tags */ ! 170: #define TAGMASK 0 ! 171: #define TAGSHIFT 0 ! 172: #endif ! 173: ! 174: /* ! 175: * We read configuration info from the .dir file into this structure, ! 176: * so we can avoid wired-in assumptions for an existing database. ! 177: * ! 178: * Among the info is a record of recent peak usages, so that a new table ! 179: * size can be chosen intelligently when rebuilding. 10 is a good ! 180: * number of usages to keep, since news displays marked fluctuations ! 181: * in volume on a 7-day cycle. ! 182: */ ! 183: struct dbzconfig { ! 184: int olddbz; /* .dir file empty but .pag not? */ ! 185: of_t tsize; /* table size */ ! 186: # ifndef NMEMORY ! 187: # define NMEMORY 10 /* # days of use info to remember */ ! 188: # endif ! 189: # define NUSEDS (1+NMEMORY) ! 190: of_t used[NUSEDS]; /* entries used today, yesterday, ... */ ! 191: int valuesize; /* size of table values, == SOF */ ! 192: int bytemap[SOF]; /* byte-order map */ ! 193: char casemap; /* case-mapping algorithm (see cipoint()) */ ! 194: char fieldsep; /* field separator in base file, if any */ ! 195: of_t tagenb; /* unshifted tag-enable bit */ ! 196: of_t tagmask; /* unshifted tag mask */ ! 197: int tagshift; /* shift count for tagmask and tagenb */ ! 198: }; ! 199: static struct dbzconfig conf; ! 200: static int getconf(); ! 201: static long getno(); ! 202: static int putconf(); ! 203: static void mybytemap(); ! 204: static of_t bytemap(); ! 205: ! 206: /* ! 207: * For a program that makes many, many references to the database, it ! 208: * is a large performance win to keep the table in core, if it will fit. ! 209: * Note that this does hurt robustness in the event of crashes, and ! 210: * dbmclose() *must* be called to flush the in-core database to disk. ! 211: * The code is prepared to deal with the possibility that there isn't ! 212: * enough memory. There *is* an assumption that a size_t is big enough ! 213: * to hold the size (in bytes) of one table, so dbminit() tries to figure ! 214: * out whether this is possible first. ! 215: * ! 216: * The preferred way to ask for an in-core table is to do dbzincore(1) ! 217: * before dbminit(). The default is not to do it, although -DINCORE ! 218: * overrides this for backward compatibility with old dbz. ! 219: * ! 220: * We keep only the first table in core. This greatly simplifies the ! 221: * code, and bounds memory demand. Furthermore, doing this is a large ! 222: * performance win even in the event of massive overflow. ! 223: */ ! 224: #ifdef INCORE ! 225: static int incore = 1; ! 226: #else ! 227: static int incore = 0; ! 228: #endif ! 229: ! 230: /* ! 231: * Stdio buffer for .pag reads. Buffering more than about 16 does not help ! 232: * significantly at the densities we try to maintain, and the much larger ! 233: * buffers that most stdios default to are much more expensive to fill. ! 234: * With small buffers, stdio is performance-competitive with raw read(), ! 235: * and it's much more portable. ! 236: */ ! 237: #ifndef NPAGBUF ! 238: #define NPAGBUF 16 ! 239: #endif ! 240: #ifndef NOBUFFER ! 241: #ifdef _IOFBF ! 242: static of_t pagbuf[NPAGBUF]; /* only needed if !NOBUFFER && _IOFBF */ ! 243: #endif ! 244: #endif ! 245: ! 246: /* ! 247: * Stdio buffer for base-file reads. Message-IDs (all news ever needs to ! 248: * read) are essentially never longer than 64 bytes, and the typical stdio ! 249: * buffer is so much larger that it is much more expensive to fill. ! 250: */ ! 251: #ifndef SHISTBUF ! 252: #define SHISTBUF 64 ! 253: #endif ! 254: #ifdef _IOFBF ! 255: static char basebuf[SHISTBUF]; /* only needed if _IOFBF exists */ ! 256: #endif ! 257: ! 258: /* ! 259: * Data structure for recording info about searches. ! 260: */ ! 261: struct searcher { ! 262: of_t place; /* current location in file */ ! 263: int tabno; /* which table we're in */ ! 264: int run; /* how long we'll stay in this table */ ! 265: # ifndef MAXRUN ! 266: # define MAXRUN 100 ! 267: # endif ! 268: long hash; /* the key's hash code (for optimization) */ ! 269: of_t tag; /* tag we are looking for */ ! 270: int seen; /* have we examined current location? */ ! 271: int aborted; /* has i/o error aborted search? */ ! 272: }; ! 273: static void start(); ! 274: #define FRESH ((struct searcher *)NULL) ! 275: static of_t search(); ! 276: #define NOTFOUND ((of_t)-1) ! 277: static int okayvalue(); ! 278: static int set(); ! 279: ! 280: /* ! 281: * Arguably the searcher struct for a given routine ought to be local to ! 282: * it, but a fetch() is very often immediately followed by a store(), and ! 283: * in some circumstances it is a useful performance win to remember where ! 284: * the fetch() completed. So we use a global struct and remember whether ! 285: * it is current. ! 286: */ ! 287: static struct searcher srch; ! 288: static struct searcher *prevp; /* &srch or FRESH */ ! 289: ! 290: /* byte-ordering stuff */ ! 291: static int mybmap[SOF]; /* my byte order (see mybytemap()) */ ! 292: static int bytesame; /* is database order same as mine? */ ! 293: #define MAPIN(o) ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap)) ! 294: #define MAPOUT(o) ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap)) ! 295: ! 296: /* ! 297: * The double parentheses needed to make this work are ugly, but the ! 298: * alternative (under most compilers) is to pack around 2K of unused ! 299: * strings -- there's just no way to get rid of them. ! 300: */ ! 301: static int debug; /* controlled by dbzdebug() */ ! 302: #ifdef DBZDEBUG ! 303: #define DEBUG(args) if (debug) { (void) printf args ; } ! 304: #else ! 305: #define DEBUG(args) ; ! 306: #endif ! 307: ! 308: /* externals used */ ! 309: extern char *malloc(); ! 310: extern char *calloc(); ! 311: extern void free(); /* ANSI C; some old implementations say int */ ! 312: extern int atoi(); ! 313: extern long atol(); ! 314: ! 315: /* misc. forwards */ ! 316: static long hash(); ! 317: static void crcinit(); ! 318: static char *cipoint(); ! 319: static char *mapcase(); ! 320: static int isprime(); ! 321: static FILE *latebase(); ! 322: ! 323: /* file-naming stuff */ ! 324: static char dir[] = ".dir"; ! 325: static char pag[] = ".pag"; ! 326: static char *enstring(); ! 327: ! 328: /* central data structures */ ! 329: static FILE *basef; /* descriptor for base file */ ! 330: static char *basefname; /* name for not-yet-opened base file */ ! 331: static FILE *dirf; /* descriptor for .dir file */ ! 332: static int dirronly; /* dirf open read-only? */ ! 333: static FILE *pagf = NULL; /* descriptor for .pag file */ ! 334: static of_t pagpos; /* posn in pagf; only search may set != -1 */ ! 335: static int pagronly; /* pagf open read-only? */ ! 336: static of_t *corepag; /* incore version of .pag file, if any */ ! 337: static FILE *bufpagf; /* well-buffered pagf, for incore rewrite */ ! 338: static of_t *getcore(); ! 339: static int putcore(); ! 340: static int written; /* has a store() been done? */ ! 341: ! 342: /* ! 343: - dbzfresh - set up a new database, no historical info ! 344: */ ! 345: int /* 0 success, -1 failure */ ! 346: dbzfresh(name, size, fs, cmap, tagmask) ! 347: char *name; /* base name; .dir and .pag must exist */ ! 348: long size; /* table size (0 means default) */ ! 349: int fs; /* field-separator character in base file */ ! 350: int cmap; /* case-map algorithm (0 means default) */ ! 351: of_t tagmask; /* 0 default, 1 no tags */ ! 352: { ! 353: register char *fn; ! 354: struct dbzconfig c; ! 355: register of_t m; ! 356: register FILE *f; ! 357: ! 358: if (pagf != NULL) { ! 359: DEBUG(("dbzfresh: database already open\n")); ! 360: return(-1); ! 361: } ! 362: if (size != 0 && size < 2) { ! 363: DEBUG(("dbzfresh: preposterous size (%ld)\n", size)); ! 364: return(-1); ! 365: } ! 366: ! 367: /* get default configuration */ ! 368: if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0) ! 369: return(-1); /* "can't happen" */ ! 370: ! 371: /* and mess with it as specified */ ! 372: if (size != 0) ! 373: c.tsize = size; ! 374: c.fieldsep = fs; ! 375: switch (cmap) { ! 376: case 0: ! 377: case '0': ! 378: case 'B': /* 2.10 compat */ ! 379: c.casemap = '0'; /* '\0' nicer, but '0' printable! */ ! 380: break; ! 381: case '=': ! 382: case 'b': /* 2.11 compat */ ! 383: c.casemap = '='; ! 384: break; ! 385: case 'C': ! 386: c.casemap = 'C'; ! 387: break; ! 388: case '?': ! 389: c.casemap = DEFCASE; ! 390: break; ! 391: default: ! 392: DEBUG(("dbzfresh case map `%c' unknown\n", cmap)); ! 393: return(-1); ! 394: break; ! 395: } ! 396: switch (tagmask) { ! 397: case 0: /* default */ ! 398: break; ! 399: case 1: /* no tags */ ! 400: c.tagshift = 0; ! 401: c.tagmask = 0; ! 402: c.tagenb = 0; ! 403: break; ! 404: default: ! 405: m = tagmask; ! 406: c.tagshift = 0; ! 407: while (!(m&01)) { ! 408: m >>= 1; ! 409: c.tagshift++; ! 410: } ! 411: c.tagmask = m; ! 412: c.tagenb = (m << 1) & ~m; ! 413: break; ! 414: } ! 415: ! 416: /* write it out */ ! 417: fn = enstring(name, dir); ! 418: if (fn == NULL) ! 419: return(-1); ! 420: f = fopen(fn, "w"); ! 421: free(fn); ! 422: if (f == NULL) { ! 423: DEBUG(("dbzfresh: unable to write config\n")); ! 424: return(-1); ! 425: } ! 426: if (putconf(f, &c) < 0) { ! 427: (void) fclose(f); ! 428: return(-1); ! 429: } ! 430: if (fclose(f) == EOF) { ! 431: DEBUG(("dbzfresh: fclose failure\n")); ! 432: return(-1); ! 433: } ! 434: ! 435: /* create/truncate .pag */ ! 436: fn = enstring(name, pag); ! 437: if (fn == NULL) ! 438: return(-1); ! 439: f = fopen(fn, "w"); ! 440: free(fn); ! 441: if (f == NULL) { ! 442: DEBUG(("dbzfresh: unable to create/truncate .pag file\n")); ! 443: return(-1); ! 444: } else ! 445: (void) fclose(f); ! 446: ! 447: /* and punt to dbminit for the hard work */ ! 448: return(dbminit(name)); ! 449: } ! 450: ! 451: /* ! 452: - dbzsize - what's a good table size to hold this many entries? ! 453: */ ! 454: long ! 455: dbzsize(contents) ! 456: long contents; /* 0 means what's the default */ ! 457: { ! 458: register long n; ! 459: ! 460: if (contents <= 0) { /* foulup or default inquiry */ ! 461: DEBUG(("dbzsize: preposterous input (%ld)\n", contents)); ! 462: return(DEFSIZE); ! 463: } ! 464: n = (contents/2)*3; /* try to keep table at most 2/3 full */ ! 465: if (!(n&01)) /* make it odd */ ! 466: n++; ! 467: DEBUG(("dbzsize: tentative size %ld\n", n)); ! 468: while (!isprime(n)) /* and look for a prime */ ! 469: n += 2; ! 470: DEBUG(("dbzsize: final size %ld\n", n)); ! 471: ! 472: return(n); ! 473: } ! 474: ! 475: /* ! 476: - isprime - is a number prime? ! 477: * ! 478: * This is not a terribly efficient approach. ! 479: */ ! 480: static int /* predicate */ ! 481: isprime(x) ! 482: register long x; ! 483: { ! 484: static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 }; ! 485: register int *ip; ! 486: register long div; ! 487: register long stop; ! 488: ! 489: /* hit the first few primes quickly to eliminate easy ones */ ! 490: /* this incidentally prevents ridiculously small tables */ ! 491: for (ip = quick; (div = *ip) != 0; ip++) ! 492: if (x%div == 0) { ! 493: DEBUG(("isprime: quick result on %ld\n", (long)x)); ! 494: return(0); ! 495: } ! 496: ! 497: /* approximate square root of x */ ! 498: for (stop = x; x/stop < stop; stop >>= 1) ! 499: continue; ! 500: stop <<= 1; ! 501: ! 502: /* try odd numbers up to stop */ ! 503: for (div = *--ip; div < stop; div += 2) ! 504: if (x%div == 0) ! 505: return(0); ! 506: ! 507: return(1); ! 508: } ! 509: ! 510: /* ! 511: - dbzagain - set up a new database to be a rebuild of an old one ! 512: */ ! 513: int /* 0 success, -1 failure */ ! 514: dbzagain(name, oldname) ! 515: char *name; /* base name; .dir and .pag must exist */ ! 516: char *oldname; /* base name; all must exist */ ! 517: { ! 518: register char *fn; ! 519: struct dbzconfig c; ! 520: register int i; ! 521: register long top; ! 522: register FILE *f; ! 523: register int newtable; ! 524: register of_t newsize; ! 525: ! 526: if (pagf != NULL) { ! 527: DEBUG(("dbzagain: database already open\n")); ! 528: return(-1); ! 529: } ! 530: ! 531: /* pick up the old configuration */ ! 532: fn = enstring(oldname, dir); ! 533: if (fn == NULL) ! 534: return(-1); ! 535: f = fopen(fn, "r"); ! 536: free(fn); ! 537: if (f == NULL) { ! 538: DEBUG(("dbzagain: cannot open old .dir file\n")); ! 539: return(-1); ! 540: } ! 541: i = getconf(f, (FILE *)NULL, &c); ! 542: (void) fclose(f); ! 543: if (i < 0) { ! 544: DEBUG(("dbzagain: getconf failed\n")); ! 545: return(-1); ! 546: } ! 547: ! 548: /* tinker with it */ ! 549: top = 0; ! 550: newtable = 0; ! 551: for (i = 0; i < NUSEDS; i++) { ! 552: if (top < c.used[i]) ! 553: top = c.used[i]; ! 554: if (c.used[i] == 0) ! 555: newtable = 1; /* hasn't got full usage history yet */ ! 556: } ! 557: if (top == 0) { ! 558: DEBUG(("dbzagain: old table has no contents!\n")); ! 559: newtable = 1; ! 560: } ! 561: for (i = NUSEDS-1; i > 0; i--) ! 562: c.used[i] = c.used[i-1]; ! 563: c.used[0] = 0; ! 564: newsize = dbzsize(top); ! 565: if (!newtable || newsize > c.tsize) /* don't shrink new table */ ! 566: c.tsize = newsize; ! 567: ! 568: /* write it out */ ! 569: fn = enstring(name, dir); ! 570: if (fn == NULL) ! 571: return(-1); ! 572: f = fopen(fn, "w"); ! 573: free(fn); ! 574: if (f == NULL) { ! 575: DEBUG(("dbzagain: unable to write new .dir\n")); ! 576: return(-1); ! 577: } ! 578: i = putconf(f, &c); ! 579: (void) fclose(f); ! 580: if (i < 0) { ! 581: DEBUG(("dbzagain: putconf failed\n")); ! 582: return(-1); ! 583: } ! 584: ! 585: /* create/truncate .pag */ ! 586: fn = enstring(name, pag); ! 587: if (fn == NULL) ! 588: return(-1); ! 589: f = fopen(fn, "w"); ! 590: free(fn); ! 591: if (f == NULL) { ! 592: DEBUG(("dbzagain: unable to create/truncate .pag file\n")); ! 593: return(-1); ! 594: } else ! 595: (void) fclose(f); ! 596: ! 597: /* and let dbminit do the work */ ! 598: return(dbminit(name)); ! 599: } ! 600: ! 601: /* ! 602: - dbminit - open a database, creating it (using defaults) if necessary ! 603: * ! 604: * We try to leave errno set plausibly, to the extent that underlying ! 605: * functions permit this, since many people consult it if dbminit() fails. ! 606: */ ! 607: int /* 0 success, -1 failure */ ! 608: dbminit(name) ! 609: char *name; ! 610: { ! 611: register int i; ! 612: register size_t s; ! 613: register char *dirfname; ! 614: register char *pagfname; ! 615: ! 616: if (pagf != NULL) { ! 617: DEBUG(("dbminit: dbminit already called once\n")); ! 618: errno = 0; ! 619: return(-1); ! 620: } ! 621: ! 622: /* open the .dir file */ ! 623: dirfname = enstring(name, dir); ! 624: if (dirfname == NULL) ! 625: return(-1); ! 626: dirf = fopen(dirfname, "r+"); ! 627: if (dirf == NULL) { ! 628: dirf = fopen(dirfname, "r"); ! 629: dirronly = 1; ! 630: } else ! 631: dirronly = 0; ! 632: free(dirfname); ! 633: if (dirf == NULL) { ! 634: DEBUG(("dbminit: can't open .dir file\n")); ! 635: return(-1); ! 636: } ! 637: ! 638: /* open the .pag file */ ! 639: pagfname = enstring(name, pag); ! 640: if (pagfname == NULL) { ! 641: (void) fclose(dirf); ! 642: return(-1); ! 643: } ! 644: pagf = fopen(pagfname, "r+b"); ! 645: if (pagf == NULL) { ! 646: pagf = fopen(pagfname, "rb"); ! 647: if (pagf == NULL) { ! 648: DEBUG(("dbminit: .pag open failed\n")); ! 649: (void) fclose(dirf); ! 650: free(pagfname); ! 651: return(-1); ! 652: } ! 653: pagronly = 1; ! 654: } else if (dirronly) ! 655: pagronly = 1; ! 656: else ! 657: pagronly = 0; ! 658: #ifdef NOBUFFER ! 659: /* ! 660: * B News does not do adequate locking on its database accesses. ! 661: * Why it doesn't get into trouble using dbm is a mystery. In any ! 662: * case, doing unbuffered i/o does not cure the problem, but does ! 663: * enormously reduce its incidence. ! 664: */ ! 665: (void) setbuf(pagf, (char *)NULL); ! 666: #else ! 667: #ifdef _IOFBF ! 668: (void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf)); ! 669: #endif ! 670: #endif ! 671: pagpos = -1; ! 672: /* don't free pagfname, need it below */ ! 673: ! 674: /* open the base file */ ! 675: basef = fopen(name, "r"); ! 676: if (basef == NULL) { ! 677: DEBUG(("dbminit: basefile open failed\n")); ! 678: basefname = enstring(name, ""); ! 679: if (basefname == NULL) { ! 680: (void) fclose(pagf); ! 681: (void) fclose(dirf); ! 682: free(pagfname); ! 683: pagf = NULL; ! 684: return(-1); ! 685: } ! 686: } else ! 687: basefname = NULL; ! 688: #ifdef _IOFBF ! 689: if (basef != NULL) ! 690: (void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf)); ! 691: #endif ! 692: ! 693: /* pick up configuration */ ! 694: if (getconf(dirf, pagf, &conf) < 0) { ! 695: DEBUG(("dbminit: getconf failure\n")); ! 696: (void) fclose(basef); ! 697: (void) fclose(pagf); ! 698: (void) fclose(dirf); ! 699: free(pagfname); ! 700: pagf = NULL; ! 701: errno = EDOM; /* kind of a kludge, but very portable */ ! 702: return(-1); ! 703: } ! 704: tagbits = conf.tagmask << conf.tagshift; ! 705: taghere = conf.tagenb << conf.tagshift; ! 706: tagboth = tagbits | taghere; ! 707: mybytemap(mybmap); ! 708: bytesame = 1; ! 709: for (i = 0; i < SOF; i++) ! 710: if (mybmap[i] != conf.bytemap[i]) ! 711: bytesame = 0; ! 712: ! 713: /* get first table into core, if it looks desirable and feasible */ ! 714: s = (size_t)conf.tsize * SOF; ! 715: if (incore && (of_t)(s/SOF) == conf.tsize) { ! 716: bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b"); ! 717: if (bufpagf != NULL) ! 718: corepag = getcore(bufpagf); ! 719: } else { ! 720: bufpagf = NULL; ! 721: corepag = NULL; ! 722: } ! 723: free(pagfname); ! 724: ! 725: /* misc. setup */ ! 726: crcinit(); ! 727: written = 0; ! 728: prevp = FRESH; ! 729: DEBUG(("dbminit: succeeded\n")); ! 730: return(0); ! 731: } ! 732: ! 733: /* ! 734: - enstring - concatenate two strings into a malloced area ! 735: */ ! 736: static char * /* NULL if malloc fails */ ! 737: enstring(s1, s2) ! 738: char *s1; ! 739: char *s2; ! 740: { ! 741: register char *p; ! 742: ! 743: p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1); ! 744: if (p != NULL) { ! 745: (void) strcpy(p, s1); ! 746: (void) strcat(p, s2); ! 747: } else { ! 748: DEBUG(("enstring(%s, %s) out of memory\n", s1, s2)); ! 749: } ! 750: return(p); ! 751: } ! 752: ! 753: /* ! 754: - dbmclose - close a database ! 755: */ ! 756: int ! 757: dbmclose() ! 758: { ! 759: register int ret = 0; ! 760: ! 761: if (pagf == NULL) { ! 762: DEBUG(("dbmclose: not opened!\n")); ! 763: return(-1); ! 764: } ! 765: ! 766: if (fclose(pagf) == EOF) { ! 767: DEBUG(("dbmclose: fclose(pagf) failed\n")); ! 768: ret = -1; ! 769: } ! 770: pagf = basef; /* ensure valid pointer; dbzsync checks it */ ! 771: if (dbzsync() < 0) ! 772: ret = -1; ! 773: if (bufpagf != NULL && fclose(bufpagf) == EOF) { ! 774: DEBUG(("dbmclose: fclose(bufpagf) failed\n")); ! 775: ret = -1; ! 776: } ! 777: if (corepag != NULL) ! 778: free((char *)corepag); ! 779: corepag = NULL; ! 780: if (fclose(basef) == EOF) { ! 781: DEBUG(("dbmclose: fclose(basef) failed\n")); ! 782: ret = -1; ! 783: } ! 784: if (basefname != NULL) ! 785: free(basefname); ! 786: basef = NULL; ! 787: pagf = NULL; ! 788: if (fclose(dirf) == EOF) { ! 789: DEBUG(("dbmclose: fclose(dirf) failed\n")); ! 790: ret = -1; ! 791: } ! 792: ! 793: DEBUG(("dbmclose: %s\n", (ret == 0) ? "succeeded" : "failed")); ! 794: return(ret); ! 795: } ! 796: ! 797: /* ! 798: - dbzsync - push all in-core data out to disk ! 799: */ ! 800: int ! 801: dbzsync() ! 802: { ! 803: register int ret = 0; ! 804: ! 805: if (pagf == NULL) { ! 806: DEBUG(("dbzsync: not opened!\n")); ! 807: return(-1); ! 808: } ! 809: if (!written) ! 810: return(0); ! 811: ! 812: if (corepag != NULL) { ! 813: if (putcore(corepag, bufpagf) < 0) { ! 814: DEBUG(("dbzsync: putcore failed\n")); ! 815: ret = -1; ! 816: } ! 817: } ! 818: if (!conf.olddbz) ! 819: if (putconf(dirf, &conf) < 0) ! 820: ret = -1; ! 821: ! 822: DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed")); ! 823: return(ret); ! 824: } ! 825: ! 826: /* ! 827: - dbzcancel - cancel writing of in-core data ! 828: * Mostly for use from child processes. ! 829: * Note that we don't need to futz around with stdio buffers, because we ! 830: * always fflush them immediately anyway and so they never have stale data. ! 831: */ ! 832: int ! 833: dbzcancel() ! 834: { ! 835: if (pagf == NULL) { ! 836: DEBUG(("dbzcancel: not opened!\n")); ! 837: return(-1); ! 838: } ! 839: ! 840: written = 0; ! 841: return(0); ! 842: } ! 843: ! 844: /* ! 845: - dbzfetch - fetch() with case mapping built in ! 846: */ ! 847: datum ! 848: dbzfetch(key) ! 849: datum key; ! 850: { ! 851: char buffer[DBZMAXKEY + 1]; ! 852: datum mappedkey; ! 853: register size_t keysize; ! 854: ! 855: DEBUG(("dbzfetch: (%s)\n", key.dptr)); ! 856: ! 857: /* Key is supposed to be less than DBZMAXKEY */ ! 858: keysize = key.dsize; ! 859: if (keysize >= DBZMAXKEY) { ! 860: keysize = DBZMAXKEY; ! 861: DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY)); ! 862: } ! 863: ! 864: mappedkey.dptr = mapcase(buffer, key.dptr, keysize); ! 865: buffer[keysize] = '\0'; /* just a debug aid */ ! 866: mappedkey.dsize = keysize; ! 867: ! 868: return(fetch(mappedkey)); ! 869: } ! 870: ! 871: /* ! 872: - fetch - get an entry from the database ! 873: * ! 874: * Disgusting fine point, in the name of backward compatibility: if the ! 875: * last character of "key" is a NUL, that character is (effectively) not ! 876: * part of the comparison against the stored keys. ! 877: */ ! 878: datum /* dptr NULL, dsize 0 means failure */ ! 879: fetch(key) ! 880: datum key; ! 881: { ! 882: char buffer[DBZMAXKEY + 1]; ! 883: static of_t key_ptr; /* return value points here */ ! 884: datum output; ! 885: register size_t keysize; ! 886: register size_t cmplen; ! 887: register char *sepp; ! 888: ! 889: DEBUG(("fetch: (%s)\n", key.dptr)); ! 890: output.dptr = NULL; ! 891: output.dsize = 0; ! 892: prevp = FRESH; ! 893: ! 894: /* Key is supposed to be less than DBZMAXKEY */ ! 895: keysize = key.dsize; ! 896: if (keysize >= DBZMAXKEY) { ! 897: keysize = DBZMAXKEY; ! 898: DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY)); ! 899: } ! 900: ! 901: if (pagf == NULL) { ! 902: DEBUG(("fetch: database not open!\n")); ! 903: return(output); ! 904: } else if (basef == NULL) { /* basef didn't exist yet */ ! 905: basef = latebase(); ! 906: if (basef == NULL) ! 907: return(output); ! 908: } ! 909: ! 910: cmplen = keysize; ! 911: sepp = &conf.fieldsep; ! 912: if (key.dptr[keysize-1] == '\0') { ! 913: cmplen--; ! 914: sepp = &buffer[keysize-1]; ! 915: } ! 916: start(&srch, &key, FRESH); ! 917: while ((key_ptr = search(&srch)) != NOTFOUND) { ! 918: DEBUG(("got 0x%lx\n", key_ptr)); ! 919: ! 920: /* fetch the key */ ! 921: if (fseek(basef, key_ptr, SEEK_SET) != 0) { ! 922: DEBUG(("fetch: seek failed\n")); ! 923: return(output); ! 924: } ! 925: if (fread(buffer, 1, keysize, basef) != keysize) { ! 926: DEBUG(("fetch: read failed\n")); ! 927: return(output); ! 928: } ! 929: ! 930: /* try it */ ! 931: buffer[keysize] = '\0'; /* terminated for DEBUG */ ! 932: (void) mapcase(buffer, buffer, keysize); ! 933: DEBUG(("fetch: buffer (%s) looking for (%s) size = %d\n", ! 934: buffer, key.dptr, keysize)); ! 935: if (memcmp(key.dptr, buffer, cmplen) == 0 && ! 936: (*sepp == conf.fieldsep || *sepp == '\0')) { ! 937: /* we found it */ ! 938: output.dptr = (char *)&key_ptr; ! 939: output.dsize = SOF; ! 940: DEBUG(("fetch: successful\n")); ! 941: return(output); ! 942: } ! 943: } ! 944: ! 945: /* we didn't find it */ ! 946: DEBUG(("fetch: failed\n")); ! 947: prevp = &srch; /* remember where we stopped */ ! 948: return(output); ! 949: } ! 950: ! 951: /* ! 952: - latebase - try to open a base file that wasn't there at the start ! 953: */ ! 954: static FILE * ! 955: latebase() ! 956: { ! 957: register FILE *it; ! 958: ! 959: if (basefname == NULL) { ! 960: DEBUG(("latebase: name foulup\n")); ! 961: return(NULL); ! 962: } ! 963: it = fopen(basefname, "r"); ! 964: if (it == NULL) { ! 965: DEBUG(("latebase: still can't open base\n")); ! 966: } else { ! 967: DEBUG(("latebase: late open succeeded\n")); ! 968: free(basefname); ! 969: basefname = NULL; ! 970: #ifdef _IOFBF ! 971: (void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf)); ! 972: #endif ! 973: } ! 974: return(it); ! 975: } ! 976: ! 977: /* ! 978: - dbzstore - store() with case mapping built in ! 979: */ ! 980: int ! 981: dbzstore(key, data) ! 982: datum key; ! 983: datum data; ! 984: { ! 985: char buffer[DBZMAXKEY + 1]; ! 986: datum mappedkey; ! 987: register size_t keysize; ! 988: ! 989: DEBUG(("dbzstore: (%s)\n", key.dptr)); ! 990: ! 991: /* Key is supposed to be less than DBZMAXKEY */ ! 992: keysize = key.dsize; ! 993: if (keysize >= DBZMAXKEY) { ! 994: DEBUG(("dbzstore: key size too big (%d)\n", key.dsize)); ! 995: return(-1); ! 996: } ! 997: ! 998: mappedkey.dptr = mapcase(buffer, key.dptr, keysize); ! 999: buffer[keysize] = '\0'; /* just a debug aid */ ! 1000: mappedkey.dsize = keysize; ! 1001: ! 1002: return(store(mappedkey, data)); ! 1003: } ! 1004: ! 1005: /* ! 1006: - store - add an entry to the database ! 1007: */ ! 1008: int /* 0 success, -1 failure */ ! 1009: store(key, data) ! 1010: datum key; ! 1011: datum data; ! 1012: { ! 1013: of_t value; ! 1014: ! 1015: if (pagf == NULL) { ! 1016: DEBUG(("store: database not open!\n")); ! 1017: return(-1); ! 1018: } else if (basef == NULL) { /* basef didn't exist yet */ ! 1019: basef = latebase(); ! 1020: if (basef == NULL) ! 1021: return(-1); ! 1022: } ! 1023: if (pagronly) { ! 1024: DEBUG(("store: database open read-only\n")); ! 1025: return(-1); ! 1026: } ! 1027: if (data.dsize != SOF) { ! 1028: DEBUG(("store: value size wrong (%d)\n", data.dsize)); ! 1029: return(-1); ! 1030: } ! 1031: if (key.dsize >= DBZMAXKEY) { ! 1032: DEBUG(("store: key size too big (%d)\n", key.dsize)); ! 1033: return(-1); ! 1034: } ! 1035: ! 1036: /* copy the value in to ensure alignment */ ! 1037: (void) memcpy((char *)&value, data.dptr, SOF); ! 1038: DEBUG(("store: (%s, %ld)\n", key.dptr, (long)value)); ! 1039: if (!okayvalue(value)) { ! 1040: DEBUG(("store: reserved bit or overflow in 0x%lx\n", value)); ! 1041: return(-1); ! 1042: } ! 1043: ! 1044: /* find the place, exploiting previous search if possible */ ! 1045: start(&srch, &key, prevp); ! 1046: while (search(&srch) != NOTFOUND) ! 1047: continue; ! 1048: ! 1049: prevp = FRESH; ! 1050: conf.used[0]++; ! 1051: DEBUG(("store: used count %ld\n", conf.used[0])); ! 1052: written = 1; ! 1053: return(set(&srch, value)); ! 1054: } ! 1055: ! 1056: /* ! 1057: - dbzincore - control attempts to keep .pag file in core ! 1058: */ ! 1059: int /* old setting */ ! 1060: dbzincore(value) ! 1061: int value; ! 1062: { ! 1063: register int old = incore; ! 1064: ! 1065: incore = value; ! 1066: return(old); ! 1067: } ! 1068: ! 1069: /* ! 1070: - getconf - get configuration from .dir file ! 1071: */ ! 1072: static int /* 0 success, -1 failure */ ! 1073: getconf(df, pf, cp) ! 1074: register FILE *df; /* NULL means just give me the default */ ! 1075: register FILE *pf; /* NULL means don't care about .pag */ ! 1076: register struct dbzconfig *cp; ! 1077: { ! 1078: register int c; ! 1079: register int i; ! 1080: int err = 0; ! 1081: ! 1082: c = (df != NULL) ? getc(df) : EOF; ! 1083: if (c == EOF) { /* empty file, no configuration known */ ! 1084: cp->olddbz = 0; ! 1085: if (df != NULL && pf != NULL && getc(pf) != EOF) ! 1086: cp->olddbz = 1; ! 1087: cp->tsize = DEFSIZE; ! 1088: cp->fieldsep = '\t'; ! 1089: for (i = 0; i < NUSEDS; i++) ! 1090: cp->used[i] = 0; ! 1091: cp->valuesize = SOF; ! 1092: mybytemap(cp->bytemap); ! 1093: cp->casemap = DEFCASE; ! 1094: cp->tagenb = TAGENB; ! 1095: cp->tagmask = TAGMASK; ! 1096: cp->tagshift = TAGSHIFT; ! 1097: DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n", ! 1098: cp->tsize, cp->casemap, cp->tagenb, ! 1099: cp->tagmask, cp->tagshift)); ! 1100: return(0); ! 1101: } ! 1102: (void) ungetc(c, df); ! 1103: ! 1104: /* first line, the vital stuff */ ! 1105: if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z') ! 1106: err = -1; ! 1107: if (getno(df, &err) != dbzversion) ! 1108: err = -1; ! 1109: cp->tsize = getno(df, &err); ! 1110: cp->fieldsep = getno(df, &err); ! 1111: while ((c = getc(df)) == ' ') ! 1112: continue; ! 1113: cp->casemap = c; ! 1114: cp->tagenb = getno(df, &err); ! 1115: cp->tagmask = getno(df, &err); ! 1116: cp->tagshift = getno(df, &err); ! 1117: cp->valuesize = getno(df, &err); ! 1118: if (cp->valuesize != SOF) { ! 1119: DEBUG(("getconf: wrong of_t size (%d)\n", cp->valuesize)); ! 1120: err = -1; ! 1121: cp->valuesize = SOF; /* to protect the loops below */ ! 1122: } ! 1123: for (i = 0; i < cp->valuesize; i++) ! 1124: cp->bytemap[i] = getno(df, &err); ! 1125: if (getc(df) != '\n') ! 1126: err = -1; ! 1127: DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize, ! 1128: cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask, ! 1129: cp->tagshift)); ! 1130: DEBUG(("bytemap (%d)", cp->valuesize)); ! 1131: for (i = 0; i < cp->valuesize; i++) { ! 1132: DEBUG((" %d", cp->bytemap[i])); ! 1133: } ! 1134: DEBUG(("\n")); ! 1135: ! 1136: /* second line, the usages */ ! 1137: for (i = 0; i < NUSEDS; i++) ! 1138: cp->used[i] = getno(df, &err); ! 1139: if (getc(df) != '\n') ! 1140: err = -1; ! 1141: DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2])); ! 1142: ! 1143: if (err < 0) { ! 1144: DEBUG(("getconf error\n")); ! 1145: return(-1); ! 1146: } ! 1147: return(0); ! 1148: } ! 1149: ! 1150: /* ! 1151: - getno - get a long ! 1152: */ ! 1153: static long ! 1154: getno(f, ep) ! 1155: FILE *f; ! 1156: int *ep; ! 1157: { ! 1158: register char *p; ! 1159: # define MAXN 50 ! 1160: char getbuf[MAXN]; ! 1161: register int c; ! 1162: ! 1163: while ((c = getc(f)) == ' ') ! 1164: continue; ! 1165: if (c == EOF || c == '\n') { ! 1166: DEBUG(("getno: missing number\n")); ! 1167: *ep = -1; ! 1168: return(0); ! 1169: } ! 1170: p = getbuf; ! 1171: *p++ = c; ! 1172: while ((c = getc(f)) != EOF && c != '\n' && c != ' ') ! 1173: if (p < &getbuf[MAXN-1]) ! 1174: *p++ = c; ! 1175: if (c == EOF) { ! 1176: DEBUG(("getno: EOF\n")); ! 1177: *ep = -1; ! 1178: } else ! 1179: (void) ungetc(c, f); ! 1180: *p = '\0'; ! 1181: ! 1182: if (strspn(getbuf, "-1234567890") != strlen(getbuf)) { ! 1183: DEBUG(("getno: `%s' non-numeric\n", getbuf)); ! 1184: *ep = -1; ! 1185: } ! 1186: return(atol(getbuf)); ! 1187: } ! 1188: ! 1189: /* ! 1190: - putconf - write configuration to .dir file ! 1191: */ ! 1192: static int /* 0 success, -1 failure */ ! 1193: putconf(f, cp) ! 1194: register FILE *f; ! 1195: register struct dbzconfig *cp; ! 1196: { ! 1197: register int i; ! 1198: register int ret = 0; ! 1199: ! 1200: if (fseek(f, (of_t)0, SEEK_SET) != 0) { ! 1201: DEBUG(("fseek failure in putconf\n")); ! 1202: ret = -1; ! 1203: } ! 1204: fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion, cp->tsize, ! 1205: cp->fieldsep, cp->casemap, cp->tagenb, ! 1206: cp->tagmask, cp->tagshift, cp->valuesize); ! 1207: for (i = 0; i < cp->valuesize; i++) ! 1208: fprintf(f, " %d", cp->bytemap[i]); ! 1209: fprintf(f, "\n"); ! 1210: for (i = 0; i < NUSEDS; i++) ! 1211: fprintf(f, "%ld%c", cp->used[i], (i < NUSEDS-1) ? ' ' : '\n'); ! 1212: ! 1213: (void) fflush(f); ! 1214: if (ferror(f)) ! 1215: ret = -1; ! 1216: ! 1217: DEBUG(("putconf status %d\n", ret)); ! 1218: return(ret); ! 1219: } ! 1220: ! 1221: /* ! 1222: - getcore - try to set up an in-core copy of .pag file ! 1223: */ ! 1224: static of_t * /* pointer to copy, or NULL */ ! 1225: getcore(f) ! 1226: FILE *f; ! 1227: { ! 1228: register of_t *p; ! 1229: register size_t i; ! 1230: register size_t nread; ! 1231: register char *it; ! 1232: ! 1233: it = malloc((size_t)conf.tsize * SOF); ! 1234: if (it == NULL) { ! 1235: DEBUG(("getcore: malloc failed\n")); ! 1236: return(NULL); ! 1237: } ! 1238: ! 1239: nread = fread(it, SOF, (size_t)conf.tsize, f); ! 1240: if (ferror(f)) { ! 1241: DEBUG(("getcore: read failed\n")); ! 1242: free(it); ! 1243: return(NULL); ! 1244: } ! 1245: ! 1246: p = (of_t *)it + nread; ! 1247: i = (size_t)conf.tsize - nread; ! 1248: while (i-- > 0) ! 1249: *p++ = VACANT; ! 1250: return((of_t *)it); ! 1251: } ! 1252: ! 1253: /* ! 1254: - putcore - try to rewrite an in-core table ! 1255: */ ! 1256: static int /* 0 okay, -1 fail */ ! 1257: putcore(tab, f) ! 1258: of_t *tab; ! 1259: FILE *f; ! 1260: { ! 1261: if (fseek(f, (of_t)0, SEEK_SET) != 0) { ! 1262: DEBUG(("fseek failure in putcore\n")); ! 1263: return(-1); ! 1264: } ! 1265: (void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f); ! 1266: (void) fflush(f); ! 1267: return((ferror(f)) ? -1 : 0); ! 1268: } ! 1269: ! 1270: /* ! 1271: - start - set up to start or restart a search ! 1272: */ ! 1273: static void ! 1274: start(sp, kp, osp) ! 1275: register struct searcher *sp; ! 1276: register datum *kp; ! 1277: register struct searcher *osp; /* may be FRESH, i.e. NULL */ ! 1278: { ! 1279: register long h; ! 1280: ! 1281: h = hash(kp->dptr, kp->dsize); ! 1282: if (osp != FRESH && osp->hash == h) { ! 1283: if (sp != osp) ! 1284: *sp = *osp; ! 1285: DEBUG(("search restarted\n")); ! 1286: } else { ! 1287: sp->hash = h; ! 1288: sp->tag = MKTAG(h / conf.tsize); ! 1289: DEBUG(("tag 0x%lx\n", sp->tag)); ! 1290: sp->place = h % conf.tsize; ! 1291: sp->tabno = 0; ! 1292: sp->run = (conf.olddbz) ? conf.tsize : MAXRUN; ! 1293: sp->aborted = 0; ! 1294: } ! 1295: sp->seen = 0; ! 1296: } ! 1297: ! 1298: /* ! 1299: - search - conduct part of a search ! 1300: */ ! 1301: static of_t /* NOTFOUND if we hit VACANT or error */ ! 1302: search(sp) ! 1303: register struct searcher *sp; ! 1304: { ! 1305: register of_t dest; ! 1306: register of_t value; ! 1307: of_t val; /* buffer for value (can't fread register) */ ! 1308: register of_t place; ! 1309: ! 1310: if (sp->aborted) ! 1311: return(NOTFOUND); ! 1312: ! 1313: for (;;) { ! 1314: /* determine location to be examined */ ! 1315: place = sp->place; ! 1316: if (sp->seen) { ! 1317: /* go to next location */ ! 1318: if (--sp->run <= 0) { ! 1319: sp->tabno++; ! 1320: sp->run = MAXRUN; ! 1321: } ! 1322: place = (place+1)%conf.tsize + sp->tabno*conf.tsize; ! 1323: sp->place = place; ! 1324: } else ! 1325: sp->seen = 1; /* now looking at current location */ ! 1326: DEBUG(("search @ %ld\n", place)); ! 1327: ! 1328: /* get the tagged value */ ! 1329: if (corepag != NULL && place < conf.tsize) { ! 1330: DEBUG(("search: in core\n")); ! 1331: value = MAPIN(corepag[place]); ! 1332: } else { ! 1333: /* seek, if necessary */ ! 1334: dest = place * SOF; ! 1335: if (pagpos != dest) { ! 1336: if (fseek(pagf, dest, SEEK_SET) != 0) { ! 1337: DEBUG(("search: seek failed\n")); ! 1338: pagpos = -1; ! 1339: sp->aborted = 1; ! 1340: return(NOTFOUND); ! 1341: } ! 1342: pagpos = dest; ! 1343: } ! 1344: ! 1345: /* read it */ ! 1346: if (fread((char *)&val, sizeof(val), 1, pagf) == 1) ! 1347: value = MAPIN(val); ! 1348: else if (ferror(pagf)) { ! 1349: DEBUG(("search: read failed\n")); ! 1350: pagpos = -1; ! 1351: sp->aborted = 1; ! 1352: return(NOTFOUND); ! 1353: } else ! 1354: value = VACANT; ! 1355: ! 1356: /* and finish up */ ! 1357: pagpos += sizeof(val); ! 1358: } ! 1359: ! 1360: /* vacant slot is always cause to return */ ! 1361: if (value == VACANT) { ! 1362: DEBUG(("search: empty slot\n")); ! 1363: return(NOTFOUND); ! 1364: }; ! 1365: ! 1366: /* check the tag */ ! 1367: value = UNBIAS(value); ! 1368: DEBUG(("got 0x%lx\n", value)); ! 1369: if (!HASTAG(value)) { ! 1370: DEBUG(("tagless\n")); ! 1371: return(value); ! 1372: } else if (TAG(value) == sp->tag) { ! 1373: DEBUG(("match\n")); ! 1374: return(NOTAG(value)); ! 1375: } else { ! 1376: DEBUG(("mismatch 0x%lx\n", TAG(value))); ! 1377: } ! 1378: } ! 1379: /* NOTREACHED */ ! 1380: } ! 1381: ! 1382: /* ! 1383: - okayvalue - check that a value can be stored ! 1384: */ ! 1385: static int /* predicate */ ! 1386: okayvalue(value) ! 1387: of_t value; ! 1388: { ! 1389: if (HASTAG(value)) ! 1390: return(0); ! 1391: #ifdef OVERFLOW ! 1392: if (value == LONG_MAX) /* BIAS() and UNBIAS() will overflow */ ! 1393: return(0); ! 1394: #endif ! 1395: return(1); ! 1396: } ! 1397: ! 1398: /* ! 1399: - set - store a value into a location previously found by search ! 1400: */ ! 1401: static int /* 0 success, -1 failure */ ! 1402: set(sp, value) ! 1403: register struct searcher *sp; ! 1404: of_t value; ! 1405: { ! 1406: register of_t place = sp->place; ! 1407: register of_t v = value; ! 1408: ! 1409: if (sp->aborted) ! 1410: return(-1); ! 1411: ! 1412: if (CANTAG(v) && !conf.olddbz) { ! 1413: v |= sp->tag | taghere; ! 1414: if (v != UNBIAS(VACANT)) /* BIAS(v) won't look VACANT */ ! 1415: #ifdef OVERFLOW ! 1416: if (v != LONG_MAX) /* and it won't overflow */ ! 1417: #endif ! 1418: value = v; ! 1419: } ! 1420: DEBUG(("tagged value is 0x%lx\n", value)); ! 1421: value = BIAS(value); ! 1422: value = MAPOUT(value); ! 1423: ! 1424: /* If we have the index file in memory, use it */ ! 1425: if (corepag != NULL && place < conf.tsize) { ! 1426: corepag[place] = value; ! 1427: DEBUG(("set: incore\n")); ! 1428: return(0); ! 1429: } ! 1430: ! 1431: /* seek to spot */ ! 1432: pagpos = -1; /* invalidate position memory */ ! 1433: if (fseek(pagf, place * SOF, SEEK_SET) != 0) { ! 1434: DEBUG(("set: seek failed\n")); ! 1435: sp->aborted = 1; ! 1436: return(-1); ! 1437: } ! 1438: ! 1439: /* write in data */ ! 1440: if (fwrite((char *)&value, SOF, 1, pagf) != 1) { ! 1441: DEBUG(("set: write failed\n")); ! 1442: sp->aborted = 1; ! 1443: return(-1); ! 1444: } ! 1445: /* fflush improves robustness, and buffer re-use is rare anyway */ ! 1446: if (fflush(pagf) == EOF) { ! 1447: DEBUG(("set: fflush failed\n")); ! 1448: sp->aborted = 1; ! 1449: return(-1); ! 1450: } ! 1451: ! 1452: DEBUG(("set: succeeded\n")); ! 1453: return(0); ! 1454: } ! 1455: ! 1456: /* ! 1457: - mybytemap - determine this machine's byte map ! 1458: * ! 1459: * A byte map is an array of ints, sizeof(of_t) of them. The 0th int ! 1460: * is the byte number of the high-order byte in my of_t, and so forth. ! 1461: */ ! 1462: static void ! 1463: mybytemap(map) ! 1464: int map[]; /* -> int[SOF] */ ! 1465: { ! 1466: union { ! 1467: of_t o; ! 1468: char c[SOF]; ! 1469: } u; ! 1470: register int *mp = &map[SOF]; ! 1471: register int ntodo; ! 1472: register int i; ! 1473: ! 1474: u.o = 1; ! 1475: for (ntodo = (int)SOF; ntodo > 0; ntodo--) { ! 1476: for (i = 0; i < SOF; i++) ! 1477: if (u.c[i] != 0) ! 1478: break; ! 1479: if (i == SOF) { ! 1480: /* trouble -- set it to *something* consistent */ ! 1481: DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo)); ! 1482: for (i = 0; i < SOF; i++) ! 1483: map[i] = i; ! 1484: return; ! 1485: } ! 1486: DEBUG(("mybytemap: byte %d\n", i)); ! 1487: *--mp = i; ! 1488: while (u.c[i] != 0) ! 1489: u.o <<= 1; ! 1490: } ! 1491: } ! 1492: ! 1493: /* ! 1494: - bytemap - transform an of_t from byte ordering map1 to map2 ! 1495: */ ! 1496: static of_t /* transformed result */ ! 1497: bytemap(ino, map1, map2) ! 1498: of_t ino; ! 1499: int *map1; ! 1500: int *map2; ! 1501: { ! 1502: union oc { ! 1503: of_t o; ! 1504: char c[SOF]; ! 1505: }; ! 1506: union oc in; ! 1507: union oc out; ! 1508: register int i; ! 1509: ! 1510: in.o = ino; ! 1511: for (i = 0; i < SOF; i++) ! 1512: out.c[map2[i]] = in.c[map1[i]]; ! 1513: return(out.o); ! 1514: } ! 1515: ! 1516: /* ! 1517: * This is a simplified version of the pathalias hashing function. ! 1518: * Thanks to Steve Belovin and Peter Honeyman ! 1519: * ! 1520: * hash a string into a long int. 31 bit crc (from andrew appel). ! 1521: * the crc table is computed at run time by crcinit() -- we could ! 1522: * precompute, but it takes 1 clock tick on a 750. ! 1523: * ! 1524: * This fast table calculation works only if POLY is a prime polynomial ! 1525: * in the field of integers modulo 2. Since the coefficients of a ! 1526: * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is ! 1527: * implicit. IT MUST ALSO BE THE CASE that the coefficients of orders ! 1528: * 31 down to 25 are zero. Happily, we have candidates, from ! 1529: * E. J. Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962): ! 1530: * x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0 ! 1531: * x^31 + x^3 + x^0 ! 1532: * ! 1533: * We reverse the bits to get: ! 1534: * 111101010000000000000000000000001 but drop the last 1 ! 1535: * f 5 0 0 0 0 0 0 ! 1536: * 010010000000000000000000000000001 ditto, for 31-bit crc ! 1537: * 4 8 0 0 0 0 0 0 ! 1538: */ ! 1539: ! 1540: #define POLY 0x48000000L /* 31-bit polynomial (avoids sign problems) */ ! 1541: ! 1542: static long CrcTable[128]; ! 1543: ! 1544: /* ! 1545: - crcinit - initialize tables for hash function ! 1546: */ ! 1547: static void ! 1548: crcinit() ! 1549: { ! 1550: register int i, j; ! 1551: register long sum; ! 1552: ! 1553: for (i = 0; i < 128; ++i) { ! 1554: sum = 0L; ! 1555: for (j = 7 - 1; j >= 0; --j) ! 1556: if (i & (1 << j)) ! 1557: sum ^= POLY >> j; ! 1558: CrcTable[i] = sum; ! 1559: } ! 1560: DEBUG(("crcinit: done\n")); ! 1561: } ! 1562: ! 1563: /* ! 1564: - hash - Honeyman's nice hashing function ! 1565: */ ! 1566: static long ! 1567: hash(name, size) ! 1568: register char *name; ! 1569: register int size; ! 1570: { ! 1571: register long sum = 0L; ! 1572: ! 1573: while (size--) { ! 1574: sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f]; ! 1575: } ! 1576: DEBUG(("hash: returns (%ld)\n", sum)); ! 1577: return(sum); ! 1578: } ! 1579: ! 1580: /* ! 1581: * case-mapping stuff ! 1582: * ! 1583: * Borrowed from C News, by permission of the authors. Somewhat modified. ! 1584: * ! 1585: * We exploit the fact that we are dealing only with headers here, and ! 1586: * headers are limited to the ASCII characters by RFC822. It is barely ! 1587: * possible that we might be dealing with a translation into another ! 1588: * character set, but in particular it's very unlikely for a header ! 1589: * character to be outside -128..255. ! 1590: * ! 1591: * Life would be a whole lot simpler if tolower() could safely and portably ! 1592: * be applied to any char. ! 1593: */ ! 1594: ! 1595: #define OFFSET 128 /* avoid trouble with negative chars */ ! 1596: ! 1597: /* must call casencmp before invoking TOLOW... */ ! 1598: #define TOLOW(c) (cmap[(c)+OFFSET]) ! 1599: ! 1600: /* ...but the use of it in CISTREQN is safe without the preliminary call (!) */ ! 1601: /* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */ ! 1602: #define CISTREQN(a, b, n) \ ! 1603: (TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0) ! 1604: ! 1605: #define MAPSIZE (256+OFFSET) ! 1606: static char cmap[MAPSIZE]; /* relies on init to '\0' */ ! 1607: static int mprimed = 0; /* has cmap been set up? */ ! 1608: ! 1609: /* ! 1610: - mapprime - set up case-mapping stuff ! 1611: */ ! 1612: static void ! 1613: mapprime() ! 1614: { ! 1615: register char *lp; ! 1616: register char *up; ! 1617: register int c; ! 1618: register int i; ! 1619: static char lower[] = "abcdefghijklmnopqrstuvwxyz"; ! 1620: static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; ! 1621: ! 1622: for (lp = lower, up = upper; *lp != '\0'; lp++, up++) { ! 1623: c = *lp; ! 1624: cmap[c+OFFSET] = c; ! 1625: cmap[*up+OFFSET] = c; ! 1626: } ! 1627: for (i = 0; i < MAPSIZE; i++) ! 1628: if (cmap[i] == '\0') ! 1629: cmap[i] = (char)(i-OFFSET); ! 1630: mprimed = 1; ! 1631: } ! 1632: ! 1633: /* ! 1634: - casencmp - case-independent strncmp ! 1635: */ ! 1636: static int /* < == > 0 */ ! 1637: casencmp(s1, s2, len) ! 1638: char *s1; ! 1639: char *s2; ! 1640: int len; ! 1641: { ! 1642: register char *p1; ! 1643: register char *p2; ! 1644: register int n; ! 1645: ! 1646: if (!mprimed) ! 1647: mapprime(); ! 1648: ! 1649: p1 = s1; ! 1650: p2 = s2; ! 1651: n = len; ! 1652: while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) { ! 1653: p1++; ! 1654: p2++; ! 1655: } ! 1656: if (n < 0) ! 1657: return(0); ! 1658: ! 1659: /* ! 1660: * The following case analysis is necessary so that characters ! 1661: * which look negative collate low against normal characters but ! 1662: * high against the end-of-string NUL. ! 1663: */ ! 1664: if (*p1 == '\0' && *p2 == '\0') ! 1665: return(0); ! 1666: else if (*p1 == '\0') ! 1667: return(-1); ! 1668: else if (*p2 == '\0') ! 1669: return(1); ! 1670: else ! 1671: return(TOLOW(*p1) - TOLOW(*p2)); ! 1672: } ! 1673: ! 1674: /* ! 1675: - mapcase - do case-mapped copy ! 1676: */ ! 1677: static char * /* returns src or dst */ ! 1678: mapcase(dst, src, siz) ! 1679: char *dst; /* destination, used only if mapping needed */ ! 1680: char *src; /* source; src == dst is legal */ ! 1681: size_t siz; ! 1682: { ! 1683: register char *s; ! 1684: register char *d; ! 1685: register char *c; /* case break */ ! 1686: register char *e; /* end of source */ ! 1687: ! 1688: ! 1689: c = cipoint(src, siz); ! 1690: if (c == NULL) ! 1691: return(src); ! 1692: ! 1693: if (!mprimed) ! 1694: mapprime(); ! 1695: s = src; ! 1696: e = s + siz; ! 1697: d = dst; ! 1698: ! 1699: while (s < c) ! 1700: *d++ = *s++; ! 1701: while (s < e) ! 1702: *d++ = TOLOW(*s++); ! 1703: ! 1704: return(dst); ! 1705: } ! 1706: ! 1707: /* ! 1708: - cipoint - where in this message-ID does it become case-insensitive? ! 1709: * ! 1710: * The RFC822 code is not quite complete. Absolute, total, full RFC822 ! 1711: * compliance requires a horrible parsing job, because of the arcane ! 1712: * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi, ! 1713: * for example. There are three or four things that might occur in the ! 1714: * domain part of a message-id that are case-sensitive. They don't seem ! 1715: * to ever occur in real news, thank Cthulhu. (What? You were expecting ! 1716: * a merciful and forgiving deity to be invoked in connection with RFC822? ! 1717: * Forget it; none of them would come near it.) ! 1718: */ ! 1719: static char * /* pointer into s, or NULL for "nowhere" */ ! 1720: cipoint(s, siz) ! 1721: char *s; ! 1722: size_t siz; ! 1723: { ! 1724: register char *p; ! 1725: static char post[] = "postmaster"; ! 1726: static int plen = sizeof(post)-1; ! 1727: ! 1728: switch (conf.casemap) { ! 1729: case '0': /* unmapped, sensible */ ! 1730: return(NULL); ! 1731: break; ! 1732: case 'C': /* C News, RFC 822 conformant (approx.) */ ! 1733: p = memchr(s, '@', siz); ! 1734: if (p == NULL) /* no local/domain split */ ! 1735: return(NULL); /* assume all local */ ! 1736: else if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) { ! 1737: /* crazy -- "postmaster" is case-insensitive */ ! 1738: return(s); ! 1739: } else ! 1740: return(p); ! 1741: break; ! 1742: case '=': /* 2.11, neither sensible nor conformant */ ! 1743: return(s); /* all case-insensitive */ ! 1744: break; ! 1745: } ! 1746: ! 1747: DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap)); ! 1748: return(NULL); /* just leave it alone */ ! 1749: } ! 1750: ! 1751: /* ! 1752: - dbzdebug - control dbz debugging at run time ! 1753: */ ! 1754: int /* old value */ ! 1755: dbzdebug(value) ! 1756: int value; ! 1757: { ! 1758: #ifdef DBZDEBUG ! 1759: register int old = debug; ! 1760: ! 1761: debug = value; ! 1762: return(old); ! 1763: #else ! 1764: return(-1); ! 1765: #endif ! 1766: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.