|
|
1.1 root 1: /*
2:
3: dbz.c V3.2
4:
5: Copyright 1988 Jon Zeeff ([email protected])
6: You can use this code in any manner, as long as you leave my name on it
7: and don't hold me responsible for any problems with it.
8:
9: Hacked on by [email protected] (David Butler); Sun Jun 5 00:27:08 CDT 1988
10:
11: Various improvments + INCORE by [email protected] (Mark Moraes)
12:
13: Major reworking by Henry Spencer as part of the C News project.
14:
15: These routines replace dbm as used by the usenet news software
16: (it's not a full dbm replacement by any means). It's fast and
17: simple. It contains no AT&T code.
18:
19: In general, dbz's files are 1/20 the size of dbm's. Lookup performance
20: is somewhat better, while file creation is spectacularly faster, especially
21: if the incore facility is used.
22:
23: */
24:
25: #include <stdio.h>
26: #include <sys/types.h>
27: #include <string.h>
28: #include <ctype.h>
29: #include <errno.h>
30: #ifndef __STDC__
31: extern int errno;
32: #endif
33: #include <dbz.h>
34:
35: /*
36: * #ifdef index. "LIA" = "leave it alone unless you know what you're doing".
37: *
38: * FUNNYSEEKS SEEK_SET is not 0, get it from <unistd.h>
39: * INDEX_SIZE backward compatibility with old dbz; avoid using this
40: * NMEMORY number of days of memory for use in sizing new table (LIA)
41: * INCORE backward compatibility with old dbz; use dbzincore() instead
42: * DBZDEBUG enable debugging
43: * DEFSIZE default table size (not as critical as in old dbz)
44: * OLDBNEWS default case mapping as in old B News; set NOBUFFER
45: * BNEWS default case mapping as in current B News; set NOBUFFER
46: * DEFCASE default case-map algorithm selector
47: * NOTAGS fseek offsets are strange, do not do tagging (see below)
48: * NPAGBUF size of .pag buffer, in longs (LIA)
49: * SHISTBUF size of ASCII-file buffer, in bytes (LIA)
50: * MAXRUN length of run which shifts to next table (see below) (LIA)
51: * OVERFLOW long-int arithmetic overflow must be avoided, will trap
52: * NOBUFFER do not buffer hash-table i/o, B News locking is defective
53: */
54:
55: #ifdef FUNNYSEEKS
56: #include <unistd.h>
57: #else
58: #define SEEK_SET 0
59: #endif
60: #ifdef OVERFLOW
61: #include <limits.h>
62: #endif
63:
64: static int dbzversion = 3; /* for validating .dir file format */
65:
66: /*
67: * The dbz database exploits the fact that when news stores a <key,value>
68: * tuple, the `value' part is a seek offset into a text file, pointing to
69: * a copy of the `key' part. This avoids the need to store a copy of
70: * the key in the dbz files. However, the text file *must* exist and be
71: * consistent with the dbz files, or things will fail.
72: *
73: * The basic format of the database is a simple hash table containing the
74: * values. A value is stored by indexing into the table using a hash value
75: * computed from the key; collisions are resolved by linear probing (just
76: * search forward for an empty slot, wrapping around to the beginning of
77: * the table if necessary). Linear probing is a performance disaster when
78: * the table starts to get full, so a complication is introduced. The
79: * database is actually one *or more* tables, stored sequentially in the
80: * .pag file, and the length of linear-probe sequences is limited. The
81: * search (for an existing item or an empty slot) always starts in the
82: * first table, and whenever MAXRUN probes have been done in table N,
83: * probing continues in table N+1. This behaves reasonably well even in
84: * cases of massive overflow. There are some other small complications
85: * added, see comments below.
86: *
87: * The table size is fixed for any particular database, but is determined
88: * dynamically when a database is rebuilt. The strategy is to try to pick
89: * the size so the first table will be no more than 2/3 full, that being
90: * slightly before the point where performance starts to degrade. (It is
91: * desirable to be a bit conservative because the overflow strategy tends
92: * to produce files with holes in them, which is a nuisance.)
93: */
94:
95: /*
96: * The following is for backward compatibility.
97: */
98: #ifdef INDEX_SIZE
99: #define DEFSIZE INDEX_SIZE
100: #endif
101:
102: /*
103: * ANSI C says an offset into a file is a long, not an off_t, for some
104: * reason. This actually does simplify life a bit, but it's still nice
105: * to have a distinctive name for it. Beware, this is just for readability,
106: * don't try to change this.
107: */
108: #define of_t long
109: #define SOF (sizeof(of_t))
110:
111: /*
112: * We assume that unused areas of a binary file are zeros, and that the
113: * bit pattern of `(of_t)0' is all zeros. The alternative is rather
114: * painful file initialization. Note that okayvalue(), if OVERFLOW is
115: * defined, knows what value of an offset would cause overflow.
116: */
117: #define VACANT ((of_t)0)
118: #define BIAS(o) ((o)+1) /* make any valid of_t non-VACANT */
119: #define UNBIAS(o) ((o)-1) /* reverse BIAS() effect */
120:
121: /*
122: * In a Unix implementation, or indeed any in which an of_t is a byte
123: * count, there are a bunch of high bits free in an of_t. There is a
124: * use for them. Checking a possible hit by looking it up in the base
125: * file is relatively expensive, and the cost can be dramatically reduced
126: * by using some of those high bits to tag the value with a few more bits
127: * of the key's hash. This detects most false hits without the overhead of
128: * seek+read+strcmp. We use the top bit to indicate whether the value is
129: * tagged or not, and don't tag a value which is using the tag bits itself.
130: * We're in trouble if the of_t representation wants to use the top bit.
131: * The actual bitmasks and offset come from the configuration stuff,
132: * which permits fiddling with them as necessary, and also suppressing
133: * them completely (by defining the masks to 0). We build pre-shifted
134: * versions of the masks for efficiency.
135: */
136: static of_t tagbits; /* pre-shifted tag mask */
137: static of_t taghere; /* pre-shifted tag-enable bit */
138: static of_t tagboth; /* tagbits|taghere */
139: #define HASTAG(o) ((o)&taghere)
140: #define TAG(o) ((o)&tagbits)
141: #define NOTAG(o) ((o)&~tagboth)
142: #define CANTAG(o) (((o)&tagboth) == 0)
143: #define MKTAG(v) (((v)<<conf.tagshift)&tagbits)
144:
145: /*
146: * A new, from-scratch database, not built as a rebuild of an old one,
147: * needs to know table size, casemap algorithm, and tagging. Normally
148: * the user supplies this info, but there have to be defaults.
149: */
150: #ifndef DEFSIZE
151: #define DEFSIZE 120011 /* 300007 might be better */
152: #endif
153: #ifdef OLDBNEWS
154: #define DEFCASE '0' /* B2.10 -- no mapping */
155: #define NOBUFFER /* B News locking is defective */
156: #endif
157: #ifdef BNEWS
158: #define DEFCASE '=' /* B2.11 -- all mapped */
159: #define NOBUFFER /* B News locking is defective */
160: #endif
161: #ifndef DEFCASE /* C News compatibility is the default */
162: #define DEFCASE 'C' /* C News -- RFC822 mapping */
163: #endif
164: #ifndef NOTAGS
165: #define TAGENB 0x80 /* tag enable is top bit, tag is next 7 */
166: #define TAGMASK 0x7f
167: #define TAGSHIFT 24
168: #else
169: #define TAGENB 0 /* no tags */
170: #define TAGMASK 0
171: #define TAGSHIFT 0
172: #endif
173:
174: /*
175: * We read configuration info from the .dir file into this structure,
176: * so we can avoid wired-in assumptions for an existing database.
177: *
178: * Among the info is a record of recent peak usages, so that a new table
179: * size can be chosen intelligently when rebuilding. 10 is a good
180: * number of usages to keep, since news displays marked fluctuations
181: * in volume on a 7-day cycle.
182: */
183: struct dbzconfig {
184: int olddbz; /* .dir file empty but .pag not? */
185: of_t tsize; /* table size */
186: # ifndef NMEMORY
187: # define NMEMORY 10 /* # days of use info to remember */
188: # endif
189: # define NUSEDS (1+NMEMORY)
190: of_t used[NUSEDS]; /* entries used today, yesterday, ... */
191: int valuesize; /* size of table values, == SOF */
192: int bytemap[SOF]; /* byte-order map */
193: char casemap; /* case-mapping algorithm (see cipoint()) */
194: char fieldsep; /* field separator in base file, if any */
195: of_t tagenb; /* unshifted tag-enable bit */
196: of_t tagmask; /* unshifted tag mask */
197: int tagshift; /* shift count for tagmask and tagenb */
198: };
199: static struct dbzconfig conf;
200: static int getconf();
201: static long getno();
202: static int putconf();
203: static void mybytemap();
204: static of_t bytemap();
205:
206: /*
207: * For a program that makes many, many references to the database, it
208: * is a large performance win to keep the table in core, if it will fit.
209: * Note that this does hurt robustness in the event of crashes, and
210: * dbmclose() *must* be called to flush the in-core database to disk.
211: * The code is prepared to deal with the possibility that there isn't
212: * enough memory. There *is* an assumption that a size_t is big enough
213: * to hold the size (in bytes) of one table, so dbminit() tries to figure
214: * out whether this is possible first.
215: *
216: * The preferred way to ask for an in-core table is to do dbzincore(1)
217: * before dbminit(). The default is not to do it, although -DINCORE
218: * overrides this for backward compatibility with old dbz.
219: *
220: * We keep only the first table in core. This greatly simplifies the
221: * code, and bounds memory demand. Furthermore, doing this is a large
222: * performance win even in the event of massive overflow.
223: */
224: #ifdef INCORE
225: static int incore = 1;
226: #else
227: static int incore = 0;
228: #endif
229:
230: /*
231: * Stdio buffer for .pag reads. Buffering more than about 16 does not help
232: * significantly at the densities we try to maintain, and the much larger
233: * buffers that most stdios default to are much more expensive to fill.
234: * With small buffers, stdio is performance-competitive with raw read(),
235: * and it's much more portable.
236: */
237: #ifndef NPAGBUF
238: #define NPAGBUF 16
239: #endif
240: #ifndef NOBUFFER
241: #ifdef _IOFBF
242: static of_t pagbuf[NPAGBUF]; /* only needed if !NOBUFFER && _IOFBF */
243: #endif
244: #endif
245:
246: /*
247: * Stdio buffer for base-file reads. Message-IDs (all news ever needs to
248: * read) are essentially never longer than 64 bytes, and the typical stdio
249: * buffer is so much larger that it is much more expensive to fill.
250: */
251: #ifndef SHISTBUF
252: #define SHISTBUF 64
253: #endif
254: #ifdef _IOFBF
255: static char basebuf[SHISTBUF]; /* only needed if _IOFBF exists */
256: #endif
257:
258: /*
259: * Data structure for recording info about searches.
260: */
261: struct searcher {
262: of_t place; /* current location in file */
263: int tabno; /* which table we're in */
264: int run; /* how long we'll stay in this table */
265: # ifndef MAXRUN
266: # define MAXRUN 100
267: # endif
268: long hash; /* the key's hash code (for optimization) */
269: of_t tag; /* tag we are looking for */
270: int seen; /* have we examined current location? */
271: int aborted; /* has i/o error aborted search? */
272: };
273: static void start();
274: #define FRESH ((struct searcher *)NULL)
275: static of_t search();
276: #define NOTFOUND ((of_t)-1)
277: static int okayvalue();
278: static int set();
279:
280: /*
281: * Arguably the searcher struct for a given routine ought to be local to
282: * it, but a fetch() is very often immediately followed by a store(), and
283: * in some circumstances it is a useful performance win to remember where
284: * the fetch() completed. So we use a global struct and remember whether
285: * it is current.
286: */
287: static struct searcher srch;
288: static struct searcher *prevp; /* &srch or FRESH */
289:
290: /* byte-ordering stuff */
291: static int mybmap[SOF]; /* my byte order (see mybytemap()) */
292: static int bytesame; /* is database order same as mine? */
293: #define MAPIN(o) ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))
294: #define MAPOUT(o) ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))
295:
296: /*
297: * The double parentheses needed to make this work are ugly, but the
298: * alternative (under most compilers) is to pack around 2K of unused
299: * strings -- there's just no way to get rid of them.
300: */
301: static int debug; /* controlled by dbzdebug() */
302: #ifdef DBZDEBUG
303: #define DEBUG(args) if (debug) { (void) printf args ; }
304: #else
305: #define DEBUG(args) ;
306: #endif
307:
308: /* externals used */
309: extern char *malloc();
310: extern char *calloc();
311: extern void free(); /* ANSI C; some old implementations say int */
312: extern int atoi();
313: extern long atol();
314:
315: /* misc. forwards */
316: static long hash();
317: static void crcinit();
318: static char *cipoint();
319: static char *mapcase();
320: static int isprime();
321: static FILE *latebase();
322:
323: /* file-naming stuff */
324: static char dir[] = ".dir";
325: static char pag[] = ".pag";
326: static char *enstring();
327:
328: /* central data structures */
329: static FILE *basef; /* descriptor for base file */
330: static char *basefname; /* name for not-yet-opened base file */
331: static FILE *dirf; /* descriptor for .dir file */
332: static int dirronly; /* dirf open read-only? */
333: static FILE *pagf = NULL; /* descriptor for .pag file */
334: static of_t pagpos; /* posn in pagf; only search may set != -1 */
335: static int pagronly; /* pagf open read-only? */
336: static of_t *corepag; /* incore version of .pag file, if any */
337: static FILE *bufpagf; /* well-buffered pagf, for incore rewrite */
338: static of_t *getcore();
339: static int putcore();
340: static int written; /* has a store() been done? */
341:
342: /*
343: - dbzfresh - set up a new database, no historical info
344: */
345: int /* 0 success, -1 failure */
346: dbzfresh(name, size, fs, cmap, tagmask)
347: char *name; /* base name; .dir and .pag must exist */
348: long size; /* table size (0 means default) */
349: int fs; /* field-separator character in base file */
350: int cmap; /* case-map algorithm (0 means default) */
351: of_t tagmask; /* 0 default, 1 no tags */
352: {
353: register char *fn;
354: struct dbzconfig c;
355: register of_t m;
356: register FILE *f;
357:
358: if (pagf != NULL) {
359: DEBUG(("dbzfresh: database already open\n"));
360: return(-1);
361: }
362: if (size != 0 && size < 2) {
363: DEBUG(("dbzfresh: preposterous size (%ld)\n", size));
364: return(-1);
365: }
366:
367: /* get default configuration */
368: if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0)
369: return(-1); /* "can't happen" */
370:
371: /* and mess with it as specified */
372: if (size != 0)
373: c.tsize = size;
374: c.fieldsep = fs;
375: switch (cmap) {
376: case 0:
377: case '0':
378: case 'B': /* 2.10 compat */
379: c.casemap = '0'; /* '\0' nicer, but '0' printable! */
380: break;
381: case '=':
382: case 'b': /* 2.11 compat */
383: c.casemap = '=';
384: break;
385: case 'C':
386: c.casemap = 'C';
387: break;
388: case '?':
389: c.casemap = DEFCASE;
390: break;
391: default:
392: DEBUG(("dbzfresh case map `%c' unknown\n", cmap));
393: return(-1);
394: break;
395: }
396: switch (tagmask) {
397: case 0: /* default */
398: break;
399: case 1: /* no tags */
400: c.tagshift = 0;
401: c.tagmask = 0;
402: c.tagenb = 0;
403: break;
404: default:
405: m = tagmask;
406: c.tagshift = 0;
407: while (!(m&01)) {
408: m >>= 1;
409: c.tagshift++;
410: }
411: c.tagmask = m;
412: c.tagenb = (m << 1) & ~m;
413: break;
414: }
415:
416: /* write it out */
417: fn = enstring(name, dir);
418: if (fn == NULL)
419: return(-1);
420: f = fopen(fn, "w");
421: free(fn);
422: if (f == NULL) {
423: DEBUG(("dbzfresh: unable to write config\n"));
424: return(-1);
425: }
426: if (putconf(f, &c) < 0) {
427: (void) fclose(f);
428: return(-1);
429: }
430: if (fclose(f) == EOF) {
431: DEBUG(("dbzfresh: fclose failure\n"));
432: return(-1);
433: }
434:
435: /* create/truncate .pag */
436: fn = enstring(name, pag);
437: if (fn == NULL)
438: return(-1);
439: f = fopen(fn, "w");
440: free(fn);
441: if (f == NULL) {
442: DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));
443: return(-1);
444: } else
445: (void) fclose(f);
446:
447: /* and punt to dbminit for the hard work */
448: return(dbminit(name));
449: }
450:
451: /*
452: - dbzsize - what's a good table size to hold this many entries?
453: */
454: long
455: dbzsize(contents)
456: long contents; /* 0 means what's the default */
457: {
458: register long n;
459:
460: if (contents <= 0) { /* foulup or default inquiry */
461: DEBUG(("dbzsize: preposterous input (%ld)\n", contents));
462: return(DEFSIZE);
463: }
464: n = (contents/2)*3; /* try to keep table at most 2/3 full */
465: if (!(n&01)) /* make it odd */
466: n++;
467: DEBUG(("dbzsize: tentative size %ld\n", n));
468: while (!isprime(n)) /* and look for a prime */
469: n += 2;
470: DEBUG(("dbzsize: final size %ld\n", n));
471:
472: return(n);
473: }
474:
475: /*
476: - isprime - is a number prime?
477: *
478: * This is not a terribly efficient approach.
479: */
480: static int /* predicate */
481: isprime(x)
482: register long x;
483: {
484: static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
485: register int *ip;
486: register long div;
487: register long stop;
488:
489: /* hit the first few primes quickly to eliminate easy ones */
490: /* this incidentally prevents ridiculously small tables */
491: for (ip = quick; (div = *ip) != 0; ip++)
492: if (x%div == 0) {
493: DEBUG(("isprime: quick result on %ld\n", (long)x));
494: return(0);
495: }
496:
497: /* approximate square root of x */
498: for (stop = x; x/stop < stop; stop >>= 1)
499: continue;
500: stop <<= 1;
501:
502: /* try odd numbers up to stop */
503: for (div = *--ip; div < stop; div += 2)
504: if (x%div == 0)
505: return(0);
506:
507: return(1);
508: }
509:
510: /*
511: - dbzagain - set up a new database to be a rebuild of an old one
512: */
513: int /* 0 success, -1 failure */
514: dbzagain(name, oldname)
515: char *name; /* base name; .dir and .pag must exist */
516: char *oldname; /* base name; all must exist */
517: {
518: register char *fn;
519: struct dbzconfig c;
520: register int i;
521: register long top;
522: register FILE *f;
523: register int newtable;
524: register of_t newsize;
525:
526: if (pagf != NULL) {
527: DEBUG(("dbzagain: database already open\n"));
528: return(-1);
529: }
530:
531: /* pick up the old configuration */
532: fn = enstring(oldname, dir);
533: if (fn == NULL)
534: return(-1);
535: f = fopen(fn, "r");
536: free(fn);
537: if (f == NULL) {
538: DEBUG(("dbzagain: cannot open old .dir file\n"));
539: return(-1);
540: }
541: i = getconf(f, (FILE *)NULL, &c);
542: (void) fclose(f);
543: if (i < 0) {
544: DEBUG(("dbzagain: getconf failed\n"));
545: return(-1);
546: }
547:
548: /* tinker with it */
549: top = 0;
550: newtable = 0;
551: for (i = 0; i < NUSEDS; i++) {
552: if (top < c.used[i])
553: top = c.used[i];
554: if (c.used[i] == 0)
555: newtable = 1; /* hasn't got full usage history yet */
556: }
557: if (top == 0) {
558: DEBUG(("dbzagain: old table has no contents!\n"));
559: newtable = 1;
560: }
561: for (i = NUSEDS-1; i > 0; i--)
562: c.used[i] = c.used[i-1];
563: c.used[0] = 0;
564: newsize = dbzsize(top);
565: if (!newtable || newsize > c.tsize) /* don't shrink new table */
566: c.tsize = newsize;
567:
568: /* write it out */
569: fn = enstring(name, dir);
570: if (fn == NULL)
571: return(-1);
572: f = fopen(fn, "w");
573: free(fn);
574: if (f == NULL) {
575: DEBUG(("dbzagain: unable to write new .dir\n"));
576: return(-1);
577: }
578: i = putconf(f, &c);
579: (void) fclose(f);
580: if (i < 0) {
581: DEBUG(("dbzagain: putconf failed\n"));
582: return(-1);
583: }
584:
585: /* create/truncate .pag */
586: fn = enstring(name, pag);
587: if (fn == NULL)
588: return(-1);
589: f = fopen(fn, "w");
590: free(fn);
591: if (f == NULL) {
592: DEBUG(("dbzagain: unable to create/truncate .pag file\n"));
593: return(-1);
594: } else
595: (void) fclose(f);
596:
597: /* and let dbminit do the work */
598: return(dbminit(name));
599: }
600:
601: /*
602: - dbminit - open a database, creating it (using defaults) if necessary
603: *
604: * We try to leave errno set plausibly, to the extent that underlying
605: * functions permit this, since many people consult it if dbminit() fails.
606: */
607: int /* 0 success, -1 failure */
608: dbminit(name)
609: char *name;
610: {
611: register int i;
612: register size_t s;
613: register char *dirfname;
614: register char *pagfname;
615:
616: if (pagf != NULL) {
617: DEBUG(("dbminit: dbminit already called once\n"));
618: errno = 0;
619: return(-1);
620: }
621:
622: /* open the .dir file */
623: dirfname = enstring(name, dir);
624: if (dirfname == NULL)
625: return(-1);
626: dirf = fopen(dirfname, "r+");
627: if (dirf == NULL) {
628: dirf = fopen(dirfname, "r");
629: dirronly = 1;
630: } else
631: dirronly = 0;
632: free(dirfname);
633: if (dirf == NULL) {
634: DEBUG(("dbminit: can't open .dir file\n"));
635: return(-1);
636: }
637:
638: /* open the .pag file */
639: pagfname = enstring(name, pag);
640: if (pagfname == NULL) {
641: (void) fclose(dirf);
642: return(-1);
643: }
644: pagf = fopen(pagfname, "r+b");
645: if (pagf == NULL) {
646: pagf = fopen(pagfname, "rb");
647: if (pagf == NULL) {
648: DEBUG(("dbminit: .pag open failed\n"));
649: (void) fclose(dirf);
650: free(pagfname);
651: return(-1);
652: }
653: pagronly = 1;
654: } else if (dirronly)
655: pagronly = 1;
656: else
657: pagronly = 0;
658: #ifdef NOBUFFER
659: /*
660: * B News does not do adequate locking on its database accesses.
661: * Why it doesn't get into trouble using dbm is a mystery. In any
662: * case, doing unbuffered i/o does not cure the problem, but does
663: * enormously reduce its incidence.
664: */
665: (void) setbuf(pagf, (char *)NULL);
666: #else
667: #ifdef _IOFBF
668: (void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf));
669: #endif
670: #endif
671: pagpos = -1;
672: /* don't free pagfname, need it below */
673:
674: /* open the base file */
675: basef = fopen(name, "r");
676: if (basef == NULL) {
677: DEBUG(("dbminit: basefile open failed\n"));
678: basefname = enstring(name, "");
679: if (basefname == NULL) {
680: (void) fclose(pagf);
681: (void) fclose(dirf);
682: free(pagfname);
683: pagf = NULL;
684: return(-1);
685: }
686: } else
687: basefname = NULL;
688: #ifdef _IOFBF
689: if (basef != NULL)
690: (void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf));
691: #endif
692:
693: /* pick up configuration */
694: if (getconf(dirf, pagf, &conf) < 0) {
695: DEBUG(("dbminit: getconf failure\n"));
696: (void) fclose(basef);
697: (void) fclose(pagf);
698: (void) fclose(dirf);
699: free(pagfname);
700: pagf = NULL;
701: errno = EDOM; /* kind of a kludge, but very portable */
702: return(-1);
703: }
704: tagbits = conf.tagmask << conf.tagshift;
705: taghere = conf.tagenb << conf.tagshift;
706: tagboth = tagbits | taghere;
707: mybytemap(mybmap);
708: bytesame = 1;
709: for (i = 0; i < SOF; i++)
710: if (mybmap[i] != conf.bytemap[i])
711: bytesame = 0;
712:
713: /* get first table into core, if it looks desirable and feasible */
714: s = (size_t)conf.tsize * SOF;
715: if (incore && (of_t)(s/SOF) == conf.tsize) {
716: bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b");
717: if (bufpagf != NULL)
718: corepag = getcore(bufpagf);
719: } else {
720: bufpagf = NULL;
721: corepag = NULL;
722: }
723: free(pagfname);
724:
725: /* misc. setup */
726: crcinit();
727: written = 0;
728: prevp = FRESH;
729: DEBUG(("dbminit: succeeded\n"));
730: return(0);
731: }
732:
733: /*
734: - enstring - concatenate two strings into a malloced area
735: */
736: static char * /* NULL if malloc fails */
737: enstring(s1, s2)
738: char *s1;
739: char *s2;
740: {
741: register char *p;
742:
743: p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1);
744: if (p != NULL) {
745: (void) strcpy(p, s1);
746: (void) strcat(p, s2);
747: } else {
748: DEBUG(("enstring(%s, %s) out of memory\n", s1, s2));
749: }
750: return(p);
751: }
752:
753: /*
754: - dbmclose - close a database
755: */
756: int
757: dbmclose()
758: {
759: register int ret = 0;
760:
761: if (pagf == NULL) {
762: DEBUG(("dbmclose: not opened!\n"));
763: return(-1);
764: }
765:
766: if (fclose(pagf) == EOF) {
767: DEBUG(("dbmclose: fclose(pagf) failed\n"));
768: ret = -1;
769: }
770: pagf = basef; /* ensure valid pointer; dbzsync checks it */
771: if (dbzsync() < 0)
772: ret = -1;
773: if (bufpagf != NULL && fclose(bufpagf) == EOF) {
774: DEBUG(("dbmclose: fclose(bufpagf) failed\n"));
775: ret = -1;
776: }
777: if (corepag != NULL)
778: free((char *)corepag);
779: corepag = NULL;
780: if (fclose(basef) == EOF) {
781: DEBUG(("dbmclose: fclose(basef) failed\n"));
782: ret = -1;
783: }
784: if (basefname != NULL)
785: free(basefname);
786: basef = NULL;
787: pagf = NULL;
788: if (fclose(dirf) == EOF) {
789: DEBUG(("dbmclose: fclose(dirf) failed\n"));
790: ret = -1;
791: }
792:
793: DEBUG(("dbmclose: %s\n", (ret == 0) ? "succeeded" : "failed"));
794: return(ret);
795: }
796:
797: /*
798: - dbzsync - push all in-core data out to disk
799: */
800: int
801: dbzsync()
802: {
803: register int ret = 0;
804:
805: if (pagf == NULL) {
806: DEBUG(("dbzsync: not opened!\n"));
807: return(-1);
808: }
809: if (!written)
810: return(0);
811:
812: if (corepag != NULL) {
813: if (putcore(corepag, bufpagf) < 0) {
814: DEBUG(("dbzsync: putcore failed\n"));
815: ret = -1;
816: }
817: }
818: if (!conf.olddbz)
819: if (putconf(dirf, &conf) < 0)
820: ret = -1;
821:
822: DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed"));
823: return(ret);
824: }
825:
826: /*
827: - dbzcancel - cancel writing of in-core data
828: * Mostly for use from child processes.
829: * Note that we don't need to futz around with stdio buffers, because we
830: * always fflush them immediately anyway and so they never have stale data.
831: */
832: int
833: dbzcancel()
834: {
835: if (pagf == NULL) {
836: DEBUG(("dbzcancel: not opened!\n"));
837: return(-1);
838: }
839:
840: written = 0;
841: return(0);
842: }
843:
844: /*
845: - dbzfetch - fetch() with case mapping built in
846: */
847: datum
848: dbzfetch(key)
849: datum key;
850: {
851: char buffer[DBZMAXKEY + 1];
852: datum mappedkey;
853: register size_t keysize;
854:
855: DEBUG(("dbzfetch: (%s)\n", key.dptr));
856:
857: /* Key is supposed to be less than DBZMAXKEY */
858: keysize = key.dsize;
859: if (keysize >= DBZMAXKEY) {
860: keysize = DBZMAXKEY;
861: DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
862: }
863:
864: mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
865: buffer[keysize] = '\0'; /* just a debug aid */
866: mappedkey.dsize = keysize;
867:
868: return(fetch(mappedkey));
869: }
870:
871: /*
872: - fetch - get an entry from the database
873: *
874: * Disgusting fine point, in the name of backward compatibility: if the
875: * last character of "key" is a NUL, that character is (effectively) not
876: * part of the comparison against the stored keys.
877: */
878: datum /* dptr NULL, dsize 0 means failure */
879: fetch(key)
880: datum key;
881: {
882: char buffer[DBZMAXKEY + 1];
883: static of_t key_ptr; /* return value points here */
884: datum output;
885: register size_t keysize;
886: register size_t cmplen;
887: register char *sepp;
888:
889: DEBUG(("fetch: (%s)\n", key.dptr));
890: output.dptr = NULL;
891: output.dsize = 0;
892: prevp = FRESH;
893:
894: /* Key is supposed to be less than DBZMAXKEY */
895: keysize = key.dsize;
896: if (keysize >= DBZMAXKEY) {
897: keysize = DBZMAXKEY;
898: DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
899: }
900:
901: if (pagf == NULL) {
902: DEBUG(("fetch: database not open!\n"));
903: return(output);
904: } else if (basef == NULL) { /* basef didn't exist yet */
905: basef = latebase();
906: if (basef == NULL)
907: return(output);
908: }
909:
910: cmplen = keysize;
911: sepp = &conf.fieldsep;
912: if (key.dptr[keysize-1] == '\0') {
913: cmplen--;
914: sepp = &buffer[keysize-1];
915: }
916: start(&srch, &key, FRESH);
917: while ((key_ptr = search(&srch)) != NOTFOUND) {
918: DEBUG(("got 0x%lx\n", key_ptr));
919:
920: /* fetch the key */
921: if (fseek(basef, key_ptr, SEEK_SET) != 0) {
922: DEBUG(("fetch: seek failed\n"));
923: return(output);
924: }
925: if (fread(buffer, 1, keysize, basef) != keysize) {
926: DEBUG(("fetch: read failed\n"));
927: return(output);
928: }
929:
930: /* try it */
931: buffer[keysize] = '\0'; /* terminated for DEBUG */
932: (void) mapcase(buffer, buffer, keysize);
933: DEBUG(("fetch: buffer (%s) looking for (%s) size = %d\n",
934: buffer, key.dptr, keysize));
935: if (memcmp(key.dptr, buffer, cmplen) == 0 &&
936: (*sepp == conf.fieldsep || *sepp == '\0')) {
937: /* we found it */
938: output.dptr = (char *)&key_ptr;
939: output.dsize = SOF;
940: DEBUG(("fetch: successful\n"));
941: return(output);
942: }
943: }
944:
945: /* we didn't find it */
946: DEBUG(("fetch: failed\n"));
947: prevp = &srch; /* remember where we stopped */
948: return(output);
949: }
950:
951: /*
952: - latebase - try to open a base file that wasn't there at the start
953: */
954: static FILE *
955: latebase()
956: {
957: register FILE *it;
958:
959: if (basefname == NULL) {
960: DEBUG(("latebase: name foulup\n"));
961: return(NULL);
962: }
963: it = fopen(basefname, "r");
964: if (it == NULL) {
965: DEBUG(("latebase: still can't open base\n"));
966: } else {
967: DEBUG(("latebase: late open succeeded\n"));
968: free(basefname);
969: basefname = NULL;
970: #ifdef _IOFBF
971: (void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf));
972: #endif
973: }
974: return(it);
975: }
976:
977: /*
978: - dbzstore - store() with case mapping built in
979: */
980: int
981: dbzstore(key, data)
982: datum key;
983: datum data;
984: {
985: char buffer[DBZMAXKEY + 1];
986: datum mappedkey;
987: register size_t keysize;
988:
989: DEBUG(("dbzstore: (%s)\n", key.dptr));
990:
991: /* Key is supposed to be less than DBZMAXKEY */
992: keysize = key.dsize;
993: if (keysize >= DBZMAXKEY) {
994: DEBUG(("dbzstore: key size too big (%d)\n", key.dsize));
995: return(-1);
996: }
997:
998: mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
999: buffer[keysize] = '\0'; /* just a debug aid */
1000: mappedkey.dsize = keysize;
1001:
1002: return(store(mappedkey, data));
1003: }
1004:
1005: /*
1006: - store - add an entry to the database
1007: */
1008: int /* 0 success, -1 failure */
1009: store(key, data)
1010: datum key;
1011: datum data;
1012: {
1013: of_t value;
1014:
1015: if (pagf == NULL) {
1016: DEBUG(("store: database not open!\n"));
1017: return(-1);
1018: } else if (basef == NULL) { /* basef didn't exist yet */
1019: basef = latebase();
1020: if (basef == NULL)
1021: return(-1);
1022: }
1023: if (pagronly) {
1024: DEBUG(("store: database open read-only\n"));
1025: return(-1);
1026: }
1027: if (data.dsize != SOF) {
1028: DEBUG(("store: value size wrong (%d)\n", data.dsize));
1029: return(-1);
1030: }
1031: if (key.dsize >= DBZMAXKEY) {
1032: DEBUG(("store: key size too big (%d)\n", key.dsize));
1033: return(-1);
1034: }
1035:
1036: /* copy the value in to ensure alignment */
1037: (void) memcpy((char *)&value, data.dptr, SOF);
1038: DEBUG(("store: (%s, %ld)\n", key.dptr, (long)value));
1039: if (!okayvalue(value)) {
1040: DEBUG(("store: reserved bit or overflow in 0x%lx\n", value));
1041: return(-1);
1042: }
1043:
1044: /* find the place, exploiting previous search if possible */
1045: start(&srch, &key, prevp);
1046: while (search(&srch) != NOTFOUND)
1047: continue;
1048:
1049: prevp = FRESH;
1050: conf.used[0]++;
1051: DEBUG(("store: used count %ld\n", conf.used[0]));
1052: written = 1;
1053: return(set(&srch, value));
1054: }
1055:
1056: /*
1057: - dbzincore - control attempts to keep .pag file in core
1058: */
1059: int /* old setting */
1060: dbzincore(value)
1061: int value;
1062: {
1063: register int old = incore;
1064:
1065: incore = value;
1066: return(old);
1067: }
1068:
1069: /*
1070: - getconf - get configuration from .dir file
1071: */
1072: static int /* 0 success, -1 failure */
1073: getconf(df, pf, cp)
1074: register FILE *df; /* NULL means just give me the default */
1075: register FILE *pf; /* NULL means don't care about .pag */
1076: register struct dbzconfig *cp;
1077: {
1078: register int c;
1079: register int i;
1080: int err = 0;
1081:
1082: c = (df != NULL) ? getc(df) : EOF;
1083: if (c == EOF) { /* empty file, no configuration known */
1084: cp->olddbz = 0;
1085: if (df != NULL && pf != NULL && getc(pf) != EOF)
1086: cp->olddbz = 1;
1087: cp->tsize = DEFSIZE;
1088: cp->fieldsep = '\t';
1089: for (i = 0; i < NUSEDS; i++)
1090: cp->used[i] = 0;
1091: cp->valuesize = SOF;
1092: mybytemap(cp->bytemap);
1093: cp->casemap = DEFCASE;
1094: cp->tagenb = TAGENB;
1095: cp->tagmask = TAGMASK;
1096: cp->tagshift = TAGSHIFT;
1097: DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n",
1098: cp->tsize, cp->casemap, cp->tagenb,
1099: cp->tagmask, cp->tagshift));
1100: return(0);
1101: }
1102: (void) ungetc(c, df);
1103:
1104: /* first line, the vital stuff */
1105: if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z')
1106: err = -1;
1107: if (getno(df, &err) != dbzversion)
1108: err = -1;
1109: cp->tsize = getno(df, &err);
1110: cp->fieldsep = getno(df, &err);
1111: while ((c = getc(df)) == ' ')
1112: continue;
1113: cp->casemap = c;
1114: cp->tagenb = getno(df, &err);
1115: cp->tagmask = getno(df, &err);
1116: cp->tagshift = getno(df, &err);
1117: cp->valuesize = getno(df, &err);
1118: if (cp->valuesize != SOF) {
1119: DEBUG(("getconf: wrong of_t size (%d)\n", cp->valuesize));
1120: err = -1;
1121: cp->valuesize = SOF; /* to protect the loops below */
1122: }
1123: for (i = 0; i < cp->valuesize; i++)
1124: cp->bytemap[i] = getno(df, &err);
1125: if (getc(df) != '\n')
1126: err = -1;
1127: DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize,
1128: cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask,
1129: cp->tagshift));
1130: DEBUG(("bytemap (%d)", cp->valuesize));
1131: for (i = 0; i < cp->valuesize; i++) {
1132: DEBUG((" %d", cp->bytemap[i]));
1133: }
1134: DEBUG(("\n"));
1135:
1136: /* second line, the usages */
1137: for (i = 0; i < NUSEDS; i++)
1138: cp->used[i] = getno(df, &err);
1139: if (getc(df) != '\n')
1140: err = -1;
1141: DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2]));
1142:
1143: if (err < 0) {
1144: DEBUG(("getconf error\n"));
1145: return(-1);
1146: }
1147: return(0);
1148: }
1149:
1150: /*
1151: - getno - get a long
1152: */
1153: static long
1154: getno(f, ep)
1155: FILE *f;
1156: int *ep;
1157: {
1158: register char *p;
1159: # define MAXN 50
1160: char getbuf[MAXN];
1161: register int c;
1162:
1163: while ((c = getc(f)) == ' ')
1164: continue;
1165: if (c == EOF || c == '\n') {
1166: DEBUG(("getno: missing number\n"));
1167: *ep = -1;
1168: return(0);
1169: }
1170: p = getbuf;
1171: *p++ = c;
1172: while ((c = getc(f)) != EOF && c != '\n' && c != ' ')
1173: if (p < &getbuf[MAXN-1])
1174: *p++ = c;
1175: if (c == EOF) {
1176: DEBUG(("getno: EOF\n"));
1177: *ep = -1;
1178: } else
1179: (void) ungetc(c, f);
1180: *p = '\0';
1181:
1182: if (strspn(getbuf, "-1234567890") != strlen(getbuf)) {
1183: DEBUG(("getno: `%s' non-numeric\n", getbuf));
1184: *ep = -1;
1185: }
1186: return(atol(getbuf));
1187: }
1188:
1189: /*
1190: - putconf - write configuration to .dir file
1191: */
1192: static int /* 0 success, -1 failure */
1193: putconf(f, cp)
1194: register FILE *f;
1195: register struct dbzconfig *cp;
1196: {
1197: register int i;
1198: register int ret = 0;
1199:
1200: if (fseek(f, (of_t)0, SEEK_SET) != 0) {
1201: DEBUG(("fseek failure in putconf\n"));
1202: ret = -1;
1203: }
1204: fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion, cp->tsize,
1205: cp->fieldsep, cp->casemap, cp->tagenb,
1206: cp->tagmask, cp->tagshift, cp->valuesize);
1207: for (i = 0; i < cp->valuesize; i++)
1208: fprintf(f, " %d", cp->bytemap[i]);
1209: fprintf(f, "\n");
1210: for (i = 0; i < NUSEDS; i++)
1211: fprintf(f, "%ld%c", cp->used[i], (i < NUSEDS-1) ? ' ' : '\n');
1212:
1213: (void) fflush(f);
1214: if (ferror(f))
1215: ret = -1;
1216:
1217: DEBUG(("putconf status %d\n", ret));
1218: return(ret);
1219: }
1220:
1221: /*
1222: - getcore - try to set up an in-core copy of .pag file
1223: */
1224: static of_t * /* pointer to copy, or NULL */
1225: getcore(f)
1226: FILE *f;
1227: {
1228: register of_t *p;
1229: register size_t i;
1230: register size_t nread;
1231: register char *it;
1232:
1233: it = malloc((size_t)conf.tsize * SOF);
1234: if (it == NULL) {
1235: DEBUG(("getcore: malloc failed\n"));
1236: return(NULL);
1237: }
1238:
1239: nread = fread(it, SOF, (size_t)conf.tsize, f);
1240: if (ferror(f)) {
1241: DEBUG(("getcore: read failed\n"));
1242: free(it);
1243: return(NULL);
1244: }
1245:
1246: p = (of_t *)it + nread;
1247: i = (size_t)conf.tsize - nread;
1248: while (i-- > 0)
1249: *p++ = VACANT;
1250: return((of_t *)it);
1251: }
1252:
1253: /*
1254: - putcore - try to rewrite an in-core table
1255: */
1256: static int /* 0 okay, -1 fail */
1257: putcore(tab, f)
1258: of_t *tab;
1259: FILE *f;
1260: {
1261: if (fseek(f, (of_t)0, SEEK_SET) != 0) {
1262: DEBUG(("fseek failure in putcore\n"));
1263: return(-1);
1264: }
1265: (void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f);
1266: (void) fflush(f);
1267: return((ferror(f)) ? -1 : 0);
1268: }
1269:
1270: /*
1271: - start - set up to start or restart a search
1272: */
1273: static void
1274: start(sp, kp, osp)
1275: register struct searcher *sp;
1276: register datum *kp;
1277: register struct searcher *osp; /* may be FRESH, i.e. NULL */
1278: {
1279: register long h;
1280:
1281: h = hash(kp->dptr, kp->dsize);
1282: if (osp != FRESH && osp->hash == h) {
1283: if (sp != osp)
1284: *sp = *osp;
1285: DEBUG(("search restarted\n"));
1286: } else {
1287: sp->hash = h;
1288: sp->tag = MKTAG(h / conf.tsize);
1289: DEBUG(("tag 0x%lx\n", sp->tag));
1290: sp->place = h % conf.tsize;
1291: sp->tabno = 0;
1292: sp->run = (conf.olddbz) ? conf.tsize : MAXRUN;
1293: sp->aborted = 0;
1294: }
1295: sp->seen = 0;
1296: }
1297:
1298: /*
1299: - search - conduct part of a search
1300: */
1301: static of_t /* NOTFOUND if we hit VACANT or error */
1302: search(sp)
1303: register struct searcher *sp;
1304: {
1305: register of_t dest;
1306: register of_t value;
1307: of_t val; /* buffer for value (can't fread register) */
1308: register of_t place;
1309:
1310: if (sp->aborted)
1311: return(NOTFOUND);
1312:
1313: for (;;) {
1314: /* determine location to be examined */
1315: place = sp->place;
1316: if (sp->seen) {
1317: /* go to next location */
1318: if (--sp->run <= 0) {
1319: sp->tabno++;
1320: sp->run = MAXRUN;
1321: }
1322: place = (place+1)%conf.tsize + sp->tabno*conf.tsize;
1323: sp->place = place;
1324: } else
1325: sp->seen = 1; /* now looking at current location */
1326: DEBUG(("search @ %ld\n", place));
1327:
1328: /* get the tagged value */
1329: if (corepag != NULL && place < conf.tsize) {
1330: DEBUG(("search: in core\n"));
1331: value = MAPIN(corepag[place]);
1332: } else {
1333: /* seek, if necessary */
1334: dest = place * SOF;
1335: if (pagpos != dest) {
1336: if (fseek(pagf, dest, SEEK_SET) != 0) {
1337: DEBUG(("search: seek failed\n"));
1338: pagpos = -1;
1339: sp->aborted = 1;
1340: return(NOTFOUND);
1341: }
1342: pagpos = dest;
1343: }
1344:
1345: /* read it */
1346: if (fread((char *)&val, sizeof(val), 1, pagf) == 1)
1347: value = MAPIN(val);
1348: else if (ferror(pagf)) {
1349: DEBUG(("search: read failed\n"));
1350: pagpos = -1;
1351: sp->aborted = 1;
1352: return(NOTFOUND);
1353: } else
1354: value = VACANT;
1355:
1356: /* and finish up */
1357: pagpos += sizeof(val);
1358: }
1359:
1360: /* vacant slot is always cause to return */
1361: if (value == VACANT) {
1362: DEBUG(("search: empty slot\n"));
1363: return(NOTFOUND);
1364: };
1365:
1366: /* check the tag */
1367: value = UNBIAS(value);
1368: DEBUG(("got 0x%lx\n", value));
1369: if (!HASTAG(value)) {
1370: DEBUG(("tagless\n"));
1371: return(value);
1372: } else if (TAG(value) == sp->tag) {
1373: DEBUG(("match\n"));
1374: return(NOTAG(value));
1375: } else {
1376: DEBUG(("mismatch 0x%lx\n", TAG(value)));
1377: }
1378: }
1379: /* NOTREACHED */
1380: }
1381:
1382: /*
1383: - okayvalue - check that a value can be stored
1384: */
1385: static int /* predicate */
1386: okayvalue(value)
1387: of_t value;
1388: {
1389: if (HASTAG(value))
1390: return(0);
1391: #ifdef OVERFLOW
1392: if (value == LONG_MAX) /* BIAS() and UNBIAS() will overflow */
1393: return(0);
1394: #endif
1395: return(1);
1396: }
1397:
1398: /*
1399: - set - store a value into a location previously found by search
1400: */
1401: static int /* 0 success, -1 failure */
1402: set(sp, value)
1403: register struct searcher *sp;
1404: of_t value;
1405: {
1406: register of_t place = sp->place;
1407: register of_t v = value;
1408:
1409: if (sp->aborted)
1410: return(-1);
1411:
1412: if (CANTAG(v) && !conf.olddbz) {
1413: v |= sp->tag | taghere;
1414: if (v != UNBIAS(VACANT)) /* BIAS(v) won't look VACANT */
1415: #ifdef OVERFLOW
1416: if (v != LONG_MAX) /* and it won't overflow */
1417: #endif
1418: value = v;
1419: }
1420: DEBUG(("tagged value is 0x%lx\n", value));
1421: value = BIAS(value);
1422: value = MAPOUT(value);
1423:
1424: /* If we have the index file in memory, use it */
1425: if (corepag != NULL && place < conf.tsize) {
1426: corepag[place] = value;
1427: DEBUG(("set: incore\n"));
1428: return(0);
1429: }
1430:
1431: /* seek to spot */
1432: pagpos = -1; /* invalidate position memory */
1433: if (fseek(pagf, place * SOF, SEEK_SET) != 0) {
1434: DEBUG(("set: seek failed\n"));
1435: sp->aborted = 1;
1436: return(-1);
1437: }
1438:
1439: /* write in data */
1440: if (fwrite((char *)&value, SOF, 1, pagf) != 1) {
1441: DEBUG(("set: write failed\n"));
1442: sp->aborted = 1;
1443: return(-1);
1444: }
1445: /* fflush improves robustness, and buffer re-use is rare anyway */
1446: if (fflush(pagf) == EOF) {
1447: DEBUG(("set: fflush failed\n"));
1448: sp->aborted = 1;
1449: return(-1);
1450: }
1451:
1452: DEBUG(("set: succeeded\n"));
1453: return(0);
1454: }
1455:
1456: /*
1457: - mybytemap - determine this machine's byte map
1458: *
1459: * A byte map is an array of ints, sizeof(of_t) of them. The 0th int
1460: * is the byte number of the high-order byte in my of_t, and so forth.
1461: */
1462: static void
1463: mybytemap(map)
1464: int map[]; /* -> int[SOF] */
1465: {
1466: union {
1467: of_t o;
1468: char c[SOF];
1469: } u;
1470: register int *mp = &map[SOF];
1471: register int ntodo;
1472: register int i;
1473:
1474: u.o = 1;
1475: for (ntodo = (int)SOF; ntodo > 0; ntodo--) {
1476: for (i = 0; i < SOF; i++)
1477: if (u.c[i] != 0)
1478: break;
1479: if (i == SOF) {
1480: /* trouble -- set it to *something* consistent */
1481: DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo));
1482: for (i = 0; i < SOF; i++)
1483: map[i] = i;
1484: return;
1485: }
1486: DEBUG(("mybytemap: byte %d\n", i));
1487: *--mp = i;
1488: while (u.c[i] != 0)
1489: u.o <<= 1;
1490: }
1491: }
1492:
1493: /*
1494: - bytemap - transform an of_t from byte ordering map1 to map2
1495: */
1496: static of_t /* transformed result */
1497: bytemap(ino, map1, map2)
1498: of_t ino;
1499: int *map1;
1500: int *map2;
1501: {
1502: union oc {
1503: of_t o;
1504: char c[SOF];
1505: };
1506: union oc in;
1507: union oc out;
1508: register int i;
1509:
1510: in.o = ino;
1511: for (i = 0; i < SOF; i++)
1512: out.c[map2[i]] = in.c[map1[i]];
1513: return(out.o);
1514: }
1515:
1516: /*
1517: * This is a simplified version of the pathalias hashing function.
1518: * Thanks to Steve Belovin and Peter Honeyman
1519: *
1520: * hash a string into a long int. 31 bit crc (from andrew appel).
1521: * the crc table is computed at run time by crcinit() -- we could
1522: * precompute, but it takes 1 clock tick on a 750.
1523: *
1524: * This fast table calculation works only if POLY is a prime polynomial
1525: * in the field of integers modulo 2. Since the coefficients of a
1526: * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
1527: * implicit. IT MUST ALSO BE THE CASE that the coefficients of orders
1528: * 31 down to 25 are zero. Happily, we have candidates, from
1529: * E. J. Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
1530: * x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
1531: * x^31 + x^3 + x^0
1532: *
1533: * We reverse the bits to get:
1534: * 111101010000000000000000000000001 but drop the last 1
1535: * f 5 0 0 0 0 0 0
1536: * 010010000000000000000000000000001 ditto, for 31-bit crc
1537: * 4 8 0 0 0 0 0 0
1538: */
1539:
1540: #define POLY 0x48000000L /* 31-bit polynomial (avoids sign problems) */
1541:
1542: static long CrcTable[128];
1543:
1544: /*
1545: - crcinit - initialize tables for hash function
1546: */
1547: static void
1548: crcinit()
1549: {
1550: register int i, j;
1551: register long sum;
1552:
1553: for (i = 0; i < 128; ++i) {
1554: sum = 0L;
1555: for (j = 7 - 1; j >= 0; --j)
1556: if (i & (1 << j))
1557: sum ^= POLY >> j;
1558: CrcTable[i] = sum;
1559: }
1560: DEBUG(("crcinit: done\n"));
1561: }
1562:
1563: /*
1564: - hash - Honeyman's nice hashing function
1565: */
1566: static long
1567: hash(name, size)
1568: register char *name;
1569: register int size;
1570: {
1571: register long sum = 0L;
1572:
1573: while (size--) {
1574: sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
1575: }
1576: DEBUG(("hash: returns (%ld)\n", sum));
1577: return(sum);
1578: }
1579:
1580: /*
1581: * case-mapping stuff
1582: *
1583: * Borrowed from C News, by permission of the authors. Somewhat modified.
1584: *
1585: * We exploit the fact that we are dealing only with headers here, and
1586: * headers are limited to the ASCII characters by RFC822. It is barely
1587: * possible that we might be dealing with a translation into another
1588: * character set, but in particular it's very unlikely for a header
1589: * character to be outside -128..255.
1590: *
1591: * Life would be a whole lot simpler if tolower() could safely and portably
1592: * be applied to any char.
1593: */
1594:
1595: #define OFFSET 128 /* avoid trouble with negative chars */
1596:
1597: /* must call casencmp before invoking TOLOW... */
1598: #define TOLOW(c) (cmap[(c)+OFFSET])
1599:
1600: /* ...but the use of it in CISTREQN is safe without the preliminary call (!) */
1601: /* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */
1602: #define CISTREQN(a, b, n) \
1603: (TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0)
1604:
1605: #define MAPSIZE (256+OFFSET)
1606: static char cmap[MAPSIZE]; /* relies on init to '\0' */
1607: static int mprimed = 0; /* has cmap been set up? */
1608:
1609: /*
1610: - mapprime - set up case-mapping stuff
1611: */
1612: static void
1613: mapprime()
1614: {
1615: register char *lp;
1616: register char *up;
1617: register int c;
1618: register int i;
1619: static char lower[] = "abcdefghijklmnopqrstuvwxyz";
1620: static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1621:
1622: for (lp = lower, up = upper; *lp != '\0'; lp++, up++) {
1623: c = *lp;
1624: cmap[c+OFFSET] = c;
1625: cmap[*up+OFFSET] = c;
1626: }
1627: for (i = 0; i < MAPSIZE; i++)
1628: if (cmap[i] == '\0')
1629: cmap[i] = (char)(i-OFFSET);
1630: mprimed = 1;
1631: }
1632:
1633: /*
1634: - casencmp - case-independent strncmp
1635: */
1636: static int /* < == > 0 */
1637: casencmp(s1, s2, len)
1638: char *s1;
1639: char *s2;
1640: int len;
1641: {
1642: register char *p1;
1643: register char *p2;
1644: register int n;
1645:
1646: if (!mprimed)
1647: mapprime();
1648:
1649: p1 = s1;
1650: p2 = s2;
1651: n = len;
1652: while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) {
1653: p1++;
1654: p2++;
1655: }
1656: if (n < 0)
1657: return(0);
1658:
1659: /*
1660: * The following case analysis is necessary so that characters
1661: * which look negative collate low against normal characters but
1662: * high against the end-of-string NUL.
1663: */
1664: if (*p1 == '\0' && *p2 == '\0')
1665: return(0);
1666: else if (*p1 == '\0')
1667: return(-1);
1668: else if (*p2 == '\0')
1669: return(1);
1670: else
1671: return(TOLOW(*p1) - TOLOW(*p2));
1672: }
1673:
1674: /*
1675: - mapcase - do case-mapped copy
1676: */
1677: static char * /* returns src or dst */
1678: mapcase(dst, src, siz)
1679: char *dst; /* destination, used only if mapping needed */
1680: char *src; /* source; src == dst is legal */
1681: size_t siz;
1682: {
1683: register char *s;
1684: register char *d;
1685: register char *c; /* case break */
1686: register char *e; /* end of source */
1687:
1688:
1689: c = cipoint(src, siz);
1690: if (c == NULL)
1691: return(src);
1692:
1693: if (!mprimed)
1694: mapprime();
1695: s = src;
1696: e = s + siz;
1697: d = dst;
1698:
1699: while (s < c)
1700: *d++ = *s++;
1701: while (s < e)
1702: *d++ = TOLOW(*s++);
1703:
1704: return(dst);
1705: }
1706:
1707: /*
1708: - cipoint - where in this message-ID does it become case-insensitive?
1709: *
1710: * The RFC822 code is not quite complete. Absolute, total, full RFC822
1711: * compliance requires a horrible parsing job, because of the arcane
1712: * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi,
1713: * for example. There are three or four things that might occur in the
1714: * domain part of a message-id that are case-sensitive. They don't seem
1715: * to ever occur in real news, thank Cthulhu. (What? You were expecting
1716: * a merciful and forgiving deity to be invoked in connection with RFC822?
1717: * Forget it; none of them would come near it.)
1718: */
1719: static char * /* pointer into s, or NULL for "nowhere" */
1720: cipoint(s, siz)
1721: char *s;
1722: size_t siz;
1723: {
1724: register char *p;
1725: static char post[] = "postmaster";
1726: static int plen = sizeof(post)-1;
1727:
1728: switch (conf.casemap) {
1729: case '0': /* unmapped, sensible */
1730: return(NULL);
1731: break;
1732: case 'C': /* C News, RFC 822 conformant (approx.) */
1733: p = memchr(s, '@', siz);
1734: if (p == NULL) /* no local/domain split */
1735: return(NULL); /* assume all local */
1736: else if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) {
1737: /* crazy -- "postmaster" is case-insensitive */
1738: return(s);
1739: } else
1740: return(p);
1741: break;
1742: case '=': /* 2.11, neither sensible nor conformant */
1743: return(s); /* all case-insensitive */
1744: break;
1745: }
1746:
1747: DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap));
1748: return(NULL); /* just leave it alone */
1749: }
1750:
1751: /*
1752: - dbzdebug - control dbz debugging at run time
1753: */
1754: int /* old value */
1755: dbzdebug(value)
1756: int value;
1757: {
1758: #ifdef DBZDEBUG
1759: register int old = debug;
1760:
1761: debug = value;
1762: return(old);
1763: #else
1764: return(-1);
1765: #endif
1766: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.