1 /*
2 
3 dbz.c  V3.4
4 
5 Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
6 You can use this code in any manner, as long as you leave my name on it
7 and don't hold me responsible for any problems with it.
8 
9 Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun  5 00:27:08 CDT 1988
10 
11 Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
12 
13 Major reworking by Henry Spencer as part of the C News project.
14 
15 These routines replace dbm as used by the usenet news software
16 (it's not a full dbm replacement by any means).  It's fast and
17 simple.  It contains no AT&T code.
18 
19 In general, dbz's files are 1/20 the size of dbm's.  Lookup performance
20 is somewhat better, while file creation is spectacularly faster, especially
21 if the incore facility is used.
22 
23 If you are seized by the urge to hack mmap() calls into this stuff, please
24 try to resist it.  Instead, spend that energy beating on your system supplier
25 to fix his read() and write() calls to exploit the same memory-mapping
26 tricks mmap() uses.  That way, all kinds of software can get more efficient
27 *without* having non-portable grunge like mmap() hacked into it.
28 
29 */
30 
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <errno.h>
37 #ifndef __STDC__
38 extern int errno;
39 #endif
40 #include <dbz.h>
41 
42 /*
43  * #ifdef index.  "LIA" = "leave it alone unless you know what you're doing".
44  *
45  * FUNNYSEEKS	SEEK_SET is not 0, get it from <unistd.h>
46  * INDEX_SIZE	backward compatibility with old dbz; avoid using this
47  * NMEMORY	number of days of memory for use in sizing new table (LIA)
48  * INCORE	backward compatibility with old dbz; use dbzincore() instead
49  * DBZDEBUG	enable debugging
50  * DEFSIZE	default table size (not as critical as in old dbz)
51  * OLDBNEWS	default case mapping as in old B News; set NOBUFFER
52  * BNEWS	default case mapping as in current B News; set NOBUFFER
53  * DEFCASE	default case-map algorithm selector
54  * NOTAGS	fseek offsets are strange, do not do tagging (see below)
55  * NPAGBUF	size of .pag buffer, in longs (LIA)
56  * SHISTBUF	size of ASCII-file buffer, in bytes (LIA)
57  * MAXRUN	length of run which shifts to next table (see below) (LIA)
58  * OVERFLOW	long-int arithmetic overflow must be avoided, will trap
59  * NOBUFFER	do not buffer hash-table i/o, B News locking is defective
60  */
61 
62 #ifdef FUNNYSEEKS
63 #include <unistd.h>
64 #else
65 #define	SEEK_SET	0
66 #endif
67 #ifdef OVERFLOW
68 #include <limits.h>
69 #endif
70 
71 static int dbzversion = 3;	/* for validating .dir file format */
72 
73 /*
74  * The dbz database exploits the fact that when news stores a <key,value>
75  * tuple, the `value' part is a seek offset into a text file, pointing to
76  * a copy of the `key' part.  This avoids the need to store a copy of
77  * the key in the dbz files.  However, the text file *must* exist and be
78  * consistent with the dbz files, or things will fail.
79  *
80  * The basic format of the database is a simple hash table containing the
81  * values.  A value is stored by indexing into the table using a hash value
82  * computed from the key; collisions are resolved by linear probing (just
83  * search forward for an empty slot, wrapping around to the beginning of
84  * the table if necessary).  Linear probing is a performance disaster when
85  * the table starts to get full, so a complication is introduced.  The
86  * database is actually one *or more* tables, stored sequentially in the
87  * .pag file, and the length of linear-probe sequences is limited.  The
88  * search (for an existing item or an empty slot) always starts in the
89  * first table, and whenever MAXRUN probes have been done in table N,
90  * probing continues in table N+1.  This behaves reasonably well even in
91  * cases of massive overflow.  There are some other small complications
92  * added, see comments below.
93  *
94  * The table size is fixed for any particular database, but is determined
95  * dynamically when a database is rebuilt.  The strategy is to try to pick
96  * the size so the first table will be no more than 2/3 full, that being
97  * slightly before the point where performance starts to degrade.  (It is
98  * desirable to be a bit conservative because the overflow strategy tends
99  * to produce files with holes in them, which is a nuisance.)
100  */
101 
102 /*
103  * The following is for backward compatibility.
104  */
105 #ifdef INDEX_SIZE
106 #define	DEFSIZE	INDEX_SIZE
107 #endif
108 
109 /*
110  * ANSI C says an offset into a file is a long, not an off_t, for some
111  * reason.  This actually does simplify life a bit, but it's still nice
112  * to have a distinctive name for it.  Beware, this is just for readability,
113  * don't try to change this.
114  */
115 #define	of_t	long
116 #define	SOF	(sizeof(of_t))
117 
118 /*
119  * We assume that unused areas of a binary file are zeros, and that the
120  * bit pattern of `(of_t)0' is all zeros.  The alternative is rather
121  * painful file initialization.  Note that okayvalue(), if OVERFLOW is
122  * defined, knows what value of an offset would cause overflow.
123  */
124 #define	VACANT		((of_t)0)
125 #define	BIAS(o)		((o)+1)		/* make any valid of_t non-VACANT */
126 #define	UNBIAS(o)	((o)-1)		/* reverse BIAS() effect */
127 
128 /*
129  * In a Unix implementation, or indeed any in which an of_t is a byte
130  * count, there are a bunch of high bits free in an of_t.  There is a
131  * use for them.  Checking a possible hit by looking it up in the base
132  * file is relatively expensive, and the cost can be dramatically reduced
133  * by using some of those high bits to tag the value with a few more bits
134  * of the key's hash.  This detects most false hits without the overhead of
135  * seek+read+strcmp.  We use the top bit to indicate whether the value is
136  * tagged or not, and don't tag a value which is using the tag bits itself.
137  * We're in trouble if the of_t representation wants to use the top bit.
138  * The actual bitmasks and offset come from the configuration stuff,
139  * which permits fiddling with them as necessary, and also suppressing
140  * them completely (by defining the masks to 0).  We build pre-shifted
141  * versions of the masks for efficiency.
142  */
143 static of_t tagbits;		/* pre-shifted tag mask */
144 static of_t taghere;		/* pre-shifted tag-enable bit */
145 static of_t tagboth;		/* tagbits|taghere */
146 #define	HASTAG(o)	((o)&taghere)
147 #define	TAG(o)		((o)&tagbits)
148 #define	NOTAG(o)	((o)&~tagboth)
149 #define	CANTAG(o)	(((o)&tagboth) == 0)
150 #define	MKTAG(v)	(((v)<<conf.tagshift)&tagbits)
151 
152 /*
153  * A new, from-scratch database, not built as a rebuild of an old one,
154  * needs to know table size, casemap algorithm, and tagging.  Normally
155  * the user supplies this info, but there have to be defaults.
156  */
157 #ifndef DEFSIZE
158 #define	DEFSIZE	300007
159 #endif
160 #ifdef OLDBNEWS
161 #define	DEFCASE	'0'		/* B2.10 -- no mapping */
162 #define	NOBUFFER		/* B News locking is defective */
163 #endif
164 #ifdef BNEWS
165 #define	DEFCASE	'='		/* B2.11 -- all mapped */
166 #define	NOBUFFER		/* B News locking is defective */
167 #endif
168 #ifndef DEFCASE			/* C News compatibility is the default */
169 #define	DEFCASE	'C'		/* C News -- RFC822 mapping */
170 #endif
171 #ifndef NOTAGS
172 #define	TAGENB	0x80L		/* tag enable is top bit, tag is next 5 */
173 #define	TAGMASK	0x7c
174 #define	TAGSHIFT	24
175 #else
176 #define	TAGENB	0L		/* no tags */
177 #define	TAGMASK	0
178 #define	TAGSHIFT	0
179 #endif
180 
181 /*
182  * We read configuration info from the .dir file into this structure,
183  * so we can avoid wired-in assumptions for an existing database.
184  *
185  * Among the info is a record of recent peak usages, so that a new table
186  * size can be chosen intelligently when rebuilding.  10 is a good
187  * number of usages to keep, since news displays marked fluctuations
188  * in volume on a 7-day cycle.
189  */
190 struct dbzconfig {
191 	int olddbz;		/* .dir file empty but .pag not? */
192 	of_t tsize;		/* table size */
193 #	ifndef NMEMORY
194 #	define	NMEMORY	10	/* # days of use info to remember */
195 #	endif
196 #	define	NUSEDS	(1+NMEMORY)
197 	of_t used[NUSEDS];	/* entries used today, yesterday, ... */
198 	int valuesize;		/* size of table values, == SOF */
199 	int bytemap[SOF];	/* byte-order map */
200 	char casemap;		/* case-mapping algorithm (see cipoint()) */
201 	char fieldsep;		/* field separator in base file, if any */
202 	of_t tagenb;		/* unshifted tag-enable bit */
203 	of_t tagmask;		/* unshifted tag mask */
204 	int tagshift;		/* shift count for tagmask and tagenb */
205 	of_t ntagless[NUSEDS];	/* how many entries went tagless today, ... */
206 };
207 static struct dbzconfig conf;
208 static int getconf();
209 static long getno();
210 static int putconf();
211 static void mybytemap();
212 static of_t bytemap();
213 
214 /*
215  * For a program that makes many, many references to the database, it
216  * is a large performance win to keep the table in core, if it will fit.
217  * Note that this does hurt robustness in the event of crashes, and
218  * dbzdbmclose() *must* be called to flush the in-core database to disk.
219  * The code is prepared to deal with the possibility that there isn't
220  * enough memory.  There *is* an assumption that a size_t is big enough
221  * to hold the size (in bytes) of one table, so dbzdbminit() tries to figure
222  * out whether this is possible first.
223  *
224  * The preferred way to ask for an in-core table is to do dbzincore(1)
225  * before dbzdbminit().  The default is not to do it, although -DINCORE
226  * overrides this for backward compatibility with old dbz.
227  *
228  * We keep only the first table in core.  This greatly simplifies the
229  * code, and bounds memory demand.  Furthermore, doing this is a large
230  * performance win even in the event of massive overflow.
231  */
232 #ifdef INCORE
233 static int incore = 1;
234 #else
235 static int incore = 0;
236 #endif
237 
238 /*
239  * Making the in-core table write-through is a win in some odd situations
240  * where one is doing much reading and very little writing.
241  */
242 static int writethrough = 0;
243 
244 /*
245  * Stdio buffer for .pag reads.  Buffering more than about 16 does not help
246  * significantly at the densities we try to maintain, and the much larger
247  * buffers that most stdios default to are much more expensive to fill.
248  * With small buffers, stdio is performance-competitive with raw read(),
249  * and it's much more portable.
250  */
251 #ifndef NPAGBUF
252 #define	NPAGBUF	16
253 #endif
254 #ifndef NOBUFFER
255 #ifdef _IOFBF
256 static of_t pagbuf[NPAGBUF];	/* only needed if !NOBUFFER && _IOFBF */
257 #endif
258 #endif
259 
260 /*
261  * Stdio buffer for base-file reads.  Message-IDs (all news ever needs to
262  * read) are essentially never longer than 64 bytes, and the typical stdio
263  * buffer is so much larger that it is much more expensive to fill.
264  */
265 #ifndef SHISTBUF
266 #define	SHISTBUF	64
267 #endif
268 #ifdef _IOFBF
269 static char basebuf[SHISTBUF];		/* only needed if _IOFBF exists */
270 #endif
271 
272 /*
273  * Callback routine for performing obscene acts on file descriptors.
274  * This gives the user some control when dbz opens a file descriptor with
275  * the intention of keeping it open, and lets him (e.g.) set close-on-exec
276  * bits on the descriptor.  The callback routine gets a FILE * as its
277  * parameter, and must not mess with it in dbz-visible ways.
278  */
279 static void (*callback)() = NULL;
280 
281 /*
282  * Data structure for recording info about searches.
283  */
284 struct searcher {
285 	of_t place;		/* current location in file */
286 	int tabno;		/* which table we're in */
287 	int run;		/* how long we'll stay in this table */
288 #		ifndef MAXRUN
289 #		define	MAXRUN	100
290 #		endif
291 	long hash;		/* the key's hash code (for optimization) */
292 	of_t tag;		/* tag we are looking for */
293 	int seen;		/* have we examined current location? */
294 	int aborted;		/* has i/o error aborted search? */
295 };
296 static void start();
297 #define	FRESH	((struct searcher *)NULL)
298 static of_t search();
299 #define	NOTFOUND	((of_t)-1)
300 static int okayvalue();
301 static int set();
302 
303 /*
304  * Arguably the searcher struct for a given routine ought to be local to
305  * it, but a fetch() is very often immediately followed by a store(), and
306  * in some circumstances it is a useful performance win to remember where
307  * the fetch() completed.  So we use a global struct and remember whether
308  * it is current.
309  */
310 static struct searcher srch;
311 static struct searcher *prevp;	/* &srch or FRESH */
312 
313 /* byte-ordering stuff */
314 static int mybmap[SOF];			/* my byte order (see mybytemap()) */
315 static int bytesame;			/* is database order same as mine? */
316 #define	MAPIN(o)	((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))
317 #define	MAPOUT(o)	((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))
318 
319 /*
320  * The double parentheses needed to make this work are ugly, but the
321  * alternative (under most compilers) is to pack around 2K of unused
322  * strings -- there's just no way to get rid of them.  The "else" is
323  * likewise ugly, but swallows the following ";" and keeps it from
324  * making syntactic trouble.
325  */
326 #ifdef DBZDEBUG
327 #define DEBUG(args) if (debug) { (void) printf args ; } else
328 static int debug;			/* controlled by dbzdebug() */
329 #else
330 #define	DEBUG(args)	;
331 #endif
332 
333 /* misc. forwards */
334 static long hash();
335 static void crcinit();
336 static char *cipoint();
337 static char *mapcase();
338 static int isprime();
339 static FILE *latebase();
340 
341 /* file-naming stuff */
342 static char dir[] = ".dir";
343 static char pag[] = ".pag";
344 static char *enstring();
345 
346 /* central data structures */
347 static FILE *basef;		/* descriptor for base file */
348 static char *basefname;		/* name for not-yet-opened base file */
349 static FILE *dirf;		/* descriptor for .dir file */
350 static int dirronly;		/* dirf open read-only? */
351 static FILE *pagf = NULL;	/* descriptor for .pag file */
352 static of_t pagpos;		/* posn in pagf; only search may set != -1 */
353 static int pagronly;		/* pagf open read-only? */
354 static of_t *corepag;		/* incore version of .pag file, if any */
355 static FILE *bufpagf;		/* well-buffered pagf, for incore rewrite */
356 static of_t *getcore();
357 static int putcore();
358 static int written;		/* has a store() been done? */
359 
360 /*
361  - dbzfresh - set up a new database, no historical info
362  */
363 int				/* 0 success, -1 failure */
dbzfresh(name,size,fs,cmap,tagmask)364 dbzfresh(name, size, fs, cmap, tagmask)
365 char *name;			/* base name; .dir and .pag must exist */
366 long size;			/* table size (0 means default) */
367 int fs;				/* field-separator character in base file */
368 int cmap;			/* case-map algorithm (0 means default) */
369 of_t tagmask;			/* 0 default, 1 no tags */
370 {
371 	register char *fn;
372 	struct dbzconfig c;
373 	register of_t m;
374 	register FILE *f;
375 
376 	if (pagf != NULL) {
377 		DEBUG(("dbzfresh: database already open\n"));
378 		return(-1);
379 	}
380 	if (size != 0 && size < 2) {
381 		DEBUG(("dbzfresh: preposterous size (%ld)\n", size));
382 		return(-1);
383 	}
384 
385 	/* get default configuration */
386 	if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0)
387 		return(-1);	/* "can't happen" */
388 
389 	/* and mess with it as specified */
390 	if (size != 0)
391 		c.tsize = size;
392 	c.fieldsep = fs;
393 	switch (cmap) {
394 	case 0:
395 	case '0':
396 	case 'B':		/* 2.10 compat */
397 		c.casemap = '0';	/* '\0' nicer, but '0' printable! */
398 		break;
399 	case '=':
400 	case 'b':		/* 2.11 compat */
401 		c.casemap = '=';
402 		break;
403 	case 'C':
404 		c.casemap = 'C';
405 		break;
406 	case '?':
407 		c.casemap = DEFCASE;
408 		break;
409 	default:
410 		DEBUG(("dbzfresh case map `%c' unknown\n", cmap));
411 		return(-1);
412 		break;
413 	}
414 	switch (tagmask) {
415 	case 0:			/* default */
416 		break;
417 	case 1:			/* no tags */
418 		c.tagshift = 0;
419 		c.tagmask = 0;
420 		c.tagenb = 0;
421 		break;
422 	default:
423 		m = tagmask;
424 		c.tagshift = 0;
425 		while (!(m&01)) {
426 			m >>= 1;
427 			c.tagshift++;
428 		}
429 		c.tagmask = m;
430 		c.tagenb = (m << 1) & ~m;
431 		break;
432 	}
433 
434 	/* write it out */
435 	fn = enstring(name, dir);
436 	if (fn == NULL)
437 		return(-1);
438 	f = fopen(fn, "w");
439 	free(fn);
440 	if (f == NULL) {
441 		DEBUG(("dbzfresh: unable to write config\n"));
442 		return(-1);
443 	}
444 	if (putconf(f, &c) < 0) {
445 		(void) fclose(f);
446 		return(-1);
447 	}
448 	if (fclose(f) == EOF) {
449 		DEBUG(("dbzfresh: fclose failure\n"));
450 		return(-1);
451 	}
452 
453 	/* create/truncate .pag */
454 	fn = enstring(name, pag);
455 	if (fn == NULL)
456 		return(-1);
457 	f = fopen(fn, "w");
458 	free(fn);
459 	if (f == NULL) {
460 		DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));
461 		return(-1);
462 	} else
463 		(void) fclose(f);
464 
465 	/* and punt to dbzdbminit for the hard work */
466 	return(dbzdbminit(name));
467 }
468 
469 /*
470  - dbzsize - what's a good table size to hold this many entries?
471  */
472 long
dbzsize(contents)473 dbzsize(contents)
474 long contents;			/* 0 means what's the default */
475 {
476 	register long n;
477 
478 	if (contents <= 0) {	/* foulup or default inquiry */
479 		DEBUG(("dbzsize: preposterous input (%ld)\n", contents));
480 		return(DEFSIZE);
481 	}
482 	n = (contents/2)*3;	/* try to keep table at most 2/3 full */
483 	if (!(n&01))		/* make it odd */
484 		n++;
485 	DEBUG(("dbzsize: tentative size %ld\n", n));
486 	while (!isprime(n))	/* and look for a prime */
487 		n += 2;
488 	DEBUG(("dbzsize: final size %ld\n", n));
489 
490 	return(n);
491 }
492 
493 /*
494  - isprime - is a number prime?
495  *
496  * This is not a terribly efficient approach.
497  */
498 static int			/* predicate */
isprime(x)499 isprime(x)
500 register long x;
501 {
502 	static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
503 	register int *ip;
504 	register long div;
505 	register long stop;
506 
507 	/* hit the first few primes quickly to eliminate easy ones */
508 	/* this incidentally prevents ridiculously small tables */
509 	for (ip = quick; (div = *ip) != 0; ip++)
510 		if (x%div == 0) {
511 			DEBUG(("isprime: quick result on %ld\n", x));
512 			return(0);
513 		}
514 
515 	/* approximate square root of x */
516 	for (stop = x; x/stop < stop; stop >>= 1)
517 		continue;
518 	stop <<= 1;
519 
520 	/* try odd numbers up to stop */
521 	for (div = *--ip + 2; div < stop; div += 2)
522 		if (x%div == 0)
523 			return(0);
524 
525 	return(1);
526 }
527 
528 /*
529  - dbztagmask - what's a good tag mask for a file this size?
530  *
531  * This assumes that TAGENB is the high-order bit of a long.
532  */
533 long
dbztagmask(filesize)534 dbztagmask(filesize)
535 register long filesize;
536 {
537 	register long m;
538 	register int nbits;
539 
540 	/*
541 	 * First, see how many bits are unused.  We cut it off at 7 on the
542 	 * theory that a 1-in-128 rate of false probes should be okay, given
543 	 * that the .pag file gets resized to limit the length of runs anyway.
544 	 */
545 	m = TAGENB << (TAGSHIFT-1);
546 	nbits = 1;
547 	while ((m&filesize) == 0 && nbits < 7) {	/* extend downward */
548 		m |= m >> 1;
549 		nbits++;
550 	}
551 
552 	if ((m&filesize) == 0)		/* reached max bits and still fine */
553 		return(m);
554 
555 	/* We now have a mask that is one bit too big. */
556 	m &= m << 1;			/* shorten it */
557 
558 	if (m == 0)			/* file is just too big */
559 		return(1);
560 
561 	return(m);
562 }
563 
564 /*
565  - dbzagain - set up a new database to be a rebuild of an old one
566  */
567 int				/* 0 success, -1 failure */
dbzagain(name,oldname)568 dbzagain(name, oldname)
569 const char *name;			/* base name; .dir and .pag must exist */
570 const char *oldname;			/* base name; all must exist */
571 {
572 	register char *fn;
573 	struct dbzconfig c;
574 	register int i;
575 	register long top;
576 	register FILE *f;
577 	register int newtable;
578 	register long newsize;
579 
580 	if (pagf != NULL) {
581 		DEBUG(("dbzagain: database already open\n"));
582 		return(-1);
583 	}
584 
585 	/* pick up the old configuration */
586 	fn = enstring(oldname, dir);
587 	if (fn == NULL)
588 		return(-1);
589 	f = fopen(fn, "r");
590 	free(fn);
591 	if (f == NULL) {
592 		DEBUG(("dbzagain: cannot open old .dir file\n"));
593 		return(-1);
594 	}
595 	i = getconf(f, (FILE *)NULL, &c);
596 	(void) fclose(f);
597 	if (i < 0) {
598 		DEBUG(("dbzagain: getconf failed\n"));
599 		return(-1);
600 	}
601 
602 	/* tinker with it */
603 	/* first, find peak of usage history, if there's enough history */
604 	top = 0;
605 	newtable = 0;
606 	for (i = 0; i < NUSEDS; i++) {
607 		if (top < c.used[i])
608 			top = c.used[i];
609 		if (c.used[i] == 0)
610 			newtable = 1;	/* hasn't got full usage history yet */
611 	}
612 	if (top == 0) {
613 		DEBUG(("dbzagain: old table has no contents!\n"));
614 		newtable = 1;
615 	}
616 
617 	/* shift the history, and decide on table size */
618 	for (i = NUSEDS-1; i > 0; i--) {
619 		c.used[i] = c.used[i-1];
620 		c.ntagless[i] = c.ntagless[i-1];
621 	}
622 	c.used[0] = 0;
623 	c.ntagless[0] = 0;
624 	newsize = dbzsize(top);
625 	if (!newtable || newsize > c.tsize)	/* don't shrink new table */
626 		c.tsize = newsize;
627 
628 	/* decide whether the tag needs changing */
629 	if (c.ntagless[1] != 0) {
630 		c.tagmask &= c.tagmask << 1;	/* clear bottom bit */
631 		if (c.tagmask == 0)		/* oops, no more room... */
632 			c.tagenb = 0;		/* just can't tag at all */
633 	}
634 	/*
635 	 * You might think that you'd want to adaptively grow the tag
636 	 * as well as shrinking it.  It's very hard to do that well,
637 	 * so for now we don't try.
638 	 */
639 
640 	/* write it out */
641 	fn = enstring(name, dir);
642 	if (fn == NULL)
643 		return(-1);
644 	f = fopen(fn, "w");
645 	free(fn);
646 	if (f == NULL) {
647 		DEBUG(("dbzagain: unable to write new .dir\n"));
648 		return(-1);
649 	}
650 	i = putconf(f, &c);
651 	(void) fclose(f);
652 	if (i < 0) {
653 		DEBUG(("dbzagain: putconf failed\n"));
654 		return(-1);
655 	}
656 
657 	/* create/truncate .pag */
658 	fn = enstring(name, pag);
659 	if (fn == NULL)
660 		return(-1);
661 	f = fopen(fn, "w");
662 	free(fn);
663 	if (f == NULL) {
664 		DEBUG(("dbzagain: unable to create/truncate .pag file\n"));
665 		return(-1);
666 	} else
667 		(void) fclose(f);
668 
669 	/* and let dbzdbminit do the work */
670 	return(dbzdbminit(name));
671 }
672 
673 /*
674  - dbzdbminit - open a database, creating it (using defaults) if necessary
675  *
676  * We try to leave errno set plausibly, to the extent that underlying
677  * functions permit this, since many people consult it if dbzdbminit() fails.
678  */
679 int 				/* 0 success, -1 failure */
dbzdbminit(name)680 dbzdbminit(name)
681 const char *name;
682 {
683 	register int i;
684 	register size_t s;
685 	register char *dirfname;
686 	register char *pagfname;
687 
688 	if (pagf != NULL) {
689 		DEBUG(("dbzdbminit: dbzdbminit already called once\n"));
690 		errno = 0;
691 		return(-1);
692 	}
693 
694 	/* open the .dir file */
695 	dirfname = enstring(name, dir);
696 	if (dirfname == NULL)
697 		return(-1);
698 	dirf = fopen(dirfname, "r+");
699 	if (dirf == NULL) {
700 		dirf = fopen(dirfname, "r");
701 		dirronly = 1;
702 	} else
703 		dirronly = 0;
704 	free(dirfname);
705 	if (dirf == NULL) {
706 		DEBUG(("dbzdbminit: can't open .dir file\n"));
707 		return(-1);
708 	}
709 	if (callback != NULL)
710 		(*callback)(dirf);
711 
712 	/* open the .pag file */
713 	pagfname = enstring(name, pag);
714 	if (pagfname == NULL) {
715 		(void) fclose(dirf);
716 		return(-1);
717 	}
718 	pagf = fopen(pagfname, "r+b");
719 	if (pagf == NULL) {
720 		pagf = fopen(pagfname, "rb");
721 		if (pagf == NULL) {
722 			DEBUG(("dbzdbminit: .pag open failed\n"));
723 			(void) fclose(dirf);
724 			free(pagfname);
725 			return(-1);
726 		}
727 		pagronly = 1;
728 	} else if (dirronly)
729 		pagronly = 1;
730 	else
731 		pagronly = 0;
732 #ifdef NOBUFFER
733 	/*
734 	 * B News does not do adequate locking on its database accesses.
735 	 * Why it doesn't get into trouble using dbm is a mystery.  In any
736 	 * case, doing unbuffered i/o does not cure the problem, but does
737 	 * enormously reduce its incidence.
738 	 */
739 	(void) setbuf(pagf, (char *)NULL);
740 #else
741 #ifdef _IOFBF
742 	(void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf));
743 #endif
744 #endif
745 	pagpos = -1;
746 	/* don't free pagfname, need it below */
747 	if (callback != NULL)
748 		(*callback)(pagf);
749 
750 	/* open the base file */
751 	basef = fopen(name, "r");
752 	if (basef == NULL) {
753 		DEBUG(("dbzdbminit: basefile open failed\n"));
754 		basefname = enstring(name, "");
755 		if (basefname == NULL) {
756 			(void) fclose(pagf);
757 			(void) fclose(dirf);
758 			free(pagfname);
759 			pagf = NULL;
760 			return(-1);
761 		}
762 	} else
763 		basefname = NULL;
764 #ifdef _IOFBF
765 	if (basef != NULL)
766 		(void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf));
767 #endif
768 	if (callback != NULL)
769 		(*callback)(basef);
770 
771 	/* pick up configuration */
772 	if (getconf(dirf, pagf, &conf) < 0) {
773 		DEBUG(("dbzdbminit: getconf failure\n"));
774 		(void) fclose(basef);
775 		(void) fclose(pagf);
776 		(void) fclose(dirf);
777 		free(pagfname);
778 		pagf = NULL;
779 		errno = EDOM;	/* kind of a kludge, but very portable */
780 		return(-1);
781 	}
782 	tagbits = conf.tagmask << conf.tagshift;
783 	taghere = conf.tagenb << conf.tagshift;
784 	tagboth = tagbits | taghere;
785 	mybytemap(mybmap);
786 	bytesame = 1;
787 	for (i = 0; i < SOF; i++)
788 		if (mybmap[i] != conf.bytemap[i])
789 			bytesame = 0;
790 
791 	/* get first table into core, if it looks desirable and feasible */
792 	s = (size_t)conf.tsize * SOF;
793 	if (incore && (of_t)(s/SOF) == conf.tsize) {
794 		bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b");
795 		if (bufpagf != NULL) {
796 			corepag = getcore(bufpagf);
797 			if (callback != NULL)
798 				(*callback)(bufpagf);
799 		}
800 	} else {
801 		bufpagf = NULL;
802 		corepag = NULL;
803 	}
804 	free(pagfname);
805 
806 	/* misc. setup */
807 	crcinit();
808 	written = 0;
809 	prevp = FRESH;
810 	DEBUG(("dbzdbminit: succeeded\n"));
811 	return(0);
812 }
813 
814 /*
815  - enstring - concatenate two strings into a malloced area
816  */
817 static char *			/* NULL if malloc fails */
enstring(s1,s2)818 enstring(s1, s2)
819 char *s1;
820 char *s2;
821 {
822 	register char *p;
823 
824 	p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1);
825 	if (p != NULL) {
826 		(void) strcpy(p, s1);
827 		(void) strcat(p, s2);
828 	} else {
829 		DEBUG(("enstring(%s, %s) out of memory\n", s1, s2));
830 	}
831 	return(p);
832 }
833 
834 /*
835  - dbzdbmclose - close a database
836  */
837 int
dbzdbmclose()838 dbzdbmclose()
839 {
840 	register int ret = 0;
841 
842 	if (pagf == NULL) {
843 		DEBUG(("dbzdbmclose: not opened!\n"));
844 		return(-1);
845 	}
846 
847 	if (fclose(pagf) == EOF) {
848 		DEBUG(("dbzdbmclose: fclose(pagf) failed\n"));
849 		ret = -1;
850 	}
851 	pagf = basef;		/* ensure valid pointer; dbzsync checks it */
852 	if (dbzsync() < 0)
853 		ret = -1;
854 	if (bufpagf != NULL && fclose(bufpagf) == EOF) {
855 		DEBUG(("dbzdbmclose: fclose(bufpagf) failed\n"));
856 		ret = -1;
857 	}
858 	if (corepag != NULL)
859 		free((char *)corepag);
860 	corepag = NULL;
861 	if (fclose(basef) == EOF) {
862 		DEBUG(("dbzdbmclose: fclose(basef) failed\n"));
863 		ret = -1;
864 	}
865 	if (basefname != NULL)
866 		free(basefname);
867 	basef = NULL;
868 	pagf = NULL;
869 	if (fclose(dirf) == EOF) {
870 		DEBUG(("dbzdbmclose: fclose(dirf) failed\n"));
871 		ret = -1;
872 	}
873 
874 	DEBUG(("dbzdbmclose: %s\n", (ret == 0) ? "succeeded" : "failed"));
875 	return(ret);
876 }
877 
878 /*
879  - dbzsync - push all in-core data out to disk
880  */
881 int
dbzsync()882 dbzsync()
883 {
884 	register int ret = 0;
885 
886 	if (pagf == NULL) {
887 		DEBUG(("dbzsync: not opened!\n"));
888 		return(-1);
889 	}
890 	if (!written)
891 		return(0);
892 
893 	if (corepag != NULL && !writethrough) {
894 		if (putcore(corepag, bufpagf) < 0) {
895 			DEBUG(("dbzsync: putcore failed\n"));
896 			ret = -1;
897 		}
898 	}
899 	if (!conf.olddbz)
900 		if (putconf(dirf, &conf) < 0)
901 			ret = -1;
902 
903 	DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed"));
904 	return(ret);
905 }
906 
907 /*
908  - dbzcancel - cancel writing of in-core data
909  * Mostly for use from child processes.
910  * Note that we don't need to futz around with stdio buffers, because we
911  * always fflush them immediately anyway and so they never have stale data.
912  */
913 int
dbzcancel()914 dbzcancel()
915 {
916 	if (pagf == NULL) {
917 		DEBUG(("dbzcancel: not opened!\n"));
918 		return(-1);
919 	}
920 
921 	written = 0;
922 	return(0);
923 }
924 
925 /*
926  - dbzfetch - fetch() with case mapping built in
927  */
928 datum
dbzfetch(key)929 dbzfetch(key)
930 datum key;
931 {
932 	char buffer[DBZMAXKEY + 1];
933 	datum mappedkey;
934 	register size_t keysize;
935 
936 	DEBUG(("dbzfetch: (%s)\n", key.dptr));
937 
938 	/* Key is supposed to be less than DBZMAXKEY */
939 	keysize = key.dsize;
940 	if (keysize >= DBZMAXKEY) {
941 		keysize = DBZMAXKEY;
942 		DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
943 	}
944 
945 	mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
946 	buffer[keysize] = '\0';	/* just a debug aid */
947 	mappedkey.dsize = keysize;
948 
949 	return(dbzdbmfetch(mappedkey));
950 }
951 
952 /*
953  - dbzdbmfetch - get an entry from the database
954  *
955  * Disgusting fine point, in the name of backward compatibility:  if the
956  * last character of "key" is a NUL, that character is (effectively) not
957  * part of the comparison against the stored keys.
958  */
959 datum				/* dptr NULL, dsize 0 means failure */
dbzdbmfetch(key)960 dbzdbmfetch(key)
961 datum key;
962 {
963 	char buffer[DBZMAXKEY + 1];
964 	static of_t key_ptr;		/* return value points here */
965 	datum output;
966 	register size_t keysize;
967 	register size_t cmplen;
968 	register char *sepp;
969 
970 	DEBUG(("dbzdbmfetch: (%s)\n", key.dptr));
971 	output.dptr = NULL;
972 	output.dsize = 0;
973 	prevp = FRESH;
974 
975 	/* Key is supposed to be less than DBZMAXKEY */
976 	keysize = key.dsize;
977 	if (keysize >= DBZMAXKEY) {
978 		keysize = DBZMAXKEY;
979 		DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
980 	}
981 
982 	if (pagf == NULL) {
983 		DEBUG(("dbzdbmfetch: database not open!\n"));
984 		return(output);
985 	} else if (basef == NULL) {	/* basef didn't exist yet */
986 		basef = latebase();
987 		if (basef == NULL)
988 			return(output);
989 	}
990 
991 	cmplen = keysize;
992 	sepp = &conf.fieldsep;
993 	if (key.dptr[keysize-1] == '\0') {
994 		cmplen--;
995 		sepp = &buffer[keysize-1];
996 	}
997 	start(&srch, &key, FRESH);
998 	while ((key_ptr = search(&srch)) != NOTFOUND) {
999 		DEBUG(("got 0x%lx\n", (long)key_ptr));
1000 
1001 		/* fetch the key */
1002 		if (fseek(basef, key_ptr, SEEK_SET) != 0) {
1003 			DEBUG(("dbzdbmfetch: seek failed\n"));
1004 			return(output);
1005 		}
1006 		if (fread(buffer, 1, keysize, basef) != keysize) {
1007 			DEBUG(("dbzdbmfetch: read failed\n"));
1008 			return(output);
1009 		}
1010 
1011 		/* try it */
1012 		buffer[keysize] = '\0';		/* terminated for DEBUG */
1013 		(void) mapcase(buffer, buffer, keysize);
1014 		DEBUG(("dbzdbmfetch: buffer (%s) looking for (%s) size = %ld\n",
1015 						buffer, key.dptr, keysize));
1016 		if (memcmp(key.dptr, buffer, cmplen) == 0 &&
1017 				(*sepp == conf.fieldsep || *sepp == '\0')) {
1018 			/* we found it */
1019 			output.dptr = (char *)&key_ptr;
1020 			output.dsize = SOF;
1021 			DEBUG(("dbzdbmfetch: successful\n"));
1022 			return(output);
1023 		}
1024 	}
1025 
1026 	/* we didn't find it */
1027 	DEBUG(("dbzdbmfetch: failed\n"));
1028 	prevp = &srch;			/* remember where we stopped */
1029 	return(output);
1030 }
1031 
1032 /*
1033  - latebase - try to open a base file that wasn't there at the start
1034  */
1035 static FILE *
latebase()1036 latebase()
1037 {
1038 	register FILE *it;
1039 
1040 	if (basefname == NULL) {
1041 		DEBUG(("latebase: name foulup\n"));
1042 		return(NULL);
1043 	}
1044 	it = fopen(basefname, "r");
1045 	if (it == NULL) {
1046 		DEBUG(("latebase: still can't open base\n"));
1047 	} else {
1048 		DEBUG(("latebase: late open succeeded\n"));
1049 		free(basefname);
1050 		basefname = NULL;
1051 #ifdef _IOFBF
1052 		(void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf));
1053 #endif
1054 		if (callback != NULL)
1055 			(*callback)(it);
1056 	}
1057 	return(it);
1058 }
1059 
1060 /*
1061  - dbzstore - store() with case mapping built in
1062  */
1063 int
dbzstore(key,data)1064 dbzstore(key, data)
1065 datum key;
1066 datum data;
1067 {
1068 	char buffer[DBZMAXKEY + 1];
1069 	datum mappedkey;
1070 	register size_t keysize;
1071 
1072 	DEBUG(("dbzstore: (%s)\n", key.dptr));
1073 
1074 	/* Key is supposed to be less than DBZMAXKEY */
1075 	keysize = key.dsize;
1076 	if (keysize >= DBZMAXKEY) {
1077 		DEBUG(("dbzstore: key size too big (%d)\n", key.dsize));
1078 		return(-1);
1079 	}
1080 
1081 	mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
1082 	buffer[keysize] = '\0';	/* just a debug aid */
1083 	mappedkey.dsize = keysize;
1084 
1085 	return(dbzdbmstore(mappedkey, data));
1086 }
1087 
1088 /*
1089  - dbzdbmstore - add an entry to the database
1090  */
1091 int				/* 0 success, -1 failure */
dbzdbmstore(key,data)1092 dbzdbmstore(key, data)
1093 datum key;
1094 datum data;
1095 {
1096 	of_t value;
1097 
1098 	if (pagf == NULL) {
1099 		DEBUG(("dbzdbmstore: database not open!\n"));
1100 		return(-1);
1101 	} else if (basef == NULL) {	/* basef didn't exist yet */
1102 		basef = latebase();
1103 		if (basef == NULL)
1104 			return(-1);
1105 	}
1106 	if (pagronly) {
1107 		DEBUG(("dbzdbmstore: database open read-only\n"));
1108 		return(-1);
1109 	}
1110 	if (data.dsize != SOF) {
1111 		DEBUG(("dbzdbmstore: value size wrong (%d)\n", data.dsize));
1112 		return(-1);
1113 	}
1114 	if (key.dsize >= DBZMAXKEY) {
1115 		DEBUG(("dbzdbmstore: key size too big (%d)\n", key.dsize));
1116 		return(-1);
1117 	}
1118 
1119 	/* copy the value in to ensure alignment */
1120 	(void) memcpy((char *)&value, data.dptr, SOF);
1121 	DEBUG(("dbzdbmstore: (%s, %ld)\n", key.dptr, (long)value));
1122 	if (!okayvalue(value)) {
1123 		DEBUG(("dbzdbmstore: reserved bit or overflow in 0x%lx\n", value));
1124 		return(-1);
1125 	}
1126 
1127 	/* find the place, exploiting previous search if possible */
1128 	start(&srch, &key, prevp);
1129 	while (search(&srch) != NOTFOUND)
1130 		continue;
1131 
1132 	prevp = FRESH;
1133 	conf.used[0]++;
1134 	DEBUG(("dbzdbmstore: used count %ld\n", conf.used[0]));
1135 	written = 1;
1136 	return(set(&srch, value));
1137 }
1138 
1139 /*
1140  - dbzincore - control attempts to keep .pag file in core
1141  */
1142 int				/* old setting */
dbzincore(value)1143 dbzincore(value)
1144 int value;
1145 {
1146 	register int old = incore;
1147 
1148 	incore = value;
1149 	return(old);
1150 }
1151 
1152 /*
1153  - dbzwritethrough - enable/disable immediate writing to disk of in-core writes
1154  */
1155 int				/* old setting */
dbzwritethrough(value)1156 dbzwritethrough(value)
1157 int value;
1158 {
1159 	register int old = writethrough;
1160 
1161 	writethrough = value;
1162 	return(old);
1163 }
1164 
1165 /*
1166  - dbzfiledesc - provide hook for doing obscene things to file descriptors
1167  */
1168 typedef void (*fp)();
1169 fp
1170 dbzfiledesc(diddler)
1171 void (*diddler)();		/* NULL means do nothing */
1172 {
1173 	register void (*old)() = callback;
1174 
1175 	callback = diddler;
1176 	return(old);
1177 }
1178 
1179 /*
1180  - getconf - get configuration from .dir file
1181  */
1182 static int			/* 0 success, -1 failure */
getconf(df,pf,cp)1183 getconf(df, pf, cp)
1184 register FILE *df;		/* NULL means just give me the default */
1185 register FILE *pf;		/* NULL means don't care about .pag */
1186 register struct dbzconfig *cp;
1187 {
1188 	register int c;
1189 	register int i;
1190 	int err = 0;
1191 
1192 	c = (df != NULL) ? getc(df) : EOF;
1193 	if (c == EOF) {		/* empty file, no configuration known */
1194 		cp->olddbz = 0;
1195 		if (df != NULL && pf != NULL && getc(pf) != EOF)
1196 			cp->olddbz = 1;
1197 		cp->tsize = DEFSIZE;
1198 		cp->fieldsep = '\t';
1199 		for (i = 0; i < NUSEDS; i++) {
1200 			cp->used[i] = 0;
1201 			cp->ntagless[i] = 0;
1202 		}
1203 		cp->valuesize = SOF;
1204 		mybytemap(cp->bytemap);
1205 		cp->casemap = DEFCASE;
1206 		cp->tagenb = TAGENB;
1207 		cp->tagmask = TAGMASK;
1208 		cp->tagshift = TAGSHIFT;
1209 		DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n",
1210 			cp->tsize, cp->casemap, cp->tagenb,
1211 			cp->tagmask, cp->tagshift));
1212 		return(0);
1213 	}
1214 	(void) ungetc(c, df);
1215 
1216 	/* first line, the vital stuff */
1217 	if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z')
1218 		err = -1;
1219 	if (getno(df, &err) != dbzversion)
1220 		err = -1;
1221 	cp->tsize = getno(df, &err);
1222 	cp->fieldsep = (char)getno(df, &err);
1223 	while ((c = getc(df)) == ' ')
1224 		continue;
1225 	cp->casemap = c;
1226 	cp->tagenb = getno(df, &err);
1227 	cp->tagmask = getno(df, &err);
1228 	cp->tagshift = getno(df, &err);
1229 	cp->valuesize = getno(df, &err);
1230 	if (cp->valuesize != SOF) {
1231 		DEBUG(("getconf: wrong of_t size (%d)\n", cp->valuesize));
1232 		err = -1;
1233 		cp->valuesize = SOF;	/* to protect the loops below */
1234 	}
1235 	for (i = 0; i < cp->valuesize; i++)
1236 		cp->bytemap[i] = getno(df, &err);
1237 	if (getc(df) != '\n')
1238 		err = -1;
1239 	DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize,
1240 			cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask,
1241 			cp->tagshift));
1242 	DEBUG(("bytemap (%d)", cp->valuesize));
1243 #ifdef DBZDEBUG
1244 	for (i = 0; i < cp->valuesize; i++) {
1245 		DEBUG((" %d", cp->bytemap[i]));
1246 	}
1247 #endif
1248 	DEBUG(("\n"));
1249 
1250 	/* second line, the usages */
1251 	for (i = 0; i < NUSEDS; i++)
1252 		cp->used[i] = getno(df, &err);
1253 	if (getc(df) != '\n')
1254 		err = -1;
1255 	DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2]));
1256 
1257 	/* third line, currently only the tagless count */
1258 	if ((c = getc(df)) != EOF) {
1259 		(void) ungetc(c, df);
1260 		for (i = 0; i < NUSEDS; i++)
1261 			cp->ntagless[i] = getno(df, &err);
1262 		if (getc(df) != '\n')
1263 			err = -1;
1264 	} else
1265 		for (i = 0; i < NUSEDS; i++)
1266 			cp->ntagless[i] = 0;
1267 
1268 	if (err < 0) {
1269 		DEBUG(("getconf error\n"));
1270 		return(-1);
1271 	}
1272 	return(0);
1273 }
1274 
1275 /*
1276  - getno - get a long
1277  */
1278 static long
getno(f,ep)1279 getno(f, ep)
1280 FILE *f;
1281 int *ep;
1282 {
1283 	register char *p;
1284 #	define	MAXN	50
1285 	char getbuf[MAXN];
1286 	register int c;
1287 
1288 	while ((c = getc(f)) == ' ')
1289 		continue;
1290 	if (c == EOF || c == '\n') {
1291 		DEBUG(("getno: missing number\n"));
1292 		*ep = -1;
1293 		return(0);
1294 	}
1295 	p = getbuf;
1296 	*p++ = c;
1297 	while ((c = getc(f)) != EOF && c != '\n' && c != ' ')
1298 		if (p < &getbuf[MAXN-1])
1299 			*p++ = c;
1300 	if (c == EOF) {
1301 		DEBUG(("getno: EOF\n"));
1302 		*ep = -1;
1303 	} else
1304 		(void) ungetc(c, f);
1305 	*p = '\0';
1306 
1307 	if (strspn(getbuf, "-1234567890") != strlen(getbuf)) {
1308 		DEBUG(("getno: `%s' non-numeric\n", getbuf));
1309 		*ep = -1;
1310 	}
1311 	return(atol(getbuf));
1312 }
1313 
1314 /*
1315  - putconf - write configuration to .dir file
1316  */
1317 static int			/* 0 success, -1 failure */
putconf(f,cp)1318 putconf(f, cp)
1319 register FILE *f;
1320 register struct dbzconfig *cp;
1321 {
1322 	register int i;
1323 	register int ret = 0;
1324 
1325 	if (fseek(f, (of_t)0, SEEK_SET) != 0) {
1326 		DEBUG(("fseek failure in putconf\n"));
1327 		ret = -1;
1328 	}
1329 	fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion, cp->tsize,
1330 				cp->fieldsep, cp->casemap, cp->tagenb,
1331 				cp->tagmask, cp->tagshift, cp->valuesize);
1332 	for (i = 0; i < cp->valuesize; i++)
1333 		fprintf(f, " %d", cp->bytemap[i]);
1334 	fprintf(f, "\n");
1335 	for (i = 0; i < NUSEDS; i++)
1336 		fprintf(f, "%ld%c", cp->used[i], (i < NUSEDS-1) ? ' ' : '\n');
1337 	for (i = 0; i < NUSEDS; i++)
1338 		fprintf(f, "%ld%c", cp->ntagless[i], (i < NUSEDS-1) ? ' ' : '\n');
1339 
1340 	(void) fflush(f);
1341 	if (ferror(f))
1342 		ret = -1;
1343 
1344 	DEBUG(("putconf status %d\n", ret));
1345 	return(ret);
1346 }
1347 
1348 /*
1349  - getcore - try to set up an in-core copy of .pag file
1350  */
1351 static of_t *			/* pointer to copy, or NULL */
getcore(f)1352 getcore(f)
1353 FILE *f;
1354 {
1355 	register of_t *p;
1356 	register size_t i;
1357 	register size_t nread;
1358 	register of_t *it;
1359 
1360 	it = (of_t *)malloc((size_t)conf.tsize * SOF);
1361 	if (it == NULL) {
1362 		DEBUG(("getcore: malloc failed\n"));
1363 		return(NULL);
1364 	}
1365 
1366 	nread = fread((char *)it, SOF, (size_t)conf.tsize, f);
1367 	if (ferror(f)) {
1368 		DEBUG(("getcore: read failed\n"));
1369 		free((char *)it);
1370 		return(NULL);
1371 	}
1372 
1373 	p = it + nread;
1374 	i = (size_t)conf.tsize - nread;
1375 	while (i-- > 0)
1376 		*p++ = VACANT;
1377 
1378 	return(it);
1379 }
1380 
1381 /*
1382  - putcore - try to rewrite an in-core table
1383  */
1384 static int			/* 0 okay, -1 fail */
putcore(tab,f)1385 putcore(tab, f)
1386 of_t *tab;
1387 FILE *f;
1388 {
1389 	if (fseek(f, (of_t)0, SEEK_SET) != 0) {
1390 		DEBUG(("fseek failure in putcore\n"));
1391 		return(-1);
1392 	}
1393 	(void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f);
1394 	(void) fflush(f);
1395 	return((ferror(f)) ? -1 : 0);
1396 }
1397 
1398 /*
1399  - start - set up to start or restart a search
1400  */
1401 static void
start(sp,kp,osp)1402 start(sp, kp, osp)
1403 register struct searcher *sp;
1404 register datum *kp;
1405 register struct searcher *osp;		/* may be FRESH, i.e. NULL */
1406 {
1407 	register long h;
1408 
1409 	h = hash(kp->dptr, kp->dsize);
1410 	if (osp != FRESH && osp->hash == h) {
1411 		if (sp != osp)
1412 			*sp = *osp;
1413 		DEBUG(("search restarted\n"));
1414 	} else {
1415 		sp->hash = h;
1416 		sp->tag = MKTAG(h / conf.tsize);
1417 		DEBUG(("tag 0x%lx\n", sp->tag));
1418 		sp->place = h % conf.tsize;
1419 		sp->tabno = 0;
1420 		sp->run = (conf.olddbz) ? conf.tsize : MAXRUN;
1421 		sp->aborted = 0;
1422 	}
1423 	sp->seen = 0;
1424 }
1425 
1426 /*
1427  - search - conduct part of a search
1428  */
1429 static of_t			/* NOTFOUND if we hit VACANT or error */
search(sp)1430 search(sp)
1431 register struct searcher *sp;
1432 {
1433 	register of_t dest;
1434 	register of_t value;
1435 	of_t val;		/* buffer for value (can't fread register) */
1436 	register of_t place;
1437 
1438 	if (sp->aborted)
1439 		return(NOTFOUND);
1440 
1441 	for (;;) {
1442 		/* determine location to be examined */
1443 		place = sp->place;
1444 		if (sp->seen) {
1445 			/* go to next location */
1446 			if (--sp->run <= 0) {
1447 				sp->tabno++;
1448 				sp->run = MAXRUN;
1449 			}
1450 			place = (place+1)%conf.tsize + sp->tabno*conf.tsize;
1451 			sp->place = place;
1452 		} else
1453 			sp->seen = 1;	/* now looking at current location */
1454 		DEBUG(("search @ %ld\n", place));
1455 
1456 		/* get the tagged value */
1457 		if (corepag != NULL && place < conf.tsize) {
1458 			DEBUG(("search: in core\n"));
1459 			value = MAPIN(corepag[place]);
1460 		} else {
1461 			/* seek, if necessary */
1462 			dest = place * SOF;
1463 			if (pagpos != dest) {
1464 				if (fseek(pagf, dest, SEEK_SET) != 0) {
1465 					DEBUG(("search: seek failed\n"));
1466 					pagpos = -1;
1467 					sp->aborted = 1;
1468 					return(NOTFOUND);
1469 				}
1470 				pagpos = dest;
1471 			}
1472 
1473 			/* read it */
1474 			if (fread((char *)&val, sizeof(val), 1, pagf) == 1)
1475 				value = MAPIN(val);
1476 			else if (ferror(pagf)) {
1477 				DEBUG(("search: read failed\n"));
1478 				pagpos = -1;
1479 				sp->aborted = 1;
1480 				return(NOTFOUND);
1481 			} else
1482 				value = VACANT;
1483 
1484 			/* and finish up */
1485 			pagpos += sizeof(val);
1486 		}
1487 
1488 		/* vacant slot is always cause to return */
1489 		if (value == VACANT) {
1490 			DEBUG(("search: empty slot\n"));
1491 			return(NOTFOUND);
1492 		};
1493 
1494 		/* check the tag */
1495 		value = UNBIAS(value);
1496 		DEBUG(("got 0x%lx\n", value));
1497 		if (!HASTAG(value)) {
1498 			DEBUG(("tagless\n"));
1499 			return(value);
1500 		} else if (TAG(value) == sp->tag) {
1501 			DEBUG(("match\n"));
1502 			return(NOTAG(value));
1503 		} else {
1504 			DEBUG(("mismatch 0x%lx\n", TAG(value)));
1505 		}
1506 	}
1507 	/* NOTREACHED */
1508 }
1509 
1510 /*
1511  - okayvalue - check that a value can be stored
1512  */
1513 static int			/* predicate */
okayvalue(value)1514 okayvalue(value)
1515 of_t value;
1516 {
1517 	if (HASTAG(value))
1518 		return(0);
1519 #ifdef OVERFLOW
1520 	if (value == LONG_MAX)	/* BIAS() and UNBIAS() will overflow */
1521 		return(0);
1522 #endif
1523 	return(1);
1524 }
1525 
1526 /*
1527  - set - store a value into a location previously found by search
1528  */
1529 static int			/* 0 success, -1 failure */
set(sp,value)1530 set(sp, value)
1531 register struct searcher *sp;
1532 of_t value;
1533 {
1534 	register of_t place = sp->place;
1535 	register of_t v = value;
1536 
1537 	if (sp->aborted)
1538 		return(-1);
1539 
1540 	if (CANTAG(v) && !conf.olddbz) {
1541 		v |= sp->tag | taghere;
1542 		if (v != UNBIAS(VACANT))	/* BIAS(v) won't look VACANT */
1543 #ifdef OVERFLOW
1544 			if (v != LONG_MAX)	/* and it won't overflow */
1545 #endif
1546 			value = v;
1547 	} else if (!conf.olddbz)
1548 		conf.ntagless[0]++;
1549 	DEBUG(("tagged value is 0x%lx\n", value));
1550 	value = BIAS(value);
1551 	value = MAPOUT(value);
1552 
1553 	/* If we have the index file in memory, use it */
1554 	if (corepag != NULL && place < conf.tsize) {
1555 		corepag[place] = value;
1556 		DEBUG(("set: incore\n"));
1557 		if (!writethrough)
1558 			return(0);
1559 	}
1560 
1561 	/* seek to spot */
1562 	pagpos = -1;		/* invalidate position memory */
1563 	if (fseek(pagf, place * (of_t)SOF, SEEK_SET) != 0) {
1564 		DEBUG(("set: seek failed\n"));
1565 		sp->aborted = 1;
1566 		return(-1);
1567 	}
1568 
1569 	/* write in data */
1570 	if (fwrite((char *)&value, SOF, 1, pagf) != 1) {
1571 		DEBUG(("set: write failed\n"));
1572 		sp->aborted = 1;
1573 		return(-1);
1574 	}
1575 	/* fflush improves robustness, and buffer re-use is rare anyway */
1576 	if (fflush(pagf) == EOF) {
1577 		DEBUG(("set: fflush failed\n"));
1578 		sp->aborted = 1;
1579 		return(-1);
1580 	}
1581 
1582 	DEBUG(("set: succeeded\n"));
1583 	return(0);
1584 }
1585 
1586 /*
1587  - mybytemap - determine this machine's byte map
1588  *
1589  * A byte map is an array of ints, sizeof(of_t) of them.  The 0th int
1590  * is the byte number of the high-order byte in my of_t, and so forth.
1591  */
1592 static void
mybytemap(map)1593 mybytemap(map)
1594 int map[];			/* -> int[SOF] */
1595 {
1596 	union {
1597 		of_t o;
1598 		char c[SOF];
1599 	} u;
1600 	register int *mp = &map[SOF];
1601 	register int ntodo;
1602 	register int i;
1603 
1604 	u.o = 1;
1605 	for (ntodo = (int)SOF; ntodo > 0; ntodo--) {
1606 		for (i = 0; i < SOF; i++)
1607 			if (u.c[i] != 0)
1608 				break;
1609 		if (i == SOF) {
1610 			/* trouble -- set it to *something* consistent */
1611 			DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo));
1612 			for (i = 0; i < SOF; i++)
1613 				map[i] = i;
1614 			return;
1615 		}
1616 		DEBUG(("mybytemap: byte %d\n", i));
1617 		*--mp = i;
1618 		while (u.c[i] != 0)
1619 			u.o <<= 1;
1620 	}
1621 }
1622 
1623 /*
1624  - bytemap - transform an of_t from byte ordering map1 to map2
1625  */
1626 static of_t			/* transformed result */
bytemap(ino,map1,map2)1627 bytemap(ino, map1, map2)
1628 of_t ino;
1629 int *map1;
1630 int *map2;
1631 {
1632 	union oc {
1633 		of_t o;
1634 		char c[SOF];
1635 	};
1636 	union oc in;
1637 	union oc out;
1638 	register int i;
1639 
1640 	in.o = ino;
1641 	for (i = 0; i < SOF; i++)
1642 		out.c[map2[i]] = in.c[map1[i]];
1643 	return(out.o);
1644 }
1645 
1646 /*
1647  * This is a simplified version of the pathalias hashing function.
1648  * Thanks to Steve Belovin and Peter Honeyman
1649  *
1650  * hash a string into a long int.  31 bit crc (from andrew appel).
1651  * the crc table is computed at run time by crcinit() -- we could
1652  * precompute, but it takes 1 clock tick on a 750.
1653  *
1654  * This fast table calculation works only if POLY is a prime polynomial
1655  * in the field of integers modulo 2.  Since the coefficients of a
1656  * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
1657  * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
1658  * 31 down to 25 are zero.  Happily, we have candidates, from
1659  * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
1660  *	x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
1661  *	x^31 + x^3 + x^0
1662  *
1663  * We reverse the bits to get:
1664  *	111101010000000000000000000000001 but drop the last 1
1665  *         f   5   0   0   0   0   0   0
1666  *	010010000000000000000000000000001 ditto, for 31-bit crc
1667  *	   4   8   0   0   0   0   0   0
1668  */
1669 
1670 #define POLY 0x48000000L	/* 31-bit polynomial (avoids sign problems) */
1671 
1672 static long CrcTable[128];
1673 
1674 /*
1675  - crcinit - initialize tables for hash function
1676  */
1677 static void
crcinit()1678 crcinit()
1679 {
1680 	register int i, j;
1681 	register long sum;
1682 
1683 	for (i = 0; i < 128; ++i) {
1684 		sum = 0L;
1685 		for (j = 7 - 1; j >= 0; --j)
1686 			if (i & (1 << j))
1687 				sum ^= POLY >> j;
1688 		CrcTable[i] = sum;
1689 	}
1690 	DEBUG(("crcinit: done\n"));
1691 }
1692 
1693 /*
1694  - hash - Honeyman's nice hashing function
1695  */
1696 static long
hash(name,size)1697 hash(name, size)
1698 register char *name;
1699 register int size;
1700 {
1701 	register long sum = 0L;
1702 
1703 	while (size--) {
1704 		sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
1705 	}
1706 	DEBUG(("hash: returns (%ld)\n", sum));
1707 	return(sum);
1708 }
1709 
1710 /*
1711  * case-mapping stuff
1712  *
1713  * Borrowed from C News, by permission of the authors.  Somewhat modified.
1714  *
1715  * We exploit the fact that we are dealing only with headers here, and
1716  * headers are limited to the ASCII characters by RFC822.  It is barely
1717  * possible that we might be dealing with a translation into another
1718  * character set, but in particular it's very unlikely for a header
1719  * character to be outside -128..255.
1720  *
1721  * Life would be a whole lot simpler if tolower() could safely and portably
1722  * be applied to any char.
1723  */
1724 
1725 #define	OFFSET	128		/* avoid trouble with negative chars */
1726 
1727 /* must call casencmp before invoking TOLOW... */
1728 #define	TOLOW(c)	(cmap[(c)+OFFSET])
1729 
1730 /* ...but the use of it in CISTREQN is safe without the preliminary call (!) */
1731 /* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */
1732 #define CISTREQN(a, b, n) \
1733 	(TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0)
1734 
1735 #define	MAPSIZE	(256+OFFSET)
1736 static char cmap[MAPSIZE];	/* relies on init to '\0' */
1737 static int mprimed = 0;		/* has cmap been set up? */
1738 
1739 /*
1740  - mapprime - set up case-mapping stuff
1741  */
1742 static void
mapprime()1743 mapprime()
1744 {
1745 	register char *lp;
1746 	register char *up;
1747 	register int c;
1748 	register int i;
1749 	static char lower[] = "abcdefghijklmnopqrstuvwxyz";
1750 	static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1751 
1752 	for (lp = lower, up = upper; *lp != '\0'; lp++, up++) {
1753 		c = *lp;
1754 		cmap[c+OFFSET] = c;
1755 		cmap[*up+OFFSET] = c;
1756 	}
1757 	for (i = 0; i < MAPSIZE; i++)
1758 		if (cmap[i] == '\0')
1759 			cmap[i] = (char)(i-OFFSET);
1760 	mprimed = 1;
1761 }
1762 
1763 /*
1764  - casencmp - case-independent strncmp
1765  */
1766 static int			/* < == > 0 */
casencmp(s1,s2,len)1767 casencmp(s1, s2, len)
1768 char *s1;
1769 char *s2;
1770 int len;
1771 {
1772 	register char *p1;
1773 	register char *p2;
1774 	register int n;
1775 
1776 	if (!mprimed)
1777 		mapprime();
1778 
1779 	p1 = s1;
1780 	p2 = s2;
1781 	n = len;
1782 	while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) {
1783 		p1++;
1784 		p2++;
1785 	}
1786 	if (n < 0)
1787 		return(0);
1788 
1789 	/*
1790 	 * The following case analysis is necessary so that characters
1791 	 * which look negative collate low against normal characters but
1792 	 * high against the end-of-string NUL.
1793 	 */
1794 	if (*p1 == '\0' && *p2 == '\0')
1795 		return(0);
1796 	else if (*p1 == '\0')
1797 		return(-1);
1798 	else if (*p2 == '\0')
1799 		return(1);
1800 	else
1801 		return(TOLOW(*p1) - TOLOW(*p2));
1802 }
1803 
1804 /*
1805  - mapcase - do case-mapped copy
1806  */
1807 static char *			/* returns src or dst */
mapcase(dst,src,siz)1808 mapcase(dst, src, siz)
1809 char *dst;			/* destination, used only if mapping needed */
1810 char *src;			/* source; src == dst is legal */
1811 size_t siz;
1812 {
1813 	register char *s;
1814 	register char *d;
1815 	register char *c;	/* case break */
1816 	register char *e;	/* end of source */
1817 
1818 
1819 	c = cipoint(src, siz);
1820 	if (c == NULL)
1821 		return(src);
1822 
1823 	if (!mprimed)
1824 		mapprime();
1825 	s = src;
1826 	e = s + siz;
1827 	d = dst;
1828 
1829 	while (s < c)
1830 		*d++ = *s++;
1831 	while (s < e)
1832 		*d++ = TOLOW(*s++);
1833 
1834 	return(dst);
1835 }
1836 
1837 /*
1838  - cipoint - where in this message-ID does it become case-insensitive?
1839  *
1840  * The RFC822 code is not quite complete.  Absolute, total, full RFC822
1841  * compliance requires a horrible parsing job, because of the arcane
1842  * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi,
1843  * for example.  There are three or four things that might occur in the
1844  * domain part of a message-id that are case-sensitive.  They don't seem
1845  * to ever occur in real news, thank Cthulhu.  (What?  You were expecting
1846  * a merciful and forgiving deity to be invoked in connection with RFC822?
1847  * Forget it; none of them would come near it.)
1848  */
1849 static char *			/* pointer into s, or NULL for "nowhere" */
cipoint(s,siz)1850 cipoint(s, siz)
1851 char *s;
1852 size_t siz;
1853 {
1854 	register char *p;
1855 	static char post[] = "postmaster";
1856 	static int plen = sizeof(post)-1;
1857 
1858 	switch (conf.casemap) {
1859 	case '0':		/* unmapped, sensible */
1860 		return(NULL);
1861 		break;
1862 	case 'C':		/* C News, RFC 822 conformant (approx.) */
1863 		p = memchr(s, '@', siz);
1864 		if (p == NULL)			/* no local/domain split */
1865 			return(NULL);		/* assume all local */
1866 		if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) {
1867 			/* crazy -- "postmaster" is case-insensitive */
1868 			return(s);
1869 		}
1870 		return(p);
1871 		break;
1872 	case '=':		/* 2.11, neither sensible nor conformant */
1873 		return(s);	/* all case-insensitive */
1874 		break;
1875 	}
1876 
1877 	DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap));
1878 	return(NULL);		/* just leave it alone */
1879 }
1880 
1881 /*
1882  - dbzdebug - control dbz debugging at run time
1883  */
1884 int				/* old value */
dbzdebug(value)1885 dbzdebug(value)
1886 int value;
1887 {
1888 #ifdef DBZDEBUG
1889 	register int old = debug;
1890 
1891 	debug = value;
1892 	return(old);
1893 #else
1894 	return(-1);
1895 #endif
1896 }
1897