1 /*
2
3 dbz.c V3.4
4
5 Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
6 You can use this code in any manner, as long as you leave my name on it
7 and don't hold me responsible for any problems with it.
8
9 Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun 5 00:27:08 CDT 1988
10
11 Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
12
13 Major reworking by Henry Spencer as part of the C News project.
14
15 These routines replace dbm as used by the usenet news software
16 (it's not a full dbm replacement by any means). It's fast and
17 simple. It contains no AT&T code.
18
19 In general, dbz's files are 1/20 the size of dbm's. Lookup performance
20 is somewhat better, while file creation is spectacularly faster, especially
21 if the incore facility is used.
22
23 If you are seized by the urge to hack mmap() calls into this stuff, please
24 try to resist it. Instead, spend that energy beating on your system supplier
25 to fix his read() and write() calls to exploit the same memory-mapping
26 tricks mmap() uses. That way, all kinds of software can get more efficient
27 *without* having non-portable grunge like mmap() hacked into it.
28
29 */
30
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <errno.h>
37 #ifndef __STDC__
38 extern int errno;
39 #endif
40 #include <dbz.h>
41
42 /*
43 * #ifdef index. "LIA" = "leave it alone unless you know what you're doing".
44 *
45 * FUNNYSEEKS SEEK_SET is not 0, get it from <unistd.h>
46 * INDEX_SIZE backward compatibility with old dbz; avoid using this
47 * NMEMORY number of days of memory for use in sizing new table (LIA)
48 * INCORE backward compatibility with old dbz; use dbzincore() instead
49 * DBZDEBUG enable debugging
50 * DEFSIZE default table size (not as critical as in old dbz)
51 * OLDBNEWS default case mapping as in old B News; set NOBUFFER
52 * BNEWS default case mapping as in current B News; set NOBUFFER
53 * DEFCASE default case-map algorithm selector
54 * NOTAGS fseek offsets are strange, do not do tagging (see below)
55 * NPAGBUF size of .pag buffer, in longs (LIA)
56 * SHISTBUF size of ASCII-file buffer, in bytes (LIA)
57 * MAXRUN length of run which shifts to next table (see below) (LIA)
58 * OVERFLOW long-int arithmetic overflow must be avoided, will trap
59 * NOBUFFER do not buffer hash-table i/o, B News locking is defective
60 */
61
62 #ifdef FUNNYSEEKS
63 #include <unistd.h>
64 #else
65 #define SEEK_SET 0
66 #endif
67 #ifdef OVERFLOW
68 #include <limits.h>
69 #endif
70
71 static int dbzversion = 3; /* for validating .dir file format */
72
73 /*
74 * The dbz database exploits the fact that when news stores a <key,value>
75 * tuple, the `value' part is a seek offset into a text file, pointing to
76 * a copy of the `key' part. This avoids the need to store a copy of
77 * the key in the dbz files. However, the text file *must* exist and be
78 * consistent with the dbz files, or things will fail.
79 *
80 * The basic format of the database is a simple hash table containing the
81 * values. A value is stored by indexing into the table using a hash value
82 * computed from the key; collisions are resolved by linear probing (just
83 * search forward for an empty slot, wrapping around to the beginning of
84 * the table if necessary). Linear probing is a performance disaster when
85 * the table starts to get full, so a complication is introduced. The
86 * database is actually one *or more* tables, stored sequentially in the
87 * .pag file, and the length of linear-probe sequences is limited. The
88 * search (for an existing item or an empty slot) always starts in the
89 * first table, and whenever MAXRUN probes have been done in table N,
90 * probing continues in table N+1. This behaves reasonably well even in
91 * cases of massive overflow. There are some other small complications
92 * added, see comments below.
93 *
94 * The table size is fixed for any particular database, but is determined
95 * dynamically when a database is rebuilt. The strategy is to try to pick
96 * the size so the first table will be no more than 2/3 full, that being
97 * slightly before the point where performance starts to degrade. (It is
98 * desirable to be a bit conservative because the overflow strategy tends
99 * to produce files with holes in them, which is a nuisance.)
100 */
101
102 /*
103 * The following is for backward compatibility.
104 */
105 #ifdef INDEX_SIZE
106 #define DEFSIZE INDEX_SIZE
107 #endif
108
109 /*
110 * ANSI C says an offset into a file is a long, not an off_t, for some
111 * reason. This actually does simplify life a bit, but it's still nice
112 * to have a distinctive name for it. Beware, this is just for readability,
113 * don't try to change this.
114 */
115 #define of_t long
116 #define SOF (sizeof(of_t))
117
118 /*
119 * We assume that unused areas of a binary file are zeros, and that the
120 * bit pattern of `(of_t)0' is all zeros. The alternative is rather
121 * painful file initialization. Note that okayvalue(), if OVERFLOW is
122 * defined, knows what value of an offset would cause overflow.
123 */
124 #define VACANT ((of_t)0)
125 #define BIAS(o) ((o)+1) /* make any valid of_t non-VACANT */
126 #define UNBIAS(o) ((o)-1) /* reverse BIAS() effect */
127
128 /*
129 * In a Unix implementation, or indeed any in which an of_t is a byte
130 * count, there are a bunch of high bits free in an of_t. There is a
131 * use for them. Checking a possible hit by looking it up in the base
132 * file is relatively expensive, and the cost can be dramatically reduced
133 * by using some of those high bits to tag the value with a few more bits
134 * of the key's hash. This detects most false hits without the overhead of
135 * seek+read+strcmp. We use the top bit to indicate whether the value is
136 * tagged or not, and don't tag a value which is using the tag bits itself.
137 * We're in trouble if the of_t representation wants to use the top bit.
138 * The actual bitmasks and offset come from the configuration stuff,
139 * which permits fiddling with them as necessary, and also suppressing
140 * them completely (by defining the masks to 0). We build pre-shifted
141 * versions of the masks for efficiency.
142 */
143 static of_t tagbits; /* pre-shifted tag mask */
144 static of_t taghere; /* pre-shifted tag-enable bit */
145 static of_t tagboth; /* tagbits|taghere */
146 #define HASTAG(o) ((o)&taghere)
147 #define TAG(o) ((o)&tagbits)
148 #define NOTAG(o) ((o)&~tagboth)
149 #define CANTAG(o) (((o)&tagboth) == 0)
150 #define MKTAG(v) (((v)<<conf.tagshift)&tagbits)
151
152 /*
153 * A new, from-scratch database, not built as a rebuild of an old one,
154 * needs to know table size, casemap algorithm, and tagging. Normally
155 * the user supplies this info, but there have to be defaults.
156 */
157 #ifndef DEFSIZE
158 #define DEFSIZE 300007
159 #endif
160 #ifdef OLDBNEWS
161 #define DEFCASE '0' /* B2.10 -- no mapping */
162 #define NOBUFFER /* B News locking is defective */
163 #endif
164 #ifdef BNEWS
165 #define DEFCASE '=' /* B2.11 -- all mapped */
166 #define NOBUFFER /* B News locking is defective */
167 #endif
168 #ifndef DEFCASE /* C News compatibility is the default */
169 #define DEFCASE 'C' /* C News -- RFC822 mapping */
170 #endif
171 #ifndef NOTAGS
172 #define TAGENB 0x80L /* tag enable is top bit, tag is next 5 */
173 #define TAGMASK 0x7c
174 #define TAGSHIFT 24
175 #else
176 #define TAGENB 0L /* no tags */
177 #define TAGMASK 0
178 #define TAGSHIFT 0
179 #endif
180
181 /*
182 * We read configuration info from the .dir file into this structure,
183 * so we can avoid wired-in assumptions for an existing database.
184 *
185 * Among the info is a record of recent peak usages, so that a new table
186 * size can be chosen intelligently when rebuilding. 10 is a good
187 * number of usages to keep, since news displays marked fluctuations
188 * in volume on a 7-day cycle.
189 */
190 struct dbzconfig {
191 int olddbz; /* .dir file empty but .pag not? */
192 of_t tsize; /* table size */
193 # ifndef NMEMORY
194 # define NMEMORY 10 /* # days of use info to remember */
195 # endif
196 # define NUSEDS (1+NMEMORY)
197 of_t used[NUSEDS]; /* entries used today, yesterday, ... */
198 int valuesize; /* size of table values, == SOF */
199 int bytemap[SOF]; /* byte-order map */
200 char casemap; /* case-mapping algorithm (see cipoint()) */
201 char fieldsep; /* field separator in base file, if any */
202 of_t tagenb; /* unshifted tag-enable bit */
203 of_t tagmask; /* unshifted tag mask */
204 int tagshift; /* shift count for tagmask and tagenb */
205 of_t ntagless[NUSEDS]; /* how many entries went tagless today, ... */
206 };
207 static struct dbzconfig conf;
208 static int getconf();
209 static long getno();
210 static int putconf();
211 static void mybytemap();
212 static of_t bytemap();
213
214 /*
215 * For a program that makes many, many references to the database, it
216 * is a large performance win to keep the table in core, if it will fit.
217 * Note that this does hurt robustness in the event of crashes, and
218 * dbzdbmclose() *must* be called to flush the in-core database to disk.
219 * The code is prepared to deal with the possibility that there isn't
220 * enough memory. There *is* an assumption that a size_t is big enough
221 * to hold the size (in bytes) of one table, so dbzdbminit() tries to figure
222 * out whether this is possible first.
223 *
224 * The preferred way to ask for an in-core table is to do dbzincore(1)
225 * before dbzdbminit(). The default is not to do it, although -DINCORE
226 * overrides this for backward compatibility with old dbz.
227 *
228 * We keep only the first table in core. This greatly simplifies the
229 * code, and bounds memory demand. Furthermore, doing this is a large
230 * performance win even in the event of massive overflow.
231 */
232 #ifdef INCORE
233 static int incore = 1;
234 #else
235 static int incore = 0;
236 #endif
237
238 /*
239 * Making the in-core table write-through is a win in some odd situations
240 * where one is doing much reading and very little writing.
241 */
242 static int writethrough = 0;
243
244 /*
245 * Stdio buffer for .pag reads. Buffering more than about 16 does not help
246 * significantly at the densities we try to maintain, and the much larger
247 * buffers that most stdios default to are much more expensive to fill.
248 * With small buffers, stdio is performance-competitive with raw read(),
249 * and it's much more portable.
250 */
251 #ifndef NPAGBUF
252 #define NPAGBUF 16
253 #endif
254 #ifndef NOBUFFER
255 #ifdef _IOFBF
256 static of_t pagbuf[NPAGBUF]; /* only needed if !NOBUFFER && _IOFBF */
257 #endif
258 #endif
259
260 /*
261 * Stdio buffer for base-file reads. Message-IDs (all news ever needs to
262 * read) are essentially never longer than 64 bytes, and the typical stdio
263 * buffer is so much larger that it is much more expensive to fill.
264 */
265 #ifndef SHISTBUF
266 #define SHISTBUF 64
267 #endif
268 #ifdef _IOFBF
269 static char basebuf[SHISTBUF]; /* only needed if _IOFBF exists */
270 #endif
271
272 /*
273 * Callback routine for performing obscene acts on file descriptors.
274 * This gives the user some control when dbz opens a file descriptor with
275 * the intention of keeping it open, and lets him (e.g.) set close-on-exec
276 * bits on the descriptor. The callback routine gets a FILE * as its
277 * parameter, and must not mess with it in dbz-visible ways.
278 */
279 static void (*callback)() = NULL;
280
281 /*
282 * Data structure for recording info about searches.
283 */
284 struct searcher {
285 of_t place; /* current location in file */
286 int tabno; /* which table we're in */
287 int run; /* how long we'll stay in this table */
288 # ifndef MAXRUN
289 # define MAXRUN 100
290 # endif
291 long hash; /* the key's hash code (for optimization) */
292 of_t tag; /* tag we are looking for */
293 int seen; /* have we examined current location? */
294 int aborted; /* has i/o error aborted search? */
295 };
296 static void start();
297 #define FRESH ((struct searcher *)NULL)
298 static of_t search();
299 #define NOTFOUND ((of_t)-1)
300 static int okayvalue();
301 static int set();
302
303 /*
304 * Arguably the searcher struct for a given routine ought to be local to
305 * it, but a fetch() is very often immediately followed by a store(), and
306 * in some circumstances it is a useful performance win to remember where
307 * the fetch() completed. So we use a global struct and remember whether
308 * it is current.
309 */
310 static struct searcher srch;
311 static struct searcher *prevp; /* &srch or FRESH */
312
313 /* byte-ordering stuff */
314 static int mybmap[SOF]; /* my byte order (see mybytemap()) */
315 static int bytesame; /* is database order same as mine? */
316 #define MAPIN(o) ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))
317 #define MAPOUT(o) ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))
318
319 /*
320 * The double parentheses needed to make this work are ugly, but the
321 * alternative (under most compilers) is to pack around 2K of unused
322 * strings -- there's just no way to get rid of them. The "else" is
323 * likewise ugly, but swallows the following ";" and keeps it from
324 * making syntactic trouble.
325 */
326 #ifdef DBZDEBUG
327 #define DEBUG(args) if (debug) { (void) printf args ; } else
328 static int debug; /* controlled by dbzdebug() */
329 #else
330 #define DEBUG(args) ;
331 #endif
332
333 /* misc. forwards */
334 static long hash();
335 static void crcinit();
336 static char *cipoint();
337 static char *mapcase();
338 static int isprime();
339 static FILE *latebase();
340
341 /* file-naming stuff */
342 static char dir[] = ".dir";
343 static char pag[] = ".pag";
344 static char *enstring();
345
346 /* central data structures */
347 static FILE *basef; /* descriptor for base file */
348 static char *basefname; /* name for not-yet-opened base file */
349 static FILE *dirf; /* descriptor for .dir file */
350 static int dirronly; /* dirf open read-only? */
351 static FILE *pagf = NULL; /* descriptor for .pag file */
352 static of_t pagpos; /* posn in pagf; only search may set != -1 */
353 static int pagronly; /* pagf open read-only? */
354 static of_t *corepag; /* incore version of .pag file, if any */
355 static FILE *bufpagf; /* well-buffered pagf, for incore rewrite */
356 static of_t *getcore();
357 static int putcore();
358 static int written; /* has a store() been done? */
359
360 /*
361 - dbzfresh - set up a new database, no historical info
362 */
363 int /* 0 success, -1 failure */
dbzfresh(name,size,fs,cmap,tagmask)364 dbzfresh(name, size, fs, cmap, tagmask)
365 char *name; /* base name; .dir and .pag must exist */
366 long size; /* table size (0 means default) */
367 int fs; /* field-separator character in base file */
368 int cmap; /* case-map algorithm (0 means default) */
369 of_t tagmask; /* 0 default, 1 no tags */
370 {
371 register char *fn;
372 struct dbzconfig c;
373 register of_t m;
374 register FILE *f;
375
376 if (pagf != NULL) {
377 DEBUG(("dbzfresh: database already open\n"));
378 return(-1);
379 }
380 if (size != 0 && size < 2) {
381 DEBUG(("dbzfresh: preposterous size (%ld)\n", size));
382 return(-1);
383 }
384
385 /* get default configuration */
386 if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0)
387 return(-1); /* "can't happen" */
388
389 /* and mess with it as specified */
390 if (size != 0)
391 c.tsize = size;
392 c.fieldsep = fs;
393 switch (cmap) {
394 case 0:
395 case '0':
396 case 'B': /* 2.10 compat */
397 c.casemap = '0'; /* '\0' nicer, but '0' printable! */
398 break;
399 case '=':
400 case 'b': /* 2.11 compat */
401 c.casemap = '=';
402 break;
403 case 'C':
404 c.casemap = 'C';
405 break;
406 case '?':
407 c.casemap = DEFCASE;
408 break;
409 default:
410 DEBUG(("dbzfresh case map `%c' unknown\n", cmap));
411 return(-1);
412 break;
413 }
414 switch (tagmask) {
415 case 0: /* default */
416 break;
417 case 1: /* no tags */
418 c.tagshift = 0;
419 c.tagmask = 0;
420 c.tagenb = 0;
421 break;
422 default:
423 m = tagmask;
424 c.tagshift = 0;
425 while (!(m&01)) {
426 m >>= 1;
427 c.tagshift++;
428 }
429 c.tagmask = m;
430 c.tagenb = (m << 1) & ~m;
431 break;
432 }
433
434 /* write it out */
435 fn = enstring(name, dir);
436 if (fn == NULL)
437 return(-1);
438 f = fopen(fn, "w");
439 free(fn);
440 if (f == NULL) {
441 DEBUG(("dbzfresh: unable to write config\n"));
442 return(-1);
443 }
444 if (putconf(f, &c) < 0) {
445 (void) fclose(f);
446 return(-1);
447 }
448 if (fclose(f) == EOF) {
449 DEBUG(("dbzfresh: fclose failure\n"));
450 return(-1);
451 }
452
453 /* create/truncate .pag */
454 fn = enstring(name, pag);
455 if (fn == NULL)
456 return(-1);
457 f = fopen(fn, "w");
458 free(fn);
459 if (f == NULL) {
460 DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));
461 return(-1);
462 } else
463 (void) fclose(f);
464
465 /* and punt to dbzdbminit for the hard work */
466 return(dbzdbminit(name));
467 }
468
469 /*
470 - dbzsize - what's a good table size to hold this many entries?
471 */
472 long
dbzsize(contents)473 dbzsize(contents)
474 long contents; /* 0 means what's the default */
475 {
476 register long n;
477
478 if (contents <= 0) { /* foulup or default inquiry */
479 DEBUG(("dbzsize: preposterous input (%ld)\n", contents));
480 return(DEFSIZE);
481 }
482 n = (contents/2)*3; /* try to keep table at most 2/3 full */
483 if (!(n&01)) /* make it odd */
484 n++;
485 DEBUG(("dbzsize: tentative size %ld\n", n));
486 while (!isprime(n)) /* and look for a prime */
487 n += 2;
488 DEBUG(("dbzsize: final size %ld\n", n));
489
490 return(n);
491 }
492
493 /*
494 - isprime - is a number prime?
495 *
496 * This is not a terribly efficient approach.
497 */
498 static int /* predicate */
isprime(x)499 isprime(x)
500 register long x;
501 {
502 static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
503 register int *ip;
504 register long div;
505 register long stop;
506
507 /* hit the first few primes quickly to eliminate easy ones */
508 /* this incidentally prevents ridiculously small tables */
509 for (ip = quick; (div = *ip) != 0; ip++)
510 if (x%div == 0) {
511 DEBUG(("isprime: quick result on %ld\n", x));
512 return(0);
513 }
514
515 /* approximate square root of x */
516 for (stop = x; x/stop < stop; stop >>= 1)
517 continue;
518 stop <<= 1;
519
520 /* try odd numbers up to stop */
521 for (div = *--ip + 2; div < stop; div += 2)
522 if (x%div == 0)
523 return(0);
524
525 return(1);
526 }
527
528 /*
529 - dbztagmask - what's a good tag mask for a file this size?
530 *
531 * This assumes that TAGENB is the high-order bit of a long.
532 */
533 long
dbztagmask(filesize)534 dbztagmask(filesize)
535 register long filesize;
536 {
537 register long m;
538 register int nbits;
539
540 /*
541 * First, see how many bits are unused. We cut it off at 7 on the
542 * theory that a 1-in-128 rate of false probes should be okay, given
543 * that the .pag file gets resized to limit the length of runs anyway.
544 */
545 m = TAGENB << (TAGSHIFT-1);
546 nbits = 1;
547 while ((m&filesize) == 0 && nbits < 7) { /* extend downward */
548 m |= m >> 1;
549 nbits++;
550 }
551
552 if ((m&filesize) == 0) /* reached max bits and still fine */
553 return(m);
554
555 /* We now have a mask that is one bit too big. */
556 m &= m << 1; /* shorten it */
557
558 if (m == 0) /* file is just too big */
559 return(1);
560
561 return(m);
562 }
563
564 /*
565 - dbzagain - set up a new database to be a rebuild of an old one
566 */
567 int /* 0 success, -1 failure */
dbzagain(name,oldname)568 dbzagain(name, oldname)
569 const char *name; /* base name; .dir and .pag must exist */
570 const char *oldname; /* base name; all must exist */
571 {
572 register char *fn;
573 struct dbzconfig c;
574 register int i;
575 register long top;
576 register FILE *f;
577 register int newtable;
578 register long newsize;
579
580 if (pagf != NULL) {
581 DEBUG(("dbzagain: database already open\n"));
582 return(-1);
583 }
584
585 /* pick up the old configuration */
586 fn = enstring(oldname, dir);
587 if (fn == NULL)
588 return(-1);
589 f = fopen(fn, "r");
590 free(fn);
591 if (f == NULL) {
592 DEBUG(("dbzagain: cannot open old .dir file\n"));
593 return(-1);
594 }
595 i = getconf(f, (FILE *)NULL, &c);
596 (void) fclose(f);
597 if (i < 0) {
598 DEBUG(("dbzagain: getconf failed\n"));
599 return(-1);
600 }
601
602 /* tinker with it */
603 /* first, find peak of usage history, if there's enough history */
604 top = 0;
605 newtable = 0;
606 for (i = 0; i < NUSEDS; i++) {
607 if (top < c.used[i])
608 top = c.used[i];
609 if (c.used[i] == 0)
610 newtable = 1; /* hasn't got full usage history yet */
611 }
612 if (top == 0) {
613 DEBUG(("dbzagain: old table has no contents!\n"));
614 newtable = 1;
615 }
616
617 /* shift the history, and decide on table size */
618 for (i = NUSEDS-1; i > 0; i--) {
619 c.used[i] = c.used[i-1];
620 c.ntagless[i] = c.ntagless[i-1];
621 }
622 c.used[0] = 0;
623 c.ntagless[0] = 0;
624 newsize = dbzsize(top);
625 if (!newtable || newsize > c.tsize) /* don't shrink new table */
626 c.tsize = newsize;
627
628 /* decide whether the tag needs changing */
629 if (c.ntagless[1] != 0) {
630 c.tagmask &= c.tagmask << 1; /* clear bottom bit */
631 if (c.tagmask == 0) /* oops, no more room... */
632 c.tagenb = 0; /* just can't tag at all */
633 }
634 /*
635 * You might think that you'd want to adaptively grow the tag
636 * as well as shrinking it. It's very hard to do that well,
637 * so for now we don't try.
638 */
639
640 /* write it out */
641 fn = enstring(name, dir);
642 if (fn == NULL)
643 return(-1);
644 f = fopen(fn, "w");
645 free(fn);
646 if (f == NULL) {
647 DEBUG(("dbzagain: unable to write new .dir\n"));
648 return(-1);
649 }
650 i = putconf(f, &c);
651 (void) fclose(f);
652 if (i < 0) {
653 DEBUG(("dbzagain: putconf failed\n"));
654 return(-1);
655 }
656
657 /* create/truncate .pag */
658 fn = enstring(name, pag);
659 if (fn == NULL)
660 return(-1);
661 f = fopen(fn, "w");
662 free(fn);
663 if (f == NULL) {
664 DEBUG(("dbzagain: unable to create/truncate .pag file\n"));
665 return(-1);
666 } else
667 (void) fclose(f);
668
669 /* and let dbzdbminit do the work */
670 return(dbzdbminit(name));
671 }
672
673 /*
674 - dbzdbminit - open a database, creating it (using defaults) if necessary
675 *
676 * We try to leave errno set plausibly, to the extent that underlying
677 * functions permit this, since many people consult it if dbzdbminit() fails.
678 */
679 int /* 0 success, -1 failure */
dbzdbminit(name)680 dbzdbminit(name)
681 const char *name;
682 {
683 register int i;
684 register size_t s;
685 register char *dirfname;
686 register char *pagfname;
687
688 if (pagf != NULL) {
689 DEBUG(("dbzdbminit: dbzdbminit already called once\n"));
690 errno = 0;
691 return(-1);
692 }
693
694 /* open the .dir file */
695 dirfname = enstring(name, dir);
696 if (dirfname == NULL)
697 return(-1);
698 dirf = fopen(dirfname, "r+");
699 if (dirf == NULL) {
700 dirf = fopen(dirfname, "r");
701 dirronly = 1;
702 } else
703 dirronly = 0;
704 free(dirfname);
705 if (dirf == NULL) {
706 DEBUG(("dbzdbminit: can't open .dir file\n"));
707 return(-1);
708 }
709 if (callback != NULL)
710 (*callback)(dirf);
711
712 /* open the .pag file */
713 pagfname = enstring(name, pag);
714 if (pagfname == NULL) {
715 (void) fclose(dirf);
716 return(-1);
717 }
718 pagf = fopen(pagfname, "r+b");
719 if (pagf == NULL) {
720 pagf = fopen(pagfname, "rb");
721 if (pagf == NULL) {
722 DEBUG(("dbzdbminit: .pag open failed\n"));
723 (void) fclose(dirf);
724 free(pagfname);
725 return(-1);
726 }
727 pagronly = 1;
728 } else if (dirronly)
729 pagronly = 1;
730 else
731 pagronly = 0;
732 #ifdef NOBUFFER
733 /*
734 * B News does not do adequate locking on its database accesses.
735 * Why it doesn't get into trouble using dbm is a mystery. In any
736 * case, doing unbuffered i/o does not cure the problem, but does
737 * enormously reduce its incidence.
738 */
739 (void) setbuf(pagf, (char *)NULL);
740 #else
741 #ifdef _IOFBF
742 (void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf));
743 #endif
744 #endif
745 pagpos = -1;
746 /* don't free pagfname, need it below */
747 if (callback != NULL)
748 (*callback)(pagf);
749
750 /* open the base file */
751 basef = fopen(name, "r");
752 if (basef == NULL) {
753 DEBUG(("dbzdbminit: basefile open failed\n"));
754 basefname = enstring(name, "");
755 if (basefname == NULL) {
756 (void) fclose(pagf);
757 (void) fclose(dirf);
758 free(pagfname);
759 pagf = NULL;
760 return(-1);
761 }
762 } else
763 basefname = NULL;
764 #ifdef _IOFBF
765 if (basef != NULL)
766 (void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf));
767 #endif
768 if (callback != NULL)
769 (*callback)(basef);
770
771 /* pick up configuration */
772 if (getconf(dirf, pagf, &conf) < 0) {
773 DEBUG(("dbzdbminit: getconf failure\n"));
774 (void) fclose(basef);
775 (void) fclose(pagf);
776 (void) fclose(dirf);
777 free(pagfname);
778 pagf = NULL;
779 errno = EDOM; /* kind of a kludge, but very portable */
780 return(-1);
781 }
782 tagbits = conf.tagmask << conf.tagshift;
783 taghere = conf.tagenb << conf.tagshift;
784 tagboth = tagbits | taghere;
785 mybytemap(mybmap);
786 bytesame = 1;
787 for (i = 0; i < SOF; i++)
788 if (mybmap[i] != conf.bytemap[i])
789 bytesame = 0;
790
791 /* get first table into core, if it looks desirable and feasible */
792 s = (size_t)conf.tsize * SOF;
793 if (incore && (of_t)(s/SOF) == conf.tsize) {
794 bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b");
795 if (bufpagf != NULL) {
796 corepag = getcore(bufpagf);
797 if (callback != NULL)
798 (*callback)(bufpagf);
799 }
800 } else {
801 bufpagf = NULL;
802 corepag = NULL;
803 }
804 free(pagfname);
805
806 /* misc. setup */
807 crcinit();
808 written = 0;
809 prevp = FRESH;
810 DEBUG(("dbzdbminit: succeeded\n"));
811 return(0);
812 }
813
814 /*
815 - enstring - concatenate two strings into a malloced area
816 */
817 static char * /* NULL if malloc fails */
enstring(s1,s2)818 enstring(s1, s2)
819 char *s1;
820 char *s2;
821 {
822 register char *p;
823
824 p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1);
825 if (p != NULL) {
826 (void) strcpy(p, s1);
827 (void) strcat(p, s2);
828 } else {
829 DEBUG(("enstring(%s, %s) out of memory\n", s1, s2));
830 }
831 return(p);
832 }
833
834 /*
835 - dbzdbmclose - close a database
836 */
837 int
dbzdbmclose()838 dbzdbmclose()
839 {
840 register int ret = 0;
841
842 if (pagf == NULL) {
843 DEBUG(("dbzdbmclose: not opened!\n"));
844 return(-1);
845 }
846
847 if (fclose(pagf) == EOF) {
848 DEBUG(("dbzdbmclose: fclose(pagf) failed\n"));
849 ret = -1;
850 }
851 pagf = basef; /* ensure valid pointer; dbzsync checks it */
852 if (dbzsync() < 0)
853 ret = -1;
854 if (bufpagf != NULL && fclose(bufpagf) == EOF) {
855 DEBUG(("dbzdbmclose: fclose(bufpagf) failed\n"));
856 ret = -1;
857 }
858 if (corepag != NULL)
859 free((char *)corepag);
860 corepag = NULL;
861 if (fclose(basef) == EOF) {
862 DEBUG(("dbzdbmclose: fclose(basef) failed\n"));
863 ret = -1;
864 }
865 if (basefname != NULL)
866 free(basefname);
867 basef = NULL;
868 pagf = NULL;
869 if (fclose(dirf) == EOF) {
870 DEBUG(("dbzdbmclose: fclose(dirf) failed\n"));
871 ret = -1;
872 }
873
874 DEBUG(("dbzdbmclose: %s\n", (ret == 0) ? "succeeded" : "failed"));
875 return(ret);
876 }
877
878 /*
879 - dbzsync - push all in-core data out to disk
880 */
881 int
dbzsync()882 dbzsync()
883 {
884 register int ret = 0;
885
886 if (pagf == NULL) {
887 DEBUG(("dbzsync: not opened!\n"));
888 return(-1);
889 }
890 if (!written)
891 return(0);
892
893 if (corepag != NULL && !writethrough) {
894 if (putcore(corepag, bufpagf) < 0) {
895 DEBUG(("dbzsync: putcore failed\n"));
896 ret = -1;
897 }
898 }
899 if (!conf.olddbz)
900 if (putconf(dirf, &conf) < 0)
901 ret = -1;
902
903 DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed"));
904 return(ret);
905 }
906
907 /*
908 - dbzcancel - cancel writing of in-core data
909 * Mostly for use from child processes.
910 * Note that we don't need to futz around with stdio buffers, because we
911 * always fflush them immediately anyway and so they never have stale data.
912 */
913 int
dbzcancel()914 dbzcancel()
915 {
916 if (pagf == NULL) {
917 DEBUG(("dbzcancel: not opened!\n"));
918 return(-1);
919 }
920
921 written = 0;
922 return(0);
923 }
924
925 /*
926 - dbzfetch - fetch() with case mapping built in
927 */
928 datum
dbzfetch(key)929 dbzfetch(key)
930 datum key;
931 {
932 char buffer[DBZMAXKEY + 1];
933 datum mappedkey;
934 register size_t keysize;
935
936 DEBUG(("dbzfetch: (%s)\n", key.dptr));
937
938 /* Key is supposed to be less than DBZMAXKEY */
939 keysize = key.dsize;
940 if (keysize >= DBZMAXKEY) {
941 keysize = DBZMAXKEY;
942 DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
943 }
944
945 mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
946 buffer[keysize] = '\0'; /* just a debug aid */
947 mappedkey.dsize = keysize;
948
949 return(dbzdbmfetch(mappedkey));
950 }
951
952 /*
953 - dbzdbmfetch - get an entry from the database
954 *
955 * Disgusting fine point, in the name of backward compatibility: if the
956 * last character of "key" is a NUL, that character is (effectively) not
957 * part of the comparison against the stored keys.
958 */
959 datum /* dptr NULL, dsize 0 means failure */
dbzdbmfetch(key)960 dbzdbmfetch(key)
961 datum key;
962 {
963 char buffer[DBZMAXKEY + 1];
964 static of_t key_ptr; /* return value points here */
965 datum output;
966 register size_t keysize;
967 register size_t cmplen;
968 register char *sepp;
969
970 DEBUG(("dbzdbmfetch: (%s)\n", key.dptr));
971 output.dptr = NULL;
972 output.dsize = 0;
973 prevp = FRESH;
974
975 /* Key is supposed to be less than DBZMAXKEY */
976 keysize = key.dsize;
977 if (keysize >= DBZMAXKEY) {
978 keysize = DBZMAXKEY;
979 DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
980 }
981
982 if (pagf == NULL) {
983 DEBUG(("dbzdbmfetch: database not open!\n"));
984 return(output);
985 } else if (basef == NULL) { /* basef didn't exist yet */
986 basef = latebase();
987 if (basef == NULL)
988 return(output);
989 }
990
991 cmplen = keysize;
992 sepp = &conf.fieldsep;
993 if (key.dptr[keysize-1] == '\0') {
994 cmplen--;
995 sepp = &buffer[keysize-1];
996 }
997 start(&srch, &key, FRESH);
998 while ((key_ptr = search(&srch)) != NOTFOUND) {
999 DEBUG(("got 0x%lx\n", (long)key_ptr));
1000
1001 /* fetch the key */
1002 if (fseek(basef, key_ptr, SEEK_SET) != 0) {
1003 DEBUG(("dbzdbmfetch: seek failed\n"));
1004 return(output);
1005 }
1006 if (fread(buffer, 1, keysize, basef) != keysize) {
1007 DEBUG(("dbzdbmfetch: read failed\n"));
1008 return(output);
1009 }
1010
1011 /* try it */
1012 buffer[keysize] = '\0'; /* terminated for DEBUG */
1013 (void) mapcase(buffer, buffer, keysize);
1014 DEBUG(("dbzdbmfetch: buffer (%s) looking for (%s) size = %ld\n",
1015 buffer, key.dptr, keysize));
1016 if (memcmp(key.dptr, buffer, cmplen) == 0 &&
1017 (*sepp == conf.fieldsep || *sepp == '\0')) {
1018 /* we found it */
1019 output.dptr = (char *)&key_ptr;
1020 output.dsize = SOF;
1021 DEBUG(("dbzdbmfetch: successful\n"));
1022 return(output);
1023 }
1024 }
1025
1026 /* we didn't find it */
1027 DEBUG(("dbzdbmfetch: failed\n"));
1028 prevp = &srch; /* remember where we stopped */
1029 return(output);
1030 }
1031
1032 /*
1033 - latebase - try to open a base file that wasn't there at the start
1034 */
1035 static FILE *
latebase()1036 latebase()
1037 {
1038 register FILE *it;
1039
1040 if (basefname == NULL) {
1041 DEBUG(("latebase: name foulup\n"));
1042 return(NULL);
1043 }
1044 it = fopen(basefname, "r");
1045 if (it == NULL) {
1046 DEBUG(("latebase: still can't open base\n"));
1047 } else {
1048 DEBUG(("latebase: late open succeeded\n"));
1049 free(basefname);
1050 basefname = NULL;
1051 #ifdef _IOFBF
1052 (void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf));
1053 #endif
1054 if (callback != NULL)
1055 (*callback)(it);
1056 }
1057 return(it);
1058 }
1059
1060 /*
1061 - dbzstore - store() with case mapping built in
1062 */
1063 int
dbzstore(key,data)1064 dbzstore(key, data)
1065 datum key;
1066 datum data;
1067 {
1068 char buffer[DBZMAXKEY + 1];
1069 datum mappedkey;
1070 register size_t keysize;
1071
1072 DEBUG(("dbzstore: (%s)\n", key.dptr));
1073
1074 /* Key is supposed to be less than DBZMAXKEY */
1075 keysize = key.dsize;
1076 if (keysize >= DBZMAXKEY) {
1077 DEBUG(("dbzstore: key size too big (%d)\n", key.dsize));
1078 return(-1);
1079 }
1080
1081 mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
1082 buffer[keysize] = '\0'; /* just a debug aid */
1083 mappedkey.dsize = keysize;
1084
1085 return(dbzdbmstore(mappedkey, data));
1086 }
1087
1088 /*
1089 - dbzdbmstore - add an entry to the database
1090 */
1091 int /* 0 success, -1 failure */
dbzdbmstore(key,data)1092 dbzdbmstore(key, data)
1093 datum key;
1094 datum data;
1095 {
1096 of_t value;
1097
1098 if (pagf == NULL) {
1099 DEBUG(("dbzdbmstore: database not open!\n"));
1100 return(-1);
1101 } else if (basef == NULL) { /* basef didn't exist yet */
1102 basef = latebase();
1103 if (basef == NULL)
1104 return(-1);
1105 }
1106 if (pagronly) {
1107 DEBUG(("dbzdbmstore: database open read-only\n"));
1108 return(-1);
1109 }
1110 if (data.dsize != SOF) {
1111 DEBUG(("dbzdbmstore: value size wrong (%d)\n", data.dsize));
1112 return(-1);
1113 }
1114 if (key.dsize >= DBZMAXKEY) {
1115 DEBUG(("dbzdbmstore: key size too big (%d)\n", key.dsize));
1116 return(-1);
1117 }
1118
1119 /* copy the value in to ensure alignment */
1120 (void) memcpy((char *)&value, data.dptr, SOF);
1121 DEBUG(("dbzdbmstore: (%s, %ld)\n", key.dptr, (long)value));
1122 if (!okayvalue(value)) {
1123 DEBUG(("dbzdbmstore: reserved bit or overflow in 0x%lx\n", value));
1124 return(-1);
1125 }
1126
1127 /* find the place, exploiting previous search if possible */
1128 start(&srch, &key, prevp);
1129 while (search(&srch) != NOTFOUND)
1130 continue;
1131
1132 prevp = FRESH;
1133 conf.used[0]++;
1134 DEBUG(("dbzdbmstore: used count %ld\n", conf.used[0]));
1135 written = 1;
1136 return(set(&srch, value));
1137 }
1138
1139 /*
1140 - dbzincore - control attempts to keep .pag file in core
1141 */
1142 int /* old setting */
dbzincore(value)1143 dbzincore(value)
1144 int value;
1145 {
1146 register int old = incore;
1147
1148 incore = value;
1149 return(old);
1150 }
1151
1152 /*
1153 - dbzwritethrough - enable/disable immediate writing to disk of in-core writes
1154 */
1155 int /* old setting */
dbzwritethrough(value)1156 dbzwritethrough(value)
1157 int value;
1158 {
1159 register int old = writethrough;
1160
1161 writethrough = value;
1162 return(old);
1163 }
1164
1165 /*
1166 - dbzfiledesc - provide hook for doing obscene things to file descriptors
1167 */
1168 typedef void (*fp)();
1169 fp
1170 dbzfiledesc(diddler)
1171 void (*diddler)(); /* NULL means do nothing */
1172 {
1173 register void (*old)() = callback;
1174
1175 callback = diddler;
1176 return(old);
1177 }
1178
1179 /*
1180 - getconf - get configuration from .dir file
1181 */
1182 static int /* 0 success, -1 failure */
getconf(df,pf,cp)1183 getconf(df, pf, cp)
1184 register FILE *df; /* NULL means just give me the default */
1185 register FILE *pf; /* NULL means don't care about .pag */
1186 register struct dbzconfig *cp;
1187 {
1188 register int c;
1189 register int i;
1190 int err = 0;
1191
1192 c = (df != NULL) ? getc(df) : EOF;
1193 if (c == EOF) { /* empty file, no configuration known */
1194 cp->olddbz = 0;
1195 if (df != NULL && pf != NULL && getc(pf) != EOF)
1196 cp->olddbz = 1;
1197 cp->tsize = DEFSIZE;
1198 cp->fieldsep = '\t';
1199 for (i = 0; i < NUSEDS; i++) {
1200 cp->used[i] = 0;
1201 cp->ntagless[i] = 0;
1202 }
1203 cp->valuesize = SOF;
1204 mybytemap(cp->bytemap);
1205 cp->casemap = DEFCASE;
1206 cp->tagenb = TAGENB;
1207 cp->tagmask = TAGMASK;
1208 cp->tagshift = TAGSHIFT;
1209 DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n",
1210 cp->tsize, cp->casemap, cp->tagenb,
1211 cp->tagmask, cp->tagshift));
1212 return(0);
1213 }
1214 (void) ungetc(c, df);
1215
1216 /* first line, the vital stuff */
1217 if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z')
1218 err = -1;
1219 if (getno(df, &err) != dbzversion)
1220 err = -1;
1221 cp->tsize = getno(df, &err);
1222 cp->fieldsep = (char)getno(df, &err);
1223 while ((c = getc(df)) == ' ')
1224 continue;
1225 cp->casemap = c;
1226 cp->tagenb = getno(df, &err);
1227 cp->tagmask = getno(df, &err);
1228 cp->tagshift = getno(df, &err);
1229 cp->valuesize = getno(df, &err);
1230 if (cp->valuesize != SOF) {
1231 DEBUG(("getconf: wrong of_t size (%d)\n", cp->valuesize));
1232 err = -1;
1233 cp->valuesize = SOF; /* to protect the loops below */
1234 }
1235 for (i = 0; i < cp->valuesize; i++)
1236 cp->bytemap[i] = getno(df, &err);
1237 if (getc(df) != '\n')
1238 err = -1;
1239 DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize,
1240 cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask,
1241 cp->tagshift));
1242 DEBUG(("bytemap (%d)", cp->valuesize));
1243 #ifdef DBZDEBUG
1244 for (i = 0; i < cp->valuesize; i++) {
1245 DEBUG((" %d", cp->bytemap[i]));
1246 }
1247 #endif
1248 DEBUG(("\n"));
1249
1250 /* second line, the usages */
1251 for (i = 0; i < NUSEDS; i++)
1252 cp->used[i] = getno(df, &err);
1253 if (getc(df) != '\n')
1254 err = -1;
1255 DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2]));
1256
1257 /* third line, currently only the tagless count */
1258 if ((c = getc(df)) != EOF) {
1259 (void) ungetc(c, df);
1260 for (i = 0; i < NUSEDS; i++)
1261 cp->ntagless[i] = getno(df, &err);
1262 if (getc(df) != '\n')
1263 err = -1;
1264 } else
1265 for (i = 0; i < NUSEDS; i++)
1266 cp->ntagless[i] = 0;
1267
1268 if (err < 0) {
1269 DEBUG(("getconf error\n"));
1270 return(-1);
1271 }
1272 return(0);
1273 }
1274
1275 /*
1276 - getno - get a long
1277 */
1278 static long
getno(f,ep)1279 getno(f, ep)
1280 FILE *f;
1281 int *ep;
1282 {
1283 register char *p;
1284 # define MAXN 50
1285 char getbuf[MAXN];
1286 register int c;
1287
1288 while ((c = getc(f)) == ' ')
1289 continue;
1290 if (c == EOF || c == '\n') {
1291 DEBUG(("getno: missing number\n"));
1292 *ep = -1;
1293 return(0);
1294 }
1295 p = getbuf;
1296 *p++ = c;
1297 while ((c = getc(f)) != EOF && c != '\n' && c != ' ')
1298 if (p < &getbuf[MAXN-1])
1299 *p++ = c;
1300 if (c == EOF) {
1301 DEBUG(("getno: EOF\n"));
1302 *ep = -1;
1303 } else
1304 (void) ungetc(c, f);
1305 *p = '\0';
1306
1307 if (strspn(getbuf, "-1234567890") != strlen(getbuf)) {
1308 DEBUG(("getno: `%s' non-numeric\n", getbuf));
1309 *ep = -1;
1310 }
1311 return(atol(getbuf));
1312 }
1313
1314 /*
1315 - putconf - write configuration to .dir file
1316 */
1317 static int /* 0 success, -1 failure */
putconf(f,cp)1318 putconf(f, cp)
1319 register FILE *f;
1320 register struct dbzconfig *cp;
1321 {
1322 register int i;
1323 register int ret = 0;
1324
1325 if (fseek(f, (of_t)0, SEEK_SET) != 0) {
1326 DEBUG(("fseek failure in putconf\n"));
1327 ret = -1;
1328 }
1329 fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion, cp->tsize,
1330 cp->fieldsep, cp->casemap, cp->tagenb,
1331 cp->tagmask, cp->tagshift, cp->valuesize);
1332 for (i = 0; i < cp->valuesize; i++)
1333 fprintf(f, " %d", cp->bytemap[i]);
1334 fprintf(f, "\n");
1335 for (i = 0; i < NUSEDS; i++)
1336 fprintf(f, "%ld%c", cp->used[i], (i < NUSEDS-1) ? ' ' : '\n');
1337 for (i = 0; i < NUSEDS; i++)
1338 fprintf(f, "%ld%c", cp->ntagless[i], (i < NUSEDS-1) ? ' ' : '\n');
1339
1340 (void) fflush(f);
1341 if (ferror(f))
1342 ret = -1;
1343
1344 DEBUG(("putconf status %d\n", ret));
1345 return(ret);
1346 }
1347
1348 /*
1349 - getcore - try to set up an in-core copy of .pag file
1350 */
1351 static of_t * /* pointer to copy, or NULL */
getcore(f)1352 getcore(f)
1353 FILE *f;
1354 {
1355 register of_t *p;
1356 register size_t i;
1357 register size_t nread;
1358 register of_t *it;
1359
1360 it = (of_t *)malloc((size_t)conf.tsize * SOF);
1361 if (it == NULL) {
1362 DEBUG(("getcore: malloc failed\n"));
1363 return(NULL);
1364 }
1365
1366 nread = fread((char *)it, SOF, (size_t)conf.tsize, f);
1367 if (ferror(f)) {
1368 DEBUG(("getcore: read failed\n"));
1369 free((char *)it);
1370 return(NULL);
1371 }
1372
1373 p = it + nread;
1374 i = (size_t)conf.tsize - nread;
1375 while (i-- > 0)
1376 *p++ = VACANT;
1377
1378 return(it);
1379 }
1380
1381 /*
1382 - putcore - try to rewrite an in-core table
1383 */
1384 static int /* 0 okay, -1 fail */
putcore(tab,f)1385 putcore(tab, f)
1386 of_t *tab;
1387 FILE *f;
1388 {
1389 if (fseek(f, (of_t)0, SEEK_SET) != 0) {
1390 DEBUG(("fseek failure in putcore\n"));
1391 return(-1);
1392 }
1393 (void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f);
1394 (void) fflush(f);
1395 return((ferror(f)) ? -1 : 0);
1396 }
1397
1398 /*
1399 - start - set up to start or restart a search
1400 */
1401 static void
start(sp,kp,osp)1402 start(sp, kp, osp)
1403 register struct searcher *sp;
1404 register datum *kp;
1405 register struct searcher *osp; /* may be FRESH, i.e. NULL */
1406 {
1407 register long h;
1408
1409 h = hash(kp->dptr, kp->dsize);
1410 if (osp != FRESH && osp->hash == h) {
1411 if (sp != osp)
1412 *sp = *osp;
1413 DEBUG(("search restarted\n"));
1414 } else {
1415 sp->hash = h;
1416 sp->tag = MKTAG(h / conf.tsize);
1417 DEBUG(("tag 0x%lx\n", sp->tag));
1418 sp->place = h % conf.tsize;
1419 sp->tabno = 0;
1420 sp->run = (conf.olddbz) ? conf.tsize : MAXRUN;
1421 sp->aborted = 0;
1422 }
1423 sp->seen = 0;
1424 }
1425
1426 /*
1427 - search - conduct part of a search
1428 */
1429 static of_t /* NOTFOUND if we hit VACANT or error */
search(sp)1430 search(sp)
1431 register struct searcher *sp;
1432 {
1433 register of_t dest;
1434 register of_t value;
1435 of_t val; /* buffer for value (can't fread register) */
1436 register of_t place;
1437
1438 if (sp->aborted)
1439 return(NOTFOUND);
1440
1441 for (;;) {
1442 /* determine location to be examined */
1443 place = sp->place;
1444 if (sp->seen) {
1445 /* go to next location */
1446 if (--sp->run <= 0) {
1447 sp->tabno++;
1448 sp->run = MAXRUN;
1449 }
1450 place = (place+1)%conf.tsize + sp->tabno*conf.tsize;
1451 sp->place = place;
1452 } else
1453 sp->seen = 1; /* now looking at current location */
1454 DEBUG(("search @ %ld\n", place));
1455
1456 /* get the tagged value */
1457 if (corepag != NULL && place < conf.tsize) {
1458 DEBUG(("search: in core\n"));
1459 value = MAPIN(corepag[place]);
1460 } else {
1461 /* seek, if necessary */
1462 dest = place * SOF;
1463 if (pagpos != dest) {
1464 if (fseek(pagf, dest, SEEK_SET) != 0) {
1465 DEBUG(("search: seek failed\n"));
1466 pagpos = -1;
1467 sp->aborted = 1;
1468 return(NOTFOUND);
1469 }
1470 pagpos = dest;
1471 }
1472
1473 /* read it */
1474 if (fread((char *)&val, sizeof(val), 1, pagf) == 1)
1475 value = MAPIN(val);
1476 else if (ferror(pagf)) {
1477 DEBUG(("search: read failed\n"));
1478 pagpos = -1;
1479 sp->aborted = 1;
1480 return(NOTFOUND);
1481 } else
1482 value = VACANT;
1483
1484 /* and finish up */
1485 pagpos += sizeof(val);
1486 }
1487
1488 /* vacant slot is always cause to return */
1489 if (value == VACANT) {
1490 DEBUG(("search: empty slot\n"));
1491 return(NOTFOUND);
1492 };
1493
1494 /* check the tag */
1495 value = UNBIAS(value);
1496 DEBUG(("got 0x%lx\n", value));
1497 if (!HASTAG(value)) {
1498 DEBUG(("tagless\n"));
1499 return(value);
1500 } else if (TAG(value) == sp->tag) {
1501 DEBUG(("match\n"));
1502 return(NOTAG(value));
1503 } else {
1504 DEBUG(("mismatch 0x%lx\n", TAG(value)));
1505 }
1506 }
1507 /* NOTREACHED */
1508 }
1509
1510 /*
1511 - okayvalue - check that a value can be stored
1512 */
1513 static int /* predicate */
okayvalue(value)1514 okayvalue(value)
1515 of_t value;
1516 {
1517 if (HASTAG(value))
1518 return(0);
1519 #ifdef OVERFLOW
1520 if (value == LONG_MAX) /* BIAS() and UNBIAS() will overflow */
1521 return(0);
1522 #endif
1523 return(1);
1524 }
1525
1526 /*
1527 - set - store a value into a location previously found by search
1528 */
1529 static int /* 0 success, -1 failure */
set(sp,value)1530 set(sp, value)
1531 register struct searcher *sp;
1532 of_t value;
1533 {
1534 register of_t place = sp->place;
1535 register of_t v = value;
1536
1537 if (sp->aborted)
1538 return(-1);
1539
1540 if (CANTAG(v) && !conf.olddbz) {
1541 v |= sp->tag | taghere;
1542 if (v != UNBIAS(VACANT)) /* BIAS(v) won't look VACANT */
1543 #ifdef OVERFLOW
1544 if (v != LONG_MAX) /* and it won't overflow */
1545 #endif
1546 value = v;
1547 } else if (!conf.olddbz)
1548 conf.ntagless[0]++;
1549 DEBUG(("tagged value is 0x%lx\n", value));
1550 value = BIAS(value);
1551 value = MAPOUT(value);
1552
1553 /* If we have the index file in memory, use it */
1554 if (corepag != NULL && place < conf.tsize) {
1555 corepag[place] = value;
1556 DEBUG(("set: incore\n"));
1557 if (!writethrough)
1558 return(0);
1559 }
1560
1561 /* seek to spot */
1562 pagpos = -1; /* invalidate position memory */
1563 if (fseek(pagf, place * (of_t)SOF, SEEK_SET) != 0) {
1564 DEBUG(("set: seek failed\n"));
1565 sp->aborted = 1;
1566 return(-1);
1567 }
1568
1569 /* write in data */
1570 if (fwrite((char *)&value, SOF, 1, pagf) != 1) {
1571 DEBUG(("set: write failed\n"));
1572 sp->aborted = 1;
1573 return(-1);
1574 }
1575 /* fflush improves robustness, and buffer re-use is rare anyway */
1576 if (fflush(pagf) == EOF) {
1577 DEBUG(("set: fflush failed\n"));
1578 sp->aborted = 1;
1579 return(-1);
1580 }
1581
1582 DEBUG(("set: succeeded\n"));
1583 return(0);
1584 }
1585
1586 /*
1587 - mybytemap - determine this machine's byte map
1588 *
1589 * A byte map is an array of ints, sizeof(of_t) of them. The 0th int
1590 * is the byte number of the high-order byte in my of_t, and so forth.
1591 */
1592 static void
mybytemap(map)1593 mybytemap(map)
1594 int map[]; /* -> int[SOF] */
1595 {
1596 union {
1597 of_t o;
1598 char c[SOF];
1599 } u;
1600 register int *mp = &map[SOF];
1601 register int ntodo;
1602 register int i;
1603
1604 u.o = 1;
1605 for (ntodo = (int)SOF; ntodo > 0; ntodo--) {
1606 for (i = 0; i < SOF; i++)
1607 if (u.c[i] != 0)
1608 break;
1609 if (i == SOF) {
1610 /* trouble -- set it to *something* consistent */
1611 DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo));
1612 for (i = 0; i < SOF; i++)
1613 map[i] = i;
1614 return;
1615 }
1616 DEBUG(("mybytemap: byte %d\n", i));
1617 *--mp = i;
1618 while (u.c[i] != 0)
1619 u.o <<= 1;
1620 }
1621 }
1622
1623 /*
1624 - bytemap - transform an of_t from byte ordering map1 to map2
1625 */
1626 static of_t /* transformed result */
bytemap(ino,map1,map2)1627 bytemap(ino, map1, map2)
1628 of_t ino;
1629 int *map1;
1630 int *map2;
1631 {
1632 union oc {
1633 of_t o;
1634 char c[SOF];
1635 };
1636 union oc in;
1637 union oc out;
1638 register int i;
1639
1640 in.o = ino;
1641 for (i = 0; i < SOF; i++)
1642 out.c[map2[i]] = in.c[map1[i]];
1643 return(out.o);
1644 }
1645
1646 /*
1647 * This is a simplified version of the pathalias hashing function.
1648 * Thanks to Steve Belovin and Peter Honeyman
1649 *
1650 * hash a string into a long int. 31 bit crc (from andrew appel).
1651 * the crc table is computed at run time by crcinit() -- we could
1652 * precompute, but it takes 1 clock tick on a 750.
1653 *
1654 * This fast table calculation works only if POLY is a prime polynomial
1655 * in the field of integers modulo 2. Since the coefficients of a
1656 * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
1657 * implicit. IT MUST ALSO BE THE CASE that the coefficients of orders
1658 * 31 down to 25 are zero. Happily, we have candidates, from
1659 * E. J. Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
1660 * x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
1661 * x^31 + x^3 + x^0
1662 *
1663 * We reverse the bits to get:
1664 * 111101010000000000000000000000001 but drop the last 1
1665 * f 5 0 0 0 0 0 0
1666 * 010010000000000000000000000000001 ditto, for 31-bit crc
1667 * 4 8 0 0 0 0 0 0
1668 */
1669
1670 #define POLY 0x48000000L /* 31-bit polynomial (avoids sign problems) */
1671
1672 static long CrcTable[128];
1673
1674 /*
1675 - crcinit - initialize tables for hash function
1676 */
1677 static void
crcinit()1678 crcinit()
1679 {
1680 register int i, j;
1681 register long sum;
1682
1683 for (i = 0; i < 128; ++i) {
1684 sum = 0L;
1685 for (j = 7 - 1; j >= 0; --j)
1686 if (i & (1 << j))
1687 sum ^= POLY >> j;
1688 CrcTable[i] = sum;
1689 }
1690 DEBUG(("crcinit: done\n"));
1691 }
1692
1693 /*
1694 - hash - Honeyman's nice hashing function
1695 */
1696 static long
hash(name,size)1697 hash(name, size)
1698 register char *name;
1699 register int size;
1700 {
1701 register long sum = 0L;
1702
1703 while (size--) {
1704 sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
1705 }
1706 DEBUG(("hash: returns (%ld)\n", sum));
1707 return(sum);
1708 }
1709
1710 /*
1711 * case-mapping stuff
1712 *
1713 * Borrowed from C News, by permission of the authors. Somewhat modified.
1714 *
1715 * We exploit the fact that we are dealing only with headers here, and
1716 * headers are limited to the ASCII characters by RFC822. It is barely
1717 * possible that we might be dealing with a translation into another
1718 * character set, but in particular it's very unlikely for a header
1719 * character to be outside -128..255.
1720 *
1721 * Life would be a whole lot simpler if tolower() could safely and portably
1722 * be applied to any char.
1723 */
1724
1725 #define OFFSET 128 /* avoid trouble with negative chars */
1726
1727 /* must call casencmp before invoking TOLOW... */
1728 #define TOLOW(c) (cmap[(c)+OFFSET])
1729
1730 /* ...but the use of it in CISTREQN is safe without the preliminary call (!) */
1731 /* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */
1732 #define CISTREQN(a, b, n) \
1733 (TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0)
1734
1735 #define MAPSIZE (256+OFFSET)
1736 static char cmap[MAPSIZE]; /* relies on init to '\0' */
1737 static int mprimed = 0; /* has cmap been set up? */
1738
1739 /*
1740 - mapprime - set up case-mapping stuff
1741 */
1742 static void
mapprime()1743 mapprime()
1744 {
1745 register char *lp;
1746 register char *up;
1747 register int c;
1748 register int i;
1749 static char lower[] = "abcdefghijklmnopqrstuvwxyz";
1750 static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1751
1752 for (lp = lower, up = upper; *lp != '\0'; lp++, up++) {
1753 c = *lp;
1754 cmap[c+OFFSET] = c;
1755 cmap[*up+OFFSET] = c;
1756 }
1757 for (i = 0; i < MAPSIZE; i++)
1758 if (cmap[i] == '\0')
1759 cmap[i] = (char)(i-OFFSET);
1760 mprimed = 1;
1761 }
1762
1763 /*
1764 - casencmp - case-independent strncmp
1765 */
1766 static int /* < == > 0 */
casencmp(s1,s2,len)1767 casencmp(s1, s2, len)
1768 char *s1;
1769 char *s2;
1770 int len;
1771 {
1772 register char *p1;
1773 register char *p2;
1774 register int n;
1775
1776 if (!mprimed)
1777 mapprime();
1778
1779 p1 = s1;
1780 p2 = s2;
1781 n = len;
1782 while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) {
1783 p1++;
1784 p2++;
1785 }
1786 if (n < 0)
1787 return(0);
1788
1789 /*
1790 * The following case analysis is necessary so that characters
1791 * which look negative collate low against normal characters but
1792 * high against the end-of-string NUL.
1793 */
1794 if (*p1 == '\0' && *p2 == '\0')
1795 return(0);
1796 else if (*p1 == '\0')
1797 return(-1);
1798 else if (*p2 == '\0')
1799 return(1);
1800 else
1801 return(TOLOW(*p1) - TOLOW(*p2));
1802 }
1803
1804 /*
1805 - mapcase - do case-mapped copy
1806 */
1807 static char * /* returns src or dst */
mapcase(dst,src,siz)1808 mapcase(dst, src, siz)
1809 char *dst; /* destination, used only if mapping needed */
1810 char *src; /* source; src == dst is legal */
1811 size_t siz;
1812 {
1813 register char *s;
1814 register char *d;
1815 register char *c; /* case break */
1816 register char *e; /* end of source */
1817
1818
1819 c = cipoint(src, siz);
1820 if (c == NULL)
1821 return(src);
1822
1823 if (!mprimed)
1824 mapprime();
1825 s = src;
1826 e = s + siz;
1827 d = dst;
1828
1829 while (s < c)
1830 *d++ = *s++;
1831 while (s < e)
1832 *d++ = TOLOW(*s++);
1833
1834 return(dst);
1835 }
1836
1837 /*
1838 - cipoint - where in this message-ID does it become case-insensitive?
1839 *
1840 * The RFC822 code is not quite complete. Absolute, total, full RFC822
1841 * compliance requires a horrible parsing job, because of the arcane
1842 * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi,
1843 * for example. There are three or four things that might occur in the
1844 * domain part of a message-id that are case-sensitive. They don't seem
1845 * to ever occur in real news, thank Cthulhu. (What? You were expecting
1846 * a merciful and forgiving deity to be invoked in connection with RFC822?
1847 * Forget it; none of them would come near it.)
1848 */
1849 static char * /* pointer into s, or NULL for "nowhere" */
cipoint(s,siz)1850 cipoint(s, siz)
1851 char *s;
1852 size_t siz;
1853 {
1854 register char *p;
1855 static char post[] = "postmaster";
1856 static int plen = sizeof(post)-1;
1857
1858 switch (conf.casemap) {
1859 case '0': /* unmapped, sensible */
1860 return(NULL);
1861 break;
1862 case 'C': /* C News, RFC 822 conformant (approx.) */
1863 p = memchr(s, '@', siz);
1864 if (p == NULL) /* no local/domain split */
1865 return(NULL); /* assume all local */
1866 if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) {
1867 /* crazy -- "postmaster" is case-insensitive */
1868 return(s);
1869 }
1870 return(p);
1871 break;
1872 case '=': /* 2.11, neither sensible nor conformant */
1873 return(s); /* all case-insensitive */
1874 break;
1875 }
1876
1877 DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap));
1878 return(NULL); /* just leave it alone */
1879 }
1880
1881 /*
1882 - dbzdebug - control dbz debugging at run time
1883 */
1884 int /* old value */
dbzdebug(value)1885 dbzdebug(value)
1886 int value;
1887 {
1888 #ifdef DBZDEBUG
1889 register int old = debug;
1890
1891 debug = value;
1892 return(old);
1893 #else
1894 return(-1);
1895 #endif
1896 }
1897