xref: /dragonfly/lib/libc/db/hash/hash_page.c (revision 17287016)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Margo Seltzer.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * @(#)hash_page.c	8.7 (Berkeley) 8/16/94
33  * $FreeBSD: head/lib/libc/db/hash/hash_page.c 190500 2009-03-28 07:44:08Z delphij $
34  */
35 
36 /*
37  * PACKAGE:  hashing
38  *
39  * DESCRIPTION:
40  *	Page manipulation for hashing package.
41  *
42  * ROUTINES:
43  *
44  * External
45  *	__get_page
46  *	__add_ovflpage
47  * Internal
48  *	overflow_page
49  *	open_temp
50  */
51 
52 #include "namespace.h"
53 #include <sys/param.h>
54 #include <sys/file.h>
55 
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <signal.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63 #ifdef DEBUG
64 #include <assert.h>
65 #endif
66 #include "un-namespace.h"
67 
68 #include <db.h>
69 #include "hash.h"
70 #include "page.h"
71 #include "extern.h"
72 
73 static uint32_t	*fetch_bitmap(HTAB *, int);
74 static uint32_t	 first_free(uint32_t);
75 static int	 open_temp(HTAB *);
76 static uint16_t	 overflow_page(HTAB *);
77 static void	 putpair(char *, const DBT *, const DBT *);
78 static void	 squeeze_key(uint16_t *, const DBT *, const DBT *);
79 static int	 ugly_split(HTAB *, uint32_t, BUFHEAD *, BUFHEAD *, int, int);
80 
81 #define	PAGE_INIT(P) {							\
82 	((uint16_t *)(P))[0] = 0;					\
83 	((uint16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(uint16_t);	\
84 	((uint16_t *)(P))[2] = hashp->BSIZE;				\
85 	}
86 
87 /*
88  * This is called AFTER we have verified that there is room on the page for
89  * the pair (PAIRFITS has returned true) so we go right ahead and start moving
90  * stuff on.
91  */
92 static void
93 putpair(char *p, const DBT *key, const DBT *val)
94 {
95 	uint16_t *bp, n, off;
96 
97 	bp = (uint16_t *)p;
98 
99 	/* Enter the key first. */
100 	n = bp[0];
101 
102 	off = OFFSET(bp) - key->size;
103 	memmove(p + off, key->data, key->size);
104 	bp[++n] = off;
105 
106 	/* Now the data. */
107 	off -= val->size;
108 	memmove(p + off, val->data, val->size);
109 	bp[++n] = off;
110 
111 	/* Adjust page info. */
112 	bp[0] = n;
113 	bp[n + 1] = off - ((n + 3) * sizeof(uint16_t));
114 	bp[n + 2] = off;
115 }
116 
117 /*
118  * Returns:
119  *	 0 OK
120  *	-1 error
121  */
122 int
123 __delpair(HTAB *hashp, BUFHEAD *bufp, int ndx)
124 {
125 	uint16_t *bp, newoff, pairlen;
126 	int n;
127 
128 	bp = (uint16_t *)bufp->page;
129 	n = bp[0];
130 
131 	if (bp[ndx + 1] < REAL_KEY)
132 		return (__big_delete(hashp, bufp));
133 	if (ndx != 1)
134 		newoff = bp[ndx - 1];
135 	else
136 		newoff = hashp->BSIZE;
137 	pairlen = newoff - bp[ndx + 1];
138 
139 	if (ndx != (n - 1)) {
140 		/* Hard Case -- need to shuffle keys */
141 		int i;
142 		char *src = bufp->page + (int)OFFSET(bp);
143 		char *dst = src + (int)pairlen;
144 		memmove(dst, src, bp[ndx + 1] - OFFSET(bp));
145 
146 		/* Now adjust the pointers */
147 		for (i = ndx + 2; i <= n; i += 2) {
148 			if (bp[i + 1] == OVFLPAGE) {
149 				bp[i - 2] = bp[i];
150 				bp[i - 1] = bp[i + 1];
151 			} else {
152 				bp[i - 2] = bp[i] + pairlen;
153 				bp[i - 1] = bp[i + 1] + pairlen;
154 			}
155 		}
156 		if (ndx == hashp->cndx) {
157 			/*
158 			 * We just removed pair we were "pointing" to.
159 			 * By moving back the cndx we ensure subsequent
160 			 * hash_seq() calls won't skip over any entries.
161 			 */
162 			hashp->cndx -= 2;
163 		}
164 	}
165 	/* Finally adjust the page data */
166 	bp[n] = OFFSET(bp) + pairlen;
167 	bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(uint16_t);
168 	bp[0] = n - 2;
169 	hashp->NKEYS--;
170 
171 	bufp->flags |= BUF_MOD;
172 	return (0);
173 }
174 /*
175  * Returns:
176  *	 0 ==> OK
177  *	-1 ==> Error
178  */
179 int
180 __split_page(HTAB *hashp, uint32_t obucket, uint32_t nbucket)
181 {
182 	BUFHEAD *new_bufp, *old_bufp;
183 	uint16_t *ino;
184 	char *np;
185 	DBT key, val;
186 	int n, ndx, retval;
187 	uint16_t copyto, diff, off, moved;
188 	char *op;
189 
190 	copyto = (uint16_t)hashp->BSIZE;
191 	off = (uint16_t)hashp->BSIZE;
192 	old_bufp = __get_buf(hashp, obucket, NULL, 0);
193 	if (old_bufp == NULL)
194 		return (-1);
195 	new_bufp = __get_buf(hashp, nbucket, NULL, 0);
196 	if (new_bufp == NULL)
197 		return (-1);
198 
199 	old_bufp->flags |= (BUF_MOD | BUF_PIN);
200 	new_bufp->flags |= (BUF_MOD | BUF_PIN);
201 
202 	ino = (uint16_t *)(op = old_bufp->page);
203 	np = new_bufp->page;
204 
205 	moved = 0;
206 
207 	for (n = 1, ndx = 1; n < ino[0]; n += 2) {
208 		if (ino[n + 1] < REAL_KEY) {
209 			retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
210 			    (int)copyto, (int)moved);
211 			old_bufp->flags &= ~BUF_PIN;
212 			new_bufp->flags &= ~BUF_PIN;
213 			return (retval);
214 
215 		}
216 		key.data = (unsigned char *)op + ino[n];
217 		key.size = off - ino[n];
218 
219 		if (__call_hash(hashp, key.data, key.size) == obucket) {
220 			/* Don't switch page */
221 			diff = copyto - off;
222 			if (diff) {
223 				copyto = ino[n + 1] + diff;
224 				memmove(op + copyto, op + ino[n + 1],
225 				    off - ino[n + 1]);
226 				ino[ndx] = copyto + ino[n] - ino[n + 1];
227 				ino[ndx + 1] = copyto;
228 			} else
229 				copyto = ino[n + 1];
230 			ndx += 2;
231 		} else {
232 			/* Switch page */
233 			val.data = (unsigned char *)op + ino[n + 1];
234 			val.size = ino[n] - ino[n + 1];
235 			putpair(np, &key, &val);
236 			moved += 2;
237 		}
238 
239 		off = ino[n + 1];
240 	}
241 
242 	/* Now clean up the page */
243 	ino[0] -= moved;
244 	FREESPACE(ino) = copyto - sizeof(uint16_t) * (ino[0] + 3);
245 	OFFSET(ino) = copyto;
246 
247 #ifdef DEBUG3
248 	fprintf(stderr, "split %d/%d\n",
249 	    ((uint16_t *)np)[0] / 2,
250 	    ((uint16_t *)op)[0] / 2);
251 #endif
252 	/* unpin both pages */
253 	old_bufp->flags &= ~BUF_PIN;
254 	new_bufp->flags &= ~BUF_PIN;
255 	return (0);
256 }
257 
258 /*
259  * Called when we encounter an overflow or big key/data page during split
260  * handling.  This is special cased since we have to begin checking whether
261  * the key/data pairs fit on their respective pages and because we may need
262  * overflow pages for both the old and new pages.
263  *
264  * The first page might be a page with regular key/data pairs in which case
265  * we have a regular overflow condition and just need to go on to the next
266  * page or it might be a big key/data pair in which case we need to fix the
267  * big key/data pair.
268  *
269  * Returns:
270  *	 0 ==> success
271  *	-1 ==> failure
272  */
273 static int
274 ugly_split(HTAB *hashp,
275     uint32_t obucket,	/* Same as __split_page. */
276     BUFHEAD *old_bufp,
277     BUFHEAD *new_bufp,
278     int copyto,		/* First byte on page which contains key/data values. */
279     int moved)		/* Number of pairs moved to new page. */
280 {
281 	BUFHEAD *bufp;	/* Buffer header for ino */
282 	uint16_t *ino;	/* Page keys come off of */
283 	uint16_t *np;	/* New page */
284 	uint16_t *op;	/* Page keys go on to if they aren't moving */
285 
286 	BUFHEAD *last_bfp;	/* Last buf header OVFL needing to be freed */
287 	DBT key, val;
288 	SPLIT_RETURN ret;
289 	uint16_t n, off, ov_addr, scopyto;
290 	char *cino;		/* Character value of ino */
291 
292 	bufp = old_bufp;
293 	ino = (uint16_t *)old_bufp->page;
294 	np = (uint16_t *)new_bufp->page;
295 	op = (uint16_t *)old_bufp->page;
296 	last_bfp = NULL;
297 	scopyto = (uint16_t)copyto;	/* ANSI */
298 
299 	n = ino[0] - 1;
300 	while (n < ino[0]) {
301 		if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
302 			if (__big_split(hashp, old_bufp,
303 			    new_bufp, bufp, bufp->addr, obucket, &ret))
304 				return (-1);
305 			old_bufp = ret.oldp;
306 			if (!old_bufp)
307 				return (-1);
308 			op = (uint16_t *)old_bufp->page;
309 			new_bufp = ret.newp;
310 			if (!new_bufp)
311 				return (-1);
312 			np = (uint16_t *)new_bufp->page;
313 			bufp = ret.nextp;
314 			if (!bufp)
315 				return (0);
316 			cino = (char *)bufp->page;
317 			ino = (uint16_t *)cino;
318 			last_bfp = ret.nextp;
319 		} else if (ino[n + 1] == OVFLPAGE) {
320 			ov_addr = ino[n];
321 			/*
322 			 * Fix up the old page -- the extra 2 are the fields
323 			 * which contained the overflow information.
324 			 */
325 			ino[0] -= (moved + 2);
326 			FREESPACE(ino) =
327 			    scopyto - sizeof(uint16_t) * (ino[0] + 3);
328 			OFFSET(ino) = scopyto;
329 
330 			bufp = __get_buf(hashp, ov_addr, bufp, 0);
331 			if (!bufp)
332 				return (-1);
333 
334 			ino = (uint16_t *)bufp->page;
335 			n = 1;
336 			scopyto = hashp->BSIZE;
337 			moved = 0;
338 
339 			if (last_bfp)
340 				__free_ovflpage(hashp, last_bfp);
341 			last_bfp = bufp;
342 		}
343 		/* Move regular sized pairs of there are any */
344 		off = hashp->BSIZE;
345 		for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
346 			cino = (char *)ino;
347 			key.data = (unsigned char *)cino + ino[n];
348 			key.size = off - ino[n];
349 			val.data = (unsigned char *)cino + ino[n + 1];
350 			val.size = ino[n] - ino[n + 1];
351 			off = ino[n + 1];
352 
353 			if (__call_hash(hashp, key.data, key.size) == obucket) {
354 				/* Keep on old page */
355 				if (PAIRFITS(op, (&key), (&val)))
356 					putpair((char *)op, &key, &val);
357 				else {
358 					old_bufp =
359 					    __add_ovflpage(hashp, old_bufp);
360 					if (!old_bufp)
361 						return (-1);
362 					op = (uint16_t *)old_bufp->page;
363 					putpair((char *)op, &key, &val);
364 				}
365 				old_bufp->flags |= BUF_MOD;
366 			} else {
367 				/* Move to new page */
368 				if (PAIRFITS(np, (&key), (&val)))
369 					putpair((char *)np, &key, &val);
370 				else {
371 					new_bufp =
372 					    __add_ovflpage(hashp, new_bufp);
373 					if (!new_bufp)
374 						return (-1);
375 					np = (uint16_t *)new_bufp->page;
376 					putpair((char *)np, &key, &val);
377 				}
378 				new_bufp->flags |= BUF_MOD;
379 			}
380 		}
381 	}
382 	if (last_bfp)
383 		__free_ovflpage(hashp, last_bfp);
384 	return (0);
385 }
386 
387 /*
388  * Add the given pair to the page
389  *
390  * Returns:
391  *	0 ==> OK
392  *	1 ==> failure
393  */
394 int
395 __addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val)
396 {
397 	uint16_t *bp, *sop;
398 	int do_expand;
399 
400 	bp = (uint16_t *)bufp->page;
401 	do_expand = 0;
402 	while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
403 		/* Exception case */
404 		if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
405 			/* This is the last page of a big key/data pair
406 			   and we need to add another page */
407 			break;
408 		else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
409 			bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
410 			if (!bufp)
411 				return (-1);
412 			bp = (uint16_t *)bufp->page;
413 		} else if (bp[bp[0]] != OVFLPAGE) {
414 			/* Short key/data pairs, no more pages */
415 			break;
416 		} else {
417 			/* Try to squeeze key on this page */
418 			if (bp[2] >= REAL_KEY &&
419 			    FREESPACE(bp) >= PAIRSIZE(key, val)) {
420 				squeeze_key(bp, key, val);
421 				goto stats;
422 			} else {
423 				bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
424 				if (!bufp)
425 					return (-1);
426 				bp = (uint16_t *)bufp->page;
427 			}
428 		}
429 
430 	if (PAIRFITS(bp, key, val))
431 		putpair(bufp->page, key, val);
432 	else {
433 		do_expand = 1;
434 		bufp = __add_ovflpage(hashp, bufp);
435 		if (!bufp)
436 			return (-1);
437 		sop = (uint16_t *)bufp->page;
438 
439 		if (PAIRFITS(sop, key, val))
440 			putpair((char *)sop, key, val);
441 		else
442 			if (__big_insert(hashp, bufp, key, val))
443 				return (-1);
444 	}
445 stats:
446 	bufp->flags |= BUF_MOD;
447 	/*
448 	 * If the average number of keys per bucket exceeds the fill factor,
449 	 * expand the table.
450 	 */
451 	hashp->NKEYS++;
452 	if (do_expand ||
453 	    (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
454 		return (__expand_table(hashp));
455 	return (0);
456 }
457 
458 /*
459  *
460  * Returns:
461  *	pointer on success
462  *	NULL on error
463  */
464 BUFHEAD *
465 __add_ovflpage(HTAB *hashp, BUFHEAD *bufp)
466 {
467 	uint16_t *sp, ndx, ovfl_num;
468 #ifdef DEBUG1
469 	int tmp1, tmp2;
470 #endif
471 	sp = (uint16_t *)bufp->page;
472 
473 	/* Check if we are dynamically determining the fill factor */
474 	if (hashp->FFACTOR == DEF_FFACTOR) {
475 		hashp->FFACTOR = sp[0] >> 1;
476 		if (hashp->FFACTOR < MIN_FFACTOR)
477 			hashp->FFACTOR = MIN_FFACTOR;
478 	}
479 	bufp->flags |= BUF_MOD;
480 	ovfl_num = overflow_page(hashp);
481 #ifdef DEBUG1
482 	tmp1 = bufp->addr;
483 	tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
484 #endif
485 	if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1)))
486 		return (NULL);
487 	bufp->ovfl->flags |= BUF_MOD;
488 #ifdef DEBUG1
489 	fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
490 	    tmp1, tmp2, bufp->ovfl->addr);
491 #endif
492 	ndx = sp[0];
493 	/*
494 	 * Since a pair is allocated on a page only if there's room to add
495 	 * an overflow page, we know that the OVFL information will fit on
496 	 * the page.
497 	 */
498 	sp[ndx + 4] = OFFSET(sp);
499 	sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE;
500 	sp[ndx + 1] = ovfl_num;
501 	sp[ndx + 2] = OVFLPAGE;
502 	sp[0] = ndx + 2;
503 #ifdef HASH_STATISTICS
504 	hash_overflows++;
505 #endif
506 	return (bufp->ovfl);
507 }
508 
509 /*
510  * Returns:
511  *	 0 indicates SUCCESS
512  *	-1 indicates FAILURE
513  */
514 int
515 __get_page(HTAB *hashp, char *p, uint32_t bucket, int is_bucket, int is_disk,
516     int is_bitmap)
517 {
518 	int fd, page, size, rsize;
519 	uint16_t *bp;
520 
521 	fd = hashp->fp;
522 	size = hashp->BSIZE;
523 
524 	if ((fd == -1) || !is_disk) {
525 		PAGE_INIT(p);
526 		return (0);
527 	}
528 	if (is_bucket)
529 		page = BUCKET_TO_PAGE(bucket);
530 	else
531 		page = OADDR_TO_PAGE(bucket);
532 	if ((rsize = pread(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1)
533 		return (-1);
534 	bp = (uint16_t *)p;
535 	if (!rsize)
536 		bp[0] = 0;	/* We hit the EOF, so initialize a new page */
537 	else
538 		if (rsize != size) {
539 			errno = EFTYPE;
540 			return (-1);
541 		}
542 	if (!is_bitmap && !bp[0]) {
543 		PAGE_INIT(p);
544 	} else
545 		if (hashp->LORDER != BYTE_ORDER) {
546 			int i, max;
547 
548 			if (is_bitmap) {
549 				max = hashp->BSIZE >> 2; /* divide by 4 */
550 				for (i = 0; i < max; i++)
551 					M_32_SWAP(((int *)p)[i]);
552 			} else {
553 				M_16_SWAP(bp[0]);
554 				max = bp[0] + 2;
555 				for (i = 1; i <= max; i++)
556 					M_16_SWAP(bp[i]);
557 			}
558 		}
559 	return (0);
560 }
561 
562 /*
563  * Write page p to disk
564  *
565  * Returns:
566  *	 0 ==> OK
567  *	-1 ==>failure
568  */
569 int
570 __put_page(HTAB *hashp, char *p, uint32_t bucket, int is_bucket, int is_bitmap)
571 {
572 	int fd, page, size, wsize;
573 
574 	size = hashp->BSIZE;
575 	if ((hashp->fp == -1) && open_temp(hashp))
576 		return (-1);
577 	fd = hashp->fp;
578 
579 	if (hashp->LORDER != BYTE_ORDER) {
580 		int i, max;
581 
582 		if (is_bitmap) {
583 			max = hashp->BSIZE >> 2;	/* divide by 4 */
584 			for (i = 0; i < max; i++)
585 				M_32_SWAP(((int *)p)[i]);
586 		} else {
587 			max = ((uint16_t *)p)[0] + 2;
588 			for (i = 0; i <= max; i++)
589 				M_16_SWAP(((uint16_t *)p)[i]);
590 		}
591 	}
592 	if (is_bucket)
593 		page = BUCKET_TO_PAGE(bucket);
594 	else
595 		page = OADDR_TO_PAGE(bucket);
596 	if ((wsize = pwrite(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1)
597 		/* Errno is set */
598 		return (-1);
599 	if (wsize != size) {
600 		errno = EFTYPE;
601 		return (-1);
602 	}
603 	return (0);
604 }
605 
606 #define BYTE_MASK	((1 << INT_BYTE_SHIFT) -1)
607 /*
608  * Initialize a new bitmap page.  Bitmap pages are left in memory
609  * once they are read in.
610  */
611 int
612 __ibitmap(HTAB *hashp, int pnum, int nbits, int ndx)
613 {
614 	uint32_t *ip;
615 	int clearbytes, clearints;
616 
617 	if ((ip = (uint32_t *)malloc(hashp->BSIZE)) == NULL)
618 		return (1);
619 	hashp->nmaps++;
620 	clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1;
621 	clearbytes = clearints << INT_TO_BYTE;
622 	memset((char *)ip, 0, clearbytes);
623 	memset(((char *)ip) + clearbytes, 0xFF,
624 	    hashp->BSIZE - clearbytes);
625 	ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
626 	SETBIT(ip, 0);
627 	hashp->BITMAPS[ndx] = (uint16_t)pnum;
628 	hashp->mapp[ndx] = ip;
629 	return (0);
630 }
631 
632 static uint32_t
633 first_free(uint32_t map)
634 {
635 	uint32_t i, mask;
636 
637 	mask = 0x1;
638 	for (i = 0; i < BITS_PER_MAP; i++) {
639 		if (!(mask & map))
640 			return (i);
641 		mask = mask << 1;
642 	}
643 	return (i);
644 }
645 
646 static uint16_t
647 overflow_page(HTAB *hashp)
648 {
649 	uint32_t *freep;
650 	int max_free, offset, splitnum;
651 	uint16_t addr;
652 	int bit, first_page, free_bit, free_page, i, in_use_bits, j;
653 #ifdef DEBUG2
654 	int tmp1, tmp2;
655 #endif
656 	splitnum = hashp->OVFL_POINT;
657 	max_free = hashp->SPARES[splitnum];
658 
659 	free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
660 	free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
661 
662 	/* Look through all the free maps to find the first free block */
663 	first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
664 	for ( i = first_page; i <= free_page; i++ ) {
665 		if (!(freep = (uint32_t *)hashp->mapp[i]) &&
666 		    !(freep = fetch_bitmap(hashp, i)))
667 			return (0);
668 		if (i == free_page)
669 			in_use_bits = free_bit;
670 		else
671 			in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
672 
673 		if (i == first_page) {
674 			bit = hashp->LAST_FREED &
675 			    ((hashp->BSIZE << BYTE_SHIFT) - 1);
676 			j = bit / BITS_PER_MAP;
677 			bit = bit & ~(BITS_PER_MAP - 1);
678 		} else {
679 			bit = 0;
680 			j = 0;
681 		}
682 		for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
683 			if (freep[j] != ALL_SET)
684 				goto found;
685 	}
686 
687 	/* No Free Page Found */
688 	hashp->LAST_FREED = hashp->SPARES[splitnum];
689 	hashp->SPARES[splitnum]++;
690 	offset = hashp->SPARES[splitnum] -
691 	    (splitnum ? hashp->SPARES[splitnum - 1] : 0);
692 
693 #define	OVMSG	"HASH: Out of overflow pages.  Increase page size\n"
694 	if (offset > SPLITMASK) {
695 		if (++splitnum >= NCACHED) {
696 			_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
697 			errno = EFBIG;
698 			return (0);
699 		}
700 		hashp->OVFL_POINT = splitnum;
701 		hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
702 		hashp->SPARES[splitnum-1]--;
703 		offset = 1;
704 	}
705 
706 	/* Check if we need to allocate a new bitmap page */
707 	if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
708 		free_page++;
709 		if (free_page >= NCACHED) {
710 			_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
711 			errno = EFBIG;
712 			return (0);
713 		}
714 		/*
715 		 * This is tricky.  The 1 indicates that you want the new page
716 		 * allocated with 1 clear bit.  Actually, you are going to
717 		 * allocate 2 pages from this map.  The first is going to be
718 		 * the map page, the second is the overflow page we were
719 		 * looking for.  The init_bitmap routine automatically, sets
720 		 * the first bit of itself to indicate that the bitmap itself
721 		 * is in use.  We would explicitly set the second bit, but
722 		 * don't have to if we tell init_bitmap not to leave it clear
723 		 * in the first place.
724 		 */
725 		if (__ibitmap(hashp,
726 		    (int)OADDR_OF(splitnum, offset), 1, free_page))
727 			return (0);
728 		hashp->SPARES[splitnum]++;
729 #ifdef DEBUG2
730 		free_bit = 2;
731 #endif
732 		offset++;
733 		if (offset > SPLITMASK) {
734 			if (++splitnum >= NCACHED) {
735 				_write(STDERR_FILENO, OVMSG,
736 				    sizeof(OVMSG) - 1);
737 				errno = EFBIG;
738 				return (0);
739 			}
740 			hashp->OVFL_POINT = splitnum;
741 			hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
742 			hashp->SPARES[splitnum-1]--;
743 			offset = 0;
744 		}
745 	} else {
746 		/*
747 		 * Free_bit addresses the last used bit.  Bump it to address
748 		 * the first available bit.
749 		 */
750 		free_bit++;
751 		SETBIT(freep, free_bit);
752 	}
753 
754 	/* Calculate address of the new overflow page */
755 	addr = OADDR_OF(splitnum, offset);
756 #ifdef DEBUG2
757 	fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
758 	    addr, free_bit, free_page);
759 #endif
760 	return (addr);
761 
762 found:
763 	bit = bit + first_free(freep[j]);
764 	SETBIT(freep, bit);
765 #ifdef DEBUG2
766 	tmp1 = bit;
767 	tmp2 = i;
768 #endif
769 	/*
770 	 * Bits are addressed starting with 0, but overflow pages are addressed
771 	 * beginning at 1. Bit is a bit addressnumber, so we need to increment
772 	 * it to convert it to a page number.
773 	 */
774 	bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
775 	if (bit >= hashp->LAST_FREED)
776 		hashp->LAST_FREED = bit - 1;
777 
778 	/* Calculate the split number for this page */
779 	for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
780 	offset = (i ? bit - hashp->SPARES[i - 1] : bit);
781 	if (offset >= SPLITMASK) {
782 		_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
783 		errno = EFBIG;
784 		return (0);	/* Out of overflow pages */
785 	}
786 	addr = OADDR_OF(i, offset);
787 #ifdef DEBUG2
788 	fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
789 	    addr, tmp1, tmp2);
790 #endif
791 
792 	/* Allocate and return the overflow page */
793 	return (addr);
794 }
795 
796 /*
797  * Mark this overflow page as free.
798  */
799 void
800 __free_ovflpage(HTAB *hashp, BUFHEAD *obufp)
801 {
802 	uint16_t addr;
803 	uint32_t *freep;
804 	int bit_address, free_page, free_bit;
805 	uint16_t ndx;
806 
807 	addr = obufp->addr;
808 #ifdef DEBUG1
809 	fprintf(stderr, "Freeing %d\n", addr);
810 #endif
811 	ndx = (((uint16_t)addr) >> SPLITSHIFT);
812 	bit_address =
813 	    (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
814 	 if (bit_address < hashp->LAST_FREED)
815 		hashp->LAST_FREED = bit_address;
816 	free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
817 	free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
818 
819 	if (!(freep = hashp->mapp[free_page]))
820 		freep = fetch_bitmap(hashp, free_page);
821 #ifdef DEBUG
822 	/*
823 	 * This had better never happen.  It means we tried to read a bitmap
824 	 * that has already had overflow pages allocated off it, and we
825 	 * failed to read it from the file.
826 	 */
827 	if (!freep)
828 		assert(0);
829 #endif
830 	CLRBIT(freep, free_bit);
831 #ifdef DEBUG2
832 	fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
833 	    obufp->addr, free_bit, free_page);
834 #endif
835 	__reclaim_buf(hashp, obufp);
836 }
837 
838 /*
839  * Returns:
840  *	 0 success
841  *	-1 failure
842  */
843 static int
844 open_temp(HTAB *hashp)
845 {
846 	sigset_t set, oset;
847 	int len;
848 	char *envtmp = NULL;
849 	char path[MAXPATHLEN];
850 
851 	if (issetugid() == 0)
852 		envtmp = getenv("TMPDIR");
853 	len = snprintf(path,
854 	    sizeof(path), "%s/_hash.XXXXXX", envtmp ? envtmp : "/tmp");
855 	if (len < 0 || len >= (int)sizeof(path)) {
856 		errno = ENAMETOOLONG;
857 		return (-1);
858 	}
859 
860 	/* Block signals; make sure file goes away at process exit. */
861 	sigfillset(&set);
862 	_sigprocmask(SIG_BLOCK, &set, &oset);
863 	if ((hashp->fp = mkostemp(path, O_CLOEXEC)) != -1) {
864 		_unlink(path);
865 		_fcntl(hashp->fp, F_SETFD, 1);
866 	}
867 	_sigprocmask(SIG_SETMASK, &oset, NULL);
868 	return (hashp->fp != -1 ? 0 : -1);
869 }
870 
871 /*
872  * We have to know that the key will fit, but the last entry on the page is
873  * an overflow pair, so we need to shift things.
874  */
875 static void
876 squeeze_key(uint16_t *sp, const DBT *key, const DBT *val)
877 {
878 	char *p;
879 	uint16_t free_space, n, off, pageno;
880 
881 	p = (char *)sp;
882 	n = sp[0];
883 	free_space = FREESPACE(sp);
884 	off = OFFSET(sp);
885 
886 	pageno = sp[n - 1];
887 	off -= key->size;
888 	sp[n - 1] = off;
889 	memmove(p + off, key->data, key->size);
890 	off -= val->size;
891 	sp[n] = off;
892 	memmove(p + off, val->data, val->size);
893 	sp[0] = n + 2;
894 	sp[n + 1] = pageno;
895 	sp[n + 2] = OVFLPAGE;
896 	FREESPACE(sp) = free_space - PAIRSIZE(key, val);
897 	OFFSET(sp) = off;
898 }
899 
900 static uint32_t *
901 fetch_bitmap(HTAB *hashp, int ndx)
902 {
903 	if (ndx >= hashp->nmaps)
904 		return (NULL);
905 	if ((hashp->mapp[ndx] = (uint32_t *)malloc(hashp->BSIZE)) == NULL)
906 		return (NULL);
907 	if (__get_page(hashp,
908 	    (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) {
909 		free(hashp->mapp[ndx]);
910 		return (NULL);
911 	}
912 	return (hashp->mapp[ndx]);
913 }
914 
915 #ifdef DEBUG4
916 int
917 print_chain(int addr)
918 {
919 	BUFHEAD *bufp;
920 	short *bp, oaddr;
921 
922 	fprintf(stderr, "%d ", addr);
923 	bufp = __get_buf(hashp, addr, NULL, 0);
924 	bp = (short *)bufp->page;
925 	while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
926 		((bp[0] > 2) && bp[2] < REAL_KEY))) {
927 		oaddr = bp[bp[0] - 1];
928 		fprintf(stderr, "%d ", (int)oaddr);
929 		bufp = __get_buf(hashp, (int)oaddr, bufp, 0);
930 		bp = (short *)bufp->page;
931 	}
932 	fprintf(stderr, "\n");
933 }
934 #endif
935