1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/param.h>
38 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94";
40 #endif /* LIBC_SCCS and not lint */
41 #include <sys/cdefs.h>
42
43 /* Macros for min/max. */
44 #define MIN(a,b) (((a)<(b))?(a):(b))
45 #define MAX(a,b) (((a)>(b))?(a):(b))
46
47 /*
48 * PACKAGE: hash
49 * DESCRIPTION:
50 * Big key/data handling for the hashing package.
51 *
52 * ROUTINES:
53 * External
54 * __big_keydata
55 * __big_split
56 * __big_insert
57 * __big_return
58 * __big_delete
59 * __find_last_page
60 * Internal
61 * collect_key
62 * collect_data
63 */
64
65 #include <sys/param.h>
66
67 #include <errno.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71
72 #ifdef DEBUG
73 #include <assert.h>
74 #endif
75
76 #include "db_local.h"
77 #include "hash.h"
78 #include "page.h"
79 #include "extern.h"
80
81 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int);
82 static int collect_data(HTAB *, BUFHEAD *, int, int);
83
84 /*
85 * Big_insert
86 *
87 * You need to do an insert and the key/data pair is too big
88 *
89 * Returns:
90 * 0 ==> OK
91 *-1 ==> ERROR
92 */
93 extern int
94 __big_insert(hashp, bufp, key, val)
95 HTAB *hashp;
96 BUFHEAD *bufp;
97 const DBT *key, *val;
98 {
99 __uint16_t *p;
100 int key_size, n, val_size;
101 __uint16_t space, move_bytes, off;
102 char *cp, *key_data, *val_data;
103
104 cp = bufp->page; /* Character pointer of p. */
105 p = (__uint16_t *)cp;
106
107 key_data = (char *)key->data;
108 key_size = key->size;
109 val_data = (char *)val->data;
110 val_size = val->size;
111
112 /* First move the Key */
113 for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
114 space = FREESPACE(p) - BIGOVERHEAD) {
115 move_bytes = MIN(space, key_size);
116 off = OFFSET(p) - move_bytes;
117 memmove(cp + off, key_data, move_bytes);
118 key_size -= move_bytes;
119 key_data += move_bytes;
120 n = p[0];
121 p[++n] = off;
122 p[0] = ++n;
123 FREESPACE(p) = off - PAGE_META(n);
124 OFFSET(p) = off;
125 p[n] = PARTIAL_KEY;
126 bufp = __add_ovflpage(hashp, bufp);
127 if (!bufp)
128 return (-1);
129 n = p[0];
130 if (!key_size)
131 if (FREESPACE(p)) {
132 move_bytes = MIN(FREESPACE(p), val_size);
133 off = OFFSET(p) - move_bytes;
134 p[n] = off;
135 memmove(cp + off, val_data, move_bytes);
136 val_data += move_bytes;
137 val_size -= move_bytes;
138 p[n - 2] = FULL_KEY_DATA;
139 FREESPACE(p) = FREESPACE(p) - move_bytes;
140 OFFSET(p) = off;
141 } else
142 p[n - 2] = FULL_KEY;
143 p = (__uint16_t *)bufp->page;
144 cp = bufp->page;
145 bufp->flags |= BUF_MOD;
146 }
147
148 /* Now move the data */
149 for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
150 space = FREESPACE(p) - BIGOVERHEAD) {
151 move_bytes = MIN(space, val_size);
152 /*
153 * Here's the hack to make sure that if the data ends on the
154 * same page as the key ends, FREESPACE is at least one.
155 */
156 if (space == val_size && val_size == val->size)
157 move_bytes--;
158 off = OFFSET(p) - move_bytes;
159 memmove(cp + off, val_data, move_bytes);
160 val_size -= move_bytes;
161 val_data += move_bytes;
162 n = p[0];
163 p[++n] = off;
164 p[0] = ++n;
165 FREESPACE(p) = off - PAGE_META(n);
166 OFFSET(p) = off;
167 if (val_size) {
168 p[n] = FULL_KEY;
169 bufp = __add_ovflpage(hashp, bufp);
170 if (!bufp)
171 return (-1);
172 cp = bufp->page;
173 p = (__uint16_t *)cp;
174 } else
175 p[n] = FULL_KEY_DATA;
176 bufp->flags |= BUF_MOD;
177 }
178 return (0);
179 }
180
181 /*
182 * Called when bufp's page contains a partial key (index should be 1)
183 *
184 * All pages in the big key/data pair except bufp are freed. We cannot
185 * free bufp because the page pointing to it is lost and we can't get rid
186 * of its pointer.
187 *
188 * Returns:
189 * 0 => OK
190 *-1 => ERROR
191 */
192 extern int
193 __big_delete(hashp, bufp)
194 HTAB *hashp;
195 BUFHEAD *bufp;
196 {
197 BUFHEAD *last_bfp, *rbufp;
198 __uint16_t *bp, pageno;
199 int key_done, n;
200
201 rbufp = bufp;
202 last_bfp = NULL;
203 bp = (__uint16_t *)bufp->page;
204 pageno = 0;
205 key_done = 0;
206
207 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
208 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
209 key_done = 1;
210
211 /*
212 * If there is freespace left on a FULL_KEY_DATA page, then
213 * the data is short and fits entirely on this page, and this
214 * is the last page.
215 */
216 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
217 break;
218 pageno = bp[bp[0] - 1];
219 rbufp->flags |= BUF_MOD;
220 rbufp = __get_buf(hashp, pageno, rbufp, 0);
221 if (last_bfp)
222 __free_ovflpage(hashp, last_bfp);
223 last_bfp = rbufp;
224 if (!rbufp)
225 return (-1); /* Error. */
226 bp = (__uint16_t *)rbufp->page;
227 }
228
229 /*
230 * If we get here then rbufp points to the last page of the big
231 * key/data pair. Bufp points to the first one -- it should now be
232 * empty pointing to the next page after this pair. Can't free it
233 * because we don't have the page pointing to it.
234 */
235
236 /* This is information from the last page of the pair. */
237 n = bp[0];
238 pageno = bp[n - 1];
239
240 /* Now, bp is the first page of the pair. */
241 bp = (__uint16_t *)bufp->page;
242 if (n > 2) {
243 /* There is an overflow page. */
244 bp[1] = pageno;
245 bp[2] = OVFLPAGE;
246 bufp->ovfl = rbufp->ovfl;
247 } else
248 /* This is the last page. */
249 bufp->ovfl = NULL;
250 n -= 2;
251 bp[0] = n;
252 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
253 OFFSET(bp) = hashp->BSIZE - 1;
254
255 bufp->flags |= BUF_MOD;
256 if (rbufp)
257 __free_ovflpage(hashp, rbufp);
258 if (last_bfp != rbufp)
259 __free_ovflpage(hashp, last_bfp);
260
261 hashp->NKEYS--;
262 return (0);
263 }
264 /*
265 * Returns:
266 * 0 = key not found
267 * -1 = get next overflow page
268 * -2 means key not found and this is big key/data
269 * -3 error
270 */
271 extern int
272 __find_bigpair(hashp, bufp, ndx, key, size)
273 HTAB *hashp;
274 BUFHEAD *bufp;
275 int ndx;
276 char *key;
277 int size;
278 {
279 __uint16_t *bp;
280 char *p;
281 int ksize;
282 __uint16_t bytes;
283 char *kkey;
284
285 bp = (__uint16_t *)bufp->page;
286 p = bufp->page;
287 ksize = size;
288 kkey = key;
289
290 for (bytes = hashp->BSIZE - bp[ndx];
291 bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
292 bytes = hashp->BSIZE - bp[ndx]) {
293 if (memcmp(p + bp[ndx], kkey, bytes))
294 return (-2);
295 kkey += bytes;
296 ksize -= bytes;
297 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
298 if (!bufp)
299 return (-3);
300 p = bufp->page;
301 bp = (__uint16_t *)p;
302 ndx = 1;
303 }
304
305 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
306 #ifdef HASH_STATISTICS
307 ++hash_collisions;
308 #endif
309 return (-2);
310 } else
311 return (ndx);
312 }
313
314 /*
315 * Given the buffer pointer of the first overflow page of a big pair,
316 * find the end of the big pair
317 *
318 * This will set bpp to the buffer header of the last page of the big pair.
319 * It will return the pageno of the overflow page following the last page
320 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
321 * bucket)
322 */
323 extern __uint16_t
324 __find_last_page(hashp, bpp)
325 HTAB *hashp;
326 BUFHEAD **bpp;
327 {
328 BUFHEAD *bufp;
329 __uint16_t *bp, pageno;
330 int n;
331
332 bufp = *bpp;
333 bp = (__uint16_t *)bufp->page;
334 for (;;) {
335 n = bp[0];
336
337 /*
338 * This is the last page if: the tag is FULL_KEY_DATA and
339 * either only 2 entries OVFLPAGE marker is explicit there
340 * is freespace on the page.
341 */
342 if (bp[2] == FULL_KEY_DATA &&
343 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
344 break;
345
346 pageno = bp[n - 1];
347 bufp = __get_buf(hashp, pageno, bufp, 0);
348 if (!bufp)
349 return (0); /* Need to indicate an error! */
350 bp = (__uint16_t *)bufp->page;
351 }
352
353 *bpp = bufp;
354 if (bp[0] > 2)
355 return (bp[3]);
356 else
357 return (0);
358 }
359
360 /*
361 * Return the data for the key/data pair that begins on this page at this
362 * index (index should always be 1).
363 */
364 extern int
365 __big_return(hashp, bufp, ndx, val, set_current)
366 HTAB *hashp;
367 BUFHEAD *bufp;
368 int ndx;
369 DBT *val;
370 int set_current;
371 {
372 BUFHEAD *save_p;
373 __uint16_t *bp, len, off, save_addr;
374 char *tp;
375
376 bp = (__uint16_t *)bufp->page;
377 while (bp[ndx + 1] == PARTIAL_KEY) {
378 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
379 if (!bufp)
380 return (-1);
381 bp = (__uint16_t *)bufp->page;
382 ndx = 1;
383 }
384
385 if (bp[ndx + 1] == FULL_KEY) {
386 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
387 if (!bufp)
388 return (-1);
389 bp = (__uint16_t *)bufp->page;
390 save_p = bufp;
391 save_addr = save_p->addr;
392 off = bp[1];
393 len = 0;
394 } else
395 if (!FREESPACE(bp)) {
396 /*
397 * This is a hack. We can't distinguish between
398 * FULL_KEY_DATA that contains complete data or
399 * incomplete data, so we require that if the data
400 * is complete, there is at least 1 byte of free
401 * space left.
402 */
403 off = bp[bp[0]];
404 len = bp[1] - off;
405 save_p = bufp;
406 save_addr = bufp->addr;
407 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
408 if (!bufp)
409 return (-1);
410 bp = (__uint16_t *)bufp->page;
411 } else {
412 /* The data is all on one page. */
413 tp = (char *)bp;
414 off = bp[bp[0]];
415 val->data = (u_char *)tp + off;
416 val->size = bp[1] - off;
417 if (set_current) {
418 if (bp[0] == 2) { /* No more buckets in
419 * chain */
420 hashp->cpage = NULL;
421 hashp->cbucket++;
422 hashp->cndx = 1;
423 } else {
424 hashp->cpage = __get_buf(hashp,
425 bp[bp[0] - 1], bufp, 0);
426 if (!hashp->cpage)
427 return (-1);
428 hashp->cndx = 1;
429 if (!((__uint16_t *)
430 hashp->cpage->page)[0]) {
431 hashp->cbucket++;
432 hashp->cpage = NULL;
433 }
434 }
435 }
436 return (0);
437 }
438
439 val->size = collect_data(hashp, bufp, (int)len, set_current);
440 if (val->size == -1)
441 return (-1);
442 if (save_p->addr != save_addr) {
443 /* We are pretty short on buffers. */
444 errno = EINVAL; /* OUT OF BUFFERS */
445 return (-1);
446 }
447 memmove(hashp->tmp_buf, (save_p->page) + off, len);
448 val->data = (u_char *)hashp->tmp_buf;
449 return (0);
450 }
451 /*
452 * Count how big the total datasize is by recursing through the pages. Then
453 * allocate a buffer and copy the data as you recurse up.
454 */
455 static int
collect_data(hashp,bufp,len,set)456 collect_data(hashp, bufp, len, set)
457 HTAB *hashp;
458 BUFHEAD *bufp;
459 int len, set;
460 {
461 __uint16_t *bp;
462 char *p;
463 BUFHEAD *xbp;
464 __uint16_t save_addr;
465 int mylen, totlen;
466
467 p = bufp->page;
468 bp = (__uint16_t *)p;
469 mylen = hashp->BSIZE - bp[1];
470 save_addr = bufp->addr;
471
472 if (bp[2] == FULL_KEY_DATA) { /* End of Data */
473 totlen = len + mylen;
474 if (hashp->tmp_buf)
475 free(hashp->tmp_buf);
476 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
477 return (-1);
478 if (set) {
479 hashp->cndx = 1;
480 if (bp[0] == 2) { /* No more buckets in chain */
481 hashp->cpage = NULL;
482 hashp->cbucket++;
483 } else {
484 hashp->cpage =
485 __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
486 if (!hashp->cpage)
487 return (-1);
488 else if (!((__uint16_t *)hashp->cpage->page)[0]) {
489 hashp->cbucket++;
490 hashp->cpage = NULL;
491 }
492 }
493 }
494 } else {
495 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
496 if (!xbp || ((totlen =
497 collect_data(hashp, xbp, len + mylen, set)) < 1))
498 return (-1);
499 }
500 if (bufp->addr != save_addr) {
501 errno = EINVAL; /* Out of buffers. */
502 return (-1);
503 }
504 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
505 return (totlen);
506 }
507
508 /*
509 * Fill in the key and data for this big pair.
510 */
511 extern int
512 __big_keydata(hashp, bufp, key, val, set)
513 HTAB *hashp;
514 BUFHEAD *bufp;
515 DBT *key, *val;
516 int set;
517 {
518 key->size = collect_key(hashp, bufp, 0, val, set);
519 if (key->size == -1)
520 return (-1);
521 key->data = (u_char *)hashp->tmp_key;
522 return (0);
523 }
524
525 /*
526 * Count how big the total key size is by recursing through the pages. Then
527 * collect the data, allocate a buffer and copy the key as you recurse up.
528 */
529 static int
collect_key(hashp,bufp,len,val,set)530 collect_key(hashp, bufp, len, val, set)
531 HTAB *hashp;
532 BUFHEAD *bufp;
533 int len;
534 DBT *val;
535 int set;
536 {
537 BUFHEAD *xbp;
538 char *p;
539 int mylen, totlen;
540 __uint16_t *bp, save_addr;
541
542 p = bufp->page;
543 bp = (__uint16_t *)p;
544 mylen = hashp->BSIZE - bp[1];
545
546 save_addr = bufp->addr;
547 totlen = len + mylen;
548 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
549 if (hashp->tmp_key != NULL)
550 free(hashp->tmp_key);
551 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
552 return (-1);
553 if (__big_return(hashp, bufp, 1, val, set))
554 return (-1);
555 } else {
556 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
557 if (!xbp || ((totlen =
558 collect_key(hashp, xbp, totlen, val, set)) < 1))
559 return (-1);
560 }
561 if (bufp->addr != save_addr) {
562 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
563 return (-1);
564 }
565 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
566 return (totlen);
567 }
568
569 /*
570 * Returns:
571 * 0 => OK
572 * -1 => error
573 */
574 extern int
575 __big_split(hashp, op, np, big_keyp, addr, obucket, ret)
576 HTAB *hashp;
577 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */
578 BUFHEAD *np; /* Pointer to new bucket page */
579 /* Pointer to first page containing the big key/data */
580 BUFHEAD *big_keyp;
581 int addr; /* Address of big_keyp */
582 __uint32_t obucket;/* Old Bucket */
583 SPLIT_RETURN *ret;
584 {
585 BUFHEAD *tmpp;
586 __uint16_t *tp;
587 BUFHEAD *bp;
588 DBT key, val;
589 __uint32_t change;
590 __uint16_t free_space, n, off;
591
592 bp = big_keyp;
593
594 /* Now figure out where the big key/data goes */
595 if (__big_keydata(hashp, big_keyp, &key, &val, 0))
596 return (-1);
597 change = (__call_hash(hashp, key.data, key.size) != obucket);
598
599 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) {
600 if (!(ret->nextp =
601 __get_buf(hashp, ret->next_addr, big_keyp, 0)))
602 return (-1);;
603 } else
604 ret->nextp = NULL;
605
606 /* Now make one of np/op point to the big key/data pair */
607 #ifdef DEBUG
608 assert(np->ovfl == NULL);
609 #endif
610 if (change)
611 tmpp = np;
612 else
613 tmpp = op;
614
615 tmpp->flags |= BUF_MOD;
616 #ifdef DEBUG1
617 (void)fprintf(stderr,
618 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
619 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
620 #endif
621 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
622 tp = (__uint16_t *)tmpp->page;
623 #ifdef DEBUG
624 assert(FREESPACE(tp) >= OVFLSIZE);
625 #endif
626 n = tp[0];
627 off = OFFSET(tp);
628 free_space = FREESPACE(tp);
629 tp[++n] = (__uint16_t)addr;
630 tp[++n] = OVFLPAGE;
631 tp[0] = n;
632 OFFSET(tp) = off;
633 FREESPACE(tp) = free_space - OVFLSIZE;
634
635 /*
636 * Finally, set the new and old return values. BIG_KEYP contains a
637 * pointer to the last page of the big key_data pair. Make sure that
638 * big_keyp has no following page (2 elements) or create an empty
639 * following page.
640 */
641
642 ret->newp = np;
643 ret->oldp = op;
644
645 tp = (__uint16_t *)big_keyp->page;
646 big_keyp->flags |= BUF_MOD;
647 if (tp[0] > 2) {
648 /*
649 * There may be either one or two offsets on this page. If
650 * there is one, then the overflow page is linked on normally
651 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains
652 * the second offset and needs to get stuffed in after the
653 * next overflow page is added.
654 */
655 n = tp[4];
656 free_space = FREESPACE(tp);
657 off = OFFSET(tp);
658 tp[0] -= 2;
659 FREESPACE(tp) = free_space + OVFLSIZE;
660 OFFSET(tp) = off;
661 tmpp = __add_ovflpage(hashp, big_keyp);
662 if (!tmpp)
663 return (-1);
664 tp[4] = n;
665 } else
666 tmpp = big_keyp;
667
668 if (change)
669 ret->newp = tmpp;
670 else
671 ret->oldp = tmpp;
672 return (0);
673 }
674