1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94
33 * $FreeBSD: head/lib/libc/db/hash/hash_bigkey.c 190494 2009-03-28 06:47:05Z delphij $
34 */
35
36 /*
37 * PACKAGE: hash
38 * DESCRIPTION:
39 * Big key/data handling for the hashing package.
40 *
41 * ROUTINES:
42 * External
43 * __big_keydata
44 * __big_split
45 * __big_insert
46 * __big_return
47 * __big_delete
48 * __find_last_page
49 * Internal
50 * collect_key
51 * collect_data
52 */
53
54 #include <sys/param.h>
55
56 #include <errno.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60
61 #ifdef DEBUG
62 #include <assert.h>
63 #endif
64
65 #include <db.h>
66 #include "hash.h"
67 #include "page.h"
68 #include "extern.h"
69
70 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int);
71 static int collect_data(HTAB *, BUFHEAD *, int, int);
72
73 /*
74 * Big_insert
75 *
76 * You need to do an insert and the key/data pair is too big
77 *
78 * Returns:
79 * 0 ==> OK
80 *-1 ==> ERROR
81 */
82 int
__big_insert(HTAB * hashp,BUFHEAD * bufp,const DBT * key,const DBT * val)83 __big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val)
84 {
85 uint16_t *p;
86 int key_size, n;
87 unsigned int val_size;
88 uint16_t space, move_bytes, off;
89 char *cp, *key_data, *val_data;
90
91 cp = bufp->page; /* Character pointer of p. */
92 p = (uint16_t *)cp;
93
94 key_data = (char *)key->data;
95 key_size = key->size;
96 val_data = (char *)val->data;
97 val_size = val->size;
98
99 /* First move the Key */
100 for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
101 space = FREESPACE(p) - BIGOVERHEAD) {
102 move_bytes = MIN(space, key_size);
103 off = OFFSET(p) - move_bytes;
104 memmove(cp + off, key_data, move_bytes);
105 key_size -= move_bytes;
106 key_data += move_bytes;
107 n = p[0];
108 p[++n] = off;
109 p[0] = ++n;
110 FREESPACE(p) = off - PAGE_META(n);
111 OFFSET(p) = off;
112 p[n] = PARTIAL_KEY;
113 bufp = __add_ovflpage(hashp, bufp);
114 if (!bufp)
115 return (-1);
116 n = p[0];
117 if (!key_size) {
118 space = FREESPACE(p);
119 if (space) {
120 move_bytes = MIN(space, val_size);
121 /*
122 * If the data would fit exactly in the
123 * remaining space, we must overflow it to the
124 * next page; otherwise the invariant that the
125 * data must end on a page with FREESPACE
126 * non-zero would fail.
127 */
128 if (space == val_size && val_size == val->size)
129 goto toolarge;
130 off = OFFSET(p) - move_bytes;
131 memmove(cp + off, val_data, move_bytes);
132 val_data += move_bytes;
133 val_size -= move_bytes;
134 p[n] = off;
135 p[n - 2] = FULL_KEY_DATA;
136 FREESPACE(p) = FREESPACE(p) - move_bytes;
137 OFFSET(p) = off;
138 } else {
139 toolarge:
140 p[n - 2] = FULL_KEY;
141 }
142 }
143 p = (uint16_t *)bufp->page;
144 cp = bufp->page;
145 bufp->flags |= BUF_MOD;
146 }
147
148 /* Now move the data */
149 for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
150 space = FREESPACE(p) - BIGOVERHEAD) {
151 move_bytes = MIN(space, val_size);
152 /*
153 * Here's the hack to make sure that if the data ends on the
154 * same page as the key ends, FREESPACE is at least one.
155 */
156 if (space == val_size && val_size == val->size)
157 move_bytes--;
158 off = OFFSET(p) - move_bytes;
159 memmove(cp + off, val_data, move_bytes);
160 val_size -= move_bytes;
161 val_data += move_bytes;
162 n = p[0];
163 p[++n] = off;
164 p[0] = ++n;
165 FREESPACE(p) = off - PAGE_META(n);
166 OFFSET(p) = off;
167 if (val_size) {
168 p[n] = FULL_KEY;
169 bufp = __add_ovflpage(hashp, bufp);
170 if (!bufp)
171 return (-1);
172 cp = bufp->page;
173 p = (uint16_t *)cp;
174 } else
175 p[n] = FULL_KEY_DATA;
176 bufp->flags |= BUF_MOD;
177 }
178 return (0);
179 }
180
181 /*
182 * Called when bufp's page contains a partial key (index should be 1)
183 *
184 * All pages in the big key/data pair except bufp are freed. We cannot
185 * free bufp because the page pointing to it is lost and we can't get rid
186 * of its pointer.
187 *
188 * Returns:
189 * 0 => OK
190 *-1 => ERROR
191 */
192 int
__big_delete(HTAB * hashp,BUFHEAD * bufp)193 __big_delete(HTAB *hashp, BUFHEAD *bufp)
194 {
195 BUFHEAD *last_bfp, *rbufp;
196 uint16_t *bp, pageno;
197 int key_done, n;
198
199 rbufp = bufp;
200 last_bfp = NULL;
201 bp = (uint16_t *)bufp->page;
202 pageno = 0;
203 key_done = 0;
204
205 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
206 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
207 key_done = 1;
208
209 /*
210 * If there is freespace left on a FULL_KEY_DATA page, then
211 * the data is short and fits entirely on this page, and this
212 * is the last page.
213 */
214 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
215 break;
216 pageno = bp[bp[0] - 1];
217 rbufp->flags |= BUF_MOD;
218 rbufp = __get_buf(hashp, pageno, rbufp, 0);
219 if (last_bfp)
220 __free_ovflpage(hashp, last_bfp);
221 last_bfp = rbufp;
222 if (!rbufp)
223 return (-1); /* Error. */
224 bp = (uint16_t *)rbufp->page;
225 }
226
227 /*
228 * If we get here then rbufp points to the last page of the big
229 * key/data pair. Bufp points to the first one -- it should now be
230 * empty pointing to the next page after this pair. Can't free it
231 * because we don't have the page pointing to it.
232 */
233
234 /* This is information from the last page of the pair. */
235 n = bp[0];
236 pageno = bp[n - 1];
237
238 /* Now, bp is the first page of the pair. */
239 bp = (uint16_t *)bufp->page;
240 if (n > 2) {
241 /* There is an overflow page. */
242 bp[1] = pageno;
243 bp[2] = OVFLPAGE;
244 bufp->ovfl = rbufp->ovfl;
245 } else
246 /* This is the last page. */
247 bufp->ovfl = NULL;
248 n -= 2;
249 bp[0] = n;
250 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
251 OFFSET(bp) = hashp->BSIZE;
252
253 bufp->flags |= BUF_MOD;
254 if (rbufp)
255 __free_ovflpage(hashp, rbufp);
256 if (last_bfp && last_bfp != rbufp)
257 __free_ovflpage(hashp, last_bfp);
258
259 hashp->NKEYS--;
260 return (0);
261 }
262 /*
263 * Returns:
264 * 0 = key not found
265 * -1 = get next overflow page
266 * -2 means key not found and this is big key/data
267 * -3 error
268 */
269 int
__find_bigpair(HTAB * hashp,BUFHEAD * bufp,int ndx,char * key,int size)270 __find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size)
271 {
272 uint16_t *bp;
273 char *p;
274 int ksize;
275 uint16_t bytes;
276 char *kkey;
277
278 bp = (uint16_t *)bufp->page;
279 p = bufp->page;
280 ksize = size;
281 kkey = key;
282
283 for (bytes = hashp->BSIZE - bp[ndx];
284 bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
285 bytes = hashp->BSIZE - bp[ndx]) {
286 if (memcmp(p + bp[ndx], kkey, bytes))
287 return (-2);
288 kkey += bytes;
289 ksize -= bytes;
290 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
291 if (!bufp)
292 return (-3);
293 p = bufp->page;
294 bp = (uint16_t *)p;
295 ndx = 1;
296 }
297
298 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
299 #ifdef HASH_STATISTICS
300 ++hash_collisions;
301 #endif
302 return (-2);
303 } else
304 return (ndx);
305 }
306
307 /*
308 * Given the buffer pointer of the first overflow page of a big pair,
309 * find the end of the big pair
310 *
311 * This will set bpp to the buffer header of the last page of the big pair.
312 * It will return the pageno of the overflow page following the last page
313 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
314 * bucket)
315 */
316 uint16_t
__find_last_page(HTAB * hashp,BUFHEAD ** bpp)317 __find_last_page(HTAB *hashp, BUFHEAD **bpp)
318 {
319 BUFHEAD *bufp;
320 uint16_t *bp, pageno;
321 int n;
322
323 bufp = *bpp;
324 bp = (uint16_t *)bufp->page;
325 for (;;) {
326 n = bp[0];
327
328 /*
329 * This is the last page if: the tag is FULL_KEY_DATA and
330 * either only 2 entries OVFLPAGE marker is explicit there
331 * is freespace on the page.
332 */
333 if (bp[2] == FULL_KEY_DATA &&
334 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
335 break;
336
337 pageno = bp[n - 1];
338 bufp = __get_buf(hashp, pageno, bufp, 0);
339 if (!bufp)
340 return (0); /* Need to indicate an error! */
341 bp = (uint16_t *)bufp->page;
342 }
343
344 *bpp = bufp;
345 if (bp[0] > 2)
346 return (bp[3]);
347 else
348 return (0);
349 }
350
351 /*
352 * Return the data for the key/data pair that begins on this page at this
353 * index (index should always be 1).
354 */
355 int
__big_return(HTAB * hashp,BUFHEAD * bufp,int ndx,DBT * val,int set_current)356 __big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current)
357 {
358 BUFHEAD *save_p;
359 uint16_t *bp, len, off, save_addr;
360 char *tp;
361
362 bp = (uint16_t *)bufp->page;
363 while (bp[ndx + 1] == PARTIAL_KEY) {
364 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
365 if (!bufp)
366 return (-1);
367 bp = (uint16_t *)bufp->page;
368 ndx = 1;
369 }
370
371 if (bp[ndx + 1] == FULL_KEY) {
372 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
373 if (!bufp)
374 return (-1);
375 bp = (uint16_t *)bufp->page;
376 save_p = bufp;
377 save_addr = save_p->addr;
378 off = bp[1];
379 len = 0;
380 } else
381 if (!FREESPACE(bp)) {
382 /*
383 * This is a hack. We can't distinguish between
384 * FULL_KEY_DATA that contains complete data or
385 * incomplete data, so we require that if the data
386 * is complete, there is at least 1 byte of free
387 * space left.
388 */
389 off = bp[bp[0]];
390 len = bp[1] - off;
391 save_p = bufp;
392 save_addr = bufp->addr;
393 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
394 if (!bufp)
395 return (-1);
396 bp = (uint16_t *)bufp->page;
397 } else {
398 /* The data is all on one page. */
399 tp = (char *)bp;
400 off = bp[bp[0]];
401 val->data = (unsigned char *)tp + off;
402 val->size = bp[1] - off;
403 if (set_current) {
404 if (bp[0] == 2) { /* No more buckets in
405 * chain */
406 hashp->cpage = NULL;
407 hashp->cbucket++;
408 hashp->cndx = 1;
409 } else {
410 hashp->cpage = __get_buf(hashp,
411 bp[bp[0] - 1], bufp, 0);
412 if (!hashp->cpage)
413 return (-1);
414 hashp->cndx = 1;
415 if (!((uint16_t *)
416 hashp->cpage->page)[0]) {
417 hashp->cbucket++;
418 hashp->cpage = NULL;
419 }
420 }
421 }
422 return (0);
423 }
424
425 val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current);
426 if (val->size == (size_t)-1)
427 return (-1);
428 if (save_p->addr != save_addr) {
429 /* We are pretty short on buffers. */
430 errno = EINVAL; /* OUT OF BUFFERS */
431 return (-1);
432 }
433 memmove(hashp->tmp_buf, (save_p->page) + off, len);
434 val->data = (unsigned char *)hashp->tmp_buf;
435 return (0);
436 }
437 /*
438 * Count how big the total datasize is by recursing through the pages. Then
439 * allocate a buffer and copy the data as you recurse up.
440 */
441 static int
collect_data(HTAB * hashp,BUFHEAD * bufp,int len,int set)442 collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set)
443 {
444 uint16_t *bp;
445 char *p;
446 BUFHEAD *xbp;
447 uint16_t save_addr;
448 int mylen, totlen;
449
450 p = bufp->page;
451 bp = (uint16_t *)p;
452 mylen = hashp->BSIZE - bp[1];
453 save_addr = bufp->addr;
454
455 if (bp[2] == FULL_KEY_DATA) { /* End of Data */
456 totlen = len + mylen;
457 if (hashp->tmp_buf)
458 free(hashp->tmp_buf);
459 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
460 return (-1);
461 if (set) {
462 hashp->cndx = 1;
463 if (bp[0] == 2) { /* No more buckets in chain */
464 hashp->cpage = NULL;
465 hashp->cbucket++;
466 } else {
467 hashp->cpage =
468 __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
469 if (!hashp->cpage)
470 return (-1);
471 else if (!((uint16_t *)hashp->cpage->page)[0]) {
472 hashp->cbucket++;
473 hashp->cpage = NULL;
474 }
475 }
476 }
477 } else {
478 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
479 if (!xbp || ((totlen =
480 collect_data(hashp, xbp, len + mylen, set)) < 1))
481 return (-1);
482 }
483 if (bufp->addr != save_addr) {
484 errno = EINVAL; /* Out of buffers. */
485 return (-1);
486 }
487 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
488 return (totlen);
489 }
490
491 /*
492 * Fill in the key and data for this big pair.
493 */
494 int
__big_keydata(HTAB * hashp,BUFHEAD * bufp,DBT * key,DBT * val,int set)495 __big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set)
496 {
497 key->size = (size_t)collect_key(hashp, bufp, 0, val, set);
498 if (key->size == (size_t)-1)
499 return (-1);
500 key->data = (unsigned char *)hashp->tmp_key;
501 return (0);
502 }
503
504 /*
505 * Count how big the total key size is by recursing through the pages. Then
506 * collect the data, allocate a buffer and copy the key as you recurse up.
507 */
508 static int
collect_key(HTAB * hashp,BUFHEAD * bufp,int len,DBT * val,int set)509 collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set)
510 {
511 BUFHEAD *xbp;
512 char *p;
513 int mylen, totlen;
514 uint16_t *bp, save_addr;
515
516 p = bufp->page;
517 bp = (uint16_t *)p;
518 mylen = hashp->BSIZE - bp[1];
519
520 save_addr = bufp->addr;
521 totlen = len + mylen;
522 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
523 if (hashp->tmp_key != NULL)
524 free(hashp->tmp_key);
525 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
526 return (-1);
527 if (__big_return(hashp, bufp, 1, val, set))
528 return (-1);
529 } else {
530 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
531 if (!xbp || ((totlen =
532 collect_key(hashp, xbp, totlen, val, set)) < 1))
533 return (-1);
534 }
535 if (bufp->addr != save_addr) {
536 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
537 return (-1);
538 }
539 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
540 return (totlen);
541 }
542
543 /*
544 * Returns:
545 * 0 => OK
546 * -1 => error
547 */
548 int
__big_split(HTAB * hashp,BUFHEAD * op,BUFHEAD * np,BUFHEAD * big_keyp,int addr,uint32_t obucket,SPLIT_RETURN * ret)549 __big_split(HTAB *hashp,
550 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */
551 BUFHEAD *np, /* Pointer to new bucket page */
552 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */
553 int addr, /* Address of big_keyp */
554 uint32_t obucket, /* Old Bucket */
555 SPLIT_RETURN *ret)
556 {
557 BUFHEAD *bp, *tmpp;
558 DBT key, val;
559 uint32_t change;
560 uint16_t free_space, n, off, *tp;
561
562 bp = big_keyp;
563
564 /* Now figure out where the big key/data goes */
565 if (__big_keydata(hashp, big_keyp, &key, &val, 0))
566 return (-1);
567 change = (__call_hash(hashp, key.data, key.size) != obucket);
568
569 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) {
570 if (!(ret->nextp =
571 __get_buf(hashp, ret->next_addr, big_keyp, 0)))
572 return (-1);
573 } else
574 ret->nextp = NULL;
575
576 /* Now make one of np/op point to the big key/data pair */
577 #ifdef DEBUG
578 assert(np->ovfl == NULL);
579 #endif
580 if (change)
581 tmpp = np;
582 else
583 tmpp = op;
584
585 tmpp->flags |= BUF_MOD;
586 #ifdef DEBUG1
587 fprintf(stderr,
588 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
589 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
590 #endif
591 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
592 tp = (uint16_t *)tmpp->page;
593 #ifdef DEBUG
594 assert(FREESPACE(tp) >= OVFLSIZE);
595 #endif
596 n = tp[0];
597 off = OFFSET(tp);
598 free_space = FREESPACE(tp);
599 tp[++n] = (uint16_t)addr;
600 tp[++n] = OVFLPAGE;
601 tp[0] = n;
602 OFFSET(tp) = off;
603 FREESPACE(tp) = free_space - OVFLSIZE;
604
605 /*
606 * Finally, set the new and old return values. BIG_KEYP contains a
607 * pointer to the last page of the big key_data pair. Make sure that
608 * big_keyp has no following page (2 elements) or create an empty
609 * following page.
610 */
611
612 ret->newp = np;
613 ret->oldp = op;
614
615 tp = (uint16_t *)big_keyp->page;
616 big_keyp->flags |= BUF_MOD;
617 if (tp[0] > 2) {
618 /*
619 * There may be either one or two offsets on this page. If
620 * there is one, then the overflow page is linked on normally
621 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains
622 * the second offset and needs to get stuffed in after the
623 * next overflow page is added.
624 */
625 n = tp[4];
626 free_space = FREESPACE(tp);
627 off = OFFSET(tp);
628 tp[0] -= 2;
629 FREESPACE(tp) = free_space + OVFLSIZE;
630 OFFSET(tp) = off;
631 tmpp = __add_ovflpage(hashp, big_keyp);
632 if (!tmpp)
633 return (-1);
634 tp[4] = n;
635 } else
636 tmpp = big_keyp;
637
638 if (change)
639 ret->newp = tmpp;
640 else
641 ret->oldp = tmpp;
642 return (0);
643 }
644