1 /* $NetBSD: hash_bigkey.c,v 1.14 1999/07/29 07:48:03 mycroft Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Margo Seltzer. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #if defined(LIBC_SCCS) && !defined(lint) 41 #if 0 42 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 43 #else 44 __RCSID("$NetBSD: hash_bigkey.c,v 1.14 1999/07/29 07:48:03 mycroft Exp $"); 45 #endif 46 #endif /* LIBC_SCCS and not lint */ 47 48 /* 49 * PACKAGE: hash 50 * DESCRIPTION: 51 * Big key/data handling for the hashing package. 52 * 53 * ROUTINES: 54 * External 55 * __big_keydata 56 * __big_split 57 * __big_insert 58 * __big_return 59 * __big_delete 60 * __find_last_page 61 * Internal 62 * collect_key 63 * collect_data 64 */ 65 66 #include <sys/param.h> 67 68 #include <errno.h> 69 #include <stdio.h> 70 #include <stdlib.h> 71 #include <string.h> 72 73 #ifdef DEBUG 74 #include <assert.h> 75 #endif 76 77 #include <db.h> 78 #include "hash.h" 79 #include "page.h" 80 #include "extern.h" 81 82 static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int)); 83 static int collect_data __P((HTAB *, BUFHEAD *, int, int)); 84 85 /* 86 * Big_insert 87 * 88 * You need to do an insert and the key/data pair is too big 89 * 90 * Returns: 91 * 0 ==> OK 92 *-1 ==> ERROR 93 */ 94 extern int 95 __big_insert(hashp, bufp, key, val) 96 HTAB *hashp; 97 BUFHEAD *bufp; 98 const DBT *key, *val; 99 { 100 register u_int16_t *p; 101 int key_size, n, val_size; 102 u_int16_t space, move_bytes, off; 103 char *cp, *key_data, *val_data; 104 105 cp = bufp->page; /* Character pointer of p. */ 106 p = (u_int16_t *)(void *)cp; 107 108 key_data = (char *)key->data; 109 key_size = key->size; 110 val_data = (char *)val->data; 111 val_size = val->size; 112 113 /* First move the Key */ 114 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 115 space = FREESPACE(p) - BIGOVERHEAD) { 116 move_bytes = MIN(space, key_size); 117 off = OFFSET(p) - move_bytes; 118 memmove(cp + off, key_data, (size_t)move_bytes); 119 key_size -= move_bytes; 120 key_data += move_bytes; 121 n = p[0]; 122 p[++n] = off; 123 p[0] = ++n; 124 FREESPACE(p) = off - PAGE_META(n); 125 OFFSET(p) = off; 126 p[n] = PARTIAL_KEY; 127 bufp = __add_ovflpage(hashp, bufp); 128 if (!bufp) 129 return (-1); 130 n = p[0]; 131 if (!key_size) { 132 space = FREESPACE(p); 133 if (space) { 134 move_bytes = MIN(space, val_size); 135 /* 136 * If the data would fit exactly in the 137 * remaining space, we must overflow it to the 138 * next page; otherwise the invariant that the 139 * data must end on a page with FREESPACE 140 * non-zero would fail. 141 */ 142 if (space == val_size && val_size == val->size) 143 goto toolarge; 144 off = OFFSET(p) - move_bytes; 145 memmove(cp + off, val_data, (size_t)move_bytes); 146 val_data += move_bytes; 147 val_size -= move_bytes; 148 p[n] = off; 149 p[n - 2] = FULL_KEY_DATA; 150 FREESPACE(p) = FREESPACE(p) - move_bytes; 151 OFFSET(p) = off; 152 } else { 153 toolarge: 154 p[n - 2] = FULL_KEY; 155 } 156 } 157 p = (u_int16_t *)(void *)bufp->page; 158 cp = bufp->page; 159 bufp->flags |= BUF_MOD; 160 } 161 162 /* Now move the data */ 163 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 164 space = FREESPACE(p) - BIGOVERHEAD) { 165 move_bytes = MIN(space, val_size); 166 /* 167 * Here's the hack to make sure that if the data ends on the 168 * same page as the key ends, FREESPACE is at least one. 169 */ 170 if (space == val_size && val_size == val->size) 171 move_bytes--; 172 off = OFFSET(p) - move_bytes; 173 memmove(cp + off, val_data, (size_t)move_bytes); 174 val_size -= move_bytes; 175 val_data += move_bytes; 176 n = p[0]; 177 p[++n] = off; 178 p[0] = ++n; 179 FREESPACE(p) = off - PAGE_META(n); 180 OFFSET(p) = off; 181 if (val_size) { 182 p[n] = FULL_KEY; 183 bufp = __add_ovflpage(hashp, bufp); 184 if (!bufp) 185 return (-1); 186 cp = bufp->page; 187 p = (u_int16_t *)(void *)cp; 188 } else 189 p[n] = FULL_KEY_DATA; 190 bufp->flags |= BUF_MOD; 191 } 192 return (0); 193 } 194 195 /* 196 * Called when bufp's page contains a partial key (index should be 1) 197 * 198 * All pages in the big key/data pair except bufp are freed. We cannot 199 * free bufp because the page pointing to it is lost and we can't get rid 200 * of its pointer. 201 * 202 * Returns: 203 * 0 => OK 204 *-1 => ERROR 205 */ 206 extern int 207 __big_delete(hashp, bufp) 208 HTAB *hashp; 209 BUFHEAD *bufp; 210 { 211 register BUFHEAD *last_bfp, *rbufp; 212 u_int16_t *bp, pageno; 213 int key_done, n; 214 215 rbufp = bufp; 216 last_bfp = NULL; 217 bp = (u_int16_t *)(void *)bufp->page; 218 pageno = 0; 219 key_done = 0; 220 221 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 222 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 223 key_done = 1; 224 225 /* 226 * If there is freespace left on a FULL_KEY_DATA page, then 227 * the data is short and fits entirely on this page, and this 228 * is the last page. 229 */ 230 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 231 break; 232 pageno = bp[bp[0] - 1]; 233 rbufp->flags |= BUF_MOD; 234 rbufp = __get_buf(hashp, (u_int32_t)pageno, rbufp, 0); 235 if (last_bfp) 236 __free_ovflpage(hashp, last_bfp); 237 last_bfp = rbufp; 238 if (!rbufp) 239 return (-1); /* Error. */ 240 bp = (u_int16_t *)(void *)rbufp->page; 241 } 242 243 /* 244 * If we get here then rbufp points to the last page of the big 245 * key/data pair. Bufp points to the first one -- it should now be 246 * empty pointing to the next page after this pair. Can't free it 247 * because we don't have the page pointing to it. 248 */ 249 250 /* This is information from the last page of the pair. */ 251 n = bp[0]; 252 pageno = bp[n - 1]; 253 254 /* Now, bp is the first page of the pair. */ 255 bp = (u_int16_t *)(void *)bufp->page; 256 if (n > 2) { 257 /* There is an overflow page. */ 258 bp[1] = pageno; 259 bp[2] = OVFLPAGE; 260 bufp->ovfl = rbufp->ovfl; 261 } else 262 /* This is the last page. */ 263 bufp->ovfl = NULL; 264 n -= 2; 265 bp[0] = n; 266 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 267 OFFSET(bp) = hashp->BSIZE; 268 269 bufp->flags |= BUF_MOD; 270 if (rbufp) 271 __free_ovflpage(hashp, rbufp); 272 if (last_bfp != rbufp) 273 __free_ovflpage(hashp, last_bfp); 274 275 hashp->NKEYS--; 276 return (0); 277 } 278 /* 279 * Returns: 280 * 0 = key not found 281 * -1 = get next overflow page 282 * -2 means key not found and this is big key/data 283 * -3 error 284 */ 285 extern int 286 __find_bigpair(hashp, bufp, ndx, key, size) 287 HTAB *hashp; 288 BUFHEAD *bufp; 289 int ndx; 290 char *key; 291 int size; 292 { 293 register u_int16_t *bp; 294 register char *p; 295 int ksize; 296 u_int16_t bytes; 297 char *kkey; 298 299 bp = (u_int16_t *)(void *)bufp->page; 300 p = bufp->page; 301 ksize = size; 302 kkey = key; 303 304 for (bytes = hashp->BSIZE - bp[ndx]; 305 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 306 bytes = hashp->BSIZE - bp[ndx]) { 307 if (memcmp(p + bp[ndx], kkey, (size_t)bytes)) 308 return (-2); 309 kkey += bytes; 310 ksize -= bytes; 311 bufp = __get_buf(hashp, (u_int32_t)bp[ndx + 2], bufp, 0); 312 if (!bufp) 313 return (-3); 314 p = bufp->page; 315 bp = (u_int16_t *)(void *)p; 316 ndx = 1; 317 } 318 319 if (bytes != ksize || memcmp(p + bp[ndx], kkey, (size_t)bytes)) { 320 #ifdef HASH_STATISTICS 321 ++hash_collisions; 322 #endif 323 return (-2); 324 } else 325 return (ndx); 326 } 327 328 /* 329 * Given the buffer pointer of the first overflow page of a big pair, 330 * find the end of the big pair 331 * 332 * This will set bpp to the buffer header of the last page of the big pair. 333 * It will return the pageno of the overflow page following the last page 334 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 335 * bucket) 336 */ 337 extern u_int16_t 338 __find_last_page(hashp, bpp) 339 HTAB *hashp; 340 BUFHEAD **bpp; 341 { 342 BUFHEAD *bufp; 343 u_int16_t *bp, pageno; 344 int n; 345 346 bufp = *bpp; 347 bp = (u_int16_t *)(void *)bufp->page; 348 for (;;) { 349 n = bp[0]; 350 351 /* 352 * This is the last page if: the tag is FULL_KEY_DATA and 353 * either only 2 entries OVFLPAGE marker is explicit there 354 * is freespace on the page. 355 */ 356 if (bp[2] == FULL_KEY_DATA && 357 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 358 break; 359 360 pageno = bp[n - 1]; 361 bufp = __get_buf(hashp, (u_int32_t)pageno, bufp, 0); 362 if (!bufp) 363 return (0); /* Need to indicate an error! */ 364 bp = (u_int16_t *)(void *)bufp->page; 365 } 366 367 *bpp = bufp; 368 if (bp[0] > 2) 369 return (bp[3]); 370 else 371 return (0); 372 } 373 374 /* 375 * Return the data for the key/data pair that begins on this page at this 376 * index (index should always be 1). 377 */ 378 extern int 379 __big_return(hashp, bufp, ndx, val, set_current) 380 HTAB *hashp; 381 BUFHEAD *bufp; 382 int ndx; 383 DBT *val; 384 int set_current; 385 { 386 BUFHEAD *save_p; 387 u_int16_t *bp, len, off, save_addr; 388 char *tp; 389 390 bp = (u_int16_t *)(void *)bufp->page; 391 while (bp[ndx + 1] == PARTIAL_KEY) { 392 bufp = __get_buf(hashp, (u_int32_t)bp[bp[0] - 1], bufp, 0); 393 if (!bufp) 394 return (-1); 395 bp = (u_int16_t *)(void *)bufp->page; 396 ndx = 1; 397 } 398 399 if (bp[ndx + 1] == FULL_KEY) { 400 bufp = __get_buf(hashp, (u_int32_t)bp[bp[0] - 1], bufp, 0); 401 if (!bufp) 402 return (-1); 403 bp = (u_int16_t *)(void *)bufp->page; 404 save_p = bufp; 405 save_addr = save_p->addr; 406 off = bp[1]; 407 len = 0; 408 } else 409 if (!FREESPACE(bp)) { 410 /* 411 * This is a hack. We can't distinguish between 412 * FULL_KEY_DATA that contains complete data or 413 * incomplete data, so we require that if the data 414 * is complete, there is at least 1 byte of free 415 * space left. 416 */ 417 off = bp[bp[0]]; 418 len = bp[1] - off; 419 save_p = bufp; 420 save_addr = bufp->addr; 421 bufp = __get_buf(hashp, (u_int32_t)bp[bp[0] - 1], bufp, 422 0); 423 if (!bufp) 424 return (-1); 425 bp = (u_int16_t *)(void *)bufp->page; 426 } else { 427 /* The data is all on one page. */ 428 tp = (char *)(void *)bp; 429 off = bp[bp[0]]; 430 val->data = (u_char *)tp + off; 431 val->size = bp[1] - off; 432 if (set_current) { 433 if (bp[0] == 2) { /* No more buckets in 434 * chain */ 435 hashp->cpage = NULL; 436 hashp->cbucket++; 437 hashp->cndx = 1; 438 } else { 439 hashp->cpage = __get_buf(hashp, 440 (u_int32_t)bp[bp[0] - 1], bufp, 0); 441 if (!hashp->cpage) 442 return (-1); 443 hashp->cndx = 1; 444 if (!((u_int16_t *)(void *) 445 hashp->cpage->page)[0]) { 446 hashp->cbucket++; 447 hashp->cpage = NULL; 448 } 449 } 450 } 451 return (0); 452 } 453 454 val->size = collect_data(hashp, bufp, (int)len, set_current); 455 if (val->size == (size_t)-1) 456 return (-1); 457 if (save_p->addr != save_addr) { 458 /* We are pretty short on buffers. */ 459 errno = EINVAL; /* OUT OF BUFFERS */ 460 return (-1); 461 } 462 memmove(hashp->tmp_buf, (save_p->page) + off, (size_t)len); 463 val->data = (u_char *)hashp->tmp_buf; 464 return (0); 465 } 466 /* 467 * Count how big the total datasize is by recursing through the pages. Then 468 * allocate a buffer and copy the data as you recurse up. 469 */ 470 static int 471 collect_data(hashp, bufp, len, set) 472 HTAB *hashp; 473 BUFHEAD *bufp; 474 int len, set; 475 { 476 register u_int16_t *bp; 477 register char *p; 478 BUFHEAD *xbp; 479 u_int16_t save_addr; 480 int mylen, totlen; 481 482 p = bufp->page; 483 bp = (u_int16_t *)(void *)p; 484 mylen = hashp->BSIZE - bp[1]; 485 save_addr = bufp->addr; 486 487 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 488 totlen = len + mylen; 489 if (hashp->tmp_buf) 490 free(hashp->tmp_buf); 491 if ((hashp->tmp_buf = (char *)malloc((size_t)totlen)) == NULL) 492 return (-1); 493 if (set) { 494 hashp->cndx = 1; 495 if (bp[0] == 2) { /* No more buckets in chain */ 496 hashp->cpage = NULL; 497 hashp->cbucket++; 498 } else { 499 hashp->cpage = 500 __get_buf(hashp, (u_int32_t)bp[bp[0] - 1], 501 bufp, 0); 502 if (!hashp->cpage) 503 return (-1); 504 else if (!((u_int16_t *)(void *)hashp->cpage->page)[0]) { 505 hashp->cbucket++; 506 hashp->cpage = NULL; 507 } 508 } 509 } 510 } else { 511 xbp = __get_buf(hashp, (u_int32_t)bp[bp[0] - 1], bufp, 0); 512 if (!xbp || ((totlen = 513 collect_data(hashp, xbp, len + mylen, set)) < 1)) 514 return (-1); 515 } 516 if (bufp->addr != save_addr) { 517 errno = EINVAL; /* Out of buffers. */ 518 return (-1); 519 } 520 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], (size_t)mylen); 521 return (totlen); 522 } 523 524 /* 525 * Fill in the key and data for this big pair. 526 */ 527 extern int 528 __big_keydata(hashp, bufp, key, val, set) 529 HTAB *hashp; 530 BUFHEAD *bufp; 531 DBT *key, *val; 532 int set; 533 { 534 key->size = collect_key(hashp, bufp, 0, val, set); 535 if (key->size == (size_t)-1) 536 return (-1); 537 key->data = (u_char *)hashp->tmp_key; 538 return (0); 539 } 540 541 /* 542 * Count how big the total key size is by recursing through the pages. Then 543 * collect the data, allocate a buffer and copy the key as you recurse up. 544 */ 545 static int 546 collect_key(hashp, bufp, len, val, set) 547 HTAB *hashp; 548 BUFHEAD *bufp; 549 int len; 550 DBT *val; 551 int set; 552 { 553 BUFHEAD *xbp; 554 char *p; 555 int mylen, totlen; 556 u_int16_t *bp, save_addr; 557 558 p = bufp->page; 559 bp = (u_int16_t *)(void *)p; 560 mylen = hashp->BSIZE - bp[1]; 561 562 save_addr = bufp->addr; 563 totlen = len + mylen; 564 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 565 if (hashp->tmp_key != NULL) 566 free(hashp->tmp_key); 567 if ((hashp->tmp_key = (char *)malloc((size_t)totlen)) == NULL) 568 return (-1); 569 if (__big_return(hashp, bufp, 1, val, set)) 570 return (-1); 571 } else { 572 xbp = __get_buf(hashp, (u_int32_t)bp[bp[0] - 1], bufp, 0); 573 if (!xbp || ((totlen = 574 collect_key(hashp, xbp, totlen, val, set)) < 1)) 575 return (-1); 576 } 577 if (bufp->addr != save_addr) { 578 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 579 return (-1); 580 } 581 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], (size_t)mylen); 582 return (totlen); 583 } 584 585 /* 586 * Returns: 587 * 0 => OK 588 * -1 => error 589 */ 590 extern int 591 __big_split(hashp, op, np, big_keyp, addr, obucket, ret) 592 HTAB *hashp; 593 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ 594 BUFHEAD *np; /* Pointer to new bucket page */ 595 /* Pointer to first page containing the big key/data */ 596 BUFHEAD *big_keyp; 597 int addr; /* Address of big_keyp */ 598 u_int32_t obucket;/* Old Bucket */ 599 SPLIT_RETURN *ret; 600 { 601 register BUFHEAD *tmpp; 602 register u_int16_t *tp; 603 BUFHEAD *bp; 604 DBT key, val; 605 u_int32_t change; 606 u_int16_t free_space, n, off; 607 608 bp = big_keyp; 609 610 /* Now figure out where the big key/data goes */ 611 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 612 return (-1); 613 change = (__call_hash(hashp, key.data, (int)key.size) != obucket); 614 615 if ((ret->next_addr = __find_last_page(hashp, &big_keyp)) != 0) { 616 if (!(ret->nextp = 617 __get_buf(hashp, (u_int32_t)ret->next_addr, big_keyp, 0))) 618 return (-1);; 619 } else 620 ret->nextp = NULL; 621 622 /* Now make one of np/op point to the big key/data pair */ 623 #ifdef DEBUG 624 assert(np->ovfl == NULL); 625 #endif 626 if (change) 627 tmpp = np; 628 else 629 tmpp = op; 630 631 tmpp->flags |= BUF_MOD; 632 #ifdef DEBUG1 633 (void)fprintf(stderr, 634 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 635 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 636 #endif 637 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 638 tp = (u_int16_t *)(void *)tmpp->page; 639 #ifdef DEBUG 640 assert(FREESPACE(tp) >= OVFLSIZE); 641 #endif 642 n = tp[0]; 643 off = OFFSET(tp); 644 free_space = FREESPACE(tp); 645 tp[++n] = (u_int16_t)addr; 646 tp[++n] = OVFLPAGE; 647 tp[0] = n; 648 OFFSET(tp) = off; 649 FREESPACE(tp) = free_space - OVFLSIZE; 650 651 /* 652 * Finally, set the new and old return values. BIG_KEYP contains a 653 * pointer to the last page of the big key_data pair. Make sure that 654 * big_keyp has no following page (2 elements) or create an empty 655 * following page. 656 */ 657 658 ret->newp = np; 659 ret->oldp = op; 660 661 tp = (u_int16_t *)(void *)big_keyp->page; 662 big_keyp->flags |= BUF_MOD; 663 if (tp[0] > 2) { 664 /* 665 * There may be either one or two offsets on this page. If 666 * there is one, then the overflow page is linked on normally 667 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 668 * the second offset and needs to get stuffed in after the 669 * next overflow page is added. 670 */ 671 n = tp[4]; 672 free_space = FREESPACE(tp); 673 off = OFFSET(tp); 674 tp[0] -= 2; 675 FREESPACE(tp) = free_space + OVFLSIZE; 676 OFFSET(tp) = off; 677 tmpp = __add_ovflpage(hashp, big_keyp); 678 if (!tmpp) 679 return (-1); 680 tp[4] = n; 681 } else 682 tmpp = big_keyp; 683 684 if (change) 685 ret->newp = tmpp; 686 else 687 ret->oldp = tmpp; 688 return (0); 689 } 690