1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94 33 * $DragonFly: src/lib/libc/db/hash/hash_bigkey.c,v 1.7 2005/09/19 09:20:37 asmodai Exp $ 34 */ 35 36 /* 37 * PACKAGE: hash 38 * DESCRIPTION: 39 * Big key/data handling for the hashing package. 40 * 41 * ROUTINES: 42 * External 43 * __big_keydata 44 * __big_split 45 * __big_insert 46 * __big_return 47 * __big_delete 48 * __find_last_page 49 * Internal 50 * collect_key 51 * collect_data 52 */ 53 54 #include <sys/param.h> 55 56 #include <errno.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 61 #ifdef DEBUG 62 #include <assert.h> 63 #endif 64 65 #include <db.h> 66 #include "hash.h" 67 #include "page.h" 68 #include "extern.h" 69 70 static int collect_key (HTAB *, BUFHEAD *, int, DBT *, int); 71 static int collect_data (HTAB *, BUFHEAD *, int, int); 72 73 /* 74 * Big_insert 75 * 76 * You need to do an insert and the key/data pair is too big 77 * 78 * Returns: 79 * 0 ==> OK 80 *-1 ==> ERROR 81 */ 82 extern int 83 __big_insert(hashp, bufp, key, val) 84 HTAB *hashp; 85 BUFHEAD *bufp; 86 const DBT *key, *val; 87 { 88 u_int16_t *p; 89 int key_size, n, val_size; 90 u_int16_t space, move_bytes, off; 91 char *cp, *key_data, *val_data; 92 93 cp = bufp->page; /* Character pointer of p. */ 94 p = (u_int16_t *)cp; 95 96 key_data = (char *)key->data; 97 key_size = key->size; 98 val_data = (char *)val->data; 99 val_size = val->size; 100 101 /* First move the Key */ 102 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 103 space = FREESPACE(p) - BIGOVERHEAD) { 104 move_bytes = MIN(space, key_size); 105 off = OFFSET(p) - move_bytes; 106 memmove(cp + off, key_data, move_bytes); 107 key_size -= move_bytes; 108 key_data += move_bytes; 109 n = p[0]; 110 p[++n] = off; 111 p[0] = ++n; 112 FREESPACE(p) = off - PAGE_META(n); 113 OFFSET(p) = off; 114 p[n] = PARTIAL_KEY; 115 bufp = __add_ovflpage(hashp, bufp); 116 if (!bufp) 117 return (-1); 118 n = p[0]; 119 if (!key_size) 120 if (FREESPACE(p)) { 121 move_bytes = MIN(FREESPACE(p), val_size); 122 off = OFFSET(p) - move_bytes; 123 p[n] = off; 124 memmove(cp + off, val_data, move_bytes); 125 val_data += move_bytes; 126 val_size -= move_bytes; 127 p[n - 2] = FULL_KEY_DATA; 128 FREESPACE(p) = FREESPACE(p) - move_bytes; 129 OFFSET(p) = off; 130 } else 131 p[n - 2] = FULL_KEY; 132 p = (u_int16_t *)bufp->page; 133 cp = bufp->page; 134 bufp->flags |= BUF_MOD; 135 } 136 137 /* Now move the data */ 138 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 139 space = FREESPACE(p) - BIGOVERHEAD) { 140 move_bytes = MIN(space, val_size); 141 /* 142 * Here's the hack to make sure that if the data ends on the 143 * same page as the key ends, FREESPACE is at least one. 144 */ 145 if (space == val_size && val_size == val->size) 146 move_bytes--; 147 off = OFFSET(p) - move_bytes; 148 memmove(cp + off, val_data, move_bytes); 149 val_size -= move_bytes; 150 val_data += move_bytes; 151 n = p[0]; 152 p[++n] = off; 153 p[0] = ++n; 154 FREESPACE(p) = off - PAGE_META(n); 155 OFFSET(p) = off; 156 if (val_size) { 157 p[n] = FULL_KEY; 158 bufp = __add_ovflpage(hashp, bufp); 159 if (!bufp) 160 return (-1); 161 cp = bufp->page; 162 p = (u_int16_t *)cp; 163 } else 164 p[n] = FULL_KEY_DATA; 165 bufp->flags |= BUF_MOD; 166 } 167 return (0); 168 } 169 170 /* 171 * Called when bufp's page contains a partial key (index should be 1) 172 * 173 * All pages in the big key/data pair except bufp are freed. We cannot 174 * free bufp because the page pointing to it is lost and we can't get rid 175 * of its pointer. 176 * 177 * Returns: 178 * 0 => OK 179 *-1 => ERROR 180 */ 181 extern int 182 __big_delete(hashp, bufp) 183 HTAB *hashp; 184 BUFHEAD *bufp; 185 { 186 BUFHEAD *last_bfp, *rbufp; 187 u_int16_t *bp, pageno; 188 int key_done, n; 189 190 rbufp = bufp; 191 last_bfp = NULL; 192 bp = (u_int16_t *)bufp->page; 193 pageno = 0; 194 key_done = 0; 195 196 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 197 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 198 key_done = 1; 199 200 /* 201 * If there is freespace left on a FULL_KEY_DATA page, then 202 * the data is short and fits entirely on this page, and this 203 * is the last page. 204 */ 205 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 206 break; 207 pageno = bp[bp[0] - 1]; 208 rbufp->flags |= BUF_MOD; 209 rbufp = __get_buf(hashp, pageno, rbufp, 0); 210 if (last_bfp) 211 __free_ovflpage(hashp, last_bfp); 212 last_bfp = rbufp; 213 if (!rbufp) 214 return (-1); /* Error. */ 215 bp = (u_int16_t *)rbufp->page; 216 } 217 218 /* 219 * If we get here then rbufp points to the last page of the big 220 * key/data pair. Bufp points to the first one -- it should now be 221 * empty pointing to the next page after this pair. Can't free it 222 * because we don't have the page pointing to it. 223 */ 224 225 /* This is information from the last page of the pair. */ 226 n = bp[0]; 227 pageno = bp[n - 1]; 228 229 /* Now, bp is the first page of the pair. */ 230 bp = (u_int16_t *)bufp->page; 231 if (n > 2) { 232 /* There is an overflow page. */ 233 bp[1] = pageno; 234 bp[2] = OVFLPAGE; 235 bufp->ovfl = rbufp->ovfl; 236 } else 237 /* This is the last page. */ 238 bufp->ovfl = NULL; 239 n -= 2; 240 bp[0] = n; 241 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 242 OFFSET(bp) = hashp->BSIZE - 1; 243 244 bufp->flags |= BUF_MOD; 245 if (rbufp) 246 __free_ovflpage(hashp, rbufp); 247 if (last_bfp != rbufp) 248 __free_ovflpage(hashp, last_bfp); 249 250 hashp->NKEYS--; 251 return (0); 252 } 253 /* 254 * Returns: 255 * 0 = key not found 256 * -1 = get next overflow page 257 * -2 means key not found and this is big key/data 258 * -3 error 259 */ 260 extern int 261 __find_bigpair(hashp, bufp, ndx, key, size) 262 HTAB *hashp; 263 BUFHEAD *bufp; 264 int ndx; 265 const char *key; 266 int size; 267 { 268 u_int16_t *bp; 269 char *p; 270 int ksize; 271 u_int16_t bytes; 272 const char *kkey; 273 274 bp = (u_int16_t *)bufp->page; 275 p = bufp->page; 276 ksize = size; 277 kkey = key; 278 279 for (bytes = hashp->BSIZE - bp[ndx]; 280 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 281 bytes = hashp->BSIZE - bp[ndx]) { 282 if (memcmp(p + bp[ndx], kkey, bytes)) 283 return (-2); 284 kkey += bytes; 285 ksize -= bytes; 286 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 287 if (!bufp) 288 return (-3); 289 p = bufp->page; 290 bp = (u_int16_t *)p; 291 ndx = 1; 292 } 293 294 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 295 #ifdef HASH_STATISTICS 296 ++hash_collisions; 297 #endif 298 return (-2); 299 } else 300 return (ndx); 301 } 302 303 /* 304 * Given the buffer pointer of the first overflow page of a big pair, 305 * find the end of the big pair 306 * 307 * This will set bpp to the buffer header of the last page of the big pair. 308 * It will return the pageno of the overflow page following the last page 309 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 310 * bucket) 311 */ 312 extern u_int16_t 313 __find_last_page(hashp, bpp) 314 HTAB *hashp; 315 BUFHEAD **bpp; 316 { 317 BUFHEAD *bufp; 318 u_int16_t *bp, pageno; 319 int n; 320 321 bufp = *bpp; 322 bp = (u_int16_t *)bufp->page; 323 for (;;) { 324 n = bp[0]; 325 326 /* 327 * This is the last page if: the tag is FULL_KEY_DATA and 328 * either only 2 entries OVFLPAGE marker is explicit there 329 * is freespace on the page. 330 */ 331 if (bp[2] == FULL_KEY_DATA && 332 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 333 break; 334 335 pageno = bp[n - 1]; 336 bufp = __get_buf(hashp, pageno, bufp, 0); 337 if (!bufp) 338 return (0); /* Need to indicate an error! */ 339 bp = (u_int16_t *)bufp->page; 340 } 341 342 *bpp = bufp; 343 if (bp[0] > 2) 344 return (bp[3]); 345 else 346 return (0); 347 } 348 349 /* 350 * Return the data for the key/data pair that begins on this page at this 351 * index (index should always be 1). 352 */ 353 extern int 354 __big_return(hashp, bufp, ndx, val, set_current) 355 HTAB *hashp; 356 BUFHEAD *bufp; 357 int ndx; 358 DBT *val; 359 int set_current; 360 { 361 BUFHEAD *save_p; 362 u_int16_t *bp, len, off, save_addr; 363 char *tp; 364 365 bp = (u_int16_t *)bufp->page; 366 while (bp[ndx + 1] == PARTIAL_KEY) { 367 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 368 if (!bufp) 369 return (-1); 370 bp = (u_int16_t *)bufp->page; 371 ndx = 1; 372 } 373 374 if (bp[ndx + 1] == FULL_KEY) { 375 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 376 if (!bufp) 377 return (-1); 378 bp = (u_int16_t *)bufp->page; 379 save_p = bufp; 380 save_addr = save_p->addr; 381 off = bp[1]; 382 len = 0; 383 } else 384 if (!FREESPACE(bp)) { 385 /* 386 * This is a hack. We can't distinguish between 387 * FULL_KEY_DATA that contains complete data or 388 * incomplete data, so we require that if the data 389 * is complete, there is at least 1 byte of free 390 * space left. 391 */ 392 off = bp[bp[0]]; 393 len = bp[1] - off; 394 save_p = bufp; 395 save_addr = bufp->addr; 396 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 397 if (!bufp) 398 return (-1); 399 bp = (u_int16_t *)bufp->page; 400 } else { 401 /* The data is all on one page. */ 402 tp = (char *)bp; 403 off = bp[bp[0]]; 404 val->data = (u_char *)tp + off; 405 val->size = bp[1] - off; 406 if (set_current) { 407 if (bp[0] == 2) { /* No more buckets in 408 * chain */ 409 hashp->cpage = NULL; 410 hashp->cbucket++; 411 hashp->cndx = 1; 412 } else { 413 hashp->cpage = __get_buf(hashp, 414 bp[bp[0] - 1], bufp, 0); 415 if (!hashp->cpage) 416 return (-1); 417 hashp->cndx = 1; 418 if (!((u_int16_t *) 419 hashp->cpage->page)[0]) { 420 hashp->cbucket++; 421 hashp->cpage = NULL; 422 } 423 } 424 } 425 return (0); 426 } 427 428 val->size = collect_data(hashp, bufp, (int)len, set_current); 429 if (val->size == -1) 430 return (-1); 431 if (save_p->addr != save_addr) { 432 /* We are pretty short on buffers. */ 433 errno = EINVAL; /* OUT OF BUFFERS */ 434 return (-1); 435 } 436 memmove(hashp->tmp_buf, (save_p->page) + off, len); 437 val->data = (u_char *)hashp->tmp_buf; 438 return (0); 439 } 440 /* 441 * Count how big the total datasize is by recursing through the pages. Then 442 * allocate a buffer and copy the data as you recurse up. 443 */ 444 static int 445 collect_data(hashp, bufp, len, set) 446 HTAB *hashp; 447 BUFHEAD *bufp; 448 int len, set; 449 { 450 u_int16_t *bp; 451 char *p; 452 BUFHEAD *xbp; 453 u_int16_t save_addr; 454 int mylen, totlen; 455 456 p = bufp->page; 457 bp = (u_int16_t *)p; 458 mylen = hashp->BSIZE - bp[1]; 459 save_addr = bufp->addr; 460 461 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 462 totlen = len + mylen; 463 if (hashp->tmp_buf) 464 free(hashp->tmp_buf); 465 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 466 return (-1); 467 if (set) { 468 hashp->cndx = 1; 469 if (bp[0] == 2) { /* No more buckets in chain */ 470 hashp->cpage = NULL; 471 hashp->cbucket++; 472 } else { 473 hashp->cpage = 474 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 475 if (!hashp->cpage) 476 return (-1); 477 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 478 hashp->cbucket++; 479 hashp->cpage = NULL; 480 } 481 } 482 } 483 } else { 484 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 485 if (!xbp || ((totlen = 486 collect_data(hashp, xbp, len + mylen, set)) < 1)) 487 return (-1); 488 } 489 if (bufp->addr != save_addr) { 490 errno = EINVAL; /* Out of buffers. */ 491 return (-1); 492 } 493 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 494 return (totlen); 495 } 496 497 /* 498 * Fill in the key and data for this big pair. 499 */ 500 extern int 501 __big_keydata(hashp, bufp, key, val, set) 502 HTAB *hashp; 503 BUFHEAD *bufp; 504 DBT *key, *val; 505 int set; 506 { 507 key->size = collect_key(hashp, bufp, 0, val, set); 508 if (key->size == -1) 509 return (-1); 510 key->data = (u_char *)hashp->tmp_key; 511 return (0); 512 } 513 514 /* 515 * Count how big the total key size is by recursing through the pages. Then 516 * collect the data, allocate a buffer and copy the key as you recurse up. 517 */ 518 static int 519 collect_key(hashp, bufp, len, val, set) 520 HTAB *hashp; 521 BUFHEAD *bufp; 522 int len; 523 DBT *val; 524 int set; 525 { 526 BUFHEAD *xbp; 527 char *p; 528 int mylen, totlen; 529 u_int16_t *bp, save_addr; 530 531 p = bufp->page; 532 bp = (u_int16_t *)p; 533 mylen = hashp->BSIZE - bp[1]; 534 535 save_addr = bufp->addr; 536 totlen = len + mylen; 537 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 538 if (hashp->tmp_key != NULL) 539 free(hashp->tmp_key); 540 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 541 return (-1); 542 if (__big_return(hashp, bufp, 1, val, set)) 543 return (-1); 544 } else { 545 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 546 if (!xbp || ((totlen = 547 collect_key(hashp, xbp, totlen, val, set)) < 1)) 548 return (-1); 549 } 550 if (bufp->addr != save_addr) { 551 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 552 return (-1); 553 } 554 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 555 return (totlen); 556 } 557 558 /* 559 * Returns: 560 * 0 => OK 561 * -1 => error 562 */ 563 extern int 564 __big_split(hashp, op, np, big_keyp, addr, obucket, ret) 565 HTAB *hashp; 566 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ 567 BUFHEAD *np; /* Pointer to new bucket page */ 568 /* Pointer to first page containing the big key/data */ 569 BUFHEAD *big_keyp; 570 int addr; /* Address of big_keyp */ 571 u_int32_t obucket;/* Old Bucket */ 572 SPLIT_RETURN *ret; 573 { 574 BUFHEAD *tmpp; 575 u_int16_t *tp; 576 BUFHEAD *bp; 577 DBT key, val; 578 u_int32_t change; 579 u_int16_t free_space, n, off; 580 581 bp = big_keyp; 582 583 /* Now figure out where the big key/data goes */ 584 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 585 return (-1); 586 change = (__call_hash(hashp, key.data, key.size) != obucket); 587 588 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 589 if (!(ret->nextp = 590 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 591 return (-1);; 592 } else 593 ret->nextp = NULL; 594 595 /* Now make one of np/op point to the big key/data pair */ 596 #ifdef DEBUG 597 assert(np->ovfl == NULL); 598 #endif 599 if (change) 600 tmpp = np; 601 else 602 tmpp = op; 603 604 tmpp->flags |= BUF_MOD; 605 #ifdef DEBUG1 606 (void)fprintf(stderr, 607 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 608 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 609 #endif 610 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 611 tp = (u_int16_t *)tmpp->page; 612 #ifdef DEBUG 613 assert(FREESPACE(tp) >= OVFLSIZE); 614 #endif 615 n = tp[0]; 616 off = OFFSET(tp); 617 free_space = FREESPACE(tp); 618 tp[++n] = (u_int16_t)addr; 619 tp[++n] = OVFLPAGE; 620 tp[0] = n; 621 OFFSET(tp) = off; 622 FREESPACE(tp) = free_space - OVFLSIZE; 623 624 /* 625 * Finally, set the new and old return values. BIG_KEYP contains a 626 * pointer to the last page of the big key_data pair. Make sure that 627 * big_keyp has no following page (2 elements) or create an empty 628 * following page. 629 */ 630 631 ret->newp = np; 632 ret->oldp = op; 633 634 tp = (u_int16_t *)big_keyp->page; 635 big_keyp->flags |= BUF_MOD; 636 if (tp[0] > 2) { 637 /* 638 * There may be either one or two offsets on this page. If 639 * there is one, then the overflow page is linked on normally 640 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 641 * the second offset and needs to get stuffed in after the 642 * next overflow page is added. 643 */ 644 n = tp[4]; 645 free_space = FREESPACE(tp); 646 off = OFFSET(tp); 647 tp[0] -= 2; 648 FREESPACE(tp) = free_space + OVFLSIZE; 649 OFFSET(tp) = off; 650 tmpp = __add_ovflpage(hashp, big_keyp); 651 if (!tmpp) 652 return (-1); 653 tp[4] = n; 654 } else 655 tmpp = big_keyp; 656 657 if (change) 658 ret->newp = tmpp; 659 else 660 ret->oldp = tmpp; 661 return (0); 662 } 663