1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94 33 * $FreeBSD: head/lib/libc/db/hash/hash_bigkey.c 190494 2009-03-28 06:47:05Z delphij $ 34 */ 35 36 /* 37 * PACKAGE: hash 38 * DESCRIPTION: 39 * Big key/data handling for the hashing package. 40 * 41 * ROUTINES: 42 * External 43 * __big_keydata 44 * __big_split 45 * __big_insert 46 * __big_return 47 * __big_delete 48 * __find_last_page 49 * Internal 50 * collect_key 51 * collect_data 52 */ 53 54 #include <sys/param.h> 55 56 #include <errno.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 61 #ifdef DEBUG 62 #include <assert.h> 63 #endif 64 65 #include <db.h> 66 #include "hash.h" 67 #include "page.h" 68 #include "extern.h" 69 70 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); 71 static int collect_data(HTAB *, BUFHEAD *, int, int); 72 73 /* 74 * Big_insert 75 * 76 * You need to do an insert and the key/data pair is too big 77 * 78 * Returns: 79 * 0 ==> OK 80 *-1 ==> ERROR 81 */ 82 int 83 __big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 84 { 85 uint16_t *p; 86 int key_size, n; 87 unsigned int val_size; 88 uint16_t space, move_bytes, off; 89 char *cp, *key_data, *val_data; 90 91 cp = bufp->page; /* Character pointer of p. */ 92 p = (uint16_t *)cp; 93 94 key_data = (char *)key->data; 95 key_size = key->size; 96 val_data = (char *)val->data; 97 val_size = val->size; 98 99 /* First move the Key */ 100 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 101 space = FREESPACE(p) - BIGOVERHEAD) { 102 move_bytes = MIN(space, key_size); 103 off = OFFSET(p) - move_bytes; 104 memmove(cp + off, key_data, move_bytes); 105 key_size -= move_bytes; 106 key_data += move_bytes; 107 n = p[0]; 108 p[++n] = off; 109 p[0] = ++n; 110 FREESPACE(p) = off - PAGE_META(n); 111 OFFSET(p) = off; 112 p[n] = PARTIAL_KEY; 113 bufp = __add_ovflpage(hashp, bufp); 114 if (!bufp) 115 return (-1); 116 n = p[0]; 117 if (!key_size) { 118 space = FREESPACE(p); 119 if (space) { 120 move_bytes = MIN(space, val_size); 121 /* 122 * If the data would fit exactly in the 123 * remaining space, we must overflow it to the 124 * next page; otherwise the invariant that the 125 * data must end on a page with FREESPACE 126 * non-zero would fail. 127 */ 128 if (space == val_size && val_size == val->size) 129 goto toolarge; 130 off = OFFSET(p) - move_bytes; 131 memmove(cp + off, val_data, move_bytes); 132 val_data += move_bytes; 133 val_size -= move_bytes; 134 p[n] = off; 135 p[n - 2] = FULL_KEY_DATA; 136 FREESPACE(p) = FREESPACE(p) - move_bytes; 137 OFFSET(p) = off; 138 } else { 139 toolarge: 140 p[n - 2] = FULL_KEY; 141 } 142 } 143 p = (uint16_t *)bufp->page; 144 cp = bufp->page; 145 bufp->flags |= BUF_MOD; 146 } 147 148 /* Now move the data */ 149 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 150 space = FREESPACE(p) - BIGOVERHEAD) { 151 move_bytes = MIN(space, val_size); 152 /* 153 * Here's the hack to make sure that if the data ends on the 154 * same page as the key ends, FREESPACE is at least one. 155 */ 156 if (space == val_size && val_size == val->size) 157 move_bytes--; 158 off = OFFSET(p) - move_bytes; 159 memmove(cp + off, val_data, move_bytes); 160 val_size -= move_bytes; 161 val_data += move_bytes; 162 n = p[0]; 163 p[++n] = off; 164 p[0] = ++n; 165 FREESPACE(p) = off - PAGE_META(n); 166 OFFSET(p) = off; 167 if (val_size) { 168 p[n] = FULL_KEY; 169 bufp = __add_ovflpage(hashp, bufp); 170 if (!bufp) 171 return (-1); 172 cp = bufp->page; 173 p = (uint16_t *)cp; 174 } else 175 p[n] = FULL_KEY_DATA; 176 bufp->flags |= BUF_MOD; 177 } 178 return (0); 179 } 180 181 /* 182 * Called when bufp's page contains a partial key (index should be 1) 183 * 184 * All pages in the big key/data pair except bufp are freed. We cannot 185 * free bufp because the page pointing to it is lost and we can't get rid 186 * of its pointer. 187 * 188 * Returns: 189 * 0 => OK 190 *-1 => ERROR 191 */ 192 int 193 __big_delete(HTAB *hashp, BUFHEAD *bufp) 194 { 195 BUFHEAD *last_bfp, *rbufp; 196 uint16_t *bp, pageno; 197 int key_done, n; 198 199 rbufp = bufp; 200 last_bfp = NULL; 201 bp = (uint16_t *)bufp->page; 202 pageno = 0; 203 key_done = 0; 204 205 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 206 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 207 key_done = 1; 208 209 /* 210 * If there is freespace left on a FULL_KEY_DATA page, then 211 * the data is short and fits entirely on this page, and this 212 * is the last page. 213 */ 214 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 215 break; 216 pageno = bp[bp[0] - 1]; 217 rbufp->flags |= BUF_MOD; 218 rbufp = __get_buf(hashp, pageno, rbufp, 0); 219 if (last_bfp) 220 __free_ovflpage(hashp, last_bfp); 221 last_bfp = rbufp; 222 if (!rbufp) 223 return (-1); /* Error. */ 224 bp = (uint16_t *)rbufp->page; 225 } 226 227 /* 228 * If we get here then rbufp points to the last page of the big 229 * key/data pair. Bufp points to the first one -- it should now be 230 * empty pointing to the next page after this pair. Can't free it 231 * because we don't have the page pointing to it. 232 */ 233 234 /* This is information from the last page of the pair. */ 235 n = bp[0]; 236 pageno = bp[n - 1]; 237 238 /* Now, bp is the first page of the pair. */ 239 bp = (uint16_t *)bufp->page; 240 if (n > 2) { 241 /* There is an overflow page. */ 242 bp[1] = pageno; 243 bp[2] = OVFLPAGE; 244 bufp->ovfl = rbufp->ovfl; 245 } else 246 /* This is the last page. */ 247 bufp->ovfl = NULL; 248 n -= 2; 249 bp[0] = n; 250 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 251 OFFSET(bp) = hashp->BSIZE; 252 253 bufp->flags |= BUF_MOD; 254 if (rbufp) 255 __free_ovflpage(hashp, rbufp); 256 if (last_bfp && last_bfp != rbufp) 257 __free_ovflpage(hashp, last_bfp); 258 259 hashp->NKEYS--; 260 return (0); 261 } 262 /* 263 * Returns: 264 * 0 = key not found 265 * -1 = get next overflow page 266 * -2 means key not found and this is big key/data 267 * -3 error 268 */ 269 int 270 __find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size) 271 { 272 uint16_t *bp; 273 char *p; 274 int ksize; 275 uint16_t bytes; 276 char *kkey; 277 278 bp = (uint16_t *)bufp->page; 279 p = bufp->page; 280 ksize = size; 281 kkey = key; 282 283 for (bytes = hashp->BSIZE - bp[ndx]; 284 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 285 bytes = hashp->BSIZE - bp[ndx]) { 286 if (memcmp(p + bp[ndx], kkey, bytes)) 287 return (-2); 288 kkey += bytes; 289 ksize -= bytes; 290 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 291 if (!bufp) 292 return (-3); 293 p = bufp->page; 294 bp = (uint16_t *)p; 295 ndx = 1; 296 } 297 298 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 299 #ifdef HASH_STATISTICS 300 ++hash_collisions; 301 #endif 302 return (-2); 303 } else 304 return (ndx); 305 } 306 307 /* 308 * Given the buffer pointer of the first overflow page of a big pair, 309 * find the end of the big pair 310 * 311 * This will set bpp to the buffer header of the last page of the big pair. 312 * It will return the pageno of the overflow page following the last page 313 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 314 * bucket) 315 */ 316 uint16_t 317 __find_last_page(HTAB *hashp, BUFHEAD **bpp) 318 { 319 BUFHEAD *bufp; 320 uint16_t *bp, pageno; 321 int n; 322 323 bufp = *bpp; 324 bp = (uint16_t *)bufp->page; 325 for (;;) { 326 n = bp[0]; 327 328 /* 329 * This is the last page if: the tag is FULL_KEY_DATA and 330 * either only 2 entries OVFLPAGE marker is explicit there 331 * is freespace on the page. 332 */ 333 if (bp[2] == FULL_KEY_DATA && 334 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 335 break; 336 337 pageno = bp[n - 1]; 338 bufp = __get_buf(hashp, pageno, bufp, 0); 339 if (!bufp) 340 return (0); /* Need to indicate an error! */ 341 bp = (uint16_t *)bufp->page; 342 } 343 344 *bpp = bufp; 345 if (bp[0] > 2) 346 return (bp[3]); 347 else 348 return (0); 349 } 350 351 /* 352 * Return the data for the key/data pair that begins on this page at this 353 * index (index should always be 1). 354 */ 355 int 356 __big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current) 357 { 358 BUFHEAD *save_p; 359 uint16_t *bp, len, off, save_addr; 360 char *tp; 361 362 bp = (uint16_t *)bufp->page; 363 while (bp[ndx + 1] == PARTIAL_KEY) { 364 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 365 if (!bufp) 366 return (-1); 367 bp = (uint16_t *)bufp->page; 368 ndx = 1; 369 } 370 371 if (bp[ndx + 1] == FULL_KEY) { 372 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 373 if (!bufp) 374 return (-1); 375 bp = (uint16_t *)bufp->page; 376 save_p = bufp; 377 save_addr = save_p->addr; 378 off = bp[1]; 379 len = 0; 380 } else 381 if (!FREESPACE(bp)) { 382 /* 383 * This is a hack. We can't distinguish between 384 * FULL_KEY_DATA that contains complete data or 385 * incomplete data, so we require that if the data 386 * is complete, there is at least 1 byte of free 387 * space left. 388 */ 389 off = bp[bp[0]]; 390 len = bp[1] - off; 391 save_p = bufp; 392 save_addr = bufp->addr; 393 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 394 if (!bufp) 395 return (-1); 396 bp = (uint16_t *)bufp->page; 397 } else { 398 /* The data is all on one page. */ 399 tp = (char *)bp; 400 off = bp[bp[0]]; 401 val->data = (unsigned char *)tp + off; 402 val->size = bp[1] - off; 403 if (set_current) { 404 if (bp[0] == 2) { /* No more buckets in 405 * chain */ 406 hashp->cpage = NULL; 407 hashp->cbucket++; 408 hashp->cndx = 1; 409 } else { 410 hashp->cpage = __get_buf(hashp, 411 bp[bp[0] - 1], bufp, 0); 412 if (!hashp->cpage) 413 return (-1); 414 hashp->cndx = 1; 415 if (!((uint16_t *) 416 hashp->cpage->page)[0]) { 417 hashp->cbucket++; 418 hashp->cpage = NULL; 419 } 420 } 421 } 422 return (0); 423 } 424 425 val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current); 426 if (val->size == (size_t)-1) 427 return (-1); 428 if (save_p->addr != save_addr) { 429 /* We are pretty short on buffers. */ 430 errno = EINVAL; /* OUT OF BUFFERS */ 431 return (-1); 432 } 433 memmove(hashp->tmp_buf, (save_p->page) + off, len); 434 val->data = (unsigned char *)hashp->tmp_buf; 435 return (0); 436 } 437 /* 438 * Count how big the total datasize is by recursing through the pages. Then 439 * allocate a buffer and copy the data as you recurse up. 440 */ 441 static int 442 collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set) 443 { 444 uint16_t *bp; 445 char *p; 446 BUFHEAD *xbp; 447 uint16_t save_addr; 448 int mylen, totlen; 449 450 p = bufp->page; 451 bp = (uint16_t *)p; 452 mylen = hashp->BSIZE - bp[1]; 453 save_addr = bufp->addr; 454 455 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 456 totlen = len + mylen; 457 if (hashp->tmp_buf) 458 free(hashp->tmp_buf); 459 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 460 return (-1); 461 if (set) { 462 hashp->cndx = 1; 463 if (bp[0] == 2) { /* No more buckets in chain */ 464 hashp->cpage = NULL; 465 hashp->cbucket++; 466 } else { 467 hashp->cpage = 468 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 469 if (!hashp->cpage) 470 return (-1); 471 else if (!((uint16_t *)hashp->cpage->page)[0]) { 472 hashp->cbucket++; 473 hashp->cpage = NULL; 474 } 475 } 476 } 477 } else { 478 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 479 if (!xbp || ((totlen = 480 collect_data(hashp, xbp, len + mylen, set)) < 1)) 481 return (-1); 482 } 483 if (bufp->addr != save_addr) { 484 errno = EINVAL; /* Out of buffers. */ 485 return (-1); 486 } 487 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 488 return (totlen); 489 } 490 491 /* 492 * Fill in the key and data for this big pair. 493 */ 494 int 495 __big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set) 496 { 497 key->size = (size_t)collect_key(hashp, bufp, 0, val, set); 498 if (key->size == (size_t)-1) 499 return (-1); 500 key->data = (unsigned char *)hashp->tmp_key; 501 return (0); 502 } 503 504 /* 505 * Count how big the total key size is by recursing through the pages. Then 506 * collect the data, allocate a buffer and copy the key as you recurse up. 507 */ 508 static int 509 collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set) 510 { 511 BUFHEAD *xbp; 512 char *p; 513 int mylen, totlen; 514 uint16_t *bp, save_addr; 515 516 p = bufp->page; 517 bp = (uint16_t *)p; 518 mylen = hashp->BSIZE - bp[1]; 519 520 save_addr = bufp->addr; 521 totlen = len + mylen; 522 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 523 if (hashp->tmp_key != NULL) 524 free(hashp->tmp_key); 525 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 526 return (-1); 527 if (__big_return(hashp, bufp, 1, val, set)) 528 return (-1); 529 } else { 530 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 531 if (!xbp || ((totlen = 532 collect_key(hashp, xbp, totlen, val, set)) < 1)) 533 return (-1); 534 } 535 if (bufp->addr != save_addr) { 536 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 537 return (-1); 538 } 539 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 540 return (totlen); 541 } 542 543 /* 544 * Returns: 545 * 0 => OK 546 * -1 => error 547 */ 548 int 549 __big_split(HTAB *hashp, 550 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */ 551 BUFHEAD *np, /* Pointer to new bucket page */ 552 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */ 553 int addr, /* Address of big_keyp */ 554 uint32_t obucket, /* Old Bucket */ 555 SPLIT_RETURN *ret) 556 { 557 BUFHEAD *bp, *tmpp; 558 DBT key, val; 559 uint32_t change; 560 uint16_t free_space, n, off, *tp; 561 562 bp = big_keyp; 563 564 /* Now figure out where the big key/data goes */ 565 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 566 return (-1); 567 change = (__call_hash(hashp, key.data, key.size) != obucket); 568 569 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 570 if (!(ret->nextp = 571 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 572 return (-1); 573 } else 574 ret->nextp = NULL; 575 576 /* Now make one of np/op point to the big key/data pair */ 577 #ifdef DEBUG 578 assert(np->ovfl == NULL); 579 #endif 580 if (change) 581 tmpp = np; 582 else 583 tmpp = op; 584 585 tmpp->flags |= BUF_MOD; 586 #ifdef DEBUG1 587 fprintf(stderr, 588 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 589 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 590 #endif 591 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 592 tp = (uint16_t *)tmpp->page; 593 #ifdef DEBUG 594 assert(FREESPACE(tp) >= OVFLSIZE); 595 #endif 596 n = tp[0]; 597 off = OFFSET(tp); 598 free_space = FREESPACE(tp); 599 tp[++n] = (uint16_t)addr; 600 tp[++n] = OVFLPAGE; 601 tp[0] = n; 602 OFFSET(tp) = off; 603 FREESPACE(tp) = free_space - OVFLSIZE; 604 605 /* 606 * Finally, set the new and old return values. BIG_KEYP contains a 607 * pointer to the last page of the big key_data pair. Make sure that 608 * big_keyp has no following page (2 elements) or create an empty 609 * following page. 610 */ 611 612 ret->newp = np; 613 ret->oldp = op; 614 615 tp = (uint16_t *)big_keyp->page; 616 big_keyp->flags |= BUF_MOD; 617 if (tp[0] > 2) { 618 /* 619 * There may be either one or two offsets on this page. If 620 * there is one, then the overflow page is linked on normally 621 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 622 * the second offset and needs to get stuffed in after the 623 * next overflow page is added. 624 */ 625 n = tp[4]; 626 free_space = FREESPACE(tp); 627 off = OFFSET(tp); 628 tp[0] -= 2; 629 FREESPACE(tp) = free_space + OVFLSIZE; 630 OFFSET(tp) = off; 631 tmpp = __add_ovflpage(hashp, big_keyp); 632 if (!tmpp) 633 return (-1); 634 tp[4] = n; 635 } else 636 tmpp = big_keyp; 637 638 if (change) 639 ret->newp = tmpp; 640 else 641 ret->oldp = tmpp; 642 return (0); 643 } 644