1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94 33 * $DragonFly: src/lib/libc/db/hash/hash_bigkey.c,v 1.9 2005/11/19 20:46:32 swildner Exp $ 34 */ 35 36 /* 37 * PACKAGE: hash 38 * DESCRIPTION: 39 * Big key/data handling for the hashing package. 40 * 41 * ROUTINES: 42 * External 43 * __big_keydata 44 * __big_split 45 * __big_insert 46 * __big_return 47 * __big_delete 48 * __find_last_page 49 * Internal 50 * collect_key 51 * collect_data 52 */ 53 54 #include <sys/param.h> 55 56 #include <errno.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 61 #ifdef DEBUG 62 #include <assert.h> 63 #endif 64 65 #include <db.h> 66 #include "hash.h" 67 #include "page.h" 68 #include "extern.h" 69 70 static int collect_key (HTAB *, BUFHEAD *, int, DBT *, int); 71 static int collect_data (HTAB *, BUFHEAD *, int, int); 72 73 /* 74 * Big_insert 75 * 76 * You need to do an insert and the key/data pair is too big 77 * 78 * Returns: 79 * 0 ==> OK 80 *-1 ==> ERROR 81 */ 82 extern int 83 __big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 84 { 85 u_int16_t *p; 86 int key_size, n, val_size; 87 u_int16_t space, move_bytes, off; 88 char *cp, *key_data, *val_data; 89 90 cp = bufp->page; /* Character pointer of p. */ 91 p = (u_int16_t *)cp; 92 93 key_data = (char *)key->data; 94 key_size = key->size; 95 val_data = (char *)val->data; 96 val_size = val->size; 97 98 /* First move the Key */ 99 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 100 space = FREESPACE(p) - BIGOVERHEAD) { 101 move_bytes = MIN(space, key_size); 102 off = OFFSET(p) - move_bytes; 103 memmove(cp + off, key_data, move_bytes); 104 key_size -= move_bytes; 105 key_data += move_bytes; 106 n = p[0]; 107 p[++n] = off; 108 p[0] = ++n; 109 FREESPACE(p) = off - PAGE_META(n); 110 OFFSET(p) = off; 111 p[n] = PARTIAL_KEY; 112 bufp = __add_ovflpage(hashp, bufp); 113 if (!bufp) 114 return (-1); 115 n = p[0]; 116 if (!key_size) { 117 if (FREESPACE(p)) { 118 move_bytes = MIN(FREESPACE(p), val_size); 119 off = OFFSET(p) - move_bytes; 120 p[n] = off; 121 memmove(cp + off, val_data, move_bytes); 122 val_data += move_bytes; 123 val_size -= move_bytes; 124 p[n - 2] = FULL_KEY_DATA; 125 FREESPACE(p) = FREESPACE(p) - move_bytes; 126 OFFSET(p) = off; 127 } else 128 p[n - 2] = FULL_KEY; 129 } 130 p = (u_int16_t *)bufp->page; 131 cp = bufp->page; 132 bufp->flags |= BUF_MOD; 133 } 134 135 /* Now move the data */ 136 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 137 space = FREESPACE(p) - BIGOVERHEAD) { 138 move_bytes = MIN(space, val_size); 139 /* 140 * Here's the hack to make sure that if the data ends on the 141 * same page as the key ends, FREESPACE is at least one. 142 */ 143 if (space == val_size && val_size == val->size) 144 move_bytes--; 145 off = OFFSET(p) - move_bytes; 146 memmove(cp + off, val_data, move_bytes); 147 val_size -= move_bytes; 148 val_data += move_bytes; 149 n = p[0]; 150 p[++n] = off; 151 p[0] = ++n; 152 FREESPACE(p) = off - PAGE_META(n); 153 OFFSET(p) = off; 154 if (val_size) { 155 p[n] = FULL_KEY; 156 bufp = __add_ovflpage(hashp, bufp); 157 if (!bufp) 158 return (-1); 159 cp = bufp->page; 160 p = (u_int16_t *)cp; 161 } else 162 p[n] = FULL_KEY_DATA; 163 bufp->flags |= BUF_MOD; 164 } 165 return (0); 166 } 167 168 /* 169 * Called when bufp's page contains a partial key (index should be 1) 170 * 171 * All pages in the big key/data pair except bufp are freed. We cannot 172 * free bufp because the page pointing to it is lost and we can't get rid 173 * of its pointer. 174 * 175 * Returns: 176 * 0 => OK 177 *-1 => ERROR 178 */ 179 extern int 180 __big_delete(HTAB *hashp, BUFHEAD *bufp) 181 { 182 BUFHEAD *last_bfp, *rbufp; 183 u_int16_t *bp, pageno; 184 int key_done, n; 185 186 rbufp = bufp; 187 last_bfp = NULL; 188 bp = (u_int16_t *)bufp->page; 189 pageno = 0; 190 key_done = 0; 191 192 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 193 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 194 key_done = 1; 195 196 /* 197 * If there is freespace left on a FULL_KEY_DATA page, then 198 * the data is short and fits entirely on this page, and this 199 * is the last page. 200 */ 201 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 202 break; 203 pageno = bp[bp[0] - 1]; 204 rbufp->flags |= BUF_MOD; 205 rbufp = __get_buf(hashp, pageno, rbufp, 0); 206 if (last_bfp) 207 __free_ovflpage(hashp, last_bfp); 208 last_bfp = rbufp; 209 if (!rbufp) 210 return (-1); /* Error. */ 211 bp = (u_int16_t *)rbufp->page; 212 } 213 214 /* 215 * If we get here then rbufp points to the last page of the big 216 * key/data pair. Bufp points to the first one -- it should now be 217 * empty pointing to the next page after this pair. Can't free it 218 * because we don't have the page pointing to it. 219 */ 220 221 /* This is information from the last page of the pair. */ 222 n = bp[0]; 223 pageno = bp[n - 1]; 224 225 /* Now, bp is the first page of the pair. */ 226 bp = (u_int16_t *)bufp->page; 227 if (n > 2) { 228 /* There is an overflow page. */ 229 bp[1] = pageno; 230 bp[2] = OVFLPAGE; 231 bufp->ovfl = rbufp->ovfl; 232 } else 233 /* This is the last page. */ 234 bufp->ovfl = NULL; 235 n -= 2; 236 bp[0] = n; 237 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 238 OFFSET(bp) = hashp->BSIZE - 1; 239 240 bufp->flags |= BUF_MOD; 241 if (rbufp) 242 __free_ovflpage(hashp, rbufp); 243 if (last_bfp != rbufp) 244 __free_ovflpage(hashp, last_bfp); 245 246 hashp->NKEYS--; 247 return (0); 248 } 249 /* 250 * Returns: 251 * 0 = key not found 252 * -1 = get next overflow page 253 * -2 means key not found and this is big key/data 254 * -3 error 255 */ 256 extern int 257 __find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, const char *key, int size) 258 { 259 u_int16_t *bp; 260 char *p; 261 int ksize; 262 u_int16_t bytes; 263 const char *kkey; 264 265 bp = (u_int16_t *)bufp->page; 266 p = bufp->page; 267 ksize = size; 268 kkey = key; 269 270 for (bytes = hashp->BSIZE - bp[ndx]; 271 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 272 bytes = hashp->BSIZE - bp[ndx]) { 273 if (memcmp(p + bp[ndx], kkey, bytes)) 274 return (-2); 275 kkey += bytes; 276 ksize -= bytes; 277 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 278 if (!bufp) 279 return (-3); 280 p = bufp->page; 281 bp = (u_int16_t *)p; 282 ndx = 1; 283 } 284 285 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 286 #ifdef HASH_STATISTICS 287 ++hash_collisions; 288 #endif 289 return (-2); 290 } else 291 return (ndx); 292 } 293 294 /* 295 * Given the buffer pointer of the first overflow page of a big pair, 296 * find the end of the big pair 297 * 298 * This will set bpp to the buffer header of the last page of the big pair. 299 * It will return the pageno of the overflow page following the last page 300 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 301 * bucket) 302 */ 303 extern u_int16_t 304 __find_last_page(HTAB *hashp, BUFHEAD **bpp) 305 { 306 BUFHEAD *bufp; 307 u_int16_t *bp, pageno; 308 int n; 309 310 bufp = *bpp; 311 bp = (u_int16_t *)bufp->page; 312 for (;;) { 313 n = bp[0]; 314 315 /* 316 * This is the last page if: the tag is FULL_KEY_DATA and 317 * either only 2 entries OVFLPAGE marker is explicit there 318 * is freespace on the page. 319 */ 320 if (bp[2] == FULL_KEY_DATA && 321 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 322 break; 323 324 pageno = bp[n - 1]; 325 bufp = __get_buf(hashp, pageno, bufp, 0); 326 if (!bufp) 327 return (0); /* Need to indicate an error! */ 328 bp = (u_int16_t *)bufp->page; 329 } 330 331 *bpp = bufp; 332 if (bp[0] > 2) 333 return (bp[3]); 334 else 335 return (0); 336 } 337 338 /* 339 * Return the data for the key/data pair that begins on this page at this 340 * index (index should always be 1). 341 */ 342 extern int 343 __big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current) 344 { 345 BUFHEAD *save_p; 346 u_int16_t *bp, len, off, save_addr; 347 char *tp; 348 349 bp = (u_int16_t *)bufp->page; 350 while (bp[ndx + 1] == PARTIAL_KEY) { 351 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 352 if (!bufp) 353 return (-1); 354 bp = (u_int16_t *)bufp->page; 355 ndx = 1; 356 } 357 358 if (bp[ndx + 1] == FULL_KEY) { 359 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 360 if (!bufp) 361 return (-1); 362 bp = (u_int16_t *)bufp->page; 363 save_p = bufp; 364 save_addr = save_p->addr; 365 off = bp[1]; 366 len = 0; 367 } else 368 if (!FREESPACE(bp)) { 369 /* 370 * This is a hack. We can't distinguish between 371 * FULL_KEY_DATA that contains complete data or 372 * incomplete data, so we require that if the data 373 * is complete, there is at least 1 byte of free 374 * space left. 375 */ 376 off = bp[bp[0]]; 377 len = bp[1] - off; 378 save_p = bufp; 379 save_addr = bufp->addr; 380 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 381 if (!bufp) 382 return (-1); 383 bp = (u_int16_t *)bufp->page; 384 } else { 385 /* The data is all on one page. */ 386 tp = (char *)bp; 387 off = bp[bp[0]]; 388 val->data = (u_char *)tp + off; 389 val->size = bp[1] - off; 390 if (set_current) { 391 if (bp[0] == 2) { /* No more buckets in 392 * chain */ 393 hashp->cpage = NULL; 394 hashp->cbucket++; 395 hashp->cndx = 1; 396 } else { 397 hashp->cpage = __get_buf(hashp, 398 bp[bp[0] - 1], bufp, 0); 399 if (!hashp->cpage) 400 return (-1); 401 hashp->cndx = 1; 402 if (!((u_int16_t *) 403 hashp->cpage->page)[0]) { 404 hashp->cbucket++; 405 hashp->cpage = NULL; 406 } 407 } 408 } 409 return (0); 410 } 411 412 val->size = collect_data(hashp, bufp, (int)len, set_current); 413 if (val->size == -1) 414 return (-1); 415 if (save_p->addr != save_addr) { 416 /* We are pretty short on buffers. */ 417 errno = EINVAL; /* OUT OF BUFFERS */ 418 return (-1); 419 } 420 memmove(hashp->tmp_buf, (save_p->page) + off, len); 421 val->data = (u_char *)hashp->tmp_buf; 422 return (0); 423 } 424 /* 425 * Count how big the total datasize is by recursing through the pages. Then 426 * allocate a buffer and copy the data as you recurse up. 427 */ 428 static int 429 collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set) 430 { 431 u_int16_t *bp; 432 char *p; 433 BUFHEAD *xbp; 434 u_int16_t save_addr; 435 int mylen, totlen; 436 437 p = bufp->page; 438 bp = (u_int16_t *)p; 439 mylen = hashp->BSIZE - bp[1]; 440 save_addr = bufp->addr; 441 442 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 443 totlen = len + mylen; 444 if (hashp->tmp_buf) 445 free(hashp->tmp_buf); 446 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 447 return (-1); 448 if (set) { 449 hashp->cndx = 1; 450 if (bp[0] == 2) { /* No more buckets in chain */ 451 hashp->cpage = NULL; 452 hashp->cbucket++; 453 } else { 454 hashp->cpage = 455 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 456 if (!hashp->cpage) 457 return (-1); 458 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 459 hashp->cbucket++; 460 hashp->cpage = NULL; 461 } 462 } 463 } 464 } else { 465 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 466 if (!xbp || ((totlen = 467 collect_data(hashp, xbp, len + mylen, set)) < 1)) 468 return (-1); 469 } 470 if (bufp->addr != save_addr) { 471 errno = EINVAL; /* Out of buffers. */ 472 return (-1); 473 } 474 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 475 return (totlen); 476 } 477 478 /* 479 * Fill in the key and data for this big pair. 480 */ 481 extern int 482 __big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set) 483 { 484 key->size = collect_key(hashp, bufp, 0, val, set); 485 if (key->size == -1) 486 return (-1); 487 key->data = (u_char *)hashp->tmp_key; 488 return (0); 489 } 490 491 /* 492 * Count how big the total key size is by recursing through the pages. Then 493 * collect the data, allocate a buffer and copy the key as you recurse up. 494 */ 495 static int 496 collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set) 497 { 498 BUFHEAD *xbp; 499 char *p; 500 int mylen, totlen; 501 u_int16_t *bp, save_addr; 502 503 p = bufp->page; 504 bp = (u_int16_t *)p; 505 mylen = hashp->BSIZE - bp[1]; 506 507 save_addr = bufp->addr; 508 totlen = len + mylen; 509 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 510 if (hashp->tmp_key != NULL) 511 free(hashp->tmp_key); 512 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 513 return (-1); 514 if (__big_return(hashp, bufp, 1, val, set)) 515 return (-1); 516 } else { 517 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 518 if (!xbp || ((totlen = 519 collect_key(hashp, xbp, totlen, val, set)) < 1)) 520 return (-1); 521 } 522 if (bufp->addr != save_addr) { 523 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 524 return (-1); 525 } 526 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 527 return (totlen); 528 } 529 530 /* 531 * Parameters: 532 * op: Pointer to where to put keys that go in old bucket 533 * np: Pointer to new bucket page 534 * big_keyp: Pointer to first page containing the big key/data 535 * addr: Address of big_keyp 536 * obucket: Old Bucket 537 * Returns: 538 * 0 => OK 539 * -1 => error 540 */ 541 extern int 542 __big_split(HTAB *hashp, BUFHEAD *op, BUFHEAD *np, BUFHEAD *big_keyp, 543 int addr, u_int32_t obucket, SPLIT_RETURN *ret) 544 { 545 BUFHEAD *tmpp; 546 u_int16_t *tp; 547 BUFHEAD *bp; 548 DBT key, val; 549 u_int32_t change; 550 u_int16_t free_space, n, off; 551 552 bp = big_keyp; 553 554 /* Now figure out where the big key/data goes */ 555 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 556 return (-1); 557 change = (__call_hash(hashp, key.data, key.size) != obucket); 558 559 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 560 if (!(ret->nextp = 561 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 562 return (-1);; 563 } else 564 ret->nextp = NULL; 565 566 /* Now make one of np/op point to the big key/data pair */ 567 #ifdef DEBUG 568 assert(np->ovfl == NULL); 569 #endif 570 if (change) 571 tmpp = np; 572 else 573 tmpp = op; 574 575 tmpp->flags |= BUF_MOD; 576 #ifdef DEBUG1 577 fprintf(stderr, 578 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 579 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 580 #endif 581 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 582 tp = (u_int16_t *)tmpp->page; 583 #ifdef DEBUG 584 assert(FREESPACE(tp) >= OVFLSIZE); 585 #endif 586 n = tp[0]; 587 off = OFFSET(tp); 588 free_space = FREESPACE(tp); 589 tp[++n] = (u_int16_t)addr; 590 tp[++n] = OVFLPAGE; 591 tp[0] = n; 592 OFFSET(tp) = off; 593 FREESPACE(tp) = free_space - OVFLSIZE; 594 595 /* 596 * Finally, set the new and old return values. BIG_KEYP contains a 597 * pointer to the last page of the big key_data pair. Make sure that 598 * big_keyp has no following page (2 elements) or create an empty 599 * following page. 600 */ 601 602 ret->newp = np; 603 ret->oldp = op; 604 605 tp = (u_int16_t *)big_keyp->page; 606 big_keyp->flags |= BUF_MOD; 607 if (tp[0] > 2) { 608 /* 609 * There may be either one or two offsets on this page. If 610 * there is one, then the overflow page is linked on normally 611 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 612 * the second offset and needs to get stuffed in after the 613 * next overflow page is added. 614 */ 615 n = tp[4]; 616 free_space = FREESPACE(tp); 617 off = OFFSET(tp); 618 tp[0] -= 2; 619 FREESPACE(tp) = free_space + OVFLSIZE; 620 OFFSET(tp) = off; 621 tmpp = __add_ovflpage(hashp, big_keyp); 622 if (!tmpp) 623 return (-1); 624 tp[4] = n; 625 } else 626 tmpp = big_keyp; 627 628 if (change) 629 ret->newp = tmpp; 630 else 631 ret->oldp = tmpp; 632 return (0); 633 } 634