1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)hash_bigkey.c 8.1 (Berkeley) 06/04/93"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * PACKAGE: hash 17 * DESCRIPTION: 18 * Big key/data handling for the hashing package. 19 * 20 * ROUTINES: 21 * External 22 * __big_keydata 23 * __big_split 24 * __big_insert 25 * __big_return 26 * __big_delete 27 * __find_last_page 28 * Internal 29 * collect_key 30 * collect_data 31 */ 32 33 #include <sys/param.h> 34 35 #include <errno.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #ifdef DEBUG 41 #include <assert.h> 42 #endif 43 44 #include <db.h> 45 #include "hash.h" 46 #include "page.h" 47 #include "extern.h" 48 49 static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int)); 50 static int collect_data __P((HTAB *, BUFHEAD *, int, int)); 51 52 /* 53 * Big_insert 54 * 55 * You need to do an insert and the key/data pair is too big 56 * 57 * Returns: 58 * 0 ==> OK 59 *-1 ==> ERROR 60 */ 61 extern int 62 __big_insert(hashp, bufp, key, val) 63 HTAB *hashp; 64 BUFHEAD *bufp; 65 const DBT *key, *val; 66 { 67 register u_short *p; 68 int key_size, n, val_size; 69 u_short space, move_bytes, off; 70 char *cp, *key_data, *val_data; 71 72 cp = bufp->page; /* Character pointer of p. */ 73 p = (u_short *)cp; 74 75 key_data = (char *)key->data; 76 key_size = key->size; 77 val_data = (char *)val->data; 78 val_size = val->size; 79 80 /* First move the Key */ 81 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 82 space = FREESPACE(p) - BIGOVERHEAD) { 83 move_bytes = MIN(space, key_size); 84 off = OFFSET(p) - move_bytes; 85 memmove(cp + off, key_data, move_bytes); 86 key_size -= move_bytes; 87 key_data += move_bytes; 88 n = p[0]; 89 p[++n] = off; 90 p[0] = ++n; 91 FREESPACE(p) = off - PAGE_META(n); 92 OFFSET(p) = off; 93 p[n] = PARTIAL_KEY; 94 bufp = __add_ovflpage(hashp, bufp); 95 if (!bufp) 96 return (-1); 97 n = p[0]; 98 if (!key_size) 99 if (FREESPACE(p)) { 100 move_bytes = MIN(FREESPACE(p), val_size); 101 off = OFFSET(p) - move_bytes; 102 p[n] = off; 103 memmove(cp + off, val_data, move_bytes); 104 val_data += move_bytes; 105 val_size -= move_bytes; 106 p[n - 2] = FULL_KEY_DATA; 107 FREESPACE(p) = FREESPACE(p) - move_bytes; 108 OFFSET(p) = off; 109 } else 110 p[n - 2] = FULL_KEY; 111 p = (u_short *)bufp->page; 112 cp = bufp->page; 113 bufp->flags |= BUF_MOD; 114 } 115 116 /* Now move the data */ 117 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 118 space = FREESPACE(p) - BIGOVERHEAD) { 119 move_bytes = MIN(space, val_size); 120 /* 121 * Here's the hack to make sure that if the data ends on the 122 * same page as the key ends, FREESPACE is at least one. 123 */ 124 if (space == val_size && val_size == val->size) 125 move_bytes--; 126 off = OFFSET(p) - move_bytes; 127 memmove(cp + off, val_data, move_bytes); 128 val_size -= move_bytes; 129 val_data += move_bytes; 130 n = p[0]; 131 p[++n] = off; 132 p[0] = ++n; 133 FREESPACE(p) = off - PAGE_META(n); 134 OFFSET(p) = off; 135 if (val_size) { 136 p[n] = FULL_KEY; 137 bufp = __add_ovflpage(hashp, bufp); 138 if (!bufp) 139 return (-1); 140 cp = bufp->page; 141 p = (u_short *)cp; 142 } else 143 p[n] = FULL_KEY_DATA; 144 bufp->flags |= BUF_MOD; 145 } 146 return (0); 147 } 148 149 /* 150 * Called when bufp's page contains a partial key (index should be 1) 151 * 152 * All pages in the big key/data pair except bufp are freed. We cannot 153 * free bufp because the page pointing to it is lost and we can't get rid 154 * of its pointer. 155 * 156 * Returns: 157 * 0 => OK 158 *-1 => ERROR 159 */ 160 extern int 161 __big_delete(hashp, bufp) 162 HTAB *hashp; 163 BUFHEAD *bufp; 164 { 165 register BUFHEAD *last_bfp, *rbufp; 166 u_short *bp, pageno; 167 int key_done, n; 168 169 rbufp = bufp; 170 last_bfp = NULL; 171 bp = (u_short *)bufp->page; 172 pageno = 0; 173 key_done = 0; 174 175 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 176 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 177 key_done = 1; 178 179 /* 180 * If there is freespace left on a FULL_KEY_DATA page, then 181 * the data is short and fits entirely on this page, and this 182 * is the last page. 183 */ 184 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 185 break; 186 pageno = bp[bp[0] - 1]; 187 rbufp->flags |= BUF_MOD; 188 rbufp = __get_buf(hashp, pageno, rbufp, 0); 189 if (last_bfp) 190 __free_ovflpage(hashp, last_bfp); 191 last_bfp = rbufp; 192 if (!rbufp) 193 return (-1); /* Error. */ 194 bp = (u_short *)rbufp->page; 195 } 196 197 /* 198 * If we get here then rbufp points to the last page of the big 199 * key/data pair. Bufp points to the first one -- it should now be 200 * empty pointing to the next page after this pair. Can't free it 201 * because we don't have the page pointing to it. 202 */ 203 204 /* This is information from the last page of the pair. */ 205 n = bp[0]; 206 pageno = bp[n - 1]; 207 208 /* Now, bp is the first page of the pair. */ 209 bp = (u_short *)bufp->page; 210 if (n > 2) { 211 /* There is an overflow page. */ 212 bp[1] = pageno; 213 bp[2] = OVFLPAGE; 214 bufp->ovfl = rbufp->ovfl; 215 } else 216 /* This is the last page. */ 217 bufp->ovfl = NULL; 218 n -= 2; 219 bp[0] = n; 220 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 221 OFFSET(bp) = hashp->BSIZE - 1; 222 223 bufp->flags |= BUF_MOD; 224 if (rbufp) 225 __free_ovflpage(hashp, rbufp); 226 if (last_bfp != rbufp) 227 __free_ovflpage(hashp, last_bfp); 228 229 hashp->NKEYS--; 230 return (0); 231 } 232 /* 233 * Returns: 234 * 0 = key not found 235 * -1 = get next overflow page 236 * -2 means key not found and this is big key/data 237 * -3 error 238 */ 239 extern int 240 __find_bigpair(hashp, bufp, ndx, key, size) 241 HTAB *hashp; 242 BUFHEAD *bufp; 243 int ndx; 244 char *key; 245 int size; 246 { 247 register u_short *bp; 248 register char *p; 249 int ksize; 250 u_short bytes; 251 char *kkey; 252 253 bp = (u_short *)bufp->page; 254 p = bufp->page; 255 ksize = size; 256 kkey = key; 257 258 for (bytes = hashp->BSIZE - bp[ndx]; 259 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 260 bytes = hashp->BSIZE - bp[ndx]) { 261 if (memcmp(p + bp[ndx], kkey, bytes)) 262 return (-2); 263 kkey += bytes; 264 ksize -= bytes; 265 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 266 if (!bufp) 267 return (-3); 268 p = bufp->page; 269 bp = (u_short *)p; 270 ndx = 1; 271 } 272 273 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 274 #ifdef HASH_STATISTICS 275 ++hash_collisions; 276 #endif 277 return (-2); 278 } else 279 return (ndx); 280 } 281 282 /* 283 * Given the buffer pointer of the first overflow page of a big pair, 284 * find the end of the big pair 285 * 286 * This will set bpp to the buffer header of the last page of the big pair. 287 * It will return the pageno of the overflow page following the last page 288 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 289 * bucket) 290 */ 291 extern u_short 292 __find_last_page(hashp, bpp) 293 HTAB *hashp; 294 BUFHEAD **bpp; 295 { 296 BUFHEAD *bufp; 297 u_short *bp, pageno; 298 int n; 299 300 bufp = *bpp; 301 bp = (u_short *)bufp->page; 302 for (;;) { 303 n = bp[0]; 304 305 /* 306 * This is the last page if: the tag is FULL_KEY_DATA and 307 * either only 2 entries OVFLPAGE marker is explicit there 308 * is freespace on the page. 309 */ 310 if (bp[2] == FULL_KEY_DATA && 311 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 312 break; 313 314 pageno = bp[n - 1]; 315 bufp = __get_buf(hashp, pageno, bufp, 0); 316 if (!bufp) 317 return (0); /* Need to indicate an error! */ 318 bp = (u_short *)bufp->page; 319 } 320 321 *bpp = bufp; 322 if (bp[0] > 2) 323 return (bp[3]); 324 else 325 return (0); 326 } 327 328 /* 329 * Return the data for the key/data pair that begins on this page at this 330 * index (index should always be 1). 331 */ 332 extern int 333 __big_return(hashp, bufp, ndx, val, set_current) 334 HTAB *hashp; 335 BUFHEAD *bufp; 336 int ndx; 337 DBT *val; 338 int set_current; 339 { 340 BUFHEAD *save_p; 341 u_short *bp, len, off, save_addr; 342 char *tp; 343 344 bp = (u_short *)bufp->page; 345 while (bp[ndx + 1] == PARTIAL_KEY) { 346 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 347 if (!bufp) 348 return (-1); 349 bp = (u_short *)bufp->page; 350 ndx = 1; 351 } 352 353 if (bp[ndx + 1] == FULL_KEY) { 354 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 355 if (!bufp) 356 return (-1); 357 bp = (u_short *)bufp->page; 358 save_p = bufp; 359 save_addr = save_p->addr; 360 off = bp[1]; 361 len = 0; 362 } else 363 if (!FREESPACE(bp)) { 364 /* 365 * This is a hack. We can't distinguish between 366 * FULL_KEY_DATA that contains complete data or 367 * incomplete data, so we require that if the data 368 * is complete, there is at least 1 byte of free 369 * space left. 370 */ 371 off = bp[bp[0]]; 372 len = bp[1] - off; 373 save_p = bufp; 374 save_addr = bufp->addr; 375 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 376 if (!bufp) 377 return (-1); 378 bp = (u_short *)bufp->page; 379 } else { 380 /* The data is all on one page. */ 381 tp = (char *)bp; 382 off = bp[bp[0]]; 383 val->data = (u_char *)tp + off; 384 val->size = bp[1] - off; 385 if (set_current) { 386 if (bp[0] == 2) { /* No more buckets in 387 * chain */ 388 hashp->cpage = NULL; 389 hashp->cbucket++; 390 hashp->cndx = 1; 391 } else { 392 hashp->cpage = __get_buf(hashp, 393 bp[bp[0] - 1], bufp, 0); 394 if (!hashp->cpage) 395 return (-1); 396 hashp->cndx = 1; 397 if (!((u_short *) 398 hashp->cpage->page)[0]) { 399 hashp->cbucket++; 400 hashp->cpage = NULL; 401 } 402 } 403 } 404 return (0); 405 } 406 407 val->size = collect_data(hashp, bufp, (int)len, set_current); 408 if (val->size == -1) 409 return (-1); 410 if (save_p->addr != save_addr) { 411 /* We are pretty short on buffers. */ 412 errno = EINVAL; /* OUT OF BUFFERS */ 413 return (-1); 414 } 415 memmove(hashp->tmp_buf, (save_p->page) + off, len); 416 val->data = (u_char *)hashp->tmp_buf; 417 return (0); 418 } 419 /* 420 * Count how big the total datasize is by recursing through the pages. Then 421 * allocate a buffer and copy the data as you recurse up. 422 */ 423 static int 424 collect_data(hashp, bufp, len, set) 425 HTAB *hashp; 426 BUFHEAD *bufp; 427 int len, set; 428 { 429 register u_short *bp; 430 register char *p; 431 BUFHEAD *xbp; 432 u_short save_addr; 433 int mylen, totlen; 434 435 p = bufp->page; 436 bp = (u_short *)p; 437 mylen = hashp->BSIZE - bp[1]; 438 save_addr = bufp->addr; 439 440 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 441 totlen = len + mylen; 442 if (hashp->tmp_buf) 443 free(hashp->tmp_buf); 444 hashp->tmp_buf = malloc(totlen); 445 if (!hashp->tmp_buf) 446 return (-1); 447 if (set) { 448 hashp->cndx = 1; 449 if (bp[0] == 2) { /* No more buckets in chain */ 450 hashp->cpage = NULL; 451 hashp->cbucket++; 452 } else { 453 hashp->cpage = 454 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 455 if (!hashp->cpage) 456 return (-1); 457 else if (!((u_short *)hashp->cpage->page)[0]) { 458 hashp->cbucket++; 459 hashp->cpage = NULL; 460 } 461 } 462 } 463 } else { 464 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 465 if (!xbp || ((totlen = 466 collect_data(hashp, xbp, len + mylen, set)) < 1)) 467 return (-1); 468 } 469 if (bufp->addr != save_addr) { 470 errno = EINVAL; /* Out of buffers. */ 471 return (-1); 472 } 473 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 474 return (totlen); 475 } 476 477 /* 478 * Fill in the key and data for this big pair. 479 */ 480 extern int 481 __big_keydata(hashp, bufp, key, val, set) 482 HTAB *hashp; 483 BUFHEAD *bufp; 484 DBT *key, *val; 485 int set; 486 { 487 key->size = collect_key(hashp, bufp, 0, val, set); 488 if (key->size == -1) 489 return (-1); 490 key->data = (u_char *)hashp->tmp_key; 491 return (0); 492 } 493 494 /* 495 * Count how big the total key size is by recursing through the pages. Then 496 * collect the data, allocate a buffer and copy the key as you recurse up. 497 */ 498 static int 499 collect_key(hashp, bufp, len, val, set) 500 HTAB *hashp; 501 BUFHEAD *bufp; 502 int len; 503 DBT *val; 504 int set; 505 { 506 BUFHEAD *xbp; 507 char *p; 508 int mylen, totlen; 509 u_short *bp, save_addr; 510 511 p = bufp->page; 512 bp = (u_short *)p; 513 mylen = hashp->BSIZE - bp[1]; 514 515 save_addr = bufp->addr; 516 totlen = len + mylen; 517 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 518 if (hashp->tmp_key) 519 free(hashp->tmp_key); 520 hashp->tmp_key = malloc(totlen); 521 if (!hashp->tmp_key) 522 return (-1); 523 if (__big_return(hashp, bufp, 1, val, set)) 524 return (-1); 525 } else { 526 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 527 if (!xbp || ((totlen = 528 collect_key(hashp, xbp, totlen, val, set)) < 1)) 529 return (-1); 530 } 531 if (bufp->addr != save_addr) { 532 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 533 return (-1); 534 } 535 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 536 return (totlen); 537 } 538 539 /* 540 * Returns: 541 * 0 => OK 542 * -1 => error 543 */ 544 extern int 545 __big_split(hashp, op, np, big_keyp, addr, obucket, ret) 546 HTAB *hashp; 547 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ 548 BUFHEAD *np; /* Pointer to new bucket page */ 549 /* Pointer to first page containing the big key/data */ 550 BUFHEAD *big_keyp; 551 int addr; /* Address of big_keyp */ 552 u_int obucket;/* Old Bucket */ 553 SPLIT_RETURN *ret; 554 { 555 register BUFHEAD *tmpp; 556 register u_short *tp; 557 BUFHEAD *bp; 558 DBT key, val; 559 u_int change; 560 u_short free_space, n, off; 561 562 bp = big_keyp; 563 564 /* Now figure out where the big key/data goes */ 565 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 566 return (-1); 567 change = (__call_hash(hashp, key.data, key.size) != obucket); 568 569 if (ret->next_addr = __find_last_page(hashp, &big_keyp)) { 570 if (!(ret->nextp = 571 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 572 return (-1);; 573 } else 574 ret->nextp = NULL; 575 576 /* Now make one of np/op point to the big key/data pair */ 577 #ifdef DEBUG 578 assert(np->ovfl == NULL); 579 #endif 580 if (change) 581 tmpp = np; 582 else 583 tmpp = op; 584 585 tmpp->flags |= BUF_MOD; 586 #ifdef DEBUG1 587 (void)fprintf(stderr, 588 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 589 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 590 #endif 591 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 592 tp = (u_short *)tmpp->page; 593 #ifdef DEBUG 594 assert(FREESPACE(tp) >= OVFLSIZE); 595 #endif 596 n = tp[0]; 597 off = OFFSET(tp); 598 free_space = FREESPACE(tp); 599 tp[++n] = (u_short)addr; 600 tp[++n] = OVFLPAGE; 601 tp[0] = n; 602 OFFSET(tp) = off; 603 FREESPACE(tp) = free_space - OVFLSIZE; 604 605 /* 606 * Finally, set the new and old return values. BIG_KEYP contains a 607 * pointer to the last page of the big key_data pair. Make sure that 608 * big_keyp has no following page (2 elements) or create an empty 609 * following page. 610 */ 611 612 ret->newp = np; 613 ret->oldp = op; 614 615 tp = (u_short *)big_keyp->page; 616 big_keyp->flags |= BUF_MOD; 617 if (tp[0] > 2) { 618 /* 619 * There may be either one or two offsets on this page. If 620 * there is one, then the overflow page is linked on normally 621 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 622 * the second offset and needs to get stuffed in after the 623 * next overflow page is added. 624 */ 625 n = tp[4]; 626 free_space = FREESPACE(tp); 627 off = OFFSET(tp); 628 tp[0] -= 2; 629 FREESPACE(tp) = free_space + OVFLSIZE; 630 OFFSET(tp) = off; 631 tmpp = __add_ovflpage(hashp, big_keyp); 632 if (!tmpp) 633 return (-1); 634 tp[4] = n; 635 } else 636 tmpp = big_keyp; 637 638 if (change) 639 ret->newp = tmpp; 640 else 641 ret->oldp = tmpp; 642 return (0); 643 } 644