1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)hash_bigkey.c 8.2 (Berkeley) 02/21/94"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * PACKAGE: hash 17 * DESCRIPTION: 18 * Big key/data handling for the hashing package. 19 * 20 * ROUTINES: 21 * External 22 * __big_keydata 23 * __big_split 24 * __big_insert 25 * __big_return 26 * __big_delete 27 * __find_last_page 28 * Internal 29 * collect_key 30 * collect_data 31 */ 32 33 #include <sys/param.h> 34 35 #include <errno.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #ifdef DEBUG 41 #include <assert.h> 42 #endif 43 44 #include <db.h> 45 #include "hash.h" 46 #include "page.h" 47 #include "extern.h" 48 49 static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int)); 50 static int collect_data __P((HTAB *, BUFHEAD *, int, int)); 51 52 /* 53 * Big_insert 54 * 55 * You need to do an insert and the key/data pair is too big 56 * 57 * Returns: 58 * 0 ==> OK 59 *-1 ==> ERROR 60 */ 61 extern int 62 __big_insert(hashp, bufp, key, val) 63 HTAB *hashp; 64 BUFHEAD *bufp; 65 const DBT *key, *val; 66 { 67 register u_short *p; 68 int key_size, n, val_size; 69 u_short space, move_bytes, off; 70 char *cp, *key_data, *val_data; 71 72 cp = bufp->page; /* Character pointer of p. */ 73 p = (u_short *)cp; 74 75 key_data = (char *)key->data; 76 key_size = key->size; 77 val_data = (char *)val->data; 78 val_size = val->size; 79 80 /* First move the Key */ 81 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 82 space = FREESPACE(p) - BIGOVERHEAD) { 83 move_bytes = MIN(space, key_size); 84 off = OFFSET(p) - move_bytes; 85 memmove(cp + off, key_data, move_bytes); 86 key_size -= move_bytes; 87 key_data += move_bytes; 88 n = p[0]; 89 p[++n] = off; 90 p[0] = ++n; 91 FREESPACE(p) = off - PAGE_META(n); 92 OFFSET(p) = off; 93 p[n] = PARTIAL_KEY; 94 bufp = __add_ovflpage(hashp, bufp); 95 if (!bufp) 96 return (-1); 97 n = p[0]; 98 if (!key_size) 99 if (FREESPACE(p)) { 100 move_bytes = MIN(FREESPACE(p), val_size); 101 off = OFFSET(p) - move_bytes; 102 p[n] = off; 103 memmove(cp + off, val_data, move_bytes); 104 val_data += move_bytes; 105 val_size -= move_bytes; 106 p[n - 2] = FULL_KEY_DATA; 107 FREESPACE(p) = FREESPACE(p) - move_bytes; 108 OFFSET(p) = off; 109 } else 110 p[n - 2] = FULL_KEY; 111 p = (u_short *)bufp->page; 112 cp = bufp->page; 113 bufp->flags |= BUF_MOD; 114 } 115 116 /* Now move the data */ 117 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 118 space = FREESPACE(p) - BIGOVERHEAD) { 119 move_bytes = MIN(space, val_size); 120 /* 121 * Here's the hack to make sure that if the data ends on the 122 * same page as the key ends, FREESPACE is at least one. 123 */ 124 if (space == val_size && val_size == val->size) 125 move_bytes--; 126 off = OFFSET(p) - move_bytes; 127 memmove(cp + off, val_data, move_bytes); 128 val_size -= move_bytes; 129 val_data += move_bytes; 130 n = p[0]; 131 p[++n] = off; 132 p[0] = ++n; 133 FREESPACE(p) = off - PAGE_META(n); 134 OFFSET(p) = off; 135 if (val_size) { 136 p[n] = FULL_KEY; 137 bufp = __add_ovflpage(hashp, bufp); 138 if (!bufp) 139 return (-1); 140 cp = bufp->page; 141 p = (u_short *)cp; 142 } else 143 p[n] = FULL_KEY_DATA; 144 bufp->flags |= BUF_MOD; 145 } 146 return (0); 147 } 148 149 /* 150 * Called when bufp's page contains a partial key (index should be 1) 151 * 152 * All pages in the big key/data pair except bufp are freed. We cannot 153 * free bufp because the page pointing to it is lost and we can't get rid 154 * of its pointer. 155 * 156 * Returns: 157 * 0 => OK 158 *-1 => ERROR 159 */ 160 extern int 161 __big_delete(hashp, bufp) 162 HTAB *hashp; 163 BUFHEAD *bufp; 164 { 165 register BUFHEAD *last_bfp, *rbufp; 166 u_short *bp, pageno; 167 int key_done, n; 168 169 rbufp = bufp; 170 last_bfp = NULL; 171 bp = (u_short *)bufp->page; 172 pageno = 0; 173 key_done = 0; 174 175 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 176 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 177 key_done = 1; 178 179 /* 180 * If there is freespace left on a FULL_KEY_DATA page, then 181 * the data is short and fits entirely on this page, and this 182 * is the last page. 183 */ 184 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 185 break; 186 pageno = bp[bp[0] - 1]; 187 rbufp->flags |= BUF_MOD; 188 rbufp = __get_buf(hashp, pageno, rbufp, 0); 189 if (last_bfp) 190 __free_ovflpage(hashp, last_bfp); 191 last_bfp = rbufp; 192 if (!rbufp) 193 return (-1); /* Error. */ 194 bp = (u_short *)rbufp->page; 195 } 196 197 /* 198 * If we get here then rbufp points to the last page of the big 199 * key/data pair. Bufp points to the first one -- it should now be 200 * empty pointing to the next page after this pair. Can't free it 201 * because we don't have the page pointing to it. 202 */ 203 204 /* This is information from the last page of the pair. */ 205 n = bp[0]; 206 pageno = bp[n - 1]; 207 208 /* Now, bp is the first page of the pair. */ 209 bp = (u_short *)bufp->page; 210 if (n > 2) { 211 /* There is an overflow page. */ 212 bp[1] = pageno; 213 bp[2] = OVFLPAGE; 214 bufp->ovfl = rbufp->ovfl; 215 } else 216 /* This is the last page. */ 217 bufp->ovfl = NULL; 218 n -= 2; 219 bp[0] = n; 220 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 221 OFFSET(bp) = hashp->BSIZE - 1; 222 223 bufp->flags |= BUF_MOD; 224 if (rbufp) 225 __free_ovflpage(hashp, rbufp); 226 if (last_bfp != rbufp) 227 __free_ovflpage(hashp, last_bfp); 228 229 hashp->NKEYS--; 230 return (0); 231 } 232 /* 233 * Returns: 234 * 0 = key not found 235 * -1 = get next overflow page 236 * -2 means key not found and this is big key/data 237 * -3 error 238 */ 239 extern int 240 __find_bigpair(hashp, bufp, ndx, key, size) 241 HTAB *hashp; 242 BUFHEAD *bufp; 243 int ndx; 244 char *key; 245 int size; 246 { 247 register u_short *bp; 248 register char *p; 249 int ksize; 250 u_short bytes; 251 char *kkey; 252 253 bp = (u_short *)bufp->page; 254 p = bufp->page; 255 ksize = size; 256 kkey = key; 257 258 for (bytes = hashp->BSIZE - bp[ndx]; 259 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 260 bytes = hashp->BSIZE - bp[ndx]) { 261 if (memcmp(p + bp[ndx], kkey, bytes)) 262 return (-2); 263 kkey += bytes; 264 ksize -= bytes; 265 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 266 if (!bufp) 267 return (-3); 268 p = bufp->page; 269 bp = (u_short *)p; 270 ndx = 1; 271 } 272 273 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 274 #ifdef HASH_STATISTICS 275 ++hash_collisions; 276 #endif 277 return (-2); 278 } else 279 return (ndx); 280 } 281 282 /* 283 * Given the buffer pointer of the first overflow page of a big pair, 284 * find the end of the big pair 285 * 286 * This will set bpp to the buffer header of the last page of the big pair. 287 * It will return the pageno of the overflow page following the last page 288 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 289 * bucket) 290 */ 291 extern u_short 292 __find_last_page(hashp, bpp) 293 HTAB *hashp; 294 BUFHEAD **bpp; 295 { 296 BUFHEAD *bufp; 297 u_short *bp, pageno; 298 int n; 299 300 bufp = *bpp; 301 bp = (u_short *)bufp->page; 302 for (;;) { 303 n = bp[0]; 304 305 /* 306 * This is the last page if: the tag is FULL_KEY_DATA and 307 * either only 2 entries OVFLPAGE marker is explicit there 308 * is freespace on the page. 309 */ 310 if (bp[2] == FULL_KEY_DATA && 311 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 312 break; 313 314 pageno = bp[n - 1]; 315 bufp = __get_buf(hashp, pageno, bufp, 0); 316 if (!bufp) 317 return (0); /* Need to indicate an error! */ 318 bp = (u_short *)bufp->page; 319 } 320 321 *bpp = bufp; 322 if (bp[0] > 2) 323 return (bp[3]); 324 else 325 return (0); 326 } 327 328 /* 329 * Return the data for the key/data pair that begins on this page at this 330 * index (index should always be 1). 331 */ 332 extern int 333 __big_return(hashp, bufp, ndx, val, set_current) 334 HTAB *hashp; 335 BUFHEAD *bufp; 336 int ndx; 337 DBT *val; 338 int set_current; 339 { 340 BUFHEAD *save_p; 341 u_short *bp, len, off, save_addr; 342 char *tp; 343 344 bp = (u_short *)bufp->page; 345 while (bp[ndx + 1] == PARTIAL_KEY) { 346 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 347 if (!bufp) 348 return (-1); 349 bp = (u_short *)bufp->page; 350 ndx = 1; 351 } 352 353 if (bp[ndx + 1] == FULL_KEY) { 354 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 355 if (!bufp) 356 return (-1); 357 bp = (u_short *)bufp->page; 358 save_p = bufp; 359 save_addr = save_p->addr; 360 off = bp[1]; 361 len = 0; 362 } else 363 if (!FREESPACE(bp)) { 364 /* 365 * This is a hack. We can't distinguish between 366 * FULL_KEY_DATA that contains complete data or 367 * incomplete data, so we require that if the data 368 * is complete, there is at least 1 byte of free 369 * space left. 370 */ 371 off = bp[bp[0]]; 372 len = bp[1] - off; 373 save_p = bufp; 374 save_addr = bufp->addr; 375 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 376 if (!bufp) 377 return (-1); 378 bp = (u_short *)bufp->page; 379 } else { 380 /* The data is all on one page. */ 381 tp = (char *)bp; 382 off = bp[bp[0]]; 383 val->data = (u_char *)tp + off; 384 val->size = bp[1] - off; 385 if (set_current) { 386 if (bp[0] == 2) { /* No more buckets in 387 * chain */ 388 hashp->cpage = NULL; 389 hashp->cbucket++; 390 hashp->cndx = 1; 391 } else { 392 hashp->cpage = __get_buf(hashp, 393 bp[bp[0] - 1], bufp, 0); 394 if (!hashp->cpage) 395 return (-1); 396 hashp->cndx = 1; 397 if (!((u_short *) 398 hashp->cpage->page)[0]) { 399 hashp->cbucket++; 400 hashp->cpage = NULL; 401 } 402 } 403 } 404 return (0); 405 } 406 407 val->size = collect_data(hashp, bufp, (int)len, set_current); 408 if (val->size == -1) 409 return (-1); 410 if (save_p->addr != save_addr) { 411 /* We are pretty short on buffers. */ 412 errno = EINVAL; /* OUT OF BUFFERS */ 413 return (-1); 414 } 415 memmove(hashp->tmp_buf, (save_p->page) + off, len); 416 val->data = (u_char *)hashp->tmp_buf; 417 return (0); 418 } 419 /* 420 * Count how big the total datasize is by recursing through the pages. Then 421 * allocate a buffer and copy the data as you recurse up. 422 */ 423 static int 424 collect_data(hashp, bufp, len, set) 425 HTAB *hashp; 426 BUFHEAD *bufp; 427 int len, set; 428 { 429 register u_short *bp; 430 register char *p; 431 BUFHEAD *xbp; 432 u_short save_addr; 433 int mylen, totlen; 434 435 p = bufp->page; 436 bp = (u_short *)p; 437 mylen = hashp->BSIZE - bp[1]; 438 save_addr = bufp->addr; 439 440 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 441 totlen = len + mylen; 442 if (hashp->tmp_buf) 443 free(hashp->tmp_buf); 444 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 445 return (-1); 446 if (set) { 447 hashp->cndx = 1; 448 if (bp[0] == 2) { /* No more buckets in chain */ 449 hashp->cpage = NULL; 450 hashp->cbucket++; 451 } else { 452 hashp->cpage = 453 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 454 if (!hashp->cpage) 455 return (-1); 456 else if (!((u_short *)hashp->cpage->page)[0]) { 457 hashp->cbucket++; 458 hashp->cpage = NULL; 459 } 460 } 461 } 462 } else { 463 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 464 if (!xbp || ((totlen = 465 collect_data(hashp, xbp, len + mylen, set)) < 1)) 466 return (-1); 467 } 468 if (bufp->addr != save_addr) { 469 errno = EINVAL; /* Out of buffers. */ 470 return (-1); 471 } 472 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 473 return (totlen); 474 } 475 476 /* 477 * Fill in the key and data for this big pair. 478 */ 479 extern int 480 __big_keydata(hashp, bufp, key, val, set) 481 HTAB *hashp; 482 BUFHEAD *bufp; 483 DBT *key, *val; 484 int set; 485 { 486 key->size = collect_key(hashp, bufp, 0, val, set); 487 if (key->size == -1) 488 return (-1); 489 key->data = (u_char *)hashp->tmp_key; 490 return (0); 491 } 492 493 /* 494 * Count how big the total key size is by recursing through the pages. Then 495 * collect the data, allocate a buffer and copy the key as you recurse up. 496 */ 497 static int 498 collect_key(hashp, bufp, len, val, set) 499 HTAB *hashp; 500 BUFHEAD *bufp; 501 int len; 502 DBT *val; 503 int set; 504 { 505 BUFHEAD *xbp; 506 char *p; 507 int mylen, totlen; 508 u_short *bp, save_addr; 509 510 p = bufp->page; 511 bp = (u_short *)p; 512 mylen = hashp->BSIZE - bp[1]; 513 514 save_addr = bufp->addr; 515 totlen = len + mylen; 516 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 517 if (hashp->tmp_key != NULL) 518 free(hashp->tmp_key); 519 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 520 return (-1); 521 if (__big_return(hashp, bufp, 1, val, set)) 522 return (-1); 523 } else { 524 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 525 if (!xbp || ((totlen = 526 collect_key(hashp, xbp, totlen, val, set)) < 1)) 527 return (-1); 528 } 529 if (bufp->addr != save_addr) { 530 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 531 return (-1); 532 } 533 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 534 return (totlen); 535 } 536 537 /* 538 * Returns: 539 * 0 => OK 540 * -1 => error 541 */ 542 extern int 543 __big_split(hashp, op, np, big_keyp, addr, obucket, ret) 544 HTAB *hashp; 545 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ 546 BUFHEAD *np; /* Pointer to new bucket page */ 547 /* Pointer to first page containing the big key/data */ 548 BUFHEAD *big_keyp; 549 int addr; /* Address of big_keyp */ 550 u_int obucket;/* Old Bucket */ 551 SPLIT_RETURN *ret; 552 { 553 register BUFHEAD *tmpp; 554 register u_short *tp; 555 BUFHEAD *bp; 556 DBT key, val; 557 u_int change; 558 u_short free_space, n, off; 559 560 bp = big_keyp; 561 562 /* Now figure out where the big key/data goes */ 563 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 564 return (-1); 565 change = (__call_hash(hashp, key.data, key.size) != obucket); 566 567 if (ret->next_addr = __find_last_page(hashp, &big_keyp)) { 568 if (!(ret->nextp = 569 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 570 return (-1);; 571 } else 572 ret->nextp = NULL; 573 574 /* Now make one of np/op point to the big key/data pair */ 575 #ifdef DEBUG 576 assert(np->ovfl == NULL); 577 #endif 578 if (change) 579 tmpp = np; 580 else 581 tmpp = op; 582 583 tmpp->flags |= BUF_MOD; 584 #ifdef DEBUG1 585 (void)fprintf(stderr, 586 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 587 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 588 #endif 589 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 590 tp = (u_short *)tmpp->page; 591 #ifdef DEBUG 592 assert(FREESPACE(tp) >= OVFLSIZE); 593 #endif 594 n = tp[0]; 595 off = OFFSET(tp); 596 free_space = FREESPACE(tp); 597 tp[++n] = (u_short)addr; 598 tp[++n] = OVFLPAGE; 599 tp[0] = n; 600 OFFSET(tp) = off; 601 FREESPACE(tp) = free_space - OVFLSIZE; 602 603 /* 604 * Finally, set the new and old return values. BIG_KEYP contains a 605 * pointer to the last page of the big key_data pair. Make sure that 606 * big_keyp has no following page (2 elements) or create an empty 607 * following page. 608 */ 609 610 ret->newp = np; 611 ret->oldp = op; 612 613 tp = (u_short *)big_keyp->page; 614 big_keyp->flags |= BUF_MOD; 615 if (tp[0] > 2) { 616 /* 617 * There may be either one or two offsets on this page. If 618 * there is one, then the overflow page is linked on normally 619 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 620 * the second offset and needs to get stuffed in after the 621 * next overflow page is added. 622 */ 623 n = tp[4]; 624 free_space = FREESPACE(tp); 625 off = OFFSET(tp); 626 tp[0] -= 2; 627 FREESPACE(tp) = free_space + OVFLSIZE; 628 OFFSET(tp) = off; 629 tmpp = __add_ovflpage(hashp, big_keyp); 630 if (!tmpp) 631 return (-1); 632 tp[4] = n; 633 } else 634 tmpp = big_keyp; 635 636 if (change) 637 ret->newp = tmpp; 638 else 639 ret->oldp = tmpp; 640 return (0); 641 } 642