1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)hash_bigkey.c 5.7 (Berkeley) 09/08/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * PACKAGE: hash 17 * DESCRIPTION: 18 * Big key/data handling for the hashing package. 19 * 20 * ROUTINES: 21 * External 22 * __big_keydata 23 * __big_split 24 * __big_insert 25 * __big_return 26 * __big_delete 27 * __find_last_page 28 * Internal 29 * collect_key 30 * collect_data 31 */ 32 33 #include <sys/param.h> 34 #include <errno.h> 35 #include <db.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #ifdef DEBUG 40 #include <assert.h> 41 #endif 42 #include "hash.h" 43 #include "page.h" 44 #include "extern.h" 45 46 static int collect_key __P((BUFHEAD *, int, DBT *, int)); 47 static int collect_data __P((BUFHEAD *, int, int)); 48 49 /* 50 * Big_insert 51 * 52 * You need to do an insert and the key/data pair is too big 53 * 54 * Returns: 55 * 0 ==> OK 56 *-1 ==> ERROR 57 */ 58 extern int 59 __big_insert(bufp, key, val) 60 BUFHEAD *bufp; 61 const DBT *key, *val; 62 { 63 register u_short *p; 64 int key_size, n, val_size; 65 u_short space, move_bytes, off; 66 char *cp, *key_data, *val_data; 67 68 cp = bufp->page; /* Character pointer of p. */ 69 p = (u_short *)cp; 70 71 key_data = (char *)key->data; 72 key_size = key->size; 73 val_data = (char *)val->data; 74 val_size = val->size; 75 76 /* First move the Key */ 77 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 78 space = FREESPACE(p) - BIGOVERHEAD) { 79 move_bytes = MIN(space, key_size); 80 off = OFFSET(p) - move_bytes; 81 bcopy(key_data, cp + off, move_bytes); 82 key_size -= move_bytes; 83 key_data += move_bytes; 84 n = p[0]; 85 p[++n] = off; 86 p[0] = ++n; 87 FREESPACE(p) = off - PAGE_META(n); 88 OFFSET(p) = off; 89 p[n] = PARTIAL_KEY; 90 bufp = __add_ovflpage(bufp); 91 if (!bufp) 92 return (-1); 93 n = p[0]; 94 if (!key_size) 95 if (FREESPACE(p)) { 96 move_bytes = MIN(FREESPACE(p), val_size); 97 off = OFFSET(p) - move_bytes; 98 p[n] = off; 99 bcopy(val_data, cp + off, move_bytes); 100 val_data += move_bytes; 101 val_size -= move_bytes; 102 p[n - 2] = FULL_KEY_DATA; 103 FREESPACE(p) = FREESPACE(p) - move_bytes; 104 OFFSET(p) = off; 105 } else 106 p[n - 2] = FULL_KEY; 107 p = (u_short *)bufp->page; 108 cp = bufp->page; 109 bufp->flags |= BUF_MOD; 110 } 111 112 /* Now move the data */ 113 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 114 space = FREESPACE(p) - BIGOVERHEAD) { 115 move_bytes = MIN(space, val_size); 116 /* 117 * Here's the hack to make sure that if the data ends on the 118 * same page as the key ends, FREESPACE is at least one. 119 */ 120 if (space == val_size && val_size == val->size) 121 move_bytes--; 122 off = OFFSET(p) - move_bytes; 123 bcopy(val_data, cp + off, move_bytes); 124 val_size -= move_bytes; 125 val_data += move_bytes; 126 n = p[0]; 127 p[++n] = off; 128 p[0] = ++n; 129 FREESPACE(p) = off - PAGE_META(n); 130 OFFSET(p) = off; 131 if (val_size) { 132 p[n] = FULL_KEY; 133 bufp = __add_ovflpage(bufp); 134 if (!bufp) 135 return (-1); 136 cp = bufp->page; 137 p = (u_short *)cp; 138 } else 139 p[n] = FULL_KEY_DATA; 140 bufp->flags |= BUF_MOD; 141 } 142 return (0); 143 } 144 145 /* 146 * Called when bufp's page contains a partial key (index should be 1) 147 * 148 * All pages in the big key/data pair except bufp are freed. We cannot 149 * free bufp because the page pointing to it is lost and we can't get rid 150 * of its pointer. 151 * 152 * Returns: 153 * 0 => OK 154 *-1 => ERROR 155 */ 156 extern int 157 __big_delete(bufp) 158 BUFHEAD *bufp; 159 { 160 register BUFHEAD *last_bfp, *rbufp; 161 u_short *bp, pageno; 162 int key_done, n; 163 164 rbufp = bufp; 165 last_bfp = NULL; 166 bp = (u_short *)bufp->page; 167 pageno = 0; 168 key_done = 0; 169 170 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 171 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 172 key_done = 1; 173 174 /* 175 * If there is freespace left on a FULL_KEY_DATA page, then 176 * the data is short and fits entirely on this page, and this 177 * is the last page. 178 */ 179 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 180 break; 181 pageno = bp[bp[0] - 1]; 182 rbufp->flags |= BUF_MOD; 183 rbufp = __get_buf(pageno, rbufp, 0); 184 if (last_bfp) 185 __free_ovflpage(last_bfp); 186 last_bfp = rbufp; 187 if (!rbufp) 188 return (-1); /* Error. */ 189 bp = (u_short *)rbufp->page; 190 } 191 192 /* 193 * If we get here then rbufp points to the last page of the big 194 * key/data pair. Bufp points to the first one -- it should now be 195 * empty pointing to the next page after this pair. Can't free it 196 * because we don't have the page pointing to it. 197 */ 198 199 /* This is information from the last page of the pair. */ 200 n = bp[0]; 201 pageno = bp[n - 1]; 202 203 /* Now, bp is the first page of the pair. */ 204 bp = (u_short *)bufp->page; 205 if (n > 2) { 206 /* There is an overflow page. */ 207 bp[1] = pageno; 208 bp[2] = OVFLPAGE; 209 bufp->ovfl = rbufp->ovfl; 210 } else 211 /* This is the last page. */ 212 bufp->ovfl = NULL; 213 n -= 2; 214 bp[0] = n; 215 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 216 OFFSET(bp) = hashp->BSIZE - 1; 217 218 bufp->flags |= BUF_MOD; 219 if (rbufp) 220 __free_ovflpage(rbufp); 221 if (last_bfp != rbufp) 222 __free_ovflpage(last_bfp); 223 224 hashp->NKEYS--; 225 return (0); 226 } 227 /* 228 * Returns: 229 * 0 = key not found 230 * -1 = get next overflow page 231 * -2 means key not found and this is big key/data 232 * -3 error 233 */ 234 extern int 235 __find_bigpair(bufp, ndx, key, size) 236 BUFHEAD *bufp; 237 int ndx; 238 char *key; 239 int size; 240 { 241 register u_short *bp; 242 register char *p; 243 int ksize; 244 u_short bytes; 245 char *kkey; 246 247 bp = (u_short *)bufp->page; 248 p = bufp->page; 249 ksize = size; 250 kkey = key; 251 252 for (bytes = hashp->BSIZE - bp[ndx]; 253 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 254 bytes = hashp->BSIZE - bp[ndx]) { 255 if (bcmp(p + bp[ndx], kkey, bytes)) 256 return (-2); 257 kkey += bytes; 258 ksize -= bytes; 259 bufp = __get_buf(bp[ndx + 2], bufp, 0); 260 if (!bufp) 261 return (-3); 262 p = bufp->page; 263 bp = (u_short *)p; 264 ndx = 1; 265 } 266 267 if (bytes != ksize || bcmp(p + bp[ndx], kkey, bytes)) { 268 #ifdef HASH_STATISTICS 269 ++hash_collisions; 270 #endif 271 return (-2); 272 } else 273 return (ndx); 274 } 275 276 /* 277 * Given the buffer pointer of the first overflow page of a big pair, 278 * find the end of the big pair 279 * 280 * This will set bpp to the buffer header of the last page of the big pair. 281 * It will return the pageno of the overflow page following the last page 282 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 283 * bucket) 284 */ 285 extern u_short 286 __find_last_page(bpp) 287 BUFHEAD **bpp; 288 { 289 BUFHEAD *bufp; 290 u_short *bp, pageno; 291 int n; 292 293 bufp = *bpp; 294 bp = (u_short *)bufp->page; 295 for (;;) { 296 n = bp[0]; 297 298 /* 299 * This is the last page if: the tag is FULL_KEY_DATA and 300 * either only 2 entries OVFLPAGE marker is explicit there 301 * is freespace on the page. 302 */ 303 if (bp[2] == FULL_KEY_DATA && 304 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 305 break; 306 307 pageno = bp[n - 1]; 308 bufp = __get_buf(pageno, bufp, 0); 309 if (!bufp) 310 return (0); /* Need to indicate an error! */ 311 bp = (u_short *)bufp->page; 312 } 313 314 *bpp = bufp; 315 if (bp[0] > 2) 316 return (bp[3]); 317 else 318 return (0); 319 } 320 321 /* 322 * Return the data for the key/data pair that begins on this page at this 323 * index (index should always be 1). 324 */ 325 extern int 326 __big_return(bufp, ndx, val, set_current) 327 BUFHEAD *bufp; 328 int ndx; 329 DBT *val; 330 int set_current; 331 { 332 BUFHEAD *save_p; 333 u_short *bp, len, off, save_addr; 334 char *tp; 335 336 bp = (u_short *)bufp->page; 337 while (bp[ndx + 1] == PARTIAL_KEY) { 338 bufp = __get_buf(bp[bp[0] - 1], bufp, 0); 339 if (!bufp) 340 return (-1); 341 bp = (u_short *)bufp->page; 342 ndx = 1; 343 } 344 345 if (bp[ndx + 1] == FULL_KEY) { 346 bufp = __get_buf(bp[bp[0] - 1], bufp, 0); 347 if (!bufp) 348 return (-1); 349 bp = (u_short *)bufp->page; 350 save_p = bufp; 351 save_addr = save_p->addr; 352 off = bp[1]; 353 len = 0; 354 } else 355 if (!FREESPACE(bp)) { 356 /* 357 * This is a hack. We can't distinguish between 358 * FULL_KEY_DATA that contains complete data or 359 * incomplete data, so we require that if the data 360 * is complete, there is at least 1 byte of free 361 * space left. 362 */ 363 off = bp[bp[0]]; 364 len = bp[1] - off; 365 save_p = bufp; 366 save_addr = bufp->addr; 367 bufp = __get_buf(bp[bp[0] - 1], bufp, 0); 368 if (!bufp) 369 return (-1); 370 bp = (u_short *)bufp->page; 371 } else { 372 /* The data is all on one page. */ 373 tp = (char *)bp; 374 off = bp[bp[0]]; 375 val->data = (u_char *)tp + off; 376 val->size = bp[1] - off; 377 if (set_current) { 378 if (bp[0] == 2) { /* No more buckets in 379 * chain */ 380 hashp->cpage = NULL; 381 hashp->cbucket++; 382 hashp->cndx = 1; 383 } else { 384 hashp->cpage = 385 __get_buf(bp[bp[0] - 1], bufp, 0); 386 if (!hashp->cpage) 387 return (-1); 388 hashp->cndx = 1; 389 if (!((u_short *) 390 hashp->cpage->page)[0]) { 391 hashp->cbucket++; 392 hashp->cpage = NULL; 393 } 394 } 395 } 396 return (0); 397 } 398 399 val->size = collect_data(bufp, (int)len, set_current); 400 if (val->size == -1) 401 return (-1); 402 if (save_p->addr != save_addr) { 403 /* We are pretty short on buffers. */ 404 errno = EINVAL; /* OUT OF BUFFERS */ 405 return (-1); 406 } 407 bcopy((save_p->page) + off, hashp->tmp_buf, len); 408 val->data = (u_char *)hashp->tmp_buf; 409 return (0); 410 } 411 /* 412 * Count how big the total datasize is by recursing through the pages. Then 413 * allocate a buffer and copy the data as you recurse up. 414 */ 415 static int 416 collect_data(bufp, len, set) 417 BUFHEAD *bufp; 418 int len, set; 419 { 420 register u_short *bp; 421 register char *p; 422 BUFHEAD *xbp; 423 u_short save_addr; 424 int mylen, totlen; 425 426 p = bufp->page; 427 bp = (u_short *)p; 428 mylen = hashp->BSIZE - bp[1]; 429 save_addr = bufp->addr; 430 431 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 432 totlen = len + mylen; 433 if (hashp->tmp_buf) 434 free(hashp->tmp_buf); 435 hashp->tmp_buf = malloc(totlen); 436 if (!hashp->tmp_buf) 437 return (-1); 438 if (set) { 439 hashp->cndx = 1; 440 if (bp[0] == 2) { /* No more buckets in chain */ 441 hashp->cpage = NULL; 442 hashp->cbucket++; 443 } else { 444 hashp->cpage = 445 __get_buf(bp[bp[0] - 1], bufp, 0); 446 if (!hashp->cpage) 447 return (-1); 448 else if (!((u_short *)hashp->cpage->page)[0]) { 449 hashp->cbucket++; 450 hashp->cpage = NULL; 451 } 452 } 453 } 454 } else { 455 xbp = __get_buf(bp[bp[0] - 1], bufp, 0); 456 if (!xbp || 457 ((totlen = collect_data(xbp, len + mylen, set)) < 1)) 458 return (-1); 459 } 460 if (bufp->addr != save_addr) { 461 errno = EINVAL; /* Out of buffers. */ 462 return (-1); 463 } 464 bcopy((bufp->page) + bp[1], &hashp->tmp_buf[len], mylen); 465 return (totlen); 466 } 467 468 /* 469 * Fill in the key and data for this big pair. 470 */ 471 extern int 472 __big_keydata(bufp, key, val, set) 473 BUFHEAD *bufp; 474 DBT *key, *val; 475 int set; 476 { 477 key->size = collect_key(bufp, 0, val, set); 478 if (key->size == -1) 479 return (-1); 480 key->data = (u_char *)hashp->tmp_key; 481 return (0); 482 } 483 484 /* 485 * Count how big the total key size is by recursing through the pages. Then 486 * collect the data, allocate a buffer and copy the key as you recurse up. 487 */ 488 static int 489 collect_key(bufp, len, val, set) 490 BUFHEAD *bufp; 491 int len; 492 DBT *val; 493 int set; 494 { 495 BUFHEAD *xbp; 496 char *p; 497 int mylen, totlen; 498 u_short *bp, save_addr; 499 500 p = bufp->page; 501 bp = (u_short *)p; 502 mylen = hashp->BSIZE - bp[1]; 503 504 save_addr = bufp->addr; 505 totlen = len + mylen; 506 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 507 if (hashp->tmp_key) 508 free(hashp->tmp_key); 509 hashp->tmp_key = malloc(totlen); 510 if (!hashp->tmp_key) 511 return (-1); 512 if (__big_return(bufp, 1, val, set)) 513 return (-1); 514 } else { 515 xbp = __get_buf(bp[bp[0] - 1], bufp, 0); 516 if (!xbp || 517 ((totlen = collect_key(xbp, totlen, val, set)) < 1)) 518 return (-1); 519 } 520 if (bufp->addr != save_addr) { 521 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 522 return (-1); 523 } 524 bcopy((bufp->page) + bp[1], &hashp->tmp_key[len], mylen); 525 return (totlen); 526 } 527 528 /* 529 * Returns: 530 * 0 => OK 531 * -1 => error 532 */ 533 extern int 534 __big_split(op, np, big_keyp, addr, obucket, ret) 535 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ 536 BUFHEAD *np; /* Pointer to new bucket page */ 537 /* Pointer to first page containing the big key/data */ 538 BUFHEAD *big_keyp; 539 int addr; /* Address of big_keyp */ 540 u_int obucket;/* Old Bucket */ 541 SPLIT_RETURN *ret; 542 { 543 register BUFHEAD *tmpp; 544 register u_short *tp; 545 BUFHEAD *bp; 546 DBT key, val; 547 u_int change; 548 u_short free_space, n, off; 549 550 bp = big_keyp; 551 552 /* Now figure out where the big key/data goes */ 553 if (__big_keydata(big_keyp, &key, &val, 0)) 554 return (-1); 555 change = (__call_hash(key.data, key.size) != obucket); 556 557 if (ret->next_addr = __find_last_page(&big_keyp)) { 558 if (!(ret->nextp = __get_buf(ret->next_addr, big_keyp, 0))) 559 return (-1);; 560 } else 561 ret->nextp = NULL; 562 563 /* Now make one of np/op point to the big key/data pair */ 564 #ifdef DEBUG 565 assert(np->ovfl == NULL); 566 #endif 567 if (change) 568 tmpp = np; 569 else 570 tmpp = op; 571 572 tmpp->flags |= BUF_MOD; 573 #ifdef DEBUG1 574 (void)fprintf(stderr, 575 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 576 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 577 #endif 578 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 579 tp = (u_short *)tmpp->page; 580 #ifdef DEBUG 581 assert(FREESPACE(tp) >= OVFLSIZE); 582 #endif 583 n = tp[0]; 584 off = OFFSET(tp); 585 free_space = FREESPACE(tp); 586 tp[++n] = (u_short)addr; 587 tp[++n] = OVFLPAGE; 588 tp[0] = n; 589 OFFSET(tp) = off; 590 FREESPACE(tp) = free_space - OVFLSIZE; 591 592 /* 593 * Finally, set the new and old return values. BIG_KEYP contains a 594 * pointer to the last page of the big key_data pair. Make sure that 595 * big_keyp has no following page (2 elements) or create an empty 596 * following page. 597 */ 598 599 ret->newp = np; 600 ret->oldp = op; 601 602 tp = (u_short *)big_keyp->page; 603 big_keyp->flags |= BUF_MOD; 604 if (tp[0] > 2) { 605 /* 606 * There may be either one or two offsets on this page. If 607 * there is one, then the overflow page is linked on normally 608 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 609 * the second offset and needs to get stuffed in after the 610 * next overflow page is added. 611 */ 612 n = tp[4]; 613 free_space = FREESPACE(tp); 614 off = OFFSET(tp); 615 tp[0] -= 2; 616 FREESPACE(tp) = free_space + OVFLSIZE; 617 OFFSET(tp) = off; 618 tmpp = __add_ovflpage(big_keyp); 619 if (!tmpp) 620 return (-1); 621 tp[4] = n; 622 } else 623 tmpp = big_keyp; 624 625 if (change) 626 ret->newp = tmpp; 627 else 628 ret->oldp = tmpp; 629 return (0); 630 } 631