1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_split.c 5.8 (Berkeley) 11/13/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 17 #define __DBINTERFACE_PRIVATE 18 #include <db.h> 19 #include <limits.h> 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 24 #include "btree.h" 25 26 static int bt_preserve __P((BTREE *, pgno_t)); 27 static PAGE *bt_psplit __P((BTREE *, PAGE *, PAGE *, PAGE *, int *)); 28 static PAGE *bt_page __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 29 static PAGE *bt_root __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 30 static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 31 static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 32 static recno_t rec_total __P((PAGE *)); 33 34 #ifdef STATISTICS 35 u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; 36 #endif 37 38 /* 39 * __BT_SPLIT -- Split the tree. 40 * 41 * Parameters: 42 * t: tree 43 * h: page to split 44 * key: key to insert 45 * data: data to insert 46 * flags: BIGKEY/BIGDATA flags 47 * nbytes: length of insertion 48 * skip: index to leave open 49 * 50 * Returns: 51 * RET_ERROR, RET_SUCCESS 52 */ 53 int 54 __bt_split(t, h, key, data, flags, nbytes, skip) 55 BTREE *t; 56 PAGE *h; 57 const DBT *key, *data; 58 u_long flags; 59 size_t nbytes; 60 int skip; 61 { 62 BINTERNAL *bi; 63 BLEAF *bl; 64 DBT a, b; 65 EPGNO *parent; 66 PAGE *l, *r, *lchild, *rchild; 67 index_t nxtindex; 68 size_t nksize; 69 int nosplit; 70 char *dest; 71 72 /* 73 * Split the page into two pages, l and r. The split routines return 74 * a pointer to the page into which the key should be inserted and skip 75 * set to the offset which should be used. Additionally, l and r are 76 * pinned. 77 */ 78 h = h->pgno == P_ROOT ? 79 bt_root(t, h, &l, &r, &skip) : bt_page(t, h, &l, &r, &skip); 80 if (h == NULL) 81 return (RET_ERROR); 82 83 /* 84 * Grab the space and insert the [rb]leaf structure. Always a [rb]leaf 85 * structure since key inserts always cause a leaf page to split first. 86 */ 87 h->linp[skip] = h->upper -= nbytes; 88 dest = (char *)h + h->upper; 89 if (ISSET(t, BTF_RECNO)) 90 WR_RLEAF(dest, data, flags) 91 else 92 WR_BLEAF(dest, key, data, flags) 93 94 /* 95 * Now we walk the parent page stack -- a LIFO stack of the pages that 96 * were traversed when we searched for the page that split. Each stack 97 * entry is a page number and a page index offset. The offset is for 98 * the page traversed on the search. We've just split a page, so we 99 * have to insert a new key into the parent page. 100 * 101 * If the insert into the parent page causes it to split, may have to 102 * continue splitting all the way up the tree. We stop if the root 103 * splits or the page inserted into didn't have to split to hold the 104 * new key. Some algorithms replace the key for the old page as well 105 * as the new page. We don't, as there's no reason to believe that the 106 * first key on the old page is any better than the key we have, and, 107 * in the case of a key being placed at index 0 causing the split, the 108 * key is unavailable. 109 * 110 * There are a maximum of 5 pages pinned at any time. We keep the left 111 * and right pages pinned while working on the parent. The 5 are the 112 * two children, left parent and right parent (when the parent splits) 113 * and the root page or the overflow key page when calling bt_preserve. 114 * This code must make sure that all pins are released other than the 115 * root page or overflow page which is unlocked elsewhere. 116 */ 117 for (nosplit = 0; (parent = BT_POP(t)) != NULL;) { 118 lchild = l; 119 rchild = r; 120 121 /* Get the parent page. */ 122 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 123 goto err2; 124 125 /* The new key goes ONE AFTER the index. */ 126 skip = parent->index + 1; 127 128 /* 129 * Calculate the space needed on the parent page. 130 * 131 * Space hack when inserting into BINTERNAL pages. Only need to 132 * retain the number of bytes that will distinguish between the 133 * new entry and the LAST entry on the page to its left. If the 134 * keys compare equal, retain the entire key. Note, we don't 135 * touch overflow keys and the entire key must be retained for 136 * the next-to-leftmost key on the leftmost page of each level, 137 * or the search will fail. 138 */ 139 switch (rchild->flags & P_TYPE) { 140 case P_BINTERNAL: 141 bi = GETBINTERNAL(rchild, 0); 142 nbytes = NBINTERNAL(bi->ksize); 143 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 144 !(bi->flags & P_BIGKEY)) { 145 BINTERNAL *tbi; 146 tbi = 147 GETBINTERNAL(lchild, NEXTINDEX(lchild) - 1); 148 a.size = tbi->ksize; 149 a.data = tbi->bytes; 150 b.size = bi->ksize; 151 b.data = bi->bytes; 152 goto prefix; 153 } else 154 nksize = 0; 155 break; 156 case P_BLEAF: 157 bl = GETBLEAF(rchild, 0); 158 nbytes = NBINTERNAL(bl->ksize); 159 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 160 !(bl->flags & P_BIGKEY)) { 161 BLEAF *tbl; 162 size_t n; 163 164 tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); 165 a.size = tbl->ksize; 166 a.data = tbl->bytes; 167 b.size = bl->ksize; 168 b.data = bl->bytes; 169 prefix: nksize = t->bt_pfx(&a, &b); 170 n = NBINTERNAL(nksize); 171 if (n < nbytes) { 172 #ifdef STATISTICS 173 bt_pfxsaved += nbytes - n; 174 #endif 175 nbytes = n; 176 } else 177 nksize = 0; 178 } else 179 nksize = 0; 180 break; 181 case P_RINTERNAL: 182 case P_RLEAF: 183 nbytes = NRINTERNAL; 184 break; 185 default: 186 abort(); 187 } 188 189 /* Split the parent page if necessary or shift the indices. */ 190 if (h->upper - h->lower < nbytes + sizeof(index_t)) { 191 h = h->pgno == P_ROOT ? 192 bt_root(t, h, &l, &r, &skip) : 193 bt_page(t, h, &l, &r, &skip); 194 if (h == NULL) 195 goto err1; 196 } else { 197 if (skip < (nxtindex = NEXTINDEX(h))) 198 bcopy(h->linp + skip, h->linp + skip + 1, 199 (nxtindex - skip) * sizeof(index_t)); 200 h->lower += sizeof(index_t); 201 nosplit = 1; 202 } 203 204 /* Insert the key into the parent page. */ 205 switch(rchild->flags & P_TYPE) { 206 case P_BINTERNAL: 207 h->linp[skip] = h->upper -= nbytes; 208 dest = (char *)h + h->linp[skip]; 209 bcopy(bi, dest, nbytes); 210 if (nksize) 211 ((BINTERNAL *)dest)->ksize = nksize; 212 ((BINTERNAL *)dest)->pgno = rchild->pgno; 213 break; 214 case P_BLEAF: 215 h->linp[skip] = h->upper -= nbytes; 216 dest = (char *)h + h->linp[skip]; 217 WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, 218 rchild->pgno, bl->flags & P_BIGKEY); 219 bcopy(bl->bytes, dest, nksize ? nksize : bl->ksize); 220 if (bl->flags & P_BIGKEY && 221 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 222 goto err1; 223 break; 224 case P_RINTERNAL: 225 /* Update both left and right page counts. */ 226 h->linp[skip] = h->upper -= nbytes; 227 dest = (char *)h + h->linp[skip]; 228 ((RINTERNAL *)dest)->nrecs = rec_total(rchild); 229 ((RINTERNAL *)dest)->pgno = rchild->pgno; 230 dest = (char *)h + h->linp[skip - 1]; 231 ((RINTERNAL *)dest)->nrecs = rec_total(lchild); 232 ((RINTERNAL *)dest)->pgno = lchild->pgno; 233 break; 234 case P_RLEAF: 235 /* Update both left and right page counts. */ 236 h->linp[skip] = h->upper -= nbytes; 237 dest = (char *)h + h->linp[skip]; 238 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); 239 ((RINTERNAL *)dest)->pgno = rchild->pgno; 240 dest = (char *)h + h->linp[skip - 1]; 241 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); 242 ((RINTERNAL *)dest)->pgno = lchild->pgno; 243 break; 244 default: 245 abort(); 246 } 247 248 /* Unpin the held pages. */ 249 if (nosplit) { 250 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 251 break; 252 } 253 mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 254 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 255 } 256 257 /* Unpin the held pages. */ 258 mpool_put(t->bt_mp, l, MPOOL_DIRTY); 259 mpool_put(t->bt_mp, r, MPOOL_DIRTY); 260 261 /* Clear any pages left on the stack. */ 262 BT_CLR(t); 263 return (RET_SUCCESS); 264 265 /* 266 * If something fails in the above loop we were already walking back 267 * up the tree and the tree is now inconsistent. Nothing much we can 268 * do about it but release any memory we're holding. 269 */ 270 err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 271 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 272 273 err2: mpool_put(t->bt_mp, l, 0); 274 mpool_put(t->bt_mp, r, 0); 275 __dbpanic(t->bt_dbp); 276 return (RET_ERROR); 277 } 278 279 /* 280 * BT_PAGE -- Split a non-root page of a btree. 281 * 282 * Parameters: 283 * t: tree 284 * h: root page 285 * lp: pointer to left page pointer 286 * rp: pointer to right page pointer 287 * skip: pointer to index to leave open 288 * 289 * Returns: 290 * Pointer to page in which to insert or NULL on error. 291 */ 292 static PAGE * 293 bt_page(t, h, lp, rp, skip) 294 BTREE *t; 295 PAGE *h, **lp, **rp; 296 int *skip; 297 { 298 PAGE *l, *r, *tp; 299 pgno_t npg; 300 301 #ifdef STATISTICS 302 ++bt_split; 303 #endif 304 /* Put the new right page for the split into place. */ 305 if ((r = __bt_new(t, &npg)) == NULL) 306 return (NULL); 307 r->pgno = npg; 308 r->lower = BTDATAOFF; 309 r->upper = t->bt_psize; 310 r->nextpg = h->nextpg; 311 r->prevpg = h->pgno; 312 r->flags = h->flags & P_TYPE; 313 314 /* 315 * If we're splitting the last page on a level because we're appending 316 * a key to it (skip is NEXTINDEX()), it's likely that the data is 317 * sorted. Adding an empty page on the side of the level is less work 318 * and can push the fill factor much higher than normal. If we're 319 * wrong it's no big deal, we'll just do the split the right way next 320 * time. It may look like it's equally easy to do a similar hack for 321 * reverse sorted data, that is, split the tree left, but it's not. 322 * Don't even try. 323 */ 324 if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { 325 #ifdef STATISTICS 326 ++bt_sortsplit; 327 #endif 328 h->nextpg = r->pgno; 329 r->lower = BTDATAOFF + sizeof(index_t); 330 *skip = 0; 331 *lp = h; 332 *rp = r; 333 return (r); 334 } 335 336 /* Put the new left page for the split into place. */ 337 if ((l = malloc(t->bt_psize)) == NULL) { 338 mpool_put(t->bt_mp, r, 0); 339 return (NULL); 340 } 341 l->pgno = h->pgno; 342 l->nextpg = r->pgno; 343 l->prevpg = h->prevpg; 344 l->lower = BTDATAOFF; 345 l->upper = t->bt_psize; 346 l->flags = h->flags & P_TYPE; 347 348 /* Fix up the previous pointer of the page after the split page. */ 349 if (h->nextpg != P_INVALID) { 350 if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { 351 free(l); 352 /* XXX mpool_free(t->bt_mp, r->pgno); */ 353 return (NULL); 354 } 355 tp->prevpg = r->pgno; 356 mpool_put(t->bt_mp, tp, 0); 357 } 358 359 /* 360 * Split right. The key/data pairs aren't sorted in the btree page so 361 * it's simpler to copy the data from the split page onto two new pages 362 * instead of copying half the data to the right page and compacting 363 * the left page in place. Since the left page can't change, we have 364 * to swap the original and the allocated left page after the split. 365 */ 366 tp = bt_psplit(t, h, l, r, skip); 367 368 /* Move the new left page onto the old left page. */ 369 bcopy(l, h, t->bt_psize); 370 if (tp == l) 371 tp = h; 372 free(l); 373 374 *lp = h; 375 *rp = r; 376 return (tp); 377 } 378 379 /* 380 * BT_ROOT -- Split the root page of a btree. 381 * 382 * Parameters: 383 * t: tree 384 * h: root page 385 * lp: pointer to left page pointer 386 * rp: pointer to right page pointer 387 * skip: pointer to index to leave open 388 * 389 * Returns: 390 * Pointer to page in which to insert or NULL on error. 391 */ 392 static PAGE * 393 bt_root(t, h, lp, rp, skip) 394 BTREE *t; 395 PAGE *h, **lp, **rp; 396 int *skip; 397 { 398 PAGE *l, *r, *tp; 399 pgno_t lnpg, rnpg; 400 401 #ifdef STATISTICS 402 ++bt_split; 403 ++bt_rootsplit; 404 #endif 405 /* Put the new left and right pages for the split into place. */ 406 if ((l = __bt_new(t, &lnpg)) == NULL || 407 (r = __bt_new(t, &rnpg)) == NULL) 408 return (NULL); 409 l->pgno = lnpg; 410 r->pgno = rnpg; 411 l->nextpg = r->pgno; 412 r->prevpg = l->pgno; 413 l->prevpg = r->nextpg = P_INVALID; 414 l->lower = r->lower = BTDATAOFF; 415 l->upper = r->upper = t->bt_psize; 416 l->flags = r->flags = h->flags & P_TYPE; 417 418 /* Split the root page. */ 419 tp = bt_psplit(t, h, l, r, skip); 420 421 /* Make the root page look right. */ 422 if ((ISSET(t, BTF_RECNO) ? 423 bt_rroot(t, h, l, r) : bt_broot(t, h, l, r)) == RET_ERROR) 424 return (NULL); 425 426 *lp = l; 427 *rp = r; 428 return (tp); 429 } 430 431 /* 432 * BT_RROOT -- Fix up the recno root page after the split. 433 * 434 * Parameters: 435 * t: tree 436 * h: root page 437 * 438 * Returns: 439 * RET_ERROR, RET_SUCCESS 440 */ 441 static int 442 bt_rroot(t, h, l, r) 443 BTREE *t; 444 PAGE *h, *l, *r; 445 { 446 char *dest; 447 448 /* Insert the left and right keys, set the header information. */ 449 h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; 450 dest = (char *)h + h->upper; 451 WR_RINTERNAL(dest, 452 l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); 453 454 h->linp[1] = h->upper -= NRINTERNAL; 455 dest = (char *)h + h->upper; 456 WR_RINTERNAL(dest, 457 r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); 458 459 h->lower = BTDATAOFF + 2 * sizeof(index_t); 460 461 /* Unpin the root page, set to recno internal page. */ 462 h->flags &= ~P_TYPE; 463 h->flags |= P_RINTERNAL; 464 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 465 466 return (RET_SUCCESS); 467 } 468 469 /* 470 * BT_BROOT -- Fix up the btree root page after the split. 471 * 472 * Parameters: 473 * t: tree 474 * h: root page 475 * 476 * Returns: 477 * RET_ERROR, RET_SUCCESS 478 */ 479 static int 480 bt_broot(t, h, l, r) 481 BTREE *t; 482 PAGE *h, *l, *r; 483 { 484 BINTERNAL *bi; 485 BLEAF *bl; 486 size_t nbytes; 487 char *dest; 488 489 /* 490 * If the root page was a leaf page, change it into an internal page. 491 * We copy the key we split on (but not the key's data, in the case of 492 * a leaf page) to the new root page. 493 * 494 * The btree comparison code guarantees that the left-most key on any 495 * level of the tree is never used, so it doesn't need to be filled 496 * in. (This is not just convenience -- if the insert index is 0, we 497 * don't *have* a key to fill in.) The right key is available because 498 * the split code guarantees not to split on the skipped index. 499 */ 500 nbytes = NBINTERNAL(0); 501 h->linp[0] = h->upper = t->bt_psize - nbytes; 502 dest = (char *)h + h->upper; 503 WR_BINTERNAL(dest, 0, l->pgno, 0); 504 505 switch(h->flags & P_TYPE) { 506 case P_BLEAF: 507 bl = GETBLEAF(r, 0); 508 nbytes = NBINTERNAL(bl->ksize); 509 h->linp[1] = h->upper -= nbytes; 510 dest = (char *)h + h->upper; 511 WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); 512 bcopy(bl->bytes, dest, bl->ksize); 513 514 /* 515 * If the key is on an overflow page, mark the overflow chain 516 * so it isn't deleted when the leaf copy of the key is deleted. 517 */ 518 if (bl->flags & P_BIGKEY && 519 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 520 return (RET_ERROR); 521 break; 522 case P_BINTERNAL: 523 bi = GETBINTERNAL(r, 0); 524 nbytes = NBINTERNAL(bi->ksize); 525 h->linp[1] = h->upper -= nbytes; 526 dest = (char *)h + h->upper; 527 bcopy(bi, dest, nbytes); 528 ((BINTERNAL *)dest)->pgno = r->pgno; 529 break; 530 default: 531 abort(); 532 } 533 h->lower = BTDATAOFF + 2 * sizeof(index_t); 534 535 /* Unpin the root page, set to btree internal page. */ 536 h->flags &= ~P_TYPE; 537 h->flags |= P_BINTERNAL; 538 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 539 540 return (RET_SUCCESS); 541 } 542 543 /* 544 * BT_PSPLIT -- Do the real work of splitting the page. 545 * 546 * Parameters: 547 * t: tree 548 * h: page to be split 549 * l: page to put lower half of data 550 * r: page to put upper half of data 551 * skip: pointer to index to leave open 552 * 553 * Returns: 554 * Pointer to page in which to insert. 555 */ 556 static PAGE * 557 bt_psplit(t, h, l, r, pskip) 558 BTREE *t; 559 PAGE *h, *l, *r; 560 int *pskip; 561 { 562 BINTERNAL *bi; 563 BLEAF *bl; 564 RLEAF *rl; 565 EPGNO *c; 566 PAGE *rval; 567 index_t half, skip; 568 size_t nbytes; 569 void *src; 570 int bigkeycnt, isbigkey, nxt, off, top; 571 572 /* 573 * Split the data to the left and right pages. Leave the skip index 574 * open and guarantee that the split doesn't happen on that index (the 575 * right key must be available for the parent page). Additionally, 576 * make some effort not to split on an overflow key. This makes it 577 * faster to process internal pages and can save space since overflow 578 * keys used by internal pages are never deleted. 579 */ 580 bigkeycnt = 0; 581 skip = *pskip; 582 half = (t->bt_psize - BTDATAOFF) / 2; 583 for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { 584 if (skip == off) 585 continue; 586 switch (h->flags & P_TYPE) { 587 case P_BINTERNAL: 588 src = bi = GETBINTERNAL(h, nxt); 589 nbytes = NBINTERNAL(bi->ksize); 590 isbigkey = bi->flags & P_BIGKEY; 591 break; 592 case P_BLEAF: 593 src = bl = GETBLEAF(h, nxt); 594 nbytes = NBLEAF(bl); 595 isbigkey = bl->flags & P_BIGKEY; 596 break; 597 case P_RINTERNAL: 598 src = GETRINTERNAL(h, nxt); 599 nbytes = NRINTERNAL; 600 isbigkey = 0; 601 break; 602 case P_RLEAF: 603 src = rl = GETRLEAF(h, nxt); 604 nbytes = NRLEAF(rl); 605 isbigkey = 0; 606 break; 607 default: 608 abort(); 609 } 610 ++nxt; 611 l->linp[off] = l->upper -= nbytes; 612 bcopy(src, (char *)l + l->upper, nbytes); 613 614 /* There's no empirical justification for the '3'. */ 615 if (half < nbytes) { 616 if (skip != off + 1) 617 if (!isbigkey || bigkeycnt == 3) 618 break; 619 else 620 ++bigkeycnt; 621 } else 622 half -= nbytes; 623 } 624 l->lower += (off + 1) * sizeof(index_t); 625 626 /* 627 * If splitting the page that the cursor was on, the cursor has to be 628 * adjusted to point to the same record as before the split. If the 629 * skipped slot and the cursor are both on the left page and the cursor 630 * is on or past the skipped slot, the cursor is incremented by one. 631 * If the skipped slot and the cursor are both on the right page and 632 * the cursor is on or past the skipped slot, the cursor is incremented 633 * by one. If the skipped slot and the cursor aren't on the same page, 634 * the cursor isn't changed. Regardless of the relationship of the 635 * skipped slot and the cursor, if the cursor is on the right page it 636 * is decremented by the number of records split to the left page. 637 * 638 * Don't bother checking for the BTF_SEQINIT flag, the page number will 639 * be P_INVALID. 640 */ 641 c = &t->bt_bcursor; 642 if (c->pgno == h->pgno) 643 if (c->index < off) { /* left page */ 644 c->pgno = l->pgno; 645 if (c->index >= skip) 646 ++c->index; 647 } else { /* right page */ 648 c->pgno = r->pgno; 649 if (c->index >= skip && skip > off) 650 ++c->index; 651 c->index -= off; 652 } 653 654 /* 655 * Decide which page to return, and adjust the skip index if the 656 * to-be-inserted-upon page has changed. 657 */ 658 if (skip > off) { 659 rval = r; 660 *pskip -= off + 1; 661 } else 662 rval = l; 663 664 for (off = 0; nxt < top; ++off) { 665 if (skip == nxt) { 666 skip = 0; 667 continue; 668 } 669 switch (h->flags & P_TYPE) { 670 case P_BINTERNAL: 671 src = bi = GETBINTERNAL(h, nxt); 672 nbytes = NBINTERNAL(bi->ksize); 673 break; 674 case P_BLEAF: 675 src = bl = GETBLEAF(h, nxt); 676 nbytes = NBLEAF(bl); 677 break; 678 case P_RINTERNAL: 679 src = GETRINTERNAL(h, nxt); 680 nbytes = NRINTERNAL; 681 break; 682 case P_RLEAF: 683 src = rl = GETRLEAF(h, nxt); 684 nbytes = NRLEAF(rl); 685 break; 686 default: 687 abort(); 688 } 689 ++nxt; 690 r->linp[off] = r->upper -= nbytes; 691 bcopy(src, (char *)r + r->upper, nbytes); 692 } 693 r->lower += off * sizeof(index_t); 694 695 /* If the key is being appended to the page, adjust the index. */ 696 if (skip == top) 697 r->lower += sizeof(index_t); 698 699 return (rval); 700 } 701 702 /* 703 * BT_PRESERVE -- Mark a chain of pages as used by an internal node. 704 * 705 * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the 706 * record that references them gets deleted. Chains pointed to by internal 707 * pages never get deleted. This routine marks a chain as pointed to by an 708 * internal page. 709 * 710 * Parameters: 711 * t: tree 712 * pg: page number of first page in the chain. 713 * 714 * Returns: 715 * RET_SUCCESS, RET_ERROR. 716 */ 717 static int 718 bt_preserve(t, pg) 719 BTREE *t; 720 pgno_t pg; 721 { 722 PAGE *h; 723 724 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 725 return (RET_ERROR); 726 h->flags |= P_PRESERVE; 727 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 728 return (RET_SUCCESS); 729 } 730 731 /* 732 * REC_TOTAL -- Return the number of recno entries below a page. 733 * 734 * Parameters: 735 * h: page 736 * 737 * Returns: 738 * The number of recno entries below a page. 739 * 740 * XXX 741 * These values could be set by the bt_psplit routine. The problem is that the 742 * entry has to be popped off of the stack etc. or the values have to be passed 743 * all the way back to bt_split/bt_rroot and it's not very clean. 744 */ 745 static recno_t 746 rec_total(h) 747 PAGE *h; 748 { 749 recno_t recs; 750 index_t nxt, top; 751 752 for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) 753 recs += GETRINTERNAL(h, nxt)->nrecs; 754 return (recs); 755 } 756