1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_split.c 5.15 (Berkeley) 02/19/93"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 17 #define __DBINTERFACE_PRIVATE 18 #include <limits.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include <db.h> 24 #include "btree.h" 25 26 static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 27 static PAGE *bt_page 28 __P((BTREE *, PAGE *, PAGE **, PAGE **, u_int *, size_t)); 29 static int bt_preserve __P((BTREE *, pgno_t)); 30 static PAGE *bt_psplit 31 __P((BTREE *, PAGE *, PAGE *, PAGE *, u_int *, size_t)); 32 static PAGE *bt_root 33 __P((BTREE *, PAGE *, PAGE **, PAGE **, u_int *, size_t)); 34 static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 35 static recno_t rec_total __P((PAGE *)); 36 37 #ifdef STATISTICS 38 u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; 39 #endif 40 41 /* 42 * __BT_SPLIT -- Split the tree. 43 * 44 * Parameters: 45 * t: tree 46 * sp: page to split 47 * key: key to insert 48 * data: data to insert 49 * flags: BIGKEY/BIGDATA flags 50 * ilen: insert length 51 * skip: index to leave open 52 * 53 * Returns: 54 * RET_ERROR, RET_SUCCESS 55 */ 56 int 57 __bt_split(t, sp, key, data, flags, ilen, skip) 58 BTREE *t; 59 PAGE *sp; 60 const DBT *key, *data; 61 u_long flags; 62 size_t ilen; 63 u_int skip; 64 { 65 BINTERNAL *bi; 66 BLEAF *bl, *tbl; 67 DBT a, b; 68 EPGNO *parent; 69 PAGE *h, *l, *r, *lchild, *rchild; 70 indx_t nxtindex; 71 size_t n, nbytes, nksize; 72 int nosplit; 73 char *dest; 74 75 /* 76 * Split the page into two pages, l and r. The split routines return 77 * a pointer to the page into which the key should be inserted and with 78 * skip set to the offset which should be used. Additionally, l and r 79 * are pinned. 80 */ 81 h = sp->pgno == P_ROOT ? 82 bt_root(t, sp, &l, &r, &skip, ilen) : 83 bt_page(t, sp, &l, &r, &skip, ilen); 84 if (h == NULL) 85 return (RET_ERROR); 86 87 /* 88 * Insert the new key/data pair into the leaf page. (Key inserts 89 * always cause a leaf page to split first.) 90 */ 91 h->linp[skip] = h->upper -= ilen; 92 dest = (char *)h + h->upper; 93 if (ISSET(t, BTF_RECNO)) 94 WR_RLEAF(dest, data, flags) 95 else 96 WR_BLEAF(dest, key, data, flags) 97 98 /* If the root page was split, make it look right. */ 99 if (sp->pgno == P_ROOT && 100 (ISSET(t, BTF_RECNO) ? 101 bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) 102 goto err2; 103 104 /* 105 * Now we walk the parent page stack -- a LIFO stack of the pages that 106 * were traversed when we searched for the page that split. Each stack 107 * entry is a page number and a page index offset. The offset is for 108 * the page traversed on the search. We've just split a page, so we 109 * have to insert a new key into the parent page. 110 * 111 * If the insert into the parent page causes it to split, may have to 112 * continue splitting all the way up the tree. We stop if the root 113 * splits or the page inserted into didn't have to split to hold the 114 * new key. Some algorithms replace the key for the old page as well 115 * as the new page. We don't, as there's no reason to believe that the 116 * first key on the old page is any better than the key we have, and, 117 * in the case of a key being placed at index 0 causing the split, the 118 * key is unavailable. 119 * 120 * There are a maximum of 5 pages pinned at any time. We keep the left 121 * and right pages pinned while working on the parent. The 5 are the 122 * two children, left parent and right parent (when the parent splits) 123 * and the root page or the overflow key page when calling bt_preserve. 124 * This code must make sure that all pins are released other than the 125 * root page or overflow page which is unlocked elsewhere. 126 */ 127 for (nosplit = 0; (parent = BT_POP(t)) != NULL;) { 128 lchild = l; 129 rchild = r; 130 131 /* Get the parent page. */ 132 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 133 goto err2; 134 135 /* 136 * The new key goes ONE AFTER the index, because the split 137 * was to the right. 138 */ 139 skip = parent->index + 1; 140 141 /* 142 * Calculate the space needed on the parent page. 143 * 144 * Prefix trees: space hack when inserting into BINTERNAL 145 * pages. Retain only what's needed to distinguish between 146 * the new entry and the LAST entry on the page to its left. 147 * If the keys compare equal, retain the entire key. Note, 148 * we don't touch overflow keys, and the entire key must be 149 * retained for the next-to-left most key on the leftmost 150 * page of each level, or the search will fail. Applicable 151 * ONLY to internal pages that have leaf pages as children. 152 * Further reduction of the key between pairs of internal 153 * pages loses too much information. 154 */ 155 switch (rchild->flags & P_TYPE) { 156 case P_BINTERNAL: 157 bi = GETBINTERNAL(rchild, 0); 158 nbytes = NBINTERNAL(bi->ksize); 159 break; 160 case P_BLEAF: 161 bl = GETBLEAF(rchild, 0); 162 nbytes = NBINTERNAL(bl->ksize); 163 if (t->bt_pfx && !(bl->flags & P_BIGKEY) && 164 (h->prevpg != P_INVALID || skip > 1)) { 165 tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); 166 a.size = tbl->ksize; 167 a.data = tbl->bytes; 168 b.size = bl->ksize; 169 b.data = bl->bytes; 170 nksize = t->bt_pfx(&a, &b); 171 n = NBINTERNAL(nksize); 172 if (n < nbytes) { 173 #ifdef STATISTICS 174 bt_pfxsaved += nbytes - n; 175 #endif 176 nbytes = n; 177 } else 178 nksize = 0; 179 } else 180 nksize = 0; 181 break; 182 case P_RINTERNAL: 183 case P_RLEAF: 184 nbytes = NRINTERNAL; 185 break; 186 default: 187 abort(); 188 } 189 190 /* Split the parent page if necessary or shift the indices. */ 191 if (h->upper - h->lower < nbytes + sizeof(indx_t)) { 192 sp = h; 193 h = h->pgno == P_ROOT ? 194 bt_root(t, h, &l, &r, &skip, nbytes) : 195 bt_page(t, h, &l, &r, &skip, nbytes); 196 if (h == NULL) 197 goto err1; 198 } else { 199 if (skip < (nxtindex = NEXTINDEX(h))) 200 memmove(h->linp + skip + 1, h->linp + skip, 201 (nxtindex - skip) * sizeof(indx_t)); 202 h->lower += sizeof(indx_t); 203 nosplit = 1; 204 } 205 206 /* Insert the key into the parent page. */ 207 switch(rchild->flags & P_TYPE) { 208 case P_BINTERNAL: 209 h->linp[skip] = h->upper -= nbytes; 210 dest = (char *)h + h->linp[skip]; 211 memmove(dest, bi, nbytes); 212 ((BINTERNAL *)dest)->pgno = rchild->pgno; 213 break; 214 case P_BLEAF: 215 h->linp[skip] = h->upper -= nbytes; 216 dest = (char *)h + h->linp[skip]; 217 WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, 218 rchild->pgno, bl->flags & P_BIGKEY); 219 memmove(dest, bl->bytes, nksize ? nksize : bl->ksize); 220 if (bl->flags & P_BIGKEY && 221 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 222 goto err1; 223 break; 224 case P_RINTERNAL: 225 /* Update both left and right page counts. */ 226 h->linp[skip] = h->upper -= nbytes; 227 dest = (char *)h + h->linp[skip]; 228 ((RINTERNAL *)dest)->nrecs = rec_total(rchild); 229 ((RINTERNAL *)dest)->pgno = rchild->pgno; 230 dest = (char *)h + h->linp[skip - 1]; 231 ((RINTERNAL *)dest)->nrecs = rec_total(lchild); 232 ((RINTERNAL *)dest)->pgno = lchild->pgno; 233 break; 234 case P_RLEAF: 235 /* Update both left and right page counts. */ 236 h->linp[skip] = h->upper -= nbytes; 237 dest = (char *)h + h->linp[skip]; 238 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); 239 ((RINTERNAL *)dest)->pgno = rchild->pgno; 240 dest = (char *)h + h->linp[skip - 1]; 241 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); 242 ((RINTERNAL *)dest)->pgno = lchild->pgno; 243 break; 244 default: 245 abort(); 246 } 247 248 /* Unpin the held pages. */ 249 if (nosplit) { 250 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 251 break; 252 } 253 254 /* If the root page was split, make it look right. */ 255 if (sp->pgno == P_ROOT && 256 (ISSET(t, BTF_RECNO) ? 257 bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) 258 goto err1; 259 260 mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 261 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 262 } 263 264 /* Unpin the held pages. */ 265 mpool_put(t->bt_mp, l, MPOOL_DIRTY); 266 mpool_put(t->bt_mp, r, MPOOL_DIRTY); 267 268 /* Clear any pages left on the stack. */ 269 return (RET_SUCCESS); 270 271 /* 272 * If something fails in the above loop we were already walking back 273 * up the tree and the tree is now inconsistent. Nothing much we can 274 * do about it but release any memory we're holding. 275 */ 276 err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 277 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 278 279 err2: mpool_put(t->bt_mp, l, 0); 280 mpool_put(t->bt_mp, r, 0); 281 __dbpanic(t->bt_dbp); 282 return (RET_ERROR); 283 } 284 285 /* 286 * BT_PAGE -- Split a non-root page of a btree. 287 * 288 * Parameters: 289 * t: tree 290 * h: root page 291 * lp: pointer to left page pointer 292 * rp: pointer to right page pointer 293 * skip: pointer to index to leave open 294 * ilen: insert length 295 * 296 * Returns: 297 * Pointer to page in which to insert or NULL on error. 298 */ 299 static PAGE * 300 bt_page(t, h, lp, rp, skip, ilen) 301 BTREE *t; 302 PAGE *h, **lp, **rp; 303 u_int *skip; 304 size_t ilen; 305 { 306 PAGE *l, *r, *tp; 307 pgno_t npg; 308 309 #ifdef STATISTICS 310 ++bt_split; 311 #endif 312 /* Put the new right page for the split into place. */ 313 if ((r = __bt_new(t, &npg)) == NULL) 314 return (NULL); 315 r->pgno = npg; 316 r->lower = BTDATAOFF; 317 r->upper = t->bt_psize; 318 r->nextpg = h->nextpg; 319 r->prevpg = h->pgno; 320 r->flags = h->flags & P_TYPE; 321 322 /* 323 * If we're splitting the last page on a level because we're appending 324 * a key to it (skip is NEXTINDEX()), it's likely that the data is 325 * sorted. Adding an empty page on the side of the level is less work 326 * and can push the fill factor much higher than normal. If we're 327 * wrong it's no big deal, we'll just do the split the right way next 328 * time. It may look like it's equally easy to do a similar hack for 329 * reverse sorted data, that is, split the tree left, but it's not. 330 * Don't even try. 331 */ 332 if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { 333 #ifdef STATISTICS 334 ++bt_sortsplit; 335 #endif 336 h->nextpg = r->pgno; 337 r->lower = BTDATAOFF + sizeof(indx_t); 338 *skip = 0; 339 *lp = h; 340 *rp = r; 341 return (r); 342 } 343 344 /* Put the new left page for the split into place. */ 345 if ((l = malloc(t->bt_psize)) == NULL) { 346 mpool_put(t->bt_mp, r, 0); 347 return (NULL); 348 } 349 l->pgno = h->pgno; 350 l->nextpg = r->pgno; 351 l->prevpg = h->prevpg; 352 l->lower = BTDATAOFF; 353 l->upper = t->bt_psize; 354 l->flags = h->flags & P_TYPE; 355 356 /* Fix up the previous pointer of the page after the split page. */ 357 if (h->nextpg != P_INVALID) { 358 if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { 359 free(l); 360 /* XXX mpool_free(t->bt_mp, r->pgno); */ 361 return (NULL); 362 } 363 tp->prevpg = r->pgno; 364 mpool_put(t->bt_mp, tp, 0); 365 } 366 367 /* 368 * Split right. The key/data pairs aren't sorted in the btree page so 369 * it's simpler to copy the data from the split page onto two new pages 370 * instead of copying half the data to the right page and compacting 371 * the left page in place. Since the left page can't change, we have 372 * to swap the original and the allocated left page after the split. 373 */ 374 tp = bt_psplit(t, h, l, r, skip, ilen); 375 376 /* Move the new left page onto the old left page. */ 377 memmove(h, l, t->bt_psize); 378 if (tp == l) 379 tp = h; 380 free(l); 381 382 *lp = h; 383 *rp = r; 384 return (tp); 385 } 386 387 /* 388 * BT_ROOT -- Split the root page of a btree. 389 * 390 * Parameters: 391 * t: tree 392 * h: root page 393 * lp: pointer to left page pointer 394 * rp: pointer to right page pointer 395 * skip: pointer to index to leave open 396 * ilen: insert length 397 * 398 * Returns: 399 * Pointer to page in which to insert or NULL on error. 400 */ 401 static PAGE * 402 bt_root(t, h, lp, rp, skip, ilen) 403 BTREE *t; 404 PAGE *h, **lp, **rp; 405 u_int *skip; 406 size_t ilen; 407 { 408 PAGE *l, *r, *tp; 409 pgno_t lnpg, rnpg; 410 411 #ifdef STATISTICS 412 ++bt_split; 413 ++bt_rootsplit; 414 #endif 415 /* Put the new left and right pages for the split into place. */ 416 if ((l = __bt_new(t, &lnpg)) == NULL || 417 (r = __bt_new(t, &rnpg)) == NULL) 418 return (NULL); 419 l->pgno = lnpg; 420 r->pgno = rnpg; 421 l->nextpg = r->pgno; 422 r->prevpg = l->pgno; 423 l->prevpg = r->nextpg = P_INVALID; 424 l->lower = r->lower = BTDATAOFF; 425 l->upper = r->upper = t->bt_psize; 426 l->flags = r->flags = h->flags & P_TYPE; 427 428 /* Split the root page. */ 429 tp = bt_psplit(t, h, l, r, skip, ilen); 430 431 *lp = l; 432 *rp = r; 433 return (tp); 434 } 435 436 /* 437 * BT_RROOT -- Fix up the recno root page after it has been split. 438 * 439 * Parameters: 440 * t: tree 441 * h: root page 442 * l: left page 443 * r: right page 444 * 445 * Returns: 446 * RET_ERROR, RET_SUCCESS 447 */ 448 static int 449 bt_rroot(t, h, l, r) 450 BTREE *t; 451 PAGE *h, *l, *r; 452 { 453 char *dest; 454 455 /* Insert the left and right keys, set the header information. */ 456 h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; 457 dest = (char *)h + h->upper; 458 WR_RINTERNAL(dest, 459 l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); 460 461 h->linp[1] = h->upper -= NRINTERNAL; 462 dest = (char *)h + h->upper; 463 WR_RINTERNAL(dest, 464 r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); 465 466 h->lower = BTDATAOFF + 2 * sizeof(indx_t); 467 468 /* Unpin the root page, set to recno internal page. */ 469 h->flags &= ~P_TYPE; 470 h->flags |= P_RINTERNAL; 471 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 472 473 return (RET_SUCCESS); 474 } 475 476 /* 477 * BT_BROOT -- Fix up the btree root page after it has been split. 478 * 479 * Parameters: 480 * t: tree 481 * h: root page 482 * l: left page 483 * r: right page 484 * 485 * Returns: 486 * RET_ERROR, RET_SUCCESS 487 */ 488 static int 489 bt_broot(t, h, l, r) 490 BTREE *t; 491 PAGE *h, *l, *r; 492 { 493 BINTERNAL *bi; 494 BLEAF *bl; 495 size_t nbytes; 496 char *dest; 497 498 /* 499 * If the root page was a leaf page, change it into an internal page. 500 * We copy the key we split on (but not the key's data, in the case of 501 * a leaf page) to the new root page. 502 * 503 * The btree comparison code guarantees that the left-most key on any 504 * level of the tree is never used, so it doesn't need to be filled in. 505 */ 506 nbytes = NBINTERNAL(0); 507 h->linp[0] = h->upper = t->bt_psize - nbytes; 508 dest = (char *)h + h->upper; 509 WR_BINTERNAL(dest, 0, l->pgno, 0); 510 511 switch(h->flags & P_TYPE) { 512 case P_BLEAF: 513 bl = GETBLEAF(r, 0); 514 nbytes = NBINTERNAL(bl->ksize); 515 h->linp[1] = h->upper -= nbytes; 516 dest = (char *)h + h->upper; 517 WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); 518 memmove(dest, bl->bytes, bl->ksize); 519 520 /* 521 * If the key is on an overflow page, mark the overflow chain 522 * so it isn't deleted when the leaf copy of the key is deleted. 523 */ 524 if (bl->flags & P_BIGKEY && 525 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 526 return (RET_ERROR); 527 break; 528 case P_BINTERNAL: 529 bi = GETBINTERNAL(r, 0); 530 nbytes = NBINTERNAL(bi->ksize); 531 h->linp[1] = h->upper -= nbytes; 532 dest = (char *)h + h->upper; 533 memmove(dest, bi, nbytes); 534 ((BINTERNAL *)dest)->pgno = r->pgno; 535 break; 536 default: 537 abort(); 538 } 539 540 /* There are two keys on the page. */ 541 h->lower = BTDATAOFF + 2 * sizeof(indx_t); 542 543 /* Unpin the root page, set to btree internal page. */ 544 h->flags &= ~P_TYPE; 545 h->flags |= P_BINTERNAL; 546 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 547 548 return (RET_SUCCESS); 549 } 550 551 /* 552 * BT_PSPLIT -- Do the real work of splitting the page. 553 * 554 * Parameters: 555 * t: tree 556 * h: page to be split 557 * l: page to put lower half of data 558 * r: page to put upper half of data 559 * pskip: pointer to index to leave open 560 * ilen: insert length 561 * 562 * Returns: 563 * Pointer to page in which to insert. 564 */ 565 static PAGE * 566 bt_psplit(t, h, l, r, pskip, ilen) 567 BTREE *t; 568 PAGE *h, *l, *r; 569 u_int *pskip; 570 size_t ilen; 571 { 572 BINTERNAL *bi; 573 BLEAF *bl; 574 RLEAF *rl; 575 EPGNO *c; 576 PAGE *rval; 577 void *src; 578 indx_t full, half, nxt, off, skip, top, used; 579 size_t nbytes; 580 int bigkeycnt, isbigkey; 581 582 /* 583 * Split the data to the left and right pages. Leave the skip index 584 * open. Additionally, make some effort not to split on an overflow 585 * key. This makes internal page processing faster and can save 586 * space as overflow keys used by internal pages are never deleted. 587 */ 588 bigkeycnt = 0; 589 skip = *pskip; 590 full = t->bt_psize - BTDATAOFF; 591 half = full / 2; 592 used = 0; 593 for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { 594 if (skip == off) { 595 nbytes = ilen; 596 isbigkey = 0; /* XXX: not really known. */ 597 } else 598 switch (h->flags & P_TYPE) { 599 case P_BINTERNAL: 600 src = bi = GETBINTERNAL(h, nxt); 601 nbytes = NBINTERNAL(bi->ksize); 602 isbigkey = bi->flags & P_BIGKEY; 603 break; 604 case P_BLEAF: 605 src = bl = GETBLEAF(h, nxt); 606 nbytes = NBLEAF(bl); 607 isbigkey = bl->flags & P_BIGKEY; 608 break; 609 case P_RINTERNAL: 610 src = GETRINTERNAL(h, nxt); 611 nbytes = NRINTERNAL; 612 isbigkey = 0; 613 break; 614 case P_RLEAF: 615 src = rl = GETRLEAF(h, nxt); 616 nbytes = NRLEAF(rl); 617 isbigkey = 0; 618 break; 619 default: 620 abort(); 621 } 622 623 /* 624 * If the key/data pairs are substantial fractions of the max 625 * possible size for the page, it's possible to get situations 626 * where we decide to try and copy too much onto the left page. 627 * Make sure that doesn't happen. 628 */ 629 if (skip <= off && used + nbytes >= full) { 630 --off; 631 break; 632 } 633 634 /* Copy the key/data pair, if not the skipped index. */ 635 if (skip != off) { 636 ++nxt; 637 638 l->linp[off] = l->upper -= nbytes; 639 memmove((char *)l + l->upper, src, nbytes); 640 } 641 642 used += nbytes; 643 if (used >= half) { 644 if (!isbigkey || bigkeycnt == 3) 645 break; 646 else 647 ++bigkeycnt; 648 } 649 } 650 651 /* 652 * Off is the last offset that's valid for the left page. 653 * Nxt is the first offset to be placed on the right page. 654 */ 655 l->lower += (off + 1) * sizeof(indx_t); 656 657 /* 658 * If splitting the page that the cursor was on, the cursor has to be 659 * adjusted to point to the same record as before the split. If the 660 * cursor is at or past the skipped slot, the cursor is incremented by 661 * one. If the cursor is on the right page, it is decremented by the 662 * number of records split to the left page. 663 * 664 * Don't bother checking for the BTF_SEQINIT flag, the page number will 665 * be P_INVALID. 666 */ 667 c = &t->bt_bcursor; 668 if (c->pgno == h->pgno) { 669 if (c->index >= skip) 670 ++c->index; 671 if (c->index < nxt) /* Left page. */ 672 c->pgno = l->pgno; 673 else { /* Right page. */ 674 c->pgno = r->pgno; 675 c->index -= nxt; 676 } 677 } 678 679 /* 680 * If the skipped index was on the left page, just return that page. 681 * Otherwise, adjust the skip index to reflect the new position on 682 * the right page. 683 */ 684 if (skip <= off) { 685 skip = 0; 686 rval = l; 687 } else { 688 rval = r; 689 *pskip -= nxt; 690 } 691 692 for (off = 0; nxt < top; ++off) { 693 if (skip == nxt) { 694 ++off; 695 skip = 0; 696 } 697 switch (h->flags & P_TYPE) { 698 case P_BINTERNAL: 699 src = bi = GETBINTERNAL(h, nxt); 700 nbytes = NBINTERNAL(bi->ksize); 701 break; 702 case P_BLEAF: 703 src = bl = GETBLEAF(h, nxt); 704 nbytes = NBLEAF(bl); 705 break; 706 case P_RINTERNAL: 707 src = GETRINTERNAL(h, nxt); 708 nbytes = NRINTERNAL; 709 break; 710 case P_RLEAF: 711 src = rl = GETRLEAF(h, nxt); 712 nbytes = NRLEAF(rl); 713 break; 714 default: 715 abort(); 716 } 717 ++nxt; 718 r->linp[off] = r->upper -= nbytes; 719 memmove((char *)r + r->upper, src, nbytes); 720 } 721 r->lower += off * sizeof(indx_t); 722 723 /* If the key is being appended to the page, adjust the index. */ 724 if (skip == top) 725 r->lower += sizeof(indx_t); 726 727 return (rval); 728 } 729 730 /* 731 * BT_PRESERVE -- Mark a chain of pages as used by an internal node. 732 * 733 * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the 734 * record that references them gets deleted. Chains pointed to by internal 735 * pages never get deleted. This routine marks a chain as pointed to by an 736 * internal page. 737 * 738 * Parameters: 739 * t: tree 740 * pg: page number of first page in the chain. 741 * 742 * Returns: 743 * RET_SUCCESS, RET_ERROR. 744 */ 745 static int 746 bt_preserve(t, pg) 747 BTREE *t; 748 pgno_t pg; 749 { 750 PAGE *h; 751 752 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 753 return (RET_ERROR); 754 h->flags |= P_PRESERVE; 755 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 756 return (RET_SUCCESS); 757 } 758 759 /* 760 * REC_TOTAL -- Return the number of recno entries below a page. 761 * 762 * Parameters: 763 * h: page 764 * 765 * Returns: 766 * The number of recno entries below a page. 767 * 768 * XXX 769 * These values could be set by the bt_psplit routine. The problem is that the 770 * entry has to be popped off of the stack etc. or the values have to be passed 771 * all the way back to bt_split/bt_rroot and it's not very clean. 772 */ 773 static recno_t 774 rec_total(h) 775 PAGE *h; 776 { 777 recno_t recs; 778 indx_t nxt, top; 779 780 for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) 781 recs += GETRINTERNAL(h, nxt)->nrecs; 782 return (recs); 783 } 784