1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_split.c 5.4 (Berkeley) 09/12/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 #define __DBINTERFACE_PRIVATE 17 #include <db.h> 18 #include <limits.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include "btree.h" 23 24 static int bt_preserve __P((BTREE *, pgno_t)); 25 static PAGE *bt_psplit __P((BTREE *, PAGE *, PAGE *, PAGE *, int *)); 26 static PAGE *bt_page __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 27 static PAGE *bt_root __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 28 static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 29 static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 30 static recno_t rec_total __P((PAGE *)); 31 32 #ifdef STATISTICS 33 u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; 34 #endif 35 36 /* 37 * __BT_SPLIT -- Split the tree. 38 * 39 * Parameters: 40 * t: tree 41 * h: page to split 42 * key: key to insert 43 * data: data to insert 44 * flags: BIGKEY/BIGDATA flags 45 * nbytes: length of insertion 46 * skip: index to leave open 47 * 48 * Returns: 49 * RET_ERROR, RET_SUCCESS 50 */ 51 int 52 __bt_split(t, h, key, data, flags, nbytes, skip) 53 BTREE *t; 54 PAGE *h; 55 const DBT *key, *data; 56 u_long flags; 57 size_t nbytes; 58 int skip; 59 { 60 BINTERNAL *bi; 61 BLEAF *bl; 62 DBT a, b; 63 EPGNO *parent; 64 PAGE *l, *r, *lchild, *rchild; 65 index_t nxtindex; 66 size_t nksize; 67 int nosplit; 68 char *dest; 69 70 /* 71 * Split the page into two pages, l and r. The split routines return 72 * a pointer to the page into which the key should be inserted and skip 73 * set to the offset which should be used. Additionally, l and r are 74 * pinned. 75 */ 76 h = h->pgno == P_ROOT ? 77 bt_root(t, h, &l, &r, &skip) : bt_page(t, h, &l, &r, &skip); 78 if (h == NULL) 79 return (RET_ERROR); 80 81 /* 82 * Grab the space and insert the [rb]leaf structure. Always a [rb]leaf 83 * structure since key inserts always cause a leaf page to split first. 84 */ 85 h->linp[skip] = h->upper -= nbytes; 86 dest = (char *)h + h->upper; 87 if (ISSET(t, BTF_RECNO)) 88 WR_RLEAF(dest, data, flags) 89 else 90 WR_BLEAF(dest, key, data, flags) 91 92 /* 93 * Now we walk the parent page stack -- a LIFO stack of the pages that 94 * were traversed when we searched for the page that split. Each stack 95 * entry is a page number and a page index offset. The offset is for 96 * the page traversed on the search. We've just split a page, so we 97 * have to insert a new key into the parent page. 98 * 99 * If the insert into the parent page causes it to split, may have to 100 * continue splitting all the way up the tree. We stop if the root 101 * splits or the page inserted into didn't have to split to hold the 102 * new key. Some algorithms replace the key for the old page as well 103 * as the new page. We don't, as there's no reason to believe that the 104 * first key on the old page is any better than the key we have, and, 105 * in the case of a key being placed at index 0 causing the split, the 106 * key is unavailable. 107 * 108 * There are a maximum of 5 pages pinned at any time. We keep the left 109 * and right pages pinned while working on the parent. The 5 are the 110 * two children, left parent and right parent (when the parent splits) 111 * and the root page or the overflow key page when calling bt_preserve. 112 * This code must make sure that all pins are released other than the 113 * root page or overflow page which is unlocked elsewhere. 114 */ 115 for (nosplit = 0; (parent = BT_POP(t)) != NULL;) { 116 lchild = l; 117 rchild = r; 118 119 /* Get the parent page. */ 120 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 121 goto err2; 122 123 /* The new key goes ONE AFTER the index. */ 124 skip = parent->index + 1; 125 126 /* 127 * Calculate the space needed on the parent page. 128 * 129 * Space hack when inserting into BINTERNAL pages. Only need to 130 * retain the number of bytes that will distinguish between the 131 * new entry and the LAST entry on the page to its left. If the 132 * keys compare equal, retain the entire key. Note, we don't 133 * touch overflow keys and the entire key must be retained for 134 * the next-to-leftmost key on the leftmost page of each level, 135 * or the search will fail. 136 */ 137 switch (rchild->flags & P_TYPE) { 138 case P_BINTERNAL: 139 bi = GETBINTERNAL(rchild, 0); 140 nbytes = NBINTERNAL(bi->ksize); 141 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 142 !(bi->flags & P_BIGKEY)) { 143 BINTERNAL *tbi; 144 tbi = 145 GETBINTERNAL(lchild, NEXTINDEX(lchild) - 1); 146 a.size = tbi->ksize; 147 a.data = tbi->bytes; 148 b.size = bi->ksize; 149 b.data = bi->bytes; 150 goto prefix; 151 } 152 break; 153 case P_BLEAF: 154 bl = GETBLEAF(rchild, 0); 155 nbytes = NBLEAF(bl); 156 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 157 !(bl->flags & P_BIGKEY)) { 158 BLEAF *tbl; 159 size_t n; 160 161 tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); 162 a.size = tbl->ksize; 163 a.data = tbl->bytes; 164 b.size = bl->ksize; 165 b.data = bl->bytes; 166 prefix: nksize = t->bt_pfx(&a, &b); 167 n = NBINTERNAL(nksize); 168 if (n < nbytes) { 169 #ifdef STATISTICS 170 bt_pfxsaved += nbytes - n; 171 #endif 172 nbytes = n; 173 } else 174 nksize = 0; 175 } else 176 nksize = 0; 177 break; 178 case P_RINTERNAL: 179 case P_RLEAF: 180 nbytes = NRINTERNAL; 181 break; 182 } 183 184 /* Split the parent page if necessary or shift the indices. */ 185 if (h->upper - h->lower < nbytes + sizeof(index_t)) { 186 h = h->pgno == P_ROOT ? 187 bt_root(t, h, &l, &r, &skip) : 188 bt_page(t, h, &l, &r, &skip); 189 if (h == NULL) 190 goto err1; 191 } else { 192 if (skip < (nxtindex = NEXTINDEX(h))) 193 bcopy(h->linp + skip, h->linp + skip + 1, 194 (nxtindex - skip) * sizeof(index_t)); 195 h->lower += sizeof(index_t); 196 nosplit = 1; 197 } 198 199 /* Insert the key into the parent page. */ 200 switch(rchild->flags & P_TYPE) { 201 case P_BINTERNAL: 202 h->linp[skip] = h->upper -= nbytes; 203 dest = (char *)h + h->linp[skip]; 204 bcopy(bi, dest, nbytes); 205 if (nksize) 206 ((BINTERNAL *)dest)->ksize = nksize; 207 ((BINTERNAL *)dest)->pgno = rchild->pgno; 208 break; 209 case P_BLEAF: 210 h->linp[skip] = h->upper -= nbytes; 211 dest = (char *)h + h->linp[skip]; 212 WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, 213 rchild->pgno, bl->flags & P_BIGKEY); 214 bcopy(bl->bytes, dest, nksize ? nksize : bl->ksize); 215 if (bl->flags & P_BIGKEY && 216 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 217 goto err1; 218 break; 219 case P_RINTERNAL: 220 /* Update both left and right page counts. */ 221 h->linp[skip] = h->upper -= nbytes; 222 dest = (char *)h + h->linp[skip]; 223 ((RINTERNAL *)dest)->nrecs = rec_total(rchild); 224 ((RINTERNAL *)dest)->pgno = rchild->pgno; 225 dest = (char *)h + h->linp[skip - 1]; 226 ((RINTERNAL *)dest)->nrecs = rec_total(lchild); 227 ((RINTERNAL *)dest)->pgno = lchild->pgno; 228 break; 229 case P_RLEAF: 230 /* Update both left and right page counts. */ 231 h->linp[skip] = h->upper -= nbytes; 232 dest = (char *)h + h->linp[skip]; 233 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); 234 ((RINTERNAL *)dest)->pgno = rchild->pgno; 235 dest = (char *)h + h->linp[skip - 1]; 236 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); 237 ((RINTERNAL *)dest)->pgno = lchild->pgno; 238 break; 239 } 240 241 /* Unpin the held pages. */ 242 if (nosplit) { 243 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 244 break; 245 } 246 mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 247 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 248 } 249 250 /* Unpin the held pages. */ 251 mpool_put(t->bt_mp, l, MPOOL_DIRTY); 252 mpool_put(t->bt_mp, r, MPOOL_DIRTY); 253 254 /* Clear any pages left on the stack. */ 255 BT_CLR(t); 256 return (RET_SUCCESS); 257 258 /* 259 * If something fails in the above loop we were already walking back 260 * up the tree and the tree is now inconsistent. Nothing much we can 261 * do about it but release any memory we're holding. 262 */ 263 err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 264 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 265 266 err2: mpool_put(t->bt_mp, l, 0); 267 mpool_put(t->bt_mp, r, 0); 268 __dbpanic(t->bt_dbp); 269 return (RET_ERROR); 270 } 271 272 /* 273 * BT_PAGE -- Split a non-root page of a btree. 274 * 275 * Parameters: 276 * t: tree 277 * h: root page 278 * lp: pointer to left page pointer 279 * rp: pointer to right page pointer 280 * skip: pointer to index to leave open 281 * 282 * Returns: 283 * Pointer to page in which to insert or NULL on error. 284 */ 285 static PAGE * 286 bt_page(t, h, lp, rp, skip) 287 BTREE *t; 288 PAGE *h, **lp, **rp; 289 int *skip; 290 { 291 PAGE *l, *r, *tp; 292 pgno_t npg; 293 294 #ifdef STATISTICS 295 ++bt_split; 296 #endif 297 /* Put the new right page for the split into place. */ 298 if ((r = mpool_new(t->bt_mp, &npg)) == NULL) 299 return (NULL); 300 r->pgno = npg; 301 r->lower = BTDATAOFF; 302 r->upper = t->bt_psize; 303 r->nextpg = h->nextpg; 304 r->prevpg = h->pgno; 305 r->flags = h->flags & P_TYPE; 306 307 /* 308 * If we're splitting the last page on a level because we're appending 309 * a key to it (skip is NEXTINDEX()), it's likely that the data is 310 * sorted. Adding an empty page on the side of the level is less work 311 * and can push the fill factor much higher than normal. If we're 312 * wrong it's no big deal, we'll just do the split the right way next 313 * time. It may look like it's equally easy to do a similar hack for 314 * reverse sorted data, that is, split the tree left, but it's not. 315 * Don't even try. 316 */ 317 if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { 318 #ifdef STATISTICS 319 ++bt_sortsplit; 320 #endif 321 h->nextpg = r->pgno; 322 r->lower = BTDATAOFF + sizeof(index_t); 323 *skip = 0; 324 *lp = h; 325 *rp = r; 326 return (r); 327 } 328 329 /* Put the new left page for the split into place. */ 330 if ((l = malloc(t->bt_psize)) == NULL) { 331 mpool_put(t->bt_mp, r, 0); 332 return (NULL); 333 } 334 l->pgno = h->pgno; 335 l->nextpg = r->pgno; 336 l->prevpg = h->prevpg; 337 l->lower = BTDATAOFF; 338 l->upper = t->bt_psize; 339 l->flags = h->flags & P_TYPE; 340 341 /* Fix up the previous pointer of the page after the split page. */ 342 if (h->nextpg != P_INVALID) { 343 if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { 344 free(l); 345 /* XXX mpool_free(t->bt_mp, r->pgno); */ 346 return (NULL); 347 } 348 tp->prevpg = r->pgno; 349 mpool_put(t->bt_mp, tp, 0); 350 } 351 352 /* 353 * Split right. The key/data pairs aren't sorted in the btree page so 354 * it's simpler to copy the data from the split page onto two new pages 355 * instead of copying half the data to the right page and compacting 356 * the left page in place. Since the left page can't change, we have 357 * to swap the original and the allocated left page after the split. 358 */ 359 tp = bt_psplit(t, h, l, r, skip); 360 361 /* Move the new left page onto the old left page. */ 362 bcopy(l, h, t->bt_psize); 363 if (tp == l) 364 tp = h; 365 free(l); 366 367 *lp = h; 368 *rp = r; 369 return (tp); 370 } 371 372 /* 373 * BT_ROOT -- Split the root page of a btree. 374 * 375 * Parameters: 376 * t: tree 377 * h: root page 378 * lp: pointer to left page pointer 379 * rp: pointer to right page pointer 380 * skip: pointer to index to leave open 381 * 382 * Returns: 383 * Pointer to page in which to insert or NULL on error. 384 */ 385 static PAGE * 386 bt_root(t, h, lp, rp, skip) 387 BTREE *t; 388 PAGE *h, **lp, **rp; 389 int *skip; 390 { 391 PAGE *l, *r, *tp; 392 pgno_t lnpg, rnpg; 393 394 #ifdef STATISTICS 395 ++bt_split; 396 ++bt_rootsplit; 397 #endif 398 /* Put the new left and right pages for the split into place. */ 399 if ((l = mpool_new(t->bt_mp, &lnpg)) == NULL || 400 (r = mpool_new(t->bt_mp, &rnpg)) == NULL) 401 return (NULL); 402 l->pgno = lnpg; 403 r->pgno = rnpg; 404 l->nextpg = r->pgno; 405 r->prevpg = l->pgno; 406 l->prevpg = r->nextpg = P_INVALID; 407 l->lower = r->lower = BTDATAOFF; 408 l->upper = r->upper = t->bt_psize; 409 l->flags = r->flags = h->flags & P_TYPE; 410 411 /* Split the root page. */ 412 tp = bt_psplit(t, h, l, r, skip); 413 414 /* Make the root page look right. */ 415 if ((ISSET(t, BTF_RECNO) ? 416 bt_rroot(t, h, l, r) : bt_broot(t, h, l, r)) == RET_ERROR) 417 return (NULL); 418 419 *lp = l; 420 *rp = r; 421 return (tp); 422 } 423 424 /* 425 * BT_RROOT -- Fix up the recno root page after the split. 426 * 427 * Parameters: 428 * t: tree 429 * h: root page 430 * 431 * Returns: 432 * RET_ERROR, RET_SUCCESS 433 */ 434 static int 435 bt_rroot(t, h, l, r) 436 BTREE *t; 437 PAGE *h, *l, *r; 438 { 439 char *dest; 440 441 /* Insert the left and right keys, set the header information. */ 442 h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; 443 dest = (char *)h + h->upper; 444 WR_RINTERNAL(dest, 445 l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); 446 447 h->linp[1] = h->upper -= NRINTERNAL; 448 dest = (char *)h + h->upper; 449 WR_RINTERNAL(dest, 450 r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); 451 452 h->lower = BTDATAOFF + 2 * sizeof(index_t); 453 454 /* Unpin the root page, set to recno internal page. */ 455 h->flags &= ~P_TYPE; 456 h->flags |= P_RINTERNAL; 457 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 458 459 return (RET_SUCCESS); 460 } 461 462 /* 463 * BT_BROOT -- Fix up the btree root page after the split. 464 * 465 * Parameters: 466 * t: tree 467 * h: root page 468 * 469 * Returns: 470 * RET_ERROR, RET_SUCCESS 471 */ 472 static int 473 bt_broot(t, h, l, r) 474 BTREE *t; 475 PAGE *h, *l, *r; 476 { 477 BINTERNAL *bi; 478 BLEAF *bl; 479 size_t nbytes; 480 char *dest; 481 482 /* 483 * If the root page was a leaf page, change it into an internal page. 484 * We copy the key we split on (but not the key's data, in the case of 485 * a leaf page) to the new root page. If the key is on an overflow 486 * page, mark the overflow chain so it isn't deleted when the leaf copy 487 * of the key is deleted. 488 * 489 * The btree comparison code guarantees that the left-most key on any 490 * level of the tree is never used, so it doesn't need to be filled 491 * in. (This is not just convenience -- if the insert index is 0, we 492 * don't *have* a key to fill in.) The right key is available because 493 * the split code guarantees not to split on the skipped index. 494 */ 495 nbytes = LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char)); 496 h->linp[0] = h->upper = t->bt_psize - nbytes; 497 dest = (char *)h + h->upper; 498 WR_BINTERNAL(dest, 0, l->pgno, 0); 499 500 switch(h->flags & P_TYPE) { 501 case P_BLEAF: 502 bl = GETBLEAF(r, 0); 503 nbytes = NBINTERNAL(bl->ksize); 504 h->linp[1] = h->upper -= nbytes; 505 dest = (char *)h + h->upper; 506 WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); 507 bcopy(bl->bytes, dest, bl->ksize); 508 509 if (bl->flags & P_BIGKEY && 510 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 511 return (RET_ERROR); 512 break; 513 case P_BINTERNAL: 514 bi = GETBINTERNAL(r, 0); 515 nbytes = NBINTERNAL(bi->ksize); 516 h->linp[1] = h->upper -= nbytes; 517 dest = (char *)h + h->upper; 518 bcopy(bi, dest, nbytes); 519 ((BINTERNAL *)dest)->pgno = r->pgno; 520 break; 521 } 522 h->lower = BTDATAOFF + 2 * sizeof(index_t); 523 524 /* Unpin the root page, set to btree internal page. */ 525 h->flags &= ~P_TYPE; 526 h->flags |= P_BINTERNAL; 527 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 528 529 return (RET_SUCCESS); 530 } 531 532 /* 533 * BT_PSPLIT -- Do the real work of splitting the page. 534 * 535 * Parameters: 536 * t: tree 537 * h: page to be split 538 * l: page to put lower half of data 539 * r: page to put upper half of data 540 * skip: pointer to index to leave open 541 * 542 * Returns: 543 * Pointer to page in which to insert. 544 */ 545 static PAGE * 546 bt_psplit(t, h, l, r, pskip) 547 BTREE *t; 548 PAGE *h, *l, *r; 549 int *pskip; 550 { 551 BINTERNAL *bi; 552 BLEAF *bl; 553 RLEAF *rl; 554 EPGNO *c; 555 PAGE *rval; 556 index_t half, skip; 557 size_t nbytes; 558 void *src; 559 int bigkeycnt, isbigkey, nxt, off, top; 560 561 /* 562 * Split the data to the left and right pages. Leave the skip index 563 * open and guarantee that the split doesn't happen on that index (the 564 * right key must be available for the parent page). Additionally, 565 * make some effort not to split on an overflow key. This makes it 566 * faster to process internal pages and can save space since overflow 567 * keys used by internal pages are never deleted. 568 */ 569 bigkeycnt = 0; 570 skip = *pskip; 571 half = (t->bt_psize - BTDATAOFF) / 2; 572 for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { 573 if (skip == off) 574 continue; 575 switch (h->flags & P_TYPE) { 576 case P_BINTERNAL: 577 src = bi = GETBINTERNAL(h, nxt); 578 nbytes = NBINTERNAL(bi->ksize); 579 isbigkey = bi->flags & P_BIGKEY; 580 break; 581 case P_BLEAF: 582 src = bl = GETBLEAF(h, nxt); 583 nbytes = NBLEAF(bl); 584 isbigkey = bl->flags & P_BIGKEY; 585 break; 586 case P_RINTERNAL: 587 src = GETRINTERNAL(h, nxt); 588 nbytes = NRINTERNAL; 589 isbigkey = 0; 590 break; 591 case P_RLEAF: 592 src = rl = GETRLEAF(h, nxt); 593 nbytes = NRLEAF(rl); 594 isbigkey = 0; 595 break; 596 } 597 ++nxt; 598 l->linp[off] = l->upper -= nbytes; 599 bcopy(src, (char *)l + l->upper, nbytes); 600 601 /* There's no empirical justification for the '3'. */ 602 if (half < nbytes) 603 if (!isbigkey || bigkeycnt == 3) 604 break; 605 else 606 ++bigkeycnt; 607 else 608 half -= nbytes; 609 } 610 l->lower += (off + 1) * sizeof(index_t); 611 612 /* 613 * If splitting the page that the cursor was on, the cursor has to be 614 * adjusted to point to the same record as before the split. If the 615 * skipped slot and the cursor are both on the left page and the cursor 616 * is on or past the skipped slot, the cursor is incremented by one. 617 * If the skipped slot and the cursor are both on the right page and 618 * the cursor is on or past the skipped slot, the cursor is incremented 619 * by one. If the skipped slot and the cursor aren't on the same page, 620 * the cursor isn't changed. Regardless of the relationship of the 621 * skipped slot and the cursor, if the cursor is on the right page it 622 * is decremented by the number of records split to the left page. 623 * 624 * Don't bother checking for the BTF_SEQINIT flag, the page number will 625 * be P_INVALID. 626 */ 627 c = &t->bt_bcursor; 628 if (c->pgno == h->pgno) 629 if (c->index < off) { /* left page */ 630 c->pgno = l->pgno; 631 if (c->index >= skip) 632 ++c->index; 633 } else { /* right page */ 634 c->pgno = r->pgno; 635 if (c->index >= skip && skip > off) 636 ++c->index; 637 c->index -= off; 638 } 639 640 /* 641 * Decide which page to return, and adjust the skip index if the 642 * to-be-inserted-upon page has changed. 643 */ 644 if (skip > off) { 645 rval = r; 646 *pskip -= off + 1; 647 } else 648 rval = l; 649 650 for (off = 0; nxt < top; ++off) { 651 if (skip == nxt) { 652 skip = 0; 653 continue; 654 } 655 switch (h->flags & P_TYPE) { 656 case P_BINTERNAL: 657 src = bi = GETBINTERNAL(h, nxt); 658 nbytes = NBINTERNAL(bi->ksize); 659 break; 660 case P_BLEAF: 661 src = bl = GETBLEAF(h, nxt); 662 nbytes = NBLEAF(bl); 663 break; 664 case P_RINTERNAL: 665 src = GETRINTERNAL(h, nxt); 666 nbytes = NRINTERNAL; 667 break; 668 case P_RLEAF: 669 src = rl = GETRLEAF(h, nxt); 670 nbytes = NRLEAF(rl); 671 break; 672 } 673 ++nxt; 674 r->linp[off] = r->upper -= nbytes; 675 bcopy(src, (char *)r + r->upper, nbytes); 676 } 677 r->lower += off * sizeof(index_t); 678 679 /* If the key is being appended to the page, adjust the index. */ 680 if (skip == top) 681 r->lower += sizeof(index_t); 682 683 return (rval); 684 } 685 686 /* 687 * BT_PRESERVE -- Mark a chain of pages as used by an internal node. 688 * 689 * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the 690 * record that references them gets deleted. Chains pointed to by internal 691 * pages never get deleted. This routine marks a chain as pointed to by an 692 * internal page. 693 * 694 * Parameters: 695 * t: tree 696 * pg: page number of first page in the chain. 697 * 698 * Returns: 699 * RET_SUCCESS, RET_ERROR. 700 */ 701 static int 702 bt_preserve(t, pg) 703 BTREE *t; 704 pgno_t pg; 705 { 706 PAGE *h; 707 708 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 709 return (RET_ERROR); 710 h->flags |= P_PRESERVE; 711 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 712 return (RET_SUCCESS); 713 } 714 715 /* 716 * REC_TOTAL -- Return the number of recno entries below a page. 717 * 718 * Parameters: 719 * h: page 720 * 721 * Returns: 722 * The number of recno entries below a page. 723 * 724 * XXX 725 * These values could be set by the bt_psplit routine. The problem is that the 726 * entry has to be popped off of the stack etc. or the values have to be passed 727 * all the way back to bt_split/bt_rroot and it's not very clean. 728 */ 729 static recno_t 730 rec_total(h) 731 PAGE *h; 732 { 733 recno_t recs; 734 index_t nxt, top; 735 736 for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) 737 recs += GETRINTERNAL(h, nxt)->nrecs; 738 return (recs); 739 } 740