1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_split.c 5.3 (Berkeley) 09/04/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 #define __DBINTERFACE_PRIVATE 17 #include <db.h> 18 #include <limits.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include "btree.h" 23 24 static int bt_preserve __P((BTREE *, pgno_t)); 25 static PAGE *bt_psplit __P((BTREE *, PAGE *, PAGE *, PAGE *, int *)); 26 static PAGE *bt_page __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 27 static PAGE *bt_root __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 28 static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 29 static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 30 static recno_t rec_total __P((PAGE *)); 31 32 #ifdef STATISTICS 33 u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; 34 #endif 35 36 /* 37 * __BT_SPLIT -- Split the tree. 38 * 39 * Parameters: 40 * t: tree 41 * h: page to split 42 * key: key to insert 43 * data: data to insert 44 * flags: BIGKEY/BIGDATA flags 45 * nbytes: length of insertion 46 * skip: index to leave open 47 * 48 * Returns: 49 * RET_ERROR, RET_SUCCESS 50 */ 51 int 52 __bt_split(t, h, key, data, flags, nbytes, skip) 53 BTREE *t; 54 PAGE *h; 55 const DBT *key, *data; 56 u_long flags; 57 size_t nbytes; 58 int skip; 59 { 60 BINTERNAL *bi; 61 BLEAF *bl; 62 DBT a, b; 63 EPGNO *parent; 64 PAGE *l, *r, *lchild, *rchild; 65 index_t nxtindex; 66 size_t nksize; 67 int nosplit; 68 char *dest; 69 70 /* 71 * Split the page into two pages, l and r. The split routines return 72 * a pointer to the page into which the key should be inserted and skip 73 * set to the offset which should be used. Additionally, l and r are 74 * pinned. 75 */ 76 h = h->pgno == P_ROOT ? 77 bt_root(t, h, &l, &r, &skip) : bt_page(t, h, &l, &r, &skip); 78 if (h == NULL) 79 return (RET_ERROR); 80 81 /* 82 * Grab the space and insert the [rb]leaf structure. Always a [rb]leaf 83 * structure since key inserts always cause a leaf page to split first. 84 */ 85 h->linp[skip] = h->upper -= nbytes; 86 dest = (char *)h + h->upper; 87 if (ISSET(t, BTF_RECNO)) { 88 WR_RLEAF(dest, data, flags) 89 ++t->bt_nrecs; 90 SET(t, BTF_METADIRTY | BTF_MODIFIED); 91 } else { 92 WR_BLEAF(dest, key, data, flags) 93 SET(t, BTF_MODIFIED); 94 } 95 96 /* 97 * Now we walk the parent page stack -- a LIFO stack of the pages that 98 * were traversed when we searched for the page that split. Each stack 99 * entry is a page number and a page index offset. The offset is for 100 * the page traversed on the search. We've just split a page, so we 101 * have to insert a new key into the parent page. 102 * 103 * If the insert into the parent page causes it to split, may have to 104 * continue splitting all the way up the tree. We stop if the root 105 * splits or the page inserted into didn't have to split to hold the 106 * new key. Some algorithms replace the key for the old page as well 107 * as the new page. We don't, as there's no reason to believe that the 108 * first key on the old page is any better than the key we have, and, 109 * in the case of a key being placed at index 0 causing the split, the 110 * key is unavailable. 111 * 112 * There are a maximum of 5 pages pinned at any time. We keep the left 113 * and right pages pinned while working on the parent. The 5 are the 114 * two children, left parent and right parent (when the parent splits) 115 * and the root page or the overflow key page when calling bt_preserve. 116 * This code must make sure that all pins are released other than the 117 * root page or overflow page which is unlocked elsewhere. 118 */ 119 for (nosplit = 0; (parent = BT_POP(t)) != NULL;) { 120 lchild = l; 121 rchild = r; 122 123 /* Get the parent page. */ 124 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 125 goto err2; 126 127 /* The new key goes ONE AFTER the index. */ 128 skip = parent->index + 1; 129 130 /* 131 * Calculate the space needed on the parent page. 132 * 133 * Space hack when insertin into BINTERNAL pages. Only need to 134 * retain the number of bytes that will distinguish between the 135 * new entry and the LAST entry on the page to its left. If 136 * the keys compare equal, only need to retain one byte as a 137 * placeholder. Special cases are that the entire key must be 138 * retained for the next-to-leftmost key on the leftmost page 139 * of each level, or the search will fail, and can't mess with 140 * overflow keys. 141 */ 142 switch (rchild->flags & P_TYPE) { 143 case P_BINTERNAL: 144 bi = GETBINTERNAL(rchild, 0); 145 nbytes = NBINTERNAL(bi->ksize); 146 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 147 !(bi->flags & P_BIGKEY)) { 148 BINTERNAL *tbi; 149 tbi = 150 GETBINTERNAL(lchild, NEXTINDEX(lchild) - 1); 151 a.size = tbi->ksize; 152 a.data = tbi->bytes; 153 b.size = bi->ksize; 154 b.data = bi->bytes; 155 goto prefix; 156 } 157 break; 158 case P_BLEAF: 159 bl = GETBLEAF(rchild, 0); 160 nbytes = NBLEAF(bl); 161 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 162 !(bl->flags & P_BIGKEY)) { 163 BLEAF *tbl; 164 size_t n; 165 166 tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); 167 a.size = tbl->ksize; 168 a.data = tbl->bytes; 169 b.size = bl->ksize; 170 b.data = bl->bytes; 171 prefix: nksize = t->bt_pfx(&a, &b); 172 n = NBINTERNAL(nksize); 173 if (n < nbytes) { 174 #ifdef STATISTICS 175 bt_pfxsaved += nbytes - n; 176 #endif 177 nbytes = n; 178 } else 179 nksize = 0; 180 } else 181 nksize = 0; 182 break; 183 case P_RINTERNAL: 184 case P_RLEAF: 185 nbytes = NRINTERNAL; 186 break; 187 } 188 189 /* Split the parent page if necessary or shift the indices. */ 190 if (h->upper - h->lower < nbytes + sizeof(index_t)) { 191 h = h->pgno == P_ROOT ? 192 bt_root(t, h, &l, &r, &skip) : 193 bt_page(t, h, &l, &r, &skip); 194 if (h == NULL) 195 goto err1; 196 } else { 197 if (skip < (nxtindex = NEXTINDEX(h))) 198 bcopy(h->linp + skip, h->linp + skip + 1, 199 (nxtindex - skip) * sizeof(index_t)); 200 h->lower += sizeof(index_t); 201 nosplit = 1; 202 } 203 204 /* Insert the key into the parent page. */ 205 switch(rchild->flags & P_TYPE) { 206 case P_BINTERNAL: 207 h->linp[skip] = h->upper -= nbytes; 208 dest = (char *)h + h->linp[skip]; 209 bcopy(bi, dest, nbytes); 210 if (nksize) 211 ((BINTERNAL *)dest)->ksize = nksize; 212 ((BINTERNAL *)dest)->pgno = rchild->pgno; 213 break; 214 case P_BLEAF: 215 h->linp[skip] = h->upper -= nbytes; 216 dest = (char *)h + h->linp[skip]; 217 WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, 218 rchild->pgno, rchild->flags & P_OVERFLOW); 219 bcopy(bl->bytes, dest, nksize ? nksize : bl->ksize); 220 if (bl->flags & P_BIGKEY && 221 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 222 goto err1; 223 break; 224 case P_RINTERNAL: 225 /* Update both left and right page counts. */ 226 h->linp[skip] = h->upper -= nbytes; 227 dest = (char *)h + h->linp[skip]; 228 ((RINTERNAL *)dest)->nrecs = rec_total(rchild); 229 ((RINTERNAL *)dest)->pgno = rchild->pgno; 230 dest = (char *)h + h->linp[skip - 1]; 231 ((RINTERNAL *)dest)->nrecs = rec_total(lchild); 232 ((RINTERNAL *)dest)->pgno = lchild->pgno; 233 break; 234 case P_RLEAF: 235 /* Update both left and right page counts. */ 236 h->linp[skip] = h->upper -= nbytes; 237 dest = (char *)h + h->linp[skip]; 238 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); 239 ((RINTERNAL *)dest)->pgno = rchild->pgno; 240 dest = (char *)h + h->linp[skip - 1]; 241 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); 242 ((RINTERNAL *)dest)->pgno = lchild->pgno; 243 break; 244 } 245 246 /* Unpin the held pages. */ 247 if (nosplit) { 248 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 249 break; 250 } 251 mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 252 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 253 } 254 255 /* Unpin the held pages. */ 256 mpool_put(t->bt_mp, l, MPOOL_DIRTY); 257 mpool_put(t->bt_mp, r, MPOOL_DIRTY); 258 259 /* 260 * If it's a recno tree, increment the count on all remaining parent 261 * pages. Otherwise, clear the stack. 262 */ 263 if (ISSET(t, BTF_RECNO)) 264 while ((parent = BT_POP(t)) != NULL) { 265 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 266 return (RET_ERROR); 267 ++GETRINTERNAL(h, parent->index)->nrecs; 268 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 269 } 270 else 271 BT_CLR(t); 272 return (RET_SUCCESS); 273 274 /* 275 * If something fails in the above loop we were already walking back 276 * up the tree and the tree is now inconsistent. Nothing much we can 277 * do about it but release any memory we're holding. 278 */ 279 err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 280 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 281 282 err2: mpool_put(t->bt_mp, l, 0); 283 mpool_put(t->bt_mp, r, 0); 284 __dbpanic(t->bt_dbp); 285 return (RET_ERROR); 286 } 287 288 /* 289 * BT_PAGE -- Split a non-root page of a btree. 290 * 291 * Parameters: 292 * t: tree 293 * h: root page 294 * lp: pointer to left page pointer 295 * rp: pointer to right page pointer 296 * skip: pointer to index to leave open 297 * 298 * Returns: 299 * Pointer to page in which to insert or NULL on error. 300 */ 301 static PAGE * 302 bt_page(t, h, lp, rp, skip) 303 BTREE *t; 304 PAGE *h, **lp, **rp; 305 int *skip; 306 { 307 PAGE *l, *r, *tp; 308 pgno_t npg; 309 310 #ifdef STATISTICS 311 ++bt_split; 312 #endif 313 /* Put the new right page for the split into place. */ 314 if ((r = mpool_new(t->bt_mp, &npg)) == NULL) 315 return (NULL); 316 r->pgno = npg; 317 r->lower = BTDATAOFF; 318 r->upper = t->bt_psize; 319 r->nextpg = h->nextpg; 320 r->prevpg = h->pgno; 321 r->flags = h->flags & P_TYPE; 322 323 /* 324 * If we're splitting the last page on a level because we're appending 325 * a key to it (skip is NEXTINDEX()), it's likely that the data is 326 * sorted. Adding an empty page on the side of the level is less work 327 * and can push the fill factor much higher than normal. If we're 328 * wrong it's no big deal, we'll just do the split the right way next 329 * time. It may look like it's equally easy to do a similar hack for 330 * reverse sorted data, that is, split the tree left, but it's not. 331 * Don't even try. 332 */ 333 if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { 334 #ifdef STATISTICS 335 ++bt_sortsplit; 336 #endif 337 h->nextpg = r->pgno; 338 r->lower = BTDATAOFF + sizeof(index_t); 339 *skip = 0; 340 *lp = h; 341 *rp = r; 342 return (r); 343 } 344 345 /* Put the new left page for the split into place. */ 346 if ((l = malloc(t->bt_psize)) == NULL) { 347 mpool_put(t->bt_mp, r, 0); 348 return (NULL); 349 } 350 l->pgno = h->pgno; 351 l->nextpg = r->pgno; 352 l->prevpg = h->prevpg; 353 l->lower = BTDATAOFF; 354 l->upper = t->bt_psize; 355 l->flags = h->flags & P_TYPE; 356 357 /* Fix up the previous pointer of the page after the split page. */ 358 if (h->nextpg != P_INVALID) { 359 if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { 360 free(l); 361 /* XXX mpool_free(t->bt_mp, r->pgno); */ 362 return (NULL); 363 } 364 tp->prevpg = r->pgno; 365 mpool_put(t->bt_mp, tp, 0); 366 } 367 368 /* 369 * Split right. The key/data pairs aren't sorted in the btree page so 370 * it's simpler to copy the data from the split page onto two new pages 371 * instead of copying half the data to the right page and compacting 372 * the left page in place. Since the left page can't change, we have 373 * to swap the original and the allocated left page after the split. 374 */ 375 tp = bt_psplit(t, h, l, r, skip); 376 377 /* Move the new left page onto the old left page. */ 378 bcopy(l, h, t->bt_psize); 379 if (tp == l) 380 tp = h; 381 free(l); 382 383 *lp = h; 384 *rp = r; 385 return (tp); 386 } 387 388 /* 389 * BT_RSPLIT -- Split the root page of a btree. 390 * 391 * Parameters: 392 * t: tree 393 * h: root page 394 * lp: pointer to left page pointer 395 * rp: pointer to right page pointer 396 * skip: pointer to index to leave open 397 * 398 * Returns: 399 * Pointer to page in which to insert or NULL on error. 400 */ 401 static PAGE * 402 bt_root(t, h, lp, rp, skip) 403 BTREE *t; 404 PAGE *h, **lp, **rp; 405 int *skip; 406 { 407 PAGE *l, *r, *tp; 408 pgno_t lnpg, rnpg; 409 410 #ifdef STATISTICS 411 ++bt_split; 412 ++bt_rootsplit; 413 #endif 414 /* Put the new left and right pages for the split into place. */ 415 if ((l = mpool_new(t->bt_mp, &lnpg)) == NULL || 416 (r = mpool_new(t->bt_mp, &rnpg)) == NULL) 417 return (NULL); 418 l->pgno = lnpg; 419 r->pgno = rnpg; 420 l->nextpg = r->pgno; 421 r->prevpg = l->pgno; 422 l->prevpg = r->nextpg = P_INVALID; 423 l->lower = r->lower = BTDATAOFF; 424 l->upper = r->upper = t->bt_psize; 425 l->flags = r->flags = h->flags & P_TYPE; 426 427 /* Split the root page. */ 428 tp = bt_psplit(t, h, l, r, skip); 429 430 /* Make the root page look right. */ 431 if ((ISSET(t, BTF_RECNO) ? 432 bt_rroot(t, h, l, r) : bt_broot(t, h, l, r)) == RET_ERROR) 433 return (NULL); 434 435 *lp = l; 436 *rp = r; 437 return (tp); 438 } 439 440 /* 441 * BT_RROOT -- Fix up the recno root page after the split. 442 * 443 * Parameters: 444 * t: tree 445 * h: root page 446 * 447 * Returns: 448 * RET_ERROR, RET_SUCCESS 449 */ 450 static int 451 bt_rroot(t, h, l, r) 452 BTREE *t; 453 PAGE *h, *l, *r; 454 { 455 char *dest; 456 457 /* Insert the left and right keys, set the header information. */ 458 h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; 459 dest = (char *)h + h->upper; 460 WR_RINTERNAL(dest, 461 l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); 462 463 h->linp[1] = h->upper -= NRINTERNAL; 464 dest = (char *)h + h->upper; 465 WR_RINTERNAL(dest, 466 r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); 467 468 h->lower = BTDATAOFF + 2 * sizeof(index_t); 469 470 /* Unpin the root page, set to recno internal page. */ 471 h->flags &= ~P_TYPE; 472 h->flags |= P_RINTERNAL; 473 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 474 475 return (RET_SUCCESS); 476 } 477 478 /* 479 * BT_BROOT -- Fix up the btree root page after the split. 480 * 481 * Parameters: 482 * t: tree 483 * h: root page 484 * 485 * Returns: 486 * RET_ERROR, RET_SUCCESS 487 */ 488 static int 489 bt_broot(t, h, l, r) 490 BTREE *t; 491 PAGE *h, *l, *r; 492 { 493 BINTERNAL *bi; 494 BLEAF *bl; 495 size_t nbytes; 496 char *dest; 497 498 /* 499 * If the root page was a leaf page, change it into an internal page. 500 * We copy the key we split on (but not the key's data, in the case of 501 * a leaf page) to the new root page. If the key is on an overflow 502 * page, mark the overflow chain so it isn't deleted when the leaf copy 503 * of the key is deleted. 504 * 505 * The btree comparison code guarantees that the left-most key on any 506 * level of the tree is never used, so it doesn't need to be filled 507 * in. (This is not just convenience -- if the insert index is 0, we 508 * don't *have* a key to fill in.) The right key is available because 509 * the split code guarantees not to split on the skipped index. 510 */ 511 nbytes = LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char)); 512 h->linp[0] = h->upper = t->bt_psize - nbytes; 513 dest = (char *)h + h->upper; 514 WR_BINTERNAL(dest, 0, l->pgno, 0); 515 516 switch(h->flags & P_TYPE) { 517 case P_BLEAF: 518 bl = GETBLEAF(r, 0); 519 nbytes = NBINTERNAL(bl->ksize); 520 h->linp[1] = h->upper -= nbytes; 521 dest = (char *)h + h->upper; 522 WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); 523 bcopy(bl->bytes, dest, bl->ksize); 524 525 if (bl->flags & P_BIGKEY && 526 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 527 return (RET_ERROR); 528 break; 529 case P_BINTERNAL: 530 bi = GETBINTERNAL(r, 0); 531 nbytes = NBINTERNAL(bi->ksize); 532 h->linp[1] = h->upper -= nbytes; 533 dest = (char *)h + h->upper; 534 bcopy(bi, dest, nbytes); 535 ((BINTERNAL *)dest)->pgno = r->pgno; 536 break; 537 } 538 h->lower = BTDATAOFF + 2 * sizeof(index_t); 539 540 /* Unpin the root page, set to btree internal page. */ 541 h->flags &= ~P_TYPE; 542 h->flags |= P_BINTERNAL; 543 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 544 545 return (RET_SUCCESS); 546 } 547 548 /* 549 * BT_PSPLIT -- Do the real work of splitting the page. 550 * 551 * Parameters: 552 * t: tree 553 * h: page to be split 554 * l: page to put lower half of data 555 * r: page to put upper half of data 556 * skip: pointer to index to leave open 557 * 558 * Returns: 559 * Pointer to page in which to insert. 560 */ 561 static PAGE * 562 bt_psplit(t, h, l, r, skip) 563 BTREE *t; 564 PAGE *h, *l, *r; 565 int *skip; 566 { 567 BINTERNAL *bi; 568 BLEAF *bl; 569 RLEAF *rl; 570 EPGNO *c; 571 PAGE *rval; 572 index_t half, sval; 573 size_t nbytes; 574 void *src; 575 int bigkeycnt, isbigkey, nxt, off, top; 576 577 /* 578 * Split the data to the left and right pages. Leave the skip index 579 * open and guarantee that the split doesn't happen on that index (the 580 * right key must be available for the parent page). Additionally, 581 * make some effort not to split on an overflow key. This makes it 582 * faster to process internal pages and can save space since overflow 583 * keys used by internal pages are never deleted. 584 */ 585 bigkeycnt = 0; 586 sval = *skip; 587 half = (t->bt_psize - BTDATAOFF) / 2; 588 for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { 589 if (sval == off) 590 continue; 591 switch (h->flags & P_TYPE) { 592 case P_BINTERNAL: 593 src = bi = GETBINTERNAL(h, nxt); 594 nbytes = NBINTERNAL(bi->ksize); 595 isbigkey = bi->flags & P_BIGKEY; 596 break; 597 case P_BLEAF: 598 src = bl = GETBLEAF(h, nxt); 599 nbytes = NBLEAF(bl); 600 isbigkey = bl->flags & P_BIGKEY; 601 break; 602 case P_RINTERNAL: 603 src = GETRINTERNAL(h, nxt); 604 nbytes = NRINTERNAL; 605 isbigkey = 0; 606 break; 607 case P_RLEAF: 608 src = rl = GETRLEAF(h, nxt); 609 nbytes = NRLEAF(rl); 610 isbigkey = 0; 611 break; 612 } 613 ++nxt; 614 l->linp[off] = l->upper -= nbytes; 615 bcopy(src, (char *)l + l->upper, nbytes); 616 617 /* There's no empirical justification for the '3'. */ 618 if (half < nbytes) 619 if (!isbigkey || bigkeycnt == 3) 620 break; 621 else 622 ++bigkeycnt; 623 else 624 half -= nbytes; 625 } 626 l->lower += (off + 1) * sizeof(index_t); 627 628 /* 629 * If we're splitting the page that the cursor was on, have to adjust 630 * the cursor to point to the same record as before the split. 631 */ 632 c = &t->bt_bcursor; 633 if (c->pgno == h->pgno) 634 if (c->index < off) 635 c->pgno = l->pgno; 636 else { 637 c->pgno = r->pgno; 638 c->index -= off; 639 } 640 641 /* 642 * Decide which page to return, and adjust the skip index if the 643 * to-be-inserted-upon page has changed. 644 */ 645 if (sval > off) { 646 rval = r; 647 *skip -= off + 1; 648 } else 649 rval = l; 650 651 for (off = 0; nxt < top; ++off) { 652 if (sval == nxt) { 653 sval = 0; 654 continue; 655 } 656 switch (h->flags & P_TYPE) { 657 case P_BINTERNAL: 658 src = bi = GETBINTERNAL(h, nxt); 659 nbytes = NBINTERNAL(bi->ksize); 660 break; 661 case P_BLEAF: 662 src = bl = GETBLEAF(h, nxt); 663 nbytes = NBLEAF(bl); 664 break; 665 case P_RINTERNAL: 666 src = GETRINTERNAL(h, nxt); 667 nbytes = NRINTERNAL; 668 break; 669 case P_RLEAF: 670 src = rl = GETRLEAF(h, nxt); 671 nbytes = NRLEAF(rl); 672 break; 673 } 674 ++nxt; 675 r->linp[off] = r->upper -= nbytes; 676 bcopy(src, (char *)r + r->upper, nbytes); 677 } 678 r->lower += off * sizeof(index_t); 679 680 /* If the key is being appended to the page, adjust the index. */ 681 if (sval == top) 682 r->lower += sizeof(index_t); 683 684 return (rval); 685 } 686 687 /* 688 * BT_PRESERVE -- Mark a chain of pages as used by an internal node. 689 * 690 * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the 691 * record that references them gets deleted. Chains pointed to by internal 692 * pages never get deleted. This routine marks a chain as pointed to by an 693 * internal page. 694 * 695 * Parameters: 696 * t: tree 697 * pg: page number of first page in the chain. 698 * 699 * Returns: 700 * RET_SUCCESS, RET_ERROR. 701 */ 702 static int 703 bt_preserve(t, pg) 704 BTREE *t; 705 pgno_t pg; 706 { 707 PAGE *h; 708 709 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 710 return (RET_ERROR); 711 h->flags |= P_PRESERVE; 712 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 713 return (RET_SUCCESS); 714 } 715 716 /* 717 * REC_TOTAL -- Return the number of recno entries below a page. 718 * 719 * Parameters: 720 * h: page 721 * 722 * Returns: 723 * The number of recno entries below a page. 724 * 725 * XXX 726 * These values could be set by the bt_psplit routine. The problem is that the 727 * entry has to be popped off of the stack etc. or the values have to be passed 728 * all the way back to bt_split/bt_rroot and it's not very clean. 729 */ 730 static recno_t 731 rec_total(h) 732 PAGE *h; 733 { 734 recno_t recs; 735 index_t nxt, top; 736 737 for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) 738 recs += GETRINTERNAL(h, nxt)->nrecs; 739 return (recs); 740 } 741