1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_split.c 5.7 (Berkeley) 10/09/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 #define __DBINTERFACE_PRIVATE 17 #include <db.h> 18 #include <limits.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include "btree.h" 23 24 static int bt_preserve __P((BTREE *, pgno_t)); 25 static PAGE *bt_psplit __P((BTREE *, PAGE *, PAGE *, PAGE *, int *)); 26 static PAGE *bt_page __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 27 static PAGE *bt_root __P((BTREE *, PAGE *, PAGE **, PAGE **, int *)); 28 static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 29 static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); 30 static recno_t rec_total __P((PAGE *)); 31 32 #ifdef STATISTICS 33 u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; 34 #endif 35 36 /* 37 * __BT_SPLIT -- Split the tree. 38 * 39 * Parameters: 40 * t: tree 41 * h: page to split 42 * key: key to insert 43 * data: data to insert 44 * flags: BIGKEY/BIGDATA flags 45 * nbytes: length of insertion 46 * skip: index to leave open 47 * 48 * Returns: 49 * RET_ERROR, RET_SUCCESS 50 */ 51 int 52 __bt_split(t, h, key, data, flags, nbytes, skip) 53 BTREE *t; 54 PAGE *h; 55 const DBT *key, *data; 56 u_long flags; 57 size_t nbytes; 58 int skip; 59 { 60 BINTERNAL *bi; 61 BLEAF *bl; 62 DBT a, b; 63 EPGNO *parent; 64 PAGE *l, *r, *lchild, *rchild; 65 index_t nxtindex; 66 size_t nksize; 67 int nosplit; 68 char *dest; 69 70 /* 71 * Split the page into two pages, l and r. The split routines return 72 * a pointer to the page into which the key should be inserted and skip 73 * set to the offset which should be used. Additionally, l and r are 74 * pinned. 75 */ 76 h = h->pgno == P_ROOT ? 77 bt_root(t, h, &l, &r, &skip) : bt_page(t, h, &l, &r, &skip); 78 if (h == NULL) 79 return (RET_ERROR); 80 81 /* 82 * Grab the space and insert the [rb]leaf structure. Always a [rb]leaf 83 * structure since key inserts always cause a leaf page to split first. 84 */ 85 h->linp[skip] = h->upper -= nbytes; 86 dest = (char *)h + h->upper; 87 if (ISSET(t, BTF_RECNO)) 88 WR_RLEAF(dest, data, flags) 89 else 90 WR_BLEAF(dest, key, data, flags) 91 92 /* 93 * Now we walk the parent page stack -- a LIFO stack of the pages that 94 * were traversed when we searched for the page that split. Each stack 95 * entry is a page number and a page index offset. The offset is for 96 * the page traversed on the search. We've just split a page, so we 97 * have to insert a new key into the parent page. 98 * 99 * If the insert into the parent page causes it to split, may have to 100 * continue splitting all the way up the tree. We stop if the root 101 * splits or the page inserted into didn't have to split to hold the 102 * new key. Some algorithms replace the key for the old page as well 103 * as the new page. We don't, as there's no reason to believe that the 104 * first key on the old page is any better than the key we have, and, 105 * in the case of a key being placed at index 0 causing the split, the 106 * key is unavailable. 107 * 108 * There are a maximum of 5 pages pinned at any time. We keep the left 109 * and right pages pinned while working on the parent. The 5 are the 110 * two children, left parent and right parent (when the parent splits) 111 * and the root page or the overflow key page when calling bt_preserve. 112 * This code must make sure that all pins are released other than the 113 * root page or overflow page which is unlocked elsewhere. 114 */ 115 for (nosplit = 0; (parent = BT_POP(t)) != NULL;) { 116 lchild = l; 117 rchild = r; 118 119 /* Get the parent page. */ 120 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 121 goto err2; 122 123 /* The new key goes ONE AFTER the index. */ 124 skip = parent->index + 1; 125 126 /* 127 * Calculate the space needed on the parent page. 128 * 129 * Space hack when inserting into BINTERNAL pages. Only need to 130 * retain the number of bytes that will distinguish between the 131 * new entry and the LAST entry on the page to its left. If the 132 * keys compare equal, retain the entire key. Note, we don't 133 * touch overflow keys and the entire key must be retained for 134 * the next-to-leftmost key on the leftmost page of each level, 135 * or the search will fail. 136 */ 137 switch (rchild->flags & P_TYPE) { 138 case P_BINTERNAL: 139 bi = GETBINTERNAL(rchild, 0); 140 nbytes = NBINTERNAL(bi->ksize); 141 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 142 !(bi->flags & P_BIGKEY)) { 143 BINTERNAL *tbi; 144 tbi = 145 GETBINTERNAL(lchild, NEXTINDEX(lchild) - 1); 146 a.size = tbi->ksize; 147 a.data = tbi->bytes; 148 b.size = bi->ksize; 149 b.data = bi->bytes; 150 goto prefix; 151 } 152 break; 153 case P_BLEAF: 154 bl = GETBLEAF(rchild, 0); 155 nbytes = NBINTERNAL(bl->ksize); 156 if (t->bt_pfx && (h->prevpg != P_INVALID || skip > 1) && 157 !(bl->flags & P_BIGKEY)) { 158 BLEAF *tbl; 159 size_t n; 160 161 tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); 162 a.size = tbl->ksize; 163 a.data = tbl->bytes; 164 b.size = bl->ksize; 165 b.data = bl->bytes; 166 prefix: nksize = t->bt_pfx(&a, &b); 167 n = NBINTERNAL(nksize); 168 if (n < nbytes) { 169 #ifdef STATISTICS 170 bt_pfxsaved += nbytes - n; 171 #endif 172 nbytes = n; 173 } else 174 nksize = 0; 175 } else 176 nksize = 0; 177 break; 178 case P_RINTERNAL: 179 case P_RLEAF: 180 nbytes = NRINTERNAL; 181 break; 182 } 183 184 /* Split the parent page if necessary or shift the indices. */ 185 if (h->upper - h->lower < nbytes + sizeof(index_t)) { 186 h = h->pgno == P_ROOT ? 187 bt_root(t, h, &l, &r, &skip) : 188 bt_page(t, h, &l, &r, &skip); 189 if (h == NULL) 190 goto err1; 191 } else { 192 if (skip < (nxtindex = NEXTINDEX(h))) 193 bcopy(h->linp + skip, h->linp + skip + 1, 194 (nxtindex - skip) * sizeof(index_t)); 195 h->lower += sizeof(index_t); 196 nosplit = 1; 197 } 198 199 /* Insert the key into the parent page. */ 200 switch(rchild->flags & P_TYPE) { 201 case P_BINTERNAL: 202 h->linp[skip] = h->upper -= nbytes; 203 dest = (char *)h + h->linp[skip]; 204 bcopy(bi, dest, nbytes); 205 if (nksize) 206 ((BINTERNAL *)dest)->ksize = nksize; 207 ((BINTERNAL *)dest)->pgno = rchild->pgno; 208 break; 209 case P_BLEAF: 210 h->linp[skip] = h->upper -= nbytes; 211 dest = (char *)h + h->linp[skip]; 212 WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, 213 rchild->pgno, bl->flags & P_BIGKEY); 214 bcopy(bl->bytes, dest, nksize ? nksize : bl->ksize); 215 if (bl->flags & P_BIGKEY && 216 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 217 goto err1; 218 break; 219 case P_RINTERNAL: 220 /* Update both left and right page counts. */ 221 h->linp[skip] = h->upper -= nbytes; 222 dest = (char *)h + h->linp[skip]; 223 ((RINTERNAL *)dest)->nrecs = rec_total(rchild); 224 ((RINTERNAL *)dest)->pgno = rchild->pgno; 225 dest = (char *)h + h->linp[skip - 1]; 226 ((RINTERNAL *)dest)->nrecs = rec_total(lchild); 227 ((RINTERNAL *)dest)->pgno = lchild->pgno; 228 break; 229 case P_RLEAF: 230 /* Update both left and right page counts. */ 231 h->linp[skip] = h->upper -= nbytes; 232 dest = (char *)h + h->linp[skip]; 233 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); 234 ((RINTERNAL *)dest)->pgno = rchild->pgno; 235 dest = (char *)h + h->linp[skip - 1]; 236 ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); 237 ((RINTERNAL *)dest)->pgno = lchild->pgno; 238 break; 239 } 240 241 /* Unpin the held pages. */ 242 if (nosplit) { 243 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 244 break; 245 } 246 mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 247 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 248 } 249 250 /* Unpin the held pages. */ 251 mpool_put(t->bt_mp, l, MPOOL_DIRTY); 252 mpool_put(t->bt_mp, r, MPOOL_DIRTY); 253 254 /* Clear any pages left on the stack. */ 255 BT_CLR(t); 256 return (RET_SUCCESS); 257 258 /* 259 * If something fails in the above loop we were already walking back 260 * up the tree and the tree is now inconsistent. Nothing much we can 261 * do about it but release any memory we're holding. 262 */ 263 err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); 264 mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); 265 266 err2: mpool_put(t->bt_mp, l, 0); 267 mpool_put(t->bt_mp, r, 0); 268 __dbpanic(t->bt_dbp); 269 return (RET_ERROR); 270 } 271 272 /* 273 * BT_PAGE -- Split a non-root page of a btree. 274 * 275 * Parameters: 276 * t: tree 277 * h: root page 278 * lp: pointer to left page pointer 279 * rp: pointer to right page pointer 280 * skip: pointer to index to leave open 281 * 282 * Returns: 283 * Pointer to page in which to insert or NULL on error. 284 */ 285 static PAGE * 286 bt_page(t, h, lp, rp, skip) 287 BTREE *t; 288 PAGE *h, **lp, **rp; 289 int *skip; 290 { 291 PAGE *l, *r, *tp; 292 pgno_t npg; 293 294 #ifdef STATISTICS 295 ++bt_split; 296 #endif 297 /* Put the new right page for the split into place. */ 298 if ((r = __bt_new(t, &npg)) == NULL) 299 return (NULL); 300 r->pgno = npg; 301 r->lower = BTDATAOFF; 302 r->upper = t->bt_psize; 303 r->nextpg = h->nextpg; 304 r->prevpg = h->pgno; 305 r->flags = h->flags & P_TYPE; 306 307 /* 308 * If we're splitting the last page on a level because we're appending 309 * a key to it (skip is NEXTINDEX()), it's likely that the data is 310 * sorted. Adding an empty page on the side of the level is less work 311 * and can push the fill factor much higher than normal. If we're 312 * wrong it's no big deal, we'll just do the split the right way next 313 * time. It may look like it's equally easy to do a similar hack for 314 * reverse sorted data, that is, split the tree left, but it's not. 315 * Don't even try. 316 */ 317 if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { 318 #ifdef STATISTICS 319 ++bt_sortsplit; 320 #endif 321 h->nextpg = r->pgno; 322 r->lower = BTDATAOFF + sizeof(index_t); 323 *skip = 0; 324 *lp = h; 325 *rp = r; 326 return (r); 327 } 328 329 /* Put the new left page for the split into place. */ 330 if ((l = malloc(t->bt_psize)) == NULL) { 331 mpool_put(t->bt_mp, r, 0); 332 return (NULL); 333 } 334 l->pgno = h->pgno; 335 l->nextpg = r->pgno; 336 l->prevpg = h->prevpg; 337 l->lower = BTDATAOFF; 338 l->upper = t->bt_psize; 339 l->flags = h->flags & P_TYPE; 340 341 /* Fix up the previous pointer of the page after the split page. */ 342 if (h->nextpg != P_INVALID) { 343 if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { 344 free(l); 345 /* XXX mpool_free(t->bt_mp, r->pgno); */ 346 return (NULL); 347 } 348 tp->prevpg = r->pgno; 349 mpool_put(t->bt_mp, tp, 0); 350 } 351 352 /* 353 * Split right. The key/data pairs aren't sorted in the btree page so 354 * it's simpler to copy the data from the split page onto two new pages 355 * instead of copying half the data to the right page and compacting 356 * the left page in place. Since the left page can't change, we have 357 * to swap the original and the allocated left page after the split. 358 */ 359 tp = bt_psplit(t, h, l, r, skip); 360 361 /* Move the new left page onto the old left page. */ 362 bcopy(l, h, t->bt_psize); 363 if (tp == l) 364 tp = h; 365 free(l); 366 367 *lp = h; 368 *rp = r; 369 return (tp); 370 } 371 372 /* 373 * BT_ROOT -- Split the root page of a btree. 374 * 375 * Parameters: 376 * t: tree 377 * h: root page 378 * lp: pointer to left page pointer 379 * rp: pointer to right page pointer 380 * skip: pointer to index to leave open 381 * 382 * Returns: 383 * Pointer to page in which to insert or NULL on error. 384 */ 385 static PAGE * 386 bt_root(t, h, lp, rp, skip) 387 BTREE *t; 388 PAGE *h, **lp, **rp; 389 int *skip; 390 { 391 PAGE *l, *r, *tp; 392 pgno_t lnpg, rnpg; 393 394 #ifdef STATISTICS 395 ++bt_split; 396 ++bt_rootsplit; 397 #endif 398 /* Put the new left and right pages for the split into place. */ 399 if ((l = __bt_new(t, &lnpg)) == NULL || 400 (r = __bt_new(t, &rnpg)) == NULL) 401 return (NULL); 402 l->pgno = lnpg; 403 r->pgno = rnpg; 404 l->nextpg = r->pgno; 405 r->prevpg = l->pgno; 406 l->prevpg = r->nextpg = P_INVALID; 407 l->lower = r->lower = BTDATAOFF; 408 l->upper = r->upper = t->bt_psize; 409 l->flags = r->flags = h->flags & P_TYPE; 410 411 /* Split the root page. */ 412 tp = bt_psplit(t, h, l, r, skip); 413 414 /* Make the root page look right. */ 415 if ((ISSET(t, BTF_RECNO) ? 416 bt_rroot(t, h, l, r) : bt_broot(t, h, l, r)) == RET_ERROR) 417 return (NULL); 418 419 *lp = l; 420 *rp = r; 421 return (tp); 422 } 423 424 /* 425 * BT_RROOT -- Fix up the recno root page after the split. 426 * 427 * Parameters: 428 * t: tree 429 * h: root page 430 * 431 * Returns: 432 * RET_ERROR, RET_SUCCESS 433 */ 434 static int 435 bt_rroot(t, h, l, r) 436 BTREE *t; 437 PAGE *h, *l, *r; 438 { 439 char *dest; 440 441 /* Insert the left and right keys, set the header information. */ 442 h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; 443 dest = (char *)h + h->upper; 444 WR_RINTERNAL(dest, 445 l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); 446 447 h->linp[1] = h->upper -= NRINTERNAL; 448 dest = (char *)h + h->upper; 449 WR_RINTERNAL(dest, 450 r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); 451 452 h->lower = BTDATAOFF + 2 * sizeof(index_t); 453 454 /* Unpin the root page, set to recno internal page. */ 455 h->flags &= ~P_TYPE; 456 h->flags |= P_RINTERNAL; 457 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 458 459 return (RET_SUCCESS); 460 } 461 462 /* 463 * BT_BROOT -- Fix up the btree root page after the split. 464 * 465 * Parameters: 466 * t: tree 467 * h: root page 468 * 469 * Returns: 470 * RET_ERROR, RET_SUCCESS 471 */ 472 static int 473 bt_broot(t, h, l, r) 474 BTREE *t; 475 PAGE *h, *l, *r; 476 { 477 BINTERNAL *bi; 478 BLEAF *bl; 479 size_t nbytes; 480 char *dest; 481 482 /* 483 * If the root page was a leaf page, change it into an internal page. 484 * We copy the key we split on (but not the key's data, in the case of 485 * a leaf page) to the new root page. 486 * 487 * The btree comparison code guarantees that the left-most key on any 488 * level of the tree is never used, so it doesn't need to be filled 489 * in. (This is not just convenience -- if the insert index is 0, we 490 * don't *have* a key to fill in.) The right key is available because 491 * the split code guarantees not to split on the skipped index. 492 */ 493 nbytes = NBINTERNAL(0); 494 h->linp[0] = h->upper = t->bt_psize - nbytes; 495 dest = (char *)h + h->upper; 496 WR_BINTERNAL(dest, 0, l->pgno, 0); 497 498 switch(h->flags & P_TYPE) { 499 case P_BLEAF: 500 bl = GETBLEAF(r, 0); 501 nbytes = NBINTERNAL(bl->ksize); 502 h->linp[1] = h->upper -= nbytes; 503 dest = (char *)h + h->upper; 504 WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); 505 bcopy(bl->bytes, dest, bl->ksize); 506 507 /* 508 * If the key is on an overflow page, mark the overflow chain 509 * so it isn't deleted when the leaf copy of the key is deleted. 510 */ 511 if (bl->flags & P_BIGKEY && 512 bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) 513 return (RET_ERROR); 514 break; 515 case P_BINTERNAL: 516 bi = GETBINTERNAL(r, 0); 517 nbytes = NBINTERNAL(bi->ksize); 518 h->linp[1] = h->upper -= nbytes; 519 dest = (char *)h + h->upper; 520 bcopy(bi, dest, nbytes); 521 ((BINTERNAL *)dest)->pgno = r->pgno; 522 break; 523 } 524 h->lower = BTDATAOFF + 2 * sizeof(index_t); 525 526 /* Unpin the root page, set to btree internal page. */ 527 h->flags &= ~P_TYPE; 528 h->flags |= P_BINTERNAL; 529 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 530 531 return (RET_SUCCESS); 532 } 533 534 /* 535 * BT_PSPLIT -- Do the real work of splitting the page. 536 * 537 * Parameters: 538 * t: tree 539 * h: page to be split 540 * l: page to put lower half of data 541 * r: page to put upper half of data 542 * skip: pointer to index to leave open 543 * 544 * Returns: 545 * Pointer to page in which to insert. 546 */ 547 static PAGE * 548 bt_psplit(t, h, l, r, pskip) 549 BTREE *t; 550 PAGE *h, *l, *r; 551 int *pskip; 552 { 553 BINTERNAL *bi; 554 BLEAF *bl; 555 RLEAF *rl; 556 EPGNO *c; 557 PAGE *rval; 558 index_t half, skip; 559 size_t nbytes; 560 void *src; 561 int bigkeycnt, isbigkey, nxt, off, top; 562 563 /* 564 * Split the data to the left and right pages. Leave the skip index 565 * open and guarantee that the split doesn't happen on that index (the 566 * right key must be available for the parent page). Additionally, 567 * make some effort not to split on an overflow key. This makes it 568 * faster to process internal pages and can save space since overflow 569 * keys used by internal pages are never deleted. 570 */ 571 bigkeycnt = 0; 572 skip = *pskip; 573 half = (t->bt_psize - BTDATAOFF) / 2; 574 for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { 575 if (skip == off) 576 continue; 577 switch (h->flags & P_TYPE) { 578 case P_BINTERNAL: 579 src = bi = GETBINTERNAL(h, nxt); 580 nbytes = NBINTERNAL(bi->ksize); 581 isbigkey = bi->flags & P_BIGKEY; 582 break; 583 case P_BLEAF: 584 src = bl = GETBLEAF(h, nxt); 585 nbytes = NBLEAF(bl); 586 isbigkey = bl->flags & P_BIGKEY; 587 break; 588 case P_RINTERNAL: 589 src = GETRINTERNAL(h, nxt); 590 nbytes = NRINTERNAL; 591 isbigkey = 0; 592 break; 593 case P_RLEAF: 594 src = rl = GETRLEAF(h, nxt); 595 nbytes = NRLEAF(rl); 596 isbigkey = 0; 597 break; 598 } 599 ++nxt; 600 l->linp[off] = l->upper -= nbytes; 601 bcopy(src, (char *)l + l->upper, nbytes); 602 603 /* There's no empirical justification for the '3'. */ 604 if (half < nbytes) { 605 if (skip != off + 1) 606 if (!isbigkey || bigkeycnt == 3) 607 break; 608 else 609 ++bigkeycnt; 610 } else 611 half -= nbytes; 612 } 613 l->lower += (off + 1) * sizeof(index_t); 614 615 /* 616 * If splitting the page that the cursor was on, the cursor has to be 617 * adjusted to point to the same record as before the split. If the 618 * skipped slot and the cursor are both on the left page and the cursor 619 * is on or past the skipped slot, the cursor is incremented by one. 620 * If the skipped slot and the cursor are both on the right page and 621 * the cursor is on or past the skipped slot, the cursor is incremented 622 * by one. If the skipped slot and the cursor aren't on the same page, 623 * the cursor isn't changed. Regardless of the relationship of the 624 * skipped slot and the cursor, if the cursor is on the right page it 625 * is decremented by the number of records split to the left page. 626 * 627 * Don't bother checking for the BTF_SEQINIT flag, the page number will 628 * be P_INVALID. 629 */ 630 c = &t->bt_bcursor; 631 if (c->pgno == h->pgno) 632 if (c->index < off) { /* left page */ 633 c->pgno = l->pgno; 634 if (c->index >= skip) 635 ++c->index; 636 } else { /* right page */ 637 c->pgno = r->pgno; 638 if (c->index >= skip && skip > off) 639 ++c->index; 640 c->index -= off; 641 } 642 643 /* 644 * Decide which page to return, and adjust the skip index if the 645 * to-be-inserted-upon page has changed. 646 */ 647 if (skip > off) { 648 rval = r; 649 *pskip -= off + 1; 650 } else 651 rval = l; 652 653 for (off = 0; nxt < top; ++off) { 654 if (skip == nxt) { 655 skip = 0; 656 continue; 657 } 658 switch (h->flags & P_TYPE) { 659 case P_BINTERNAL: 660 src = bi = GETBINTERNAL(h, nxt); 661 nbytes = NBINTERNAL(bi->ksize); 662 break; 663 case P_BLEAF: 664 src = bl = GETBLEAF(h, nxt); 665 nbytes = NBLEAF(bl); 666 break; 667 case P_RINTERNAL: 668 src = GETRINTERNAL(h, nxt); 669 nbytes = NRINTERNAL; 670 break; 671 case P_RLEAF: 672 src = rl = GETRLEAF(h, nxt); 673 nbytes = NRLEAF(rl); 674 break; 675 } 676 ++nxt; 677 r->linp[off] = r->upper -= nbytes; 678 bcopy(src, (char *)r + r->upper, nbytes); 679 } 680 r->lower += off * sizeof(index_t); 681 682 /* If the key is being appended to the page, adjust the index. */ 683 if (skip == top) 684 r->lower += sizeof(index_t); 685 686 return (rval); 687 } 688 689 /* 690 * BT_PRESERVE -- Mark a chain of pages as used by an internal node. 691 * 692 * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the 693 * record that references them gets deleted. Chains pointed to by internal 694 * pages never get deleted. This routine marks a chain as pointed to by an 695 * internal page. 696 * 697 * Parameters: 698 * t: tree 699 * pg: page number of first page in the chain. 700 * 701 * Returns: 702 * RET_SUCCESS, RET_ERROR. 703 */ 704 static int 705 bt_preserve(t, pg) 706 BTREE *t; 707 pgno_t pg; 708 { 709 PAGE *h; 710 711 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 712 return (RET_ERROR); 713 h->flags |= P_PRESERVE; 714 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 715 return (RET_SUCCESS); 716 } 717 718 /* 719 * REC_TOTAL -- Return the number of recno entries below a page. 720 * 721 * Parameters: 722 * h: page 723 * 724 * Returns: 725 * The number of recno entries below a page. 726 * 727 * XXX 728 * These values could be set by the bt_psplit routine. The problem is that the 729 * entry has to be popped off of the stack etc. or the values have to be passed 730 * all the way back to bt_split/bt_rroot and it's not very clean. 731 */ 732 static recno_t 733 rec_total(h) 734 PAGE *h; 735 { 736 recno_t recs; 737 index_t nxt, top; 738 739 for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) 740 recs += GETRINTERNAL(h, nxt)->nrecs; 741 return (recs); 742 } 743