1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.10 2007/12/14 08:05:39 dillon Exp $ 35 */ 36 37 /* 38 * HAMMER B-Tree index 39 * 40 * HAMMER implements a modified B+Tree. In documentation this will 41 * simply be refered to as the HAMMER B-Tree. Basically a B-Tree 42 * looks like a B+Tree (A B-Tree which stores its records only at the leafs 43 * of the tree), but adds two additional boundary elements which describe 44 * the left-most and right-most element a node is able to represent. In 45 * otherwords, we have boundary elements at the two ends of a B-Tree node 46 * instead of sub-tree pointers. 47 * 48 * A B-Tree internal node looks like this: 49 * 50 * B N N N N N N B <-- boundary and internal elements 51 * S S S S S S S <-- subtree pointers 52 * 53 * A B-Tree leaf node basically looks like this: 54 * 55 * L L L L L L L L <-- leaf elemenets 56 * 57 * The radix for an internal node is 1 less then a leaf but we get a 58 * number of significant benefits for our troubles. 59 * 60 * The big benefit to using a B-Tree containing boundary information 61 * is that it is possible to cache pointers into the middle of the tree 62 * and not have to start searches, insertions, OR deletions at the root 63 * node. In particular, searches are able to progress in a definitive 64 * direction from any point in the tree without revisting nodes. This 65 * greatly improves the efficiency of many operations, most especially 66 * record appends. 67 * 68 * B-Trees also make the stacking of trees fairly straightforward. 69 * 70 * INTER-CLUSTER ELEMENTS: An element of an internal node may reference 71 * the root of another cluster rather then a node in the current cluster. 72 * This is known as an inter-cluster references. Only B-Tree searches 73 * will cross cluster boundaries. The rebalancing and collapse code does 74 * not attempt to move children between clusters. A major effect of this 75 * is that we have to relax minimum element count requirements and allow 76 * trees to become somewhat unabalanced. 77 * 78 * INSERTIONS AND DELETIONS: When inserting we split full nodes on our 79 * way down as an optimization. I originally experimented with rebalancing 80 * nodes on the way down for deletions but it created a huge mess due to 81 * the way inter-cluster linkages work. Instead, now I simply allow 82 * the tree to become unbalanced and allow leaf nodes to become empty. 83 * The delete code will try to clean things up from the bottom-up but 84 * will stop if related elements are not in-core or if it cannot get a node 85 * lock. 86 */ 87 #include "hammer.h" 88 #include <sys/buf.h> 89 #include <sys/buf2.h> 90 91 static int btree_search(hammer_cursor_t cursor, int flags); 92 static int btree_split_internal(hammer_cursor_t cursor); 93 static int btree_split_leaf(hammer_cursor_t cursor); 94 static int btree_remove(hammer_cursor_t cursor); 95 static int btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm); 96 #if 0 97 static int btree_rebalance(hammer_cursor_t cursor); 98 static int btree_collapse(hammer_cursor_t cursor); 99 #endif 100 static int btree_node_is_full(hammer_node_ondisk_t node); 101 static void hammer_make_separator(hammer_base_elm_t key1, 102 hammer_base_elm_t key2, hammer_base_elm_t dest); 103 104 /* 105 * Iterate records after a search. The cursor is iterated forwards past 106 * the current record until a record matching the key-range requirements 107 * is found. ENOENT is returned if the iteration goes past the ending 108 * key. 109 * 110 * key_beg/key_end is an INCLUSVE range. i.e. if you are scanning to load 111 * a 4096 byte buffer key_beg might specify an offset of 0 and key_end an 112 * offset of 4095. 113 * 114 * cursor->key_beg may or may not be modified by this function during 115 * the iteration. 116 */ 117 int 118 hammer_btree_iterate(hammer_cursor_t cursor) 119 { 120 hammer_node_ondisk_t node; 121 hammer_btree_elm_t elm; 122 int error; 123 int r; 124 #if 0 125 int s; 126 int64_t save_key; 127 #endif 128 129 /* 130 * Skip past the current record 131 */ 132 node = cursor->node->ondisk; 133 if (node == NULL) 134 return(ENOENT); 135 if (cursor->index < node->count && 136 (cursor->flags & HAMMER_CURSOR_ATEDISK)) { 137 ++cursor->index; 138 } 139 140 /* 141 * Loop until an element is found or we are done. 142 */ 143 for (;;) { 144 /* 145 * We iterate up the tree and then index over one element 146 * while we are at the last element in the current node. 147 * 148 * NOTE: This can pop us up to another cluster. 149 * 150 * If we are at the root of the root cluster, cursor_up 151 * returns ENOENT. 152 * 153 * NOTE: hammer_cursor_up() will adjust cursor->key_beg 154 * when told to re-search for the cluster tag. 155 * 156 * XXX this could be optimized by storing the information in 157 * the parent reference. 158 * 159 * XXX we can lose the node lock temporarily, this could mess 160 * up our scan. 161 */ 162 if (cursor->index == node->count) { 163 error = hammer_cursor_up(cursor, 0); 164 if (error) 165 break; 166 node = cursor->node->ondisk; 167 KKASSERT(cursor->index != node->count); 168 ++cursor->index; 169 continue; 170 } 171 172 /* 173 * Iterate down the tree while we are at an internal node. 174 * Nodes cannot be empty, assert the case because if one is 175 * we will wind up in an infinite loop. 176 * 177 * We can avoid iterating through large swaths of transaction 178 * id space if the left and right separators are the same 179 * except for their transaction spaces. We can then skip 180 * the node if the left and right transaction spaces are the 181 * same sign. This directly optimized accesses to files with 182 * HUGE transactional histories, such as database files, 183 * allowing us to avoid having to iterate through the entire 184 * history. 185 */ 186 if (node->type == HAMMER_BTREE_TYPE_INTERNAL) { 187 KKASSERT(node->count != 0); 188 elm = &node->elms[cursor->index]; 189 #if 0 190 /* 191 * temporarily disable this optimization, it needs 192 * more of a theoretical review. 193 */ 194 if (elm[0].base.obj_id == elm[1].base.obj_id && 195 elm[0].base.rec_type == elm[1].base.rec_type && 196 elm[0].base.key == elm[1].base.key) { 197 /* 198 * Left side transaction space 199 */ 200 save_key = cursor->key_beg.key; 201 cursor->key_beg.key = elm[0].base.key; 202 r = hammer_btree_cmp(&cursor->key_beg, 203 &elm[0].base); 204 cursor->key_beg.key = save_key; 205 206 /* 207 * Right side transaction space 208 */ 209 save_key = cursor->key_end.key; 210 cursor->key_end.key = elm[1].base.key; 211 s = hammer_btree_cmp(&cursor->key_end, 212 &elm[1].base); 213 cursor->key_end.key = save_key; 214 215 /* 216 * If our range is entirely on one side or 217 * the other side we can skip the sub-tree. 218 */ 219 if ((r < 0 && s < 0) || (r > 0 && s > 0)) { 220 ++cursor->index; 221 continue; 222 } 223 } 224 #endif 225 error = hammer_cursor_down(cursor); 226 if (error) 227 break; 228 KKASSERT(cursor->index == 0); 229 node = cursor->node->ondisk; 230 continue; 231 } 232 233 /* 234 * We are at a leaf. 235 * 236 * Determine if the record at the cursor has gone beyond the 237 * end of our range. Remember that our key range is inclusive. 238 * 239 * When iterating we may have to 'pick out' records matching 240 * our transaction requirements. A comparison return of 241 * +1 or -1 indicates a transactional record that is too 242 * old or too new but does not terminate the search. 243 */ 244 elm = &node->elms[cursor->index]; 245 r = hammer_btree_range_cmp(cursor, &elm->base); 246 if (r == -1 || r == 1) { 247 ++cursor->index; 248 continue; 249 } 250 251 /* 252 * We either found a match or are now out of bounds. 253 */ 254 error = r ? ENOENT : 0; 255 break; 256 } 257 return(error); 258 } 259 260 /* 261 * Lookup cursor->key_beg. 0 is returned on success, ENOENT if the entry 262 * could not be found, and a fatal error otherwise. 263 * 264 * The cursor is suitably positioned for a deletion on success, and suitably 265 * positioned for an insertion on ENOENT. 266 * 267 * The cursor may begin anywhere, the search will traverse clusters in 268 * either direction to locate the requested element. 269 */ 270 int 271 hammer_btree_lookup(hammer_cursor_t cursor) 272 { 273 int error; 274 275 error = btree_search(cursor, 0); 276 if (error == 0 && cursor->flags) 277 error = hammer_btree_extract(cursor, cursor->flags); 278 return(error); 279 } 280 281 /* 282 * Extract the record and/or data associated with the cursor's current 283 * position. Any prior record or data stored in the cursor is replaced. 284 * The cursor must be positioned at a leaf node. 285 * 286 * NOTE: Only records can be extracted from internal B-Tree nodes, and 287 * only for inter-cluster references. At the moment we only support 288 * extractions from leaf nodes. 289 */ 290 int 291 hammer_btree_extract(hammer_cursor_t cursor, int flags) 292 { 293 hammer_node_ondisk_t node; 294 hammer_btree_elm_t elm; 295 hammer_cluster_t cluster; 296 u_int64_t buf_type; 297 int32_t cloff; 298 int error; 299 300 /* 301 * A cluster record type has no data reference, the information 302 * is stored directly in the record and B-Tree element. 303 * 304 * The case where the data reference resolves to the same buffer 305 * as the record reference must be handled. 306 */ 307 node = cursor->node->ondisk; 308 KKASSERT(node->type == HAMMER_BTREE_TYPE_LEAF); 309 elm = &node->elms[cursor->index]; 310 cluster = cursor->node->cluster; 311 error = 0; 312 313 if ((flags & HAMMER_CURSOR_GET_RECORD) && error == 0) { 314 cloff = elm->leaf.rec_offset; 315 cursor->record = hammer_bread(cluster, cloff, 316 HAMMER_FSBUF_RECORDS, &error, 317 &cursor->record_buffer); 318 } else { 319 cloff = 0; 320 } 321 if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) { 322 if ((cloff ^ elm->leaf.data_offset) & ~HAMMER_BUFMASK) { 323 /* 324 * The data is not in the same buffer as the last 325 * record we cached, but it could still be embedded 326 * in a record. Note that we may not have loaded the 327 * record's buffer above, depending on flags. 328 */ 329 if ((elm->leaf.rec_offset ^ elm->leaf.data_offset) & 330 ~HAMMER_BUFMASK) { 331 if (elm->leaf.data_len & HAMMER_BUFMASK) 332 buf_type = HAMMER_FSBUF_DATA; 333 else 334 buf_type = 0; /* pure data buffer */ 335 } else { 336 buf_type = HAMMER_FSBUF_RECORDS; 337 } 338 cursor->data = hammer_bread(cluster, 339 elm->leaf.data_offset, 340 buf_type, &error, 341 &cursor->data_buffer); 342 } else { 343 /* 344 * Data in same buffer as record. Note that we 345 * leave any existing data_buffer intact, even 346 * though we don't use it in this case, in case 347 * other records extracted during an iteration 348 * go back to it. 349 * 350 * Just assume the buffer type is correct. 351 */ 352 cursor->data = (void *) 353 ((char *)cursor->record_buffer->ondisk + 354 (elm->leaf.data_offset & HAMMER_BUFMASK)); 355 } 356 } 357 return(error); 358 } 359 360 361 /* 362 * Insert a leaf element into the B-Tree at the current cursor position. 363 * The cursor is positioned such that the element at and beyond the cursor 364 * are shifted to make room for the new record. 365 * 366 * The caller must call hammer_btree_lookup() with the HAMMER_CURSOR_INSERT 367 * flag set and that call must return ENOENT before this function can be 368 * called. 369 * 370 * ENOSPC is returned if there is no room to insert a new record. 371 */ 372 int 373 hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm) 374 { 375 hammer_node_ondisk_t parent; 376 hammer_node_ondisk_t node; 377 int i; 378 379 #if 0 380 /* HANDLED BY CALLER */ 381 /* 382 * Issue a search to get our cursor at the right place. The search 383 * will get us to a leaf node. 384 * 385 * The search also does some setup for our insert, so there is always 386 * room in the leaf. 387 */ 388 error = btree_search(cursor, HAMMER_CURSOR_INSERT); 389 if (error != ENOENT) { 390 if (error == 0) 391 error = EEXIST; 392 return (error); 393 } 394 #endif 395 396 /* 397 * Insert the element at the leaf node and update the count in the 398 * parent. It is possible for parent to be NULL, indicating that 399 * the root of the B-Tree in the cluster is a leaf. It is also 400 * possible for the leaf to be empty. 401 * 402 * Remember that the right-hand boundary is not included in the 403 * count. 404 */ 405 node = cursor->node->ondisk; 406 i = cursor->index; 407 KKASSERT(node->type == HAMMER_BTREE_TYPE_LEAF); 408 KKASSERT(node->count < HAMMER_BTREE_LEAF_ELMS); 409 if (i != node->count) { 410 bcopy(&node->elms[i], &node->elms[i+1], 411 (node->count - i) * sizeof(*elm)); 412 } 413 node->elms[i] = *elm; 414 ++node->count; 415 hammer_modify_node(cursor->node); 416 417 /* 418 * Adjust the sub-tree count in the parent. note that the parent 419 * may be in a different cluster. 420 */ 421 if (cursor->parent) { 422 parent = cursor->parent->ondisk; 423 i = cursor->parent_index; 424 ++parent->elms[i].internal.subtree_count; 425 KKASSERT(parent->elms[i].internal.subtree_count <= node->count); 426 hammer_modify_node(cursor->parent); 427 } 428 return(0); 429 } 430 431 /* 432 * Delete a record from the B-Tree's at the current cursor position. 433 * The cursor is positioned such that the current element is the one 434 * to be deleted. 435 * 436 * On return the cursor will be positioned after the deleted element and 437 * MAY point to an internal node. It will be suitable for the continuation 438 * of an iteration but not for an insertion or deletion. 439 * 440 * Deletions will attempt to partially rebalance the B-Tree in an upward 441 * direction. It is possible to end up with empty leafs. An empty internal 442 * node is impossible (worst case: it has one element pointing to an empty 443 * leaf). 444 */ 445 int 446 hammer_btree_delete(hammer_cursor_t cursor) 447 { 448 hammer_node_ondisk_t ondisk; 449 hammer_node_t node; 450 hammer_node_t parent; 451 hammer_btree_elm_t elm; 452 int error; 453 int i; 454 455 #if 0 456 /* HANDLED BY CALLER */ 457 /* 458 * Locate the leaf element to delete. The search is also responsible 459 * for doing some of the rebalancing work on its way down. 460 */ 461 error = btree_search(cursor, HAMMER_CURSOR_DELETE); 462 if (error) 463 return (error); 464 #endif 465 466 /* 467 * Delete the element from the leaf node. 468 * 469 * Remember that leaf nodes do not have boundaries. 470 */ 471 node = cursor->node; 472 ondisk = node->ondisk; 473 i = cursor->index; 474 475 KKASSERT(ondisk->type == HAMMER_BTREE_TYPE_LEAF); 476 if (i + 1 != ondisk->count) { 477 bcopy(&ondisk->elms[i+1], &ondisk->elms[i], 478 (ondisk->count - i - 1) * sizeof(ondisk->elms[0])); 479 } 480 --ondisk->count; 481 hammer_modify_node(node); 482 if (cursor->parent != NULL) { 483 /* 484 * Adjust parent's notion of the leaf's count. subtree_count 485 * is only approximate, it is allowed to be too small but 486 * never allowed to be too large. Make sure we don't drop 487 * the count below 0. 488 */ 489 parent = cursor->parent; 490 elm = &parent->ondisk->elms[cursor->parent_index]; 491 if (elm->internal.subtree_count) 492 --elm->internal.subtree_count; 493 KKASSERT(elm->internal.subtree_count <= ondisk->count); 494 hammer_modify_node(parent); 495 } 496 497 /* 498 * It is possible, but not desireable, to stop here. If the element 499 * count drops to 0 (which is allowed for a leaf), try recursively 500 * remove the B-Tree node. 501 * 502 * XXX rebalancing calls would go here too. 503 * 504 * This may reposition the cursor at one of the parent's of the 505 * current node. 506 */ 507 if (ondisk->count == 0) { 508 error = btree_remove(cursor); 509 if (error == EAGAIN) 510 error = 0; 511 } else { 512 error = 0; 513 } 514 return(error); 515 } 516 517 /* 518 * PRIMAY B-TREE SEARCH SUPPORT PROCEDURE 519 * 520 * Search a cluster's B-Tree for cursor->key_beg, return the matching node. 521 * 522 * The search begins at the current node and will instantiate a NULL 523 * parent if necessary and if not at the root of the cluster. On return 524 * parent will be non-NULL unless the cursor is sitting at a root-leaf. 525 * 526 * The search code may be forced to iterate up the tree if the conditions 527 * required for an insertion or deletion are not met. This does not occur 528 * very often. 529 * 530 * INSERTIONS: The search will split full nodes and leaves on its way down 531 * and guarentee that the leaf it ends up on is not full. 532 * 533 * DELETIONS: The search will rebalance the tree on its way down. 534 * 535 * The search is only guarenteed to end up on a leaf if an error code of 0 536 * is returned, or if inserting and an error code of ENOENT is returned. 537 */ 538 static 539 int 540 btree_search(hammer_cursor_t cursor, int flags) 541 { 542 hammer_node_ondisk_t node; 543 hammer_cluster_t cluster; 544 int error; 545 int i; 546 int r; 547 548 flags |= cursor->flags; 549 550 /* 551 * Move our cursor up the tree until we find a node whos range covers 552 * the key we are trying to locate. This may move us between 553 * clusters. 554 * 555 * The left bound is inclusive, the right bound is non-inclusive. 556 * It is ok to cursor up too far so when cursoring across a cluster 557 * boundary. 558 * 559 * First see if we can skip the whole cluster. hammer_cursor_up() 560 * handles both cases but this way we don't check the cluster 561 * bounds when going up the tree within a cluster. 562 */ 563 cluster = cursor->node->cluster; 564 while ( 565 hammer_btree_cmp(&cursor->key_beg, &cluster->clu_btree_beg) < 0 || 566 hammer_btree_cmp(&cursor->key_beg, &cluster->clu_btree_end) >= 0) { 567 error = hammer_cursor_toroot(cursor); 568 if (error) 569 goto done; 570 error = hammer_cursor_up(cursor, 0); 571 if (error) 572 goto done; 573 cluster = cursor->node->cluster; 574 } 575 576 /* 577 * Deal with normal cursoring within a cluster. The right bound 578 * is non-inclusive. That is, the bounds form a separator. 579 */ 580 while (hammer_btree_cmp(&cursor->key_beg, cursor->left_bound) < 0 || 581 hammer_btree_cmp(&cursor->key_beg, cursor->right_bound) >= 0) { 582 error = hammer_cursor_up(cursor, 0); 583 if (error) 584 goto done; 585 } 586 587 /* 588 * We better have ended up with a node somewhere, and our second 589 * while loop had better not have traversed up a cluster. 590 */ 591 KKASSERT(cursor->node != NULL && cursor->node->cluster == cluster); 592 593 /* 594 * If we are inserting we can't start at a full node if the parent 595 * is also full (because there is no way to split the node), 596 * continue running up the tree until we hit the root of the 597 * root cluster or until the requirement is satisfied. 598 * 599 * NOTE: These cursor-up's CAN continue to cross cluster boundaries. 600 * 601 * XXX as an optimization it should be possible to unbalance the tree 602 * and stop at the root of the current cluster. 603 */ 604 while (flags & HAMMER_CURSOR_INSERT) { 605 if (btree_node_is_full(cursor->node->ondisk) == 0) 606 break; 607 if (cursor->parent == NULL) 608 break; 609 if (cursor->parent->ondisk->count != HAMMER_BTREE_INT_ELMS) 610 break; 611 error = hammer_cursor_up(cursor, 0); 612 /* cluster and node are now may become stale */ 613 if (error) 614 goto done; 615 } 616 /* cluster = cursor->node->cluster; not needed until next cluster = */ 617 618 #if 0 619 /* 620 * If we are deleting we can't start at an internal node with only 621 * one element unless it is root, because all of our code assumes 622 * that internal nodes will never be empty. Just do this generally 623 * for both leaf and internal nodes to get better balance. 624 * 625 * This handles the case where the cursor is sitting at a leaf and 626 * either the leaf or parent contain an insufficient number of 627 * elements. 628 * 629 * NOTE: These cursor-up's CAN continue to cross cluster boundaries. 630 * 631 * XXX NOTE: Iterations may not set this flag anyway. 632 */ 633 while (flags & HAMMER_CURSOR_DELETE) { 634 if (cursor->node->ondisk->count > 1) 635 break; 636 if (cursor->parent == NULL) 637 break; 638 KKASSERT(cursor->node->ondisk->count != 0); 639 error = hammer_cursor_up(cursor, 0); 640 /* cluster and node are now may become stale */ 641 if (error) 642 goto done; 643 } 644 #endif 645 646 /*new_cluster:*/ 647 /* 648 * Push down through internal nodes to locate the requested key. 649 */ 650 cluster = cursor->node->cluster; 651 node = cursor->node->ondisk; 652 while (node->type == HAMMER_BTREE_TYPE_INTERNAL) { 653 #if 0 654 /* 655 * If we are a the root node and deleting, try to collapse 656 * all of the root's children into the root. This is the 657 * only point where tree depth is reduced. 658 * 659 * XXX NOTE: Iterations may not set this flag anyway. 660 */ 661 if ((flags & HAMMER_CURSOR_DELETE) && cursor->parent == NULL) { 662 error = btree_collapse(cursor); 663 /* node becomes stale after call */ 664 if (error) 665 goto done; 666 } 667 node = cursor->node->ondisk; 668 #endif 669 /* 670 * Scan the node to find the subtree index to push down into. 671 * We go one-past, then back-up. 672 */ 673 for (i = 0; i < node->count; ++i) { 674 r = hammer_btree_cmp(&cursor->key_beg, 675 &node->elms[i].base); 676 if (r < 0) 677 break; 678 } 679 680 /* 681 * It is possible for the search to terminate at i == 0, 682 * which is to the LEFT of the LEFT boundary but the RIGHT 683 * of the parent's boundary on the left of the sub-tree 684 * element. This case can occur due to deletions (see 685 * btree_remove()). 686 * 687 * When this case occurs an ENOENT return is guarenteed but 688 * if inserting we must still terminate at a leaf. The 689 * solution is to make the node's left boundary inherit the 690 * boundary stored in the parent. 691 * 692 * When doing this inheritance some fields in 'base' are 693 * actually related to the internal element's child 694 * specification and not to the key. These have to be 695 * retained. 696 */ 697 if (i == 0) { 698 u_int8_t save; 699 if ((flags & HAMMER_CURSOR_INSERT) == 0) { 700 cursor->index = 0; 701 return(ENOENT); 702 } 703 save = node->elms[0].subtree_type; 704 node->elms[0].base = *cursor->left_bound; 705 node->elms[0].subtree_type = save; 706 hammer_modify_node(cursor->node); 707 } else { 708 /* 709 * The push-down index is now i - 1. 710 */ 711 --i; 712 } 713 cursor->index = i; 714 715 /* 716 * Handle insertion and deletion requirements. 717 * 718 * If inserting split full nodes. The split code will 719 * adjust cursor->node and cursor->index if the current 720 * index winds up in the new node. 721 */ 722 if (flags & HAMMER_CURSOR_INSERT) { 723 if (node->count == HAMMER_BTREE_INT_ELMS) { 724 error = btree_split_internal(cursor); 725 if (error) 726 goto done; 727 /* 728 * reload stale pointers 729 */ 730 i = cursor->index; 731 node = cursor->node->ondisk; 732 } 733 } 734 735 #if 0 736 /* 737 * If deleting rebalance - do not allow the child to have 738 * just one element or we will not be able to delete it. 739 * 740 * Neither internal or leaf nodes (except a root-leaf) are 741 * allowed to drop to 0 elements. (XXX - well, leaf nodes 742 * can at the moment). 743 * 744 * Our separators may have been reorganized after rebalancing, 745 * so we have to pop back up and rescan. 746 * 747 * XXX test for subtree_count < maxelms / 2, minus 1 or 2 748 * for hysteresis? 749 * 750 * XXX NOTE: Iterations may not set this flag anyway. 751 */ 752 if (flags & HAMMER_CURSOR_DELETE) { 753 if (node->elms[i].internal.subtree_count <= 1) { 754 error = btree_rebalance(cursor); 755 if (error) 756 goto done; 757 /* cursor->index is invalid after call */ 758 goto new_cluster; 759 } 760 } 761 #endif 762 763 /* 764 * Push down (push into new node, existing node becomes 765 * the parent). 766 */ 767 error = hammer_cursor_down(cursor); 768 /* node and cluster become stale */ 769 if (error) 770 goto done; 771 node = cursor->node->ondisk; 772 cluster = cursor->node->cluster; 773 } 774 775 /* 776 * We are at a leaf, do a linear search of the key array. 777 * (XXX do a binary search). On success the index is set to the 778 * matching element, on failure the index is set to the insertion 779 * point. 780 * 781 * Boundaries are not stored in leaf nodes, so the index can wind 782 * up to the left of element 0 (index == 0) or past the end of 783 * the array (index == node->count). 784 */ 785 KKASSERT(node->count <= HAMMER_BTREE_LEAF_ELMS); 786 787 for (i = 0; i < node->count; ++i) { 788 r = hammer_btree_cmp(&cursor->key_beg, &node->elms[i].base); 789 790 /* 791 * Stop if we've flipped past key_beg 792 */ 793 if (r < 0) 794 break; 795 796 /* 797 * Return an exact match 798 */ 799 if (r == 0) { 800 cursor->index = i; 801 error = 0; 802 goto done; 803 } 804 } 805 806 /* 807 * No exact match was found, i is now at the insertion point. 808 * 809 * If inserting split a full leaf before returning. This 810 * may have the side effect of adjusting cursor->node and 811 * cursor->index. 812 */ 813 cursor->index = i; 814 if ((flags & HAMMER_CURSOR_INSERT) && 815 node->count == HAMMER_BTREE_LEAF_ELMS) { 816 error = btree_split_leaf(cursor); 817 /* NOT USED 818 i = cursor->index; 819 node = &cursor->node->internal; 820 */ 821 if (error) 822 goto done; 823 } 824 error = ENOENT; 825 done: 826 return(error); 827 } 828 829 830 /************************************************************************ 831 * SPLITTING AND MERGING * 832 ************************************************************************ 833 * 834 * These routines do all the dirty work required to split and merge nodes. 835 */ 836 837 /* 838 * Split an internal node into two nodes and move the separator at the split 839 * point to the parent. Note that the parent's parent's element pointing 840 * to our parent will have an incorrect subtree_count (we don't update it). 841 * It will be low, which is ok. 842 * 843 * (cursor->node, cursor->index) indicates the element the caller intends 844 * to push into. We will adjust node and index if that element winds 845 * up in the split node. 846 * 847 * If we are at the root of a cluster a new root must be created with two 848 * elements, one pointing to the original root and one pointing to the 849 * newly allocated split node. 850 * 851 * NOTE! Being at the root of a cluster is different from being at the 852 * root of the root cluster. cursor->parent will not be NULL and 853 * cursor->node->ondisk.parent must be tested against 0. Theoretically 854 * we could propogate the algorithm into the parent and deal with multiple 855 * 'roots' in the cluster header, but it's easier not to. 856 */ 857 static 858 int 859 btree_split_internal(hammer_cursor_t cursor) 860 { 861 hammer_node_ondisk_t ondisk; 862 hammer_node_t node; 863 hammer_node_t parent; 864 hammer_node_t new_node; 865 hammer_btree_elm_t elm; 866 hammer_btree_elm_t parent_elm; 867 int parent_index; 868 int made_root; 869 int split; 870 int error; 871 int i; 872 const int esize = sizeof(*elm); 873 874 /* 875 * We are splitting but elms[split] will be promoted to the parent, 876 * leaving the right hand node with one less element. If the 877 * insertion point will be on the left-hand side adjust the split 878 * point to give the right hand side one additional node. 879 */ 880 node = cursor->node; 881 ondisk = node->ondisk; 882 split = (ondisk->count + 1) / 2; 883 if (cursor->index <= split) 884 --split; 885 error = 0; 886 887 /* 888 * If we are at the root of the cluster, create a new root node with 889 * 1 element and split normally. Avoid making major modifications 890 * until we know the whole operation will work. 891 * 892 * The root of the cluster is different from the root of the root 893 * cluster. Use the node's on-disk structure's parent offset to 894 * detect the case. 895 */ 896 if (ondisk->parent == 0) { 897 parent = hammer_alloc_btree(node->cluster, &error); 898 if (parent == NULL) 899 return(error); 900 hammer_lock_ex(&parent->lock); 901 ondisk = parent->ondisk; 902 ondisk->count = 1; 903 ondisk->parent = 0; 904 ondisk->type = HAMMER_BTREE_TYPE_INTERNAL; 905 ondisk->elms[0].base = node->cluster->clu_btree_beg; 906 ondisk->elms[0].internal.subtree_type = node->ondisk->type; 907 ondisk->elms[0].internal.subtree_offset = node->node_offset; 908 ondisk->elms[1].base = node->cluster->clu_btree_end; 909 made_root = 1; 910 parent_index = 0; /* index of current node in parent */ 911 } else { 912 made_root = 0; 913 parent = cursor->parent; 914 parent_index = cursor->parent_index; 915 KKASSERT(parent->cluster == node->cluster); 916 } 917 918 /* 919 * Split node into new_node at the split point. 920 * 921 * B O O O P N N B <-- P = node->elms[split] 922 * 0 1 2 3 4 5 6 <-- subtree indices 923 * 924 * x x P x x 925 * s S S s 926 * / \ 927 * B O O O B B N N B <--- inner boundary points are 'P' 928 * 0 1 2 3 4 5 6 929 * 930 */ 931 new_node = hammer_alloc_btree(node->cluster, &error); 932 if (new_node == NULL) { 933 if (made_root) { 934 hammer_unlock(&parent->lock); 935 hammer_free_btree(parent->cluster, parent->node_offset); 936 hammer_rel_node(parent); 937 } 938 return(error); 939 } 940 hammer_lock_ex(&new_node->lock); 941 942 /* 943 * Create the new node. P becomes the left-hand boundary in the 944 * new node. Copy the right-hand boundary as well. 945 * 946 * elm is the new separator. 947 */ 948 ondisk = node->ondisk; 949 elm = &ondisk->elms[split]; 950 bcopy(elm, &new_node->ondisk->elms[0], 951 (ondisk->count - split + 1) * esize); 952 new_node->ondisk->count = ondisk->count - split; 953 new_node->ondisk->parent = parent->node_offset; 954 new_node->ondisk->type = HAMMER_BTREE_TYPE_INTERNAL; 955 KKASSERT(ondisk->type == new_node->ondisk->type); 956 957 /* 958 * Cleanup the original node. P becomes the new boundary, its 959 * subtree_offset was moved to the new node. If we had created 960 * a new root its parent pointer may have changed. 961 */ 962 elm->internal.subtree_offset = 0; 963 ondisk->count = split; 964 965 /* 966 * Insert the separator into the parent, fixup the parent's 967 * reference to the original node, and reference the new node. 968 * The separator is P. 969 * 970 * Remember that base.count does not include the right-hand boundary. 971 */ 972 ondisk = parent->ondisk; 973 ondisk->elms[parent_index].internal.subtree_count = split; 974 parent_elm = &ondisk->elms[parent_index+1]; 975 bcopy(parent_elm, parent_elm + 1, 976 (ondisk->count - parent_index) * esize); 977 parent_elm->internal.base = elm->base; /* separator P */ 978 parent_elm->internal.subtree_offset = new_node->node_offset; 979 parent_elm->internal.subtree_count = new_node->ondisk->count; 980 parent_elm->internal.subtree_type = new_node->ondisk->type; 981 ++ondisk->count; 982 983 /* 984 * The children of new_node need their parent pointer set to new_node. 985 */ 986 for (i = 0; i < new_node->ondisk->count; ++i) { 987 elm = &new_node->ondisk->elms[i]; 988 error = btree_set_parent(new_node, elm); 989 if (error) { 990 panic("btree_split_internal: btree-fixup problem"); 991 } 992 } 993 994 /* 995 * The cluster's root pointer may have to be updated. 996 */ 997 if (made_root) { 998 node->cluster->ondisk->clu_btree_root = parent->node_offset; 999 hammer_modify_cluster(node->cluster); 1000 node->ondisk->parent = parent->node_offset; 1001 if (cursor->parent) { 1002 hammer_unlock(&cursor->parent->lock); 1003 hammer_rel_node(cursor->parent); 1004 } 1005 cursor->parent = parent; /* lock'd and ref'd */ 1006 } 1007 1008 hammer_modify_node(node); 1009 hammer_modify_node(new_node); 1010 hammer_modify_node(parent); 1011 1012 /* 1013 * Ok, now adjust the cursor depending on which element the original 1014 * index was pointing at. If we are >= the split point the push node 1015 * is now in the new node. 1016 * 1017 * NOTE: If we are at the split point itself we cannot stay with the 1018 * original node because the push index will point at the right-hand 1019 * boundary, which is illegal. 1020 * 1021 * NOTE: The cursor's parent or parent_index must be adjusted for 1022 * the case where a new parent (new root) was created, and the case 1023 * where the cursor is now pointing at the split node. 1024 */ 1025 if (cursor->index >= split) { 1026 cursor->parent_index = parent_index + 1; 1027 cursor->index -= split; 1028 hammer_unlock(&cursor->node->lock); 1029 hammer_rel_node(cursor->node); 1030 cursor->node = new_node; /* locked and ref'd */ 1031 } else { 1032 cursor->parent_index = parent_index; 1033 hammer_unlock(&new_node->lock); 1034 hammer_rel_node(new_node); 1035 } 1036 1037 /* 1038 * Fixup left and right bounds 1039 */ 1040 parent_elm = &parent->ondisk->elms[cursor->parent_index]; 1041 cursor->left_bound = &parent_elm[0].internal.base; 1042 cursor->right_bound = &parent_elm[1].internal.base; 1043 1044 return (0); 1045 } 1046 1047 /* 1048 * Same as the above, but splits a full leaf node. 1049 */ 1050 static 1051 int 1052 btree_split_leaf(hammer_cursor_t cursor) 1053 { 1054 hammer_node_ondisk_t ondisk; 1055 hammer_node_t parent; 1056 hammer_node_t leaf; 1057 hammer_node_t new_leaf; 1058 hammer_btree_elm_t elm; 1059 hammer_btree_elm_t parent_elm; 1060 int parent_index; 1061 int made_root; 1062 int split; 1063 int error; 1064 const size_t esize = sizeof(*elm); 1065 1066 /* 1067 * Calculate the split point. If the insertion point will be on 1068 * the left-hand side adjust the split point to give the right 1069 * hand side one additional node. 1070 */ 1071 leaf = cursor->node; 1072 ondisk = leaf->ondisk; 1073 split = (ondisk->count + 1) / 2; 1074 if (cursor->index <= split) 1075 --split; 1076 error = 0; 1077 1078 /* 1079 * If we are at the root of the tree, create a new root node with 1080 * 1 element and split normally. Avoid making major modifications 1081 * until we know the whole operation will work. 1082 */ 1083 if (ondisk->parent == 0) { 1084 parent = hammer_alloc_btree(leaf->cluster, &error); 1085 if (parent == NULL) 1086 return(error); 1087 hammer_lock_ex(&parent->lock); 1088 ondisk = parent->ondisk; 1089 ondisk->count = 1; 1090 ondisk->parent = 0; 1091 ondisk->type = HAMMER_BTREE_TYPE_INTERNAL; 1092 ondisk->elms[0].base = leaf->cluster->clu_btree_beg; 1093 ondisk->elms[0].internal.subtree_type = leaf->ondisk->type; 1094 ondisk->elms[0].internal.subtree_offset = leaf->node_offset; 1095 ondisk->elms[1].base = leaf->cluster->clu_btree_end; 1096 made_root = 1; 1097 parent_index = 0; /* insertion point in parent */ 1098 } else { 1099 made_root = 0; 1100 parent = cursor->parent; 1101 parent_index = cursor->parent_index; 1102 KKASSERT(parent->cluster == leaf->cluster); 1103 } 1104 1105 /* 1106 * Split leaf into new_leaf at the split point. Select a separator 1107 * value in-between the two leafs but with a bent towards the right 1108 * leaf since comparisons use an 'elm >= separator' inequality. 1109 * 1110 * L L L L L L L L 1111 * 1112 * x x P x x 1113 * s S S s 1114 * / \ 1115 * L L L L L L L L 1116 */ 1117 new_leaf = hammer_alloc_btree(leaf->cluster, &error); 1118 if (new_leaf == NULL) { 1119 if (made_root) { 1120 hammer_unlock(&parent->lock); 1121 hammer_free_btree(parent->cluster, parent->node_offset); 1122 hammer_rel_node(parent); 1123 } 1124 return(error); 1125 } 1126 hammer_lock_ex(&new_leaf->lock); 1127 1128 /* 1129 * Create the new node. P become the left-hand boundary in the 1130 * new node. Copy the right-hand boundary as well. 1131 */ 1132 ondisk = leaf->ondisk; 1133 elm = &ondisk->elms[split]; 1134 bcopy(elm, &new_leaf->ondisk->elms[0], (ondisk->count - split) * esize); 1135 new_leaf->ondisk->count = ondisk->count - split; 1136 new_leaf->ondisk->parent = parent->node_offset; 1137 new_leaf->ondisk->type = HAMMER_BTREE_TYPE_LEAF; 1138 KKASSERT(ondisk->type == new_leaf->ondisk->type); 1139 1140 /* 1141 * Cleanup the original node. Because this is a leaf node and 1142 * leaf nodes do not have a right-hand boundary, there 1143 * aren't any special edge cases to clean up. We just fixup the 1144 * count. 1145 */ 1146 ondisk->count = split; 1147 1148 /* 1149 * Insert the separator into the parent, fixup the parent's 1150 * reference to the original node, and reference the new node. 1151 * The separator is P. 1152 * 1153 * Remember that base.count does not include the right-hand boundary. 1154 * We are copying parent_index+1 to parent_index+2, not +0 to +1. 1155 */ 1156 ondisk = parent->ondisk; 1157 ondisk->elms[parent_index].internal.subtree_count = split; 1158 parent_elm = &ondisk->elms[parent_index+1]; 1159 if (parent_index + 1 != ondisk->count) { 1160 bcopy(parent_elm, parent_elm + 1, 1161 (ondisk->count - parent_index - 1) * esize); 1162 } 1163 hammer_make_separator(&elm[-1].base, &elm[0].base, &parent_elm->base); 1164 parent_elm->internal.subtree_offset = new_leaf->node_offset; 1165 parent_elm->internal.subtree_count = new_leaf->ondisk->count; 1166 parent_elm->internal.subtree_type = new_leaf->ondisk->type; 1167 ++ondisk->count; 1168 1169 /* 1170 * The cluster's root pointer may have to be updated. 1171 */ 1172 if (made_root) { 1173 leaf->cluster->ondisk->clu_btree_root = parent->node_offset; 1174 hammer_modify_cluster(leaf->cluster); 1175 leaf->ondisk->parent = parent->node_offset; 1176 if (cursor->parent) { 1177 hammer_unlock(&cursor->parent->lock); 1178 hammer_rel_node(cursor->parent); 1179 } 1180 cursor->parent = parent; /* lock'd and ref'd */ 1181 } 1182 1183 hammer_modify_node(leaf); 1184 hammer_modify_node(new_leaf); 1185 hammer_modify_node(parent); 1186 1187 /* 1188 * Ok, now adjust the cursor depending on which element the original 1189 * index was pointing at. If we are >= the split point the push node 1190 * is now in the new node. 1191 * 1192 * NOTE: If we are at the split point itself we cannot stay with the 1193 * original node because the push index will point at the right-hand 1194 * boundary, which is illegal. 1195 */ 1196 if (cursor->index >= split) { 1197 cursor->parent_index = parent_index + 1; 1198 cursor->index -= split; 1199 hammer_unlock(&cursor->node->lock); 1200 hammer_rel_node(cursor->node); 1201 cursor->node = new_leaf; 1202 } else { 1203 cursor->parent_index = parent_index; 1204 hammer_unlock(&new_leaf->lock); 1205 hammer_rel_node(new_leaf); 1206 } 1207 1208 /* 1209 * Fixup left and right bounds 1210 */ 1211 parent_elm = &parent->ondisk->elms[cursor->parent_index]; 1212 cursor->left_bound = &parent_elm[0].internal.base; 1213 cursor->right_bound = &parent_elm[1].internal.base; 1214 1215 return (0); 1216 } 1217 1218 /* 1219 * Attempt to remove the empty B-Tree node at (cursor->node). Returns 0 1220 * on success, EAGAIN if we could not acquire the necessary locks, or some 1221 * other error. 1222 * 1223 * On return the cursor may end up pointing at an internal node, suitable 1224 * for further iteration but not for insertion or deletion. 1225 * 1226 * cursor->node may be an internal node or a leaf node. 1227 */ 1228 int 1229 btree_remove(hammer_cursor_t cursor) 1230 { 1231 hammer_node_ondisk_t ondisk; 1232 hammer_btree_elm_t elm; 1233 hammer_node_t save; 1234 hammer_node_t node; 1235 hammer_node_t parent; 1236 int error; 1237 int i; 1238 1239 /* 1240 * If we are at the root of the root cluster there is nothing to 1241 * remove, but an internal node at the root of a cluster is not 1242 * allowed to be empty so convert it to a leaf node. 1243 */ 1244 if (cursor->parent == NULL) { 1245 ondisk = cursor->node->ondisk; 1246 KKASSERT(ondisk->parent == 0); 1247 ondisk->type = HAMMER_BTREE_TYPE_LEAF; 1248 ondisk->count = 0; 1249 hammer_modify_node(cursor->node); 1250 kprintf("EMPTY ROOT OF ROOT CLUSTER -> LEAF\n"); 1251 return(0); 1252 } 1253 1254 /* 1255 * Retain a reference to cursor->node, ex-lock again (2 locks now) 1256 * so we do not lose the lock when we cursor around. 1257 */ 1258 save = cursor->node; 1259 hammer_ref_node(save); 1260 hammer_lock_ex(&save->lock); 1261 1262 /* 1263 * We need to be able to lock the parent of the parent. Do this 1264 * non-blocking and return EAGAIN if the lock cannot be acquired. 1265 * non-blocking is required in order to avoid a deadlock. 1266 * 1267 * After we cursor up, parent is moved to node and the new parent 1268 * is the parent of the parent. 1269 */ 1270 error = hammer_cursor_up(cursor, 1); 1271 if (error) { 1272 kprintf("BTREE_REMOVE: Cannot lock parent\n"); 1273 hammer_unlock(&save->lock); 1274 hammer_rel_node(save); 1275 return(error); 1276 } 1277 1278 /* 1279 * At this point we want to remove the element at (node, index), 1280 * which is now the (original) parent pointing to the saved node. 1281 * Removing the element allows us to then free the node it was 1282 * pointing to. 1283 * 1284 * However, an internal node is not allowed to have 0 elements, so 1285 * if the count would drop to 0 we have to recurse. It is possible 1286 * for the recursion to fail. 1287 * 1288 * NOTE: The cursor is in an indeterminant position after recursing, 1289 * but will still be suitable for an iteration. 1290 */ 1291 node = cursor->node; 1292 KKASSERT(node->ondisk->count > 0); 1293 if (node->ondisk->count == 1) { 1294 error = btree_remove(cursor); 1295 if (error == 0) { 1296 kprintf("BTREE_REMOVE: Successful!\n"); 1297 hammer_flush_node(save); 1298 hammer_free_btree(save->cluster, save->node_offset); 1299 } else { 1300 kprintf("BTREE_REMOVE: Recursion failed %d\n", error); 1301 } 1302 hammer_unlock(&save->lock); 1303 hammer_rel_node(save); 1304 return(error); 1305 } 1306 1307 /* 1308 * Remove the element at (node, index) and adjust the parent's 1309 * subtree_count. 1310 * 1311 * NOTE! Removing the element will cause the left-hand boundary 1312 * to no longer match the child node: 1313 * 1314 * LxMxR (left, middle, right). If the first 'x' element is 1315 * removed then you get LxR and L no longer matches the left-hand 1316 * boundary of the sub-node pointed to by what used to be the 1317 * second x. This case is handled in btree_search(). 1318 */ 1319 #if 0 1320 kprintf("BTREE_REMOVE: Removing element %d\n", cursor->index); 1321 #endif 1322 ondisk = node->ondisk; 1323 i = cursor->index; 1324 bcopy(&ondisk->elms[i+1], &ondisk->elms[i], 1325 (ondisk->count - i) * sizeof(ondisk->elms[0])); 1326 --ondisk->count; 1327 hammer_modify_node(cursor->node); 1328 1329 /* 1330 * Adjust the parent-parent's (now parent) reference to the parent 1331 * (now node). 1332 */ 1333 if ((parent = cursor->parent) != NULL) { 1334 elm = &parent->ondisk->elms[cursor->parent_index]; 1335 if (elm->internal.subtree_count != ondisk->count) { 1336 elm->internal.subtree_count = ondisk->count; 1337 hammer_modify_node(parent); 1338 } 1339 if (elm->subtree_type != HAMMER_BTREE_TYPE_CLUSTER && 1340 elm->subtree_type != ondisk->type) { 1341 elm->subtree_type = ondisk->type; 1342 hammer_modify_node(parent); 1343 } 1344 } 1345 1346 /* 1347 * Free the saved node. 1348 */ 1349 hammer_flush_node(save); 1350 hammer_free_btree(save->cluster, save->node_offset); 1351 hammer_unlock(&save->lock); 1352 hammer_rel_node(save); 1353 return(0); 1354 } 1355 1356 /* 1357 * The child represented by the element in internal node node needs 1358 * to have its parent pointer adjusted. 1359 */ 1360 static 1361 int 1362 btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm) 1363 { 1364 hammer_volume_t volume; 1365 hammer_cluster_t cluster; 1366 hammer_node_t child; 1367 int error; 1368 1369 error = 0; 1370 1371 switch(elm->internal.subtree_type) { 1372 case HAMMER_BTREE_TYPE_LEAF: 1373 case HAMMER_BTREE_TYPE_INTERNAL: 1374 child = hammer_get_node(node->cluster, 1375 elm->internal.subtree_offset, &error); 1376 if (error == 0) { 1377 hammer_lock_ex(&child->lock); 1378 child->ondisk->parent = node->node_offset; 1379 hammer_modify_node(child); 1380 hammer_unlock(&child->lock); 1381 hammer_rel_node(child); 1382 } 1383 break; 1384 case HAMMER_BTREE_TYPE_CLUSTER: 1385 volume = hammer_get_volume(node->cluster->volume->hmp, 1386 elm->internal.subtree_volno, &error); 1387 if (error) 1388 break; 1389 cluster = hammer_get_cluster(volume, 1390 elm->internal.subtree_cluid, 1391 &error, 0); 1392 hammer_rel_volume(volume, 0); 1393 if (error) 1394 break; 1395 hammer_lock_ex(&cluster->io.lock); 1396 cluster->ondisk->clu_btree_parent_offset = node->node_offset; 1397 hammer_unlock(&cluster->io.lock); 1398 KKASSERT(cluster->ondisk->clu_btree_parent_clu_no == 1399 node->cluster->clu_no); 1400 KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == 1401 node->cluster->volume->vol_no); 1402 hammer_modify_cluster(cluster); 1403 hammer_rel_cluster(cluster, 0); 1404 break; 1405 default: 1406 hammer_print_btree_elm(elm, HAMMER_BTREE_TYPE_INTERNAL, -1); 1407 panic("btree_set_parent: bad subtree_type"); 1408 break; /* NOT REACHED */ 1409 } 1410 return(error); 1411 } 1412 1413 #if 0 1414 1415 /* 1416 * This routine is called on the internal node (node) prior to recursing down 1417 * through (node, index) when the node referenced by (node, index) MIGHT 1418 * have too few elements for the caller to perform a deletion. 1419 * 1420 * cursor->index is invalid on return because the separators may have gotten 1421 * adjusted, the caller must rescan the node's elements. The caller may set 1422 * cursor->index to -1 if it wants us to do a general rebalancing. 1423 * 1424 * This routine rebalances the children of the (node), collapsing children 1425 * together if possible. On return each child will have at least L/2-1 1426 * elements unless the node only has one child. 1427 * 1428 * NOTE: Because we do not update the parent's parent in the split code, 1429 * the subtree_count used by the caller may be incorrect. We correct it 1430 * here. Also note that we cannot change the depth of the tree's leaf 1431 * nodes here (see btree_collapse()). 1432 * 1433 * NOTE: We make no attempt to rebalance inter-cluster elements. 1434 */ 1435 static 1436 int 1437 btree_rebalance(hammer_cursor_t cursor) 1438 { 1439 hammer_node_ondisk_t ondisk; 1440 hammer_node_t node; 1441 hammer_node_t children[HAMMER_BTREE_INT_ELMS]; 1442 hammer_node_t child; 1443 hammer_btree_elm_t elm; 1444 hammer_btree_elm_t elms; 1445 int i, j, n, nelms, goal; 1446 int maxelms, halfelms; 1447 int error; 1448 1449 /* 1450 * If the elm being recursed through is an inter-cluster reference, 1451 * don't worry about it. 1452 */ 1453 ondisk = cursor->node->ondisk; 1454 elm = &ondisk->elms[cursor->index]; 1455 if (elm->internal.subtree_type == HAMMER_BTREE_TYPE_CLUSTER) 1456 return(0); 1457 1458 KKASSERT(elm->internal.subtree_offset != 0); 1459 error = 0; 1460 1461 /* 1462 * Load the children of node and do any necessary corrections 1463 * to subtree_count. subtree_count may be too low due to the 1464 * way insertions split nodes. Get a count of the total number 1465 * of actual elements held by our children. 1466 */ 1467 error = 0; 1468 1469 for (i = n = 0; i < node->base.count; ++i) { 1470 struct hammer_btree_internal_elm *elm; 1471 1472 elm = &node->elms[i]; 1473 children[i] = NULL; 1474 child_buffer[i] = NULL; /* must be preinitialized for bread */ 1475 if (elm->subtree_offset == 0) 1476 continue; 1477 child = hammer_bread(cursor->cluster, elm->subtree_offset, 1478 HAMMER_FSBUF_BTREE, &error, 1479 &child_buffer[i], XXX); 1480 children[i] = child; 1481 if (child == NULL) 1482 continue; 1483 XXX 1484 KKASSERT(node->base.subtype == child->base.type); 1485 1486 /* 1487 * Accumulate n for a good child, update the node's count 1488 * if it was wrong. 1489 */ 1490 if (node->elms[i].subtree_count != child->base.count) { 1491 node->elms[i].subtree_count = child->base.count; 1492 } 1493 n += node->elms[i].subtree_count; 1494 } 1495 if (error) 1496 goto failed; 1497 1498 /* 1499 * Collect all the children's elements together 1500 */ 1501 nelms = n; 1502 elms = kmalloc(sizeof(*elms) * (nelms + 1), M_HAMMER, M_WAITOK|M_ZERO); 1503 for (i = n = 0; i < node->base.count; ++i) { 1504 child = children[i]; 1505 for (j = 0; j < child->base.count; ++j) { 1506 elms[n].owner = child; 1507 if (node->base.subtype == HAMMER_BTREE_TYPE_LEAF) 1508 elms[n].u.leaf = child->leaf.elms[j]; 1509 else 1510 elms[n].u.internal = child->internal.elms[j]; 1511 ++n; 1512 } 1513 } 1514 KKASSERT(n == nelms); 1515 1516 /* 1517 * Store a boundary in the elms array to ease the code below. This 1518 * is only used if the children are internal nodes. 1519 */ 1520 elms[n].u.internal = node->elms[i]; 1521 1522 /* 1523 * Calculate the number of elements each child should have (goal) by 1524 * reducing the number of elements until we achieve at least 1525 * halfelms - 1 per child, unless we are a degenerate case. 1526 */ 1527 maxelms = btree_max_elements(node->base.subtype); 1528 halfelms = maxelms / 2; 1529 1530 goal = halfelms - 1; 1531 while (i && n / i < goal) 1532 --i; 1533 1534 /* 1535 * Now rebalance using the specified goal 1536 */ 1537 for (i = n = 0; i < node->base.count; ++i) { 1538 struct hammer_buffer *subchild_buffer = NULL; 1539 struct hammer_btree_internal_node *subchild; 1540 1541 child = children[i]; 1542 for (j = 0; j < goal && n < nelms; ++j) { 1543 if (node->base.subtype == HAMMER_BTREE_TYPE_LEAF) { 1544 child->leaf.elms[j] = elms[n].u.leaf; 1545 } else { 1546 child->internal.elms[j] = elms[n].u.internal; 1547 } 1548 1549 /* 1550 * If the element's parent has changed we have to 1551 * update the parent pointer. This is somewhat 1552 * expensive. 1553 */ 1554 if (elms[n].owner != child && 1555 node->base.subtype == HAMMER_BTREE_TYPE_INTERNAL) { 1556 subchild = hammer_bread(cursor->cluster, 1557 elms[n].u.internal.subtree_offset, 1558 HAMMER_FSBUF_BTREE, 1559 &error, 1560 &subchild_buffer, XXX); 1561 if (subchild) { 1562 subchild->base.parent = 1563 hammer_bclu_offset(child_buffer[i], 1564 child); 1565 hammer_modify_buffer(subchild_buffer); 1566 } 1567 /* XXX error */ 1568 } 1569 ++n; 1570 } 1571 /* 1572 * Set right boundary if the children are internal nodes. 1573 */ 1574 if (node->base.subtype == HAMMER_BTREE_TYPE_INTERNAL) 1575 child->internal.elms[j] = elms[n].u.internal; 1576 child->base.count = j; 1577 hammer_modify_buffer(child_buffer[i]); 1578 if (subchild_buffer) 1579 hammer_put_buffer(subchild_buffer, 0); 1580 1581 /* 1582 * If we have run out of elements, break out 1583 */ 1584 if (n == nelms) 1585 break; 1586 } 1587 1588 /* 1589 * Physically destroy any left-over children. These children's 1590 * elements have been packed into prior children. The node's 1591 * right hand boundary and count gets shifted to index i. 1592 * 1593 * The subtree count in the node's parent MUST be updated because 1594 * we are removing elements. The subtree_count field is allowed to 1595 * be too small, but not too large! 1596 */ 1597 if (i != node->base.count) { 1598 n = i; 1599 node->elms[n] = node->elms[node->base.count]; 1600 while (i < node->base.count) { 1601 hammer_free_btree_ptr(child_buffer[i], children[i]); 1602 hammer_put_buffer(child_buffer[i], 0); 1603 ++i; 1604 } 1605 node->base.count = n; 1606 if (cursor->parent) { 1607 cursor->parent->elms[cursor->parent_index].subtree_count = n; 1608 hammer_modify_buffer(cursor->parent_buffer); 1609 } 1610 } 1611 1612 kfree(elms, M_HAMMER); 1613 failed: 1614 hammer_modify_buffer(cursor->node_buffer); 1615 for (i = 0; i < node->base.count; ++i) { 1616 if (child_buffer[i]) 1617 hammer_put_buffer(child_buffer[i], 0); 1618 } 1619 return (error); 1620 } 1621 1622 /* 1623 * This routine is only called if the cursor is at the root node and the 1624 * root node is an internal node. We attempt to collapse the root node 1625 * by replacing it with all of its children, reducing tree depth by one. 1626 * 1627 * This is the only way to reduce tree depth in a HAMMER filesystem. 1628 * Note that all leaf nodes are at the same depth. 1629 * 1630 * This is a fairly expensive operation because we not only have to load 1631 * the root's children, we also have to scan each child and adjust the 1632 * parent offset for each element in each child. Nasty all around. 1633 */ 1634 static 1635 int 1636 btree_collapse(hammer_cursor_t cursor) 1637 { 1638 hammer_btree_node_ondisk_t root, child; 1639 hammer_btree_node_ondisk_t children[HAMMER_BTREE_INT_ELMS]; 1640 struct hammer_buffer *child_buffer[HAMMER_BTREE_INT_ELMS]; 1641 int count; 1642 int i, j, n; 1643 int root_modified; 1644 int error; 1645 int32_t root_offset; 1646 u_int8_t subsubtype; 1647 1648 root = cursor->node; 1649 count = root->base.count; 1650 root_offset = hammer_bclu_offset(cursor->node_buffer, root); 1651 1652 /* 1653 * Sum up the number of children each element has. This value is 1654 * only approximate due to the way the insertion node works. It 1655 * may be too small but it will never be too large. 1656 * 1657 * Quickly terminate the collapse if the elements have too many 1658 * children. 1659 */ 1660 KKASSERT(root->base.parent == 0); /* must be root node */ 1661 KKASSERT(root->base.type == HAMMER_BTREE_TYPE_INTERNAL); 1662 KKASSERT(count <= HAMMER_BTREE_INT_ELMS); 1663 1664 for (i = n = 0; i < count; ++i) { 1665 n += root->internal.elms[i].subtree_count; 1666 } 1667 if (n > btree_max_elements(root->base.subtype)) 1668 return(0); 1669 1670 /* 1671 * Iterate through the elements again and correct the subtree_count. 1672 * Terminate the collapse if we wind up with too many. 1673 */ 1674 error = 0; 1675 root_modified = 0; 1676 1677 for (i = n = 0; i < count; ++i) { 1678 struct hammer_btree_internal_elm *elm; 1679 1680 elm = &root->internal.elms[i]; 1681 child_buffer[i] = NULL; 1682 children[i] = NULL; 1683 if (elm->subtree_offset == 0) 1684 continue; 1685 child = hammer_bread(cursor->cluster, elm->subtree_offset, 1686 HAMMER_FSBUF_BTREE, &error, 1687 &child_buffer[i], XXX); 1688 children[i] = child; 1689 if (child == NULL) 1690 continue; 1691 KKASSERT(root->base.subtype == child->base.type); 1692 1693 /* 1694 * Accumulate n for a good child, update the root's count 1695 * if it was wrong. 1696 */ 1697 if (root->internal.elms[i].subtree_count != child->base.count) { 1698 root->internal.elms[i].subtree_count = child->base.count; 1699 root_modified = 1; 1700 } 1701 n += root->internal.elms[i].subtree_count; 1702 } 1703 if (error || n > btree_max_elements(root->base.subtype)) 1704 goto done; 1705 1706 /* 1707 * Ok, we can collapse the root. If the root's children are leafs 1708 * the collapse is really simple. If they are internal nodes the 1709 * collapse is not so simple because we have to fixup the parent 1710 * pointers for the root's children's children. 1711 * 1712 * When collapsing an internal node the far left and far right 1713 * element's boundaries should match the root's left and right 1714 * boundaries. 1715 */ 1716 if (root->base.subtype == HAMMER_BTREE_TYPE_LEAF) { 1717 for (i = n = 0; i < count; ++i) { 1718 child = children[i]; 1719 for (j = 0; j < child->base.count; ++j) { 1720 root->leaf.elms[n] = child->leaf.elms[j]; 1721 ++n; 1722 } 1723 } 1724 root->base.type = root->base.subtype; 1725 root->base.subtype = 0; 1726 root->base.count = n; 1727 root->leaf.link_left = 0; 1728 root->leaf.link_right = 0; 1729 } else { 1730 struct hammer_btree_internal_elm *elm; 1731 struct hammer_btree_internal_node *subchild; 1732 struct hammer_buffer *subchild_buffer = NULL; 1733 1734 if (count) { 1735 child = children[0]; 1736 subsubtype = child->base.subtype; 1737 KKASSERT(child->base.count > 0); 1738 KKASSERT(root->internal.elms[0].base.key == 1739 child->internal.elms[0].base.key); 1740 child = children[count-1]; 1741 KKASSERT(child->base.count > 0); 1742 KKASSERT(root->internal.elms[count].base.key == 1743 child->internal.elms[child->base.count].base.key); 1744 } else { 1745 subsubtype = 0; 1746 } 1747 for (i = n = 0; i < count; ++i) { 1748 child = children[i]; 1749 KKASSERT(child->base.subtype == subsubtype); 1750 for (j = 0; j < child->base.count; ++j) { 1751 elm = &child->internal.elms[j]; 1752 1753 root->internal.elms[n] = *elm; 1754 subchild = hammer_bread(cursor->cluster, 1755 elm->subtree_offset, 1756 HAMMER_FSBUF_BTREE, 1757 &error, 1758 &subchild_buffer, 1759 XXX); 1760 if (subchild) { 1761 subchild->base.parent = root_offset; 1762 hammer_modify_buffer(subchild_buffer); 1763 } 1764 ++n; 1765 } 1766 /* make sure the right boundary is correct */ 1767 /* (this gets overwritten when the loop continues) */ 1768 /* XXX generate a new separator? */ 1769 root->internal.elms[n] = child->internal.elms[j]; 1770 } 1771 root->base.type = HAMMER_BTREE_TYPE_INTERNAL; 1772 root->base.subtype = subsubtype; 1773 if (subchild_buffer) 1774 hammer_put_buffer(subchild_buffer, 0); 1775 } 1776 root_modified = 1; 1777 1778 /* 1779 * Cleanup 1780 */ 1781 done: 1782 if (root_modified) 1783 hammer_modify_buffer(cursor->node_buffer); 1784 for (i = 0; i < count; ++i) { 1785 if (child_buffer[i]) 1786 hammer_put_buffer(child_buffer[i], 0); 1787 } 1788 return(error); 1789 } 1790 1791 #endif 1792 1793 /************************************************************************ 1794 * MISCELLANIOUS SUPPORT * 1795 ************************************************************************/ 1796 1797 /* 1798 * Compare two B-Tree elements, return -1, 0, or +1 (e.g. similar to strcmp). 1799 * 1800 * See also hammer_rec_rb_compare() and hammer_rec_cmp() in hammer_object.c. 1801 * 1802 * Note that key1 and key2 are treated differently. key1 is allowed to 1803 * wildcard some of its fields by setting them to 0, while key2 is expected 1804 * to be in an on-disk form (no wildcards). 1805 */ 1806 int 1807 hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2) 1808 { 1809 #if 0 1810 kprintf("compare obj_id %016llx %016llx\n", 1811 key1->obj_id, key2->obj_id); 1812 kprintf("compare rec_type %04x %04x\n", 1813 key1->rec_type, key2->rec_type); 1814 kprintf("compare key %016llx %016llx\n", 1815 key1->key, key2->key); 1816 #endif 1817 1818 /* 1819 * A key1->obj_id of 0 matches any object id 1820 */ 1821 if (key1->obj_id) { 1822 if (key1->obj_id < key2->obj_id) 1823 return(-4); 1824 if (key1->obj_id > key2->obj_id) 1825 return(4); 1826 } 1827 1828 /* 1829 * A key1->rec_type of 0 matches any record type. 1830 */ 1831 if (key1->rec_type) { 1832 if (key1->rec_type < key2->rec_type) 1833 return(-3); 1834 if (key1->rec_type > key2->rec_type) 1835 return(3); 1836 } 1837 1838 /* 1839 * There is no special case for key. 0 means 0. 1840 */ 1841 if (key1->key < key2->key) 1842 return(-2); 1843 if (key1->key > key2->key) 1844 return(2); 1845 1846 /* 1847 * This test has a number of special cases. create_tid in key1 is 1848 * the as-of transction id, and delete_tid in key1 is NOT USED. 1849 * 1850 * A key1->create_tid of 0 matches any record regardles of when 1851 * it was created or destroyed. 0xFFFFFFFFFFFFFFFFULL should be 1852 * used to search for the most current state of the object. 1853 * 1854 * key2->create_tid is a HAMMER record and will never be 1855 * 0. key2->delete_tid is the deletion transaction id or 0 if 1856 * the record has not yet been deleted. 1857 */ 1858 if (key1->create_tid) { 1859 if (key1->create_tid < key2->create_tid) 1860 return(-1); 1861 if (key2->delete_tid && key1->create_tid >= key2->delete_tid) 1862 return(1); 1863 } 1864 1865 return(0); 1866 } 1867 1868 /* 1869 * Compare the element against the cursor's beginning and ending keys 1870 */ 1871 int 1872 hammer_btree_range_cmp(hammer_cursor_t cursor, hammer_base_elm_t key2) 1873 { 1874 /* 1875 * A cursor->key_beg.obj_id of 0 matches any object id 1876 */ 1877 if (cursor->key_beg.obj_id) { 1878 if (cursor->key_end.obj_id < key2->obj_id) 1879 return(-4); 1880 if (cursor->key_beg.obj_id > key2->obj_id) 1881 return(4); 1882 } 1883 1884 /* 1885 * A cursor->key_beg.rec_type of 0 matches any record type. 1886 */ 1887 if (cursor->key_beg.rec_type) { 1888 if (cursor->key_end.rec_type < key2->rec_type) 1889 return(-3); 1890 if (cursor->key_beg.rec_type > key2->rec_type) 1891 return(3); 1892 } 1893 1894 /* 1895 * There is no special case for key. 0 means 0. 1896 */ 1897 if (cursor->key_end.key < key2->key) 1898 return(-2); 1899 if (cursor->key_beg.key > key2->key) 1900 return(2); 1901 1902 /* 1903 * This test has a number of special cases. create_tid in key1 is 1904 * the as-of transction id, and delete_tid in key1 is NOT USED. 1905 * 1906 * A key1->create_tid of 0 matches any record regardles of when 1907 * it was created or destroyed. 0xFFFFFFFFFFFFFFFFULL should be 1908 * used to search for the most current state of the object. 1909 * 1910 * key2->create_tid is a HAMMER record and will never be 1911 * 0. key2->delete_tid is the deletion transaction id or 0 if 1912 * the record has not yet been deleted. 1913 * 1914 * NOTE: only key_beg.create_tid is used for create_tid, we can only 1915 * do as-of scans at the moment. 1916 */ 1917 if (cursor->key_beg.create_tid) { 1918 if (cursor->key_beg.create_tid < key2->create_tid) 1919 return(-1); 1920 if (key2->delete_tid && cursor->key_beg.create_tid >= key2->delete_tid) 1921 return(1); 1922 } 1923 1924 return(0); 1925 } 1926 1927 /* 1928 * Create a separator half way inbetween key1 and key2. For fields just 1929 * one unit apart, the separator will match key2. 1930 * 1931 * The handling of delete_tid is a little confusing. It is only possible 1932 * to have one record in the B-Tree where all fields match except delete_tid. 1933 * This means, worse case, two adjacent elements may have a create_tid that 1934 * is one-apart and cause the separator to choose the right-hand element's 1935 * create_tid. e.g. (create,delete): (1,x)(2,x) -> separator is (2,x). 1936 * 1937 * So all we have to do is set delete_tid to the right-hand element to 1938 * guarentee that the separator is properly between the two elements. 1939 */ 1940 #define MAKE_SEPARATOR(key1, key2, dest, field) \ 1941 dest->field = key1->field + ((key2->field - key1->field + 1) >> 1); 1942 1943 static void 1944 hammer_make_separator(hammer_base_elm_t key1, hammer_base_elm_t key2, 1945 hammer_base_elm_t dest) 1946 { 1947 bzero(dest, sizeof(*dest)); 1948 MAKE_SEPARATOR(key1, key2, dest, obj_id); 1949 MAKE_SEPARATOR(key1, key2, dest, rec_type); 1950 MAKE_SEPARATOR(key1, key2, dest, key); 1951 MAKE_SEPARATOR(key1, key2, dest, create_tid); 1952 dest->delete_tid = key2->delete_tid; 1953 } 1954 1955 #undef MAKE_SEPARATOR 1956 1957 /* 1958 * Return whether a generic internal or leaf node is full 1959 */ 1960 static int 1961 btree_node_is_full(hammer_node_ondisk_t node) 1962 { 1963 switch(node->type) { 1964 case HAMMER_BTREE_TYPE_INTERNAL: 1965 if (node->count == HAMMER_BTREE_INT_ELMS) 1966 return(1); 1967 break; 1968 case HAMMER_BTREE_TYPE_LEAF: 1969 if (node->count == HAMMER_BTREE_LEAF_ELMS) 1970 return(1); 1971 break; 1972 default: 1973 panic("illegal btree subtype"); 1974 } 1975 return(0); 1976 } 1977 1978 #if 0 1979 static int 1980 btree_max_elements(u_int8_t type) 1981 { 1982 if (type == HAMMER_BTREE_TYPE_LEAF) 1983 return(HAMMER_BTREE_LEAF_ELMS); 1984 if (type == HAMMER_BTREE_TYPE_INTERNAL) 1985 return(HAMMER_BTREE_INT_ELMS); 1986 panic("btree_max_elements: bad type %d\n", type); 1987 } 1988 #endif 1989 1990 void 1991 hammer_print_btree_node(hammer_node_ondisk_t ondisk) 1992 { 1993 hammer_btree_elm_t elm; 1994 int i; 1995 1996 kprintf("node %p count=%d parent=%d type=%c\n", 1997 ondisk, ondisk->count, ondisk->parent, ondisk->type); 1998 1999 /* 2000 * Dump both boundary elements if an internal node 2001 */ 2002 if (ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { 2003 for (i = 0; i <= ondisk->count; ++i) { 2004 elm = &ondisk->elms[i]; 2005 hammer_print_btree_elm(elm, ondisk->type, i); 2006 } 2007 } else { 2008 for (i = 0; i < ondisk->count; ++i) { 2009 elm = &ondisk->elms[i]; 2010 hammer_print_btree_elm(elm, ondisk->type, i); 2011 } 2012 } 2013 } 2014 2015 void 2016 hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i) 2017 { 2018 kprintf(" %2d", i); 2019 kprintf("\tobjid = %016llx\n", elm->base.obj_id); 2020 kprintf("\tkey = %016llx\n", elm->base.key); 2021 kprintf("\tcreate_tid = %016llx\n", elm->base.create_tid); 2022 kprintf("\tdelete_tid = %016llx\n", elm->base.delete_tid); 2023 kprintf("\trec_type = %04x\n", elm->base.rec_type); 2024 kprintf("\tobj_type = %02x\n", elm->base.obj_type); 2025 kprintf("\tsubtree_type = %02x\n", elm->subtree_type); 2026 2027 if (type == HAMMER_BTREE_TYPE_INTERNAL) { 2028 if (elm->internal.rec_offset) { 2029 kprintf("\tcluster_rec = %08x\n", 2030 elm->internal.rec_offset); 2031 kprintf("\tcluster_id = %08x\n", 2032 elm->internal.subtree_cluid); 2033 kprintf("\tvolno = %08x\n", 2034 elm->internal.subtree_volno); 2035 } else { 2036 kprintf("\tsubtree_off = %08x\n", 2037 elm->internal.subtree_offset); 2038 } 2039 kprintf("\tsubtree_count= %d\n", elm->internal.subtree_count); 2040 } else { 2041 kprintf("\trec_offset = %08x\n", elm->leaf.rec_offset); 2042 kprintf("\tdata_offset = %08x\n", elm->leaf.data_offset); 2043 kprintf("\tdata_len = %08x\n", elm->leaf.data_len); 2044 kprintf("\tdata_crc = %08x\n", elm->leaf.data_crc); 2045 } 2046 } 2047