1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2002, 2014 Oracle and/or its affiliates. All rights reserved. 5 * 6 */ 7 8 package com.sleepycat.je.tree; 9 10 import static com.sleepycat.je.dbi.BTreeStatDefinition.BTREE_RELATCHES_REQUIRED; 11 import static com.sleepycat.je.dbi.BTreeStatDefinition.BTREE_ROOT_SPLITS; 12 import static com.sleepycat.je.dbi.BTreeStatDefinition.GROUP_DESC; 13 import static com.sleepycat.je.dbi.BTreeStatDefinition.GROUP_NAME; 14 15 import java.nio.ByteBuffer; 16 import java.util.ArrayList; 17 import java.util.Comparator; 18 import java.util.List; 19 import java.util.ListIterator; 20 import java.util.logging.Level; 21 import java.util.logging.Logger; 22 23 import com.sleepycat.je.BtreeStats; 24 import com.sleepycat.je.CacheMode; 25 import com.sleepycat.je.DatabaseException; 26 import com.sleepycat.je.EnvironmentFailureException; 27 import com.sleepycat.je.StatsConfig; 28 import com.sleepycat.je.cleaner.LocalUtilizationTracker; 29 import com.sleepycat.je.dbi.DatabaseImpl; 30 import com.sleepycat.je.dbi.EnvironmentImpl; 31 import com.sleepycat.je.dbi.INList; 32 import com.sleepycat.je.latch.LatchContext; 33 import com.sleepycat.je.latch.LatchFactory; 34 import com.sleepycat.je.latch.LatchSupport; 35 import com.sleepycat.je.latch.LatchTable; 36 import com.sleepycat.je.latch.SharedLatch; 37 import com.sleepycat.je.log.LogManager; 38 import com.sleepycat.je.log.Loggable; 39 import com.sleepycat.je.recovery.RecoveryManager; 40 import com.sleepycat.je.evictor.Evictor; 41 import com.sleepycat.je.utilint.DbLsn; 42 import com.sleepycat.je.utilint.IntStat; 43 import com.sleepycat.je.utilint.LoggerUtils; 44 import com.sleepycat.je.utilint.LongStat; 45 import com.sleepycat.je.utilint.StatGroup; 46 import com.sleepycat.je.utilint.TestHook; 47 import com.sleepycat.je.utilint.TestHookExecute; 48 49 /** 50 * Tree implements the JE B+Tree. 51 * 52 * A note on tree search patterns: 53 * There's a set of Tree.search* methods. Some clients of the tree use 54 * those search methods directly, whereas other clients of the tree 55 * tend to use methods built on top of search. 56 * 57 * The semantics of search* are 58 * they leave you pointing at a BIN or IN 59 * they don't tell you where the reference of interest is. 60 * The semantics of the get* methods are: 61 * they leave you pointing at a BIN or IN 62 * they return the index of the slot of interest 63 * they traverse down to whatever level is needed 64 * they are built on top of search* methods. 65 * For the future: 66 * Over time, we need to clarify which methods are to be used by clients 67 * of the tree. Preferably clients that call the tree use get*, although 68 * their are cases where they need visibility into the tree structure. 69 * 70 * Also, search* should return the location of the slot to save us a 71 * second binary search. 72 * 73 * Search Method Call Hierarchy 74 * ---------------------------- 75 * getFirst/LastNode 76 * search 77 * CALLED BY: 78 * CursorImpl.getFirstOrLast 79 * 80 * getNext/PrevBin 81 * getParentINForChildIN 82 * searchSubTree 83 * CALLED BY: 84 * DupConvert 85 * CursorImpl.getNext 86 * 87 * getParentINForChildIN 88 * IN.findParent 89 * does not use shared latching 90 * CALLED BY: 91 * Checkpointer.flushIN (doFetch=false, targetLevel=-1) 92 * FileProcessor.processIN (doFetch=true, targetLevel=LEVEL) 93 * Evictor.evictIN (doFetch=true, targetLevel=-1) 94 * RecoveryManager.replaceOrInsertChild (doFetch=true, targetLevel=-1) 95 * getNext/PrevBin (doFetch=true, targetLevel=-1) 96 * 97 * search 98 * searchSubTree 99 * CALLED BY: 100 * CursorImpl.searchAndPosition 101 * INCompressor to find BIN 102 * 103 * searchSubTree 104 * uses shared grandparent latching 105 * 106 * getParentBINForChildLN 107 * searchSplitsAllowed 108 * CALLED BY: 109 * RecoveryManager.redo 110 * RecoveryManager.recoveryUndo 111 * search 112 * CALLED BY: 113 * RecoveryManager.abortUndo 114 * RecoveryManager.rollbackUndo 115 * FileProcessor.processLN 116 * Cleaner.processPendingLN 117 * UtilizationProfile.verifyLsnIsObsolete (utility) 118 * 119 * findBinForInsert 120 * searchSplitsAllowed 121 * CALLED BY: 122 * CursorImpl.putInternal 123 * 124 * searchSplitsAllowed 125 * uses shared non-grandparent latching 126 * CALLED BY: 127 * DupConvert (instead of findBinForInsert, which needs a cursor) 128 * 129 * Possible Shared Latching Improvements 130 * ------------------------------------- 131 * By implementing shared latching for BINs we would get better concurrency in 132 * these cases: 133 * Reads when LN is in cache, or LN is not needed (key-only op, e.g., dups) 134 */ 135 public final class Tree implements Loggable { 136 137 /* For debug tracing */ 138 private static final String TRACE_ROOT_SPLIT = "RootSplit:"; 139 140 private DatabaseImpl database; 141 142 private int maxTreeEntriesPerNode; 143 144 private ChildReference root; 145 146 /* 147 * Latch that must be held when using/accessing the root node. Protects 148 * against the root being changed out from underneath us by splitRoot. 149 * After the root IN is latched, the rootLatch can be released. 150 */ 151 private SharedLatch rootLatch; 152 153 /* 154 * We don't need the stack trace on this so always throw a static and 155 * avoid the cost of Throwable.fillInStack() every time it's thrown. 156 * [#13354]. 157 */ 158 private static SplitRequiredException splitRequiredException = 159 new SplitRequiredException(); 160 161 /* Stats */ 162 private StatGroup stats; 163 164 /* The number of tree root splited. */ 165 private IntStat rootSplits; 166 /* The number of latch upgrades from shared to exclusive required. */ 167 private LongStat relatchesRequired; 168 169 private final ThreadLocal<TreeWalkerStatsAccumulator> treeStatsAccumulatorTL = 170 new ThreadLocal<TreeWalkerStatsAccumulator>(); 171 172 /* For unit tests */ 173 private TestHook waitHook; // used for generating race conditions 174 private TestHook searchHook; // [#12736] 175 private TestHook ckptHook; // [#13897] 176 private TestHook getParentINHook; 177 private TestHook fetchINHook; 178 179 /** 180 * Embodies an enum for the type of search being performed. NORMAL means 181 * do a regular search down the tree. LEFT/RIGHT means search down the 182 * left/right side to find the first/last node in the tree. 183 */ 184 public static class SearchType { 185 /* Search types */ 186 public static final SearchType NORMAL = new SearchType(); 187 public static final SearchType LEFT = new SearchType(); 188 public static final SearchType RIGHT = new SearchType(); 189 190 /* No lock types can be defined outside this class. */ SearchType()191 private SearchType() { 192 } 193 } 194 195 /* 196 * Class that overrides ChildReference methods to enforce rules that apply 197 * to the root. 198 * 199 * Overrides fetchTarget() so that if the rootLatch is not held exclusively 200 * when the root is fetched, we upgrade it to exclusive. Also overrides 201 * setter methods to assert that an exclusive latch is held. 202 * 203 * Overrides setDirty to dirty the DatabaseImpl, so that the MapLN will be 204 * logged during the next checkpoint. This is critical when updating the 205 * root LSN. 206 */ 207 private class RootChildReference extends ChildReference { 208 RootChildReference()209 private RootChildReference() { 210 super(); 211 } 212 RootChildReference(Node target, byte[] key, long lsn)213 private RootChildReference(Node target, byte[] key, long lsn) { 214 super(target, key, lsn); 215 } 216 217 /* Caller is responsible for releasing rootLatch. */ 218 @Override fetchTarget(DatabaseImpl database, IN in)219 public Node fetchTarget(DatabaseImpl database, IN in) 220 throws DatabaseException { 221 222 if (getTarget() == null && !rootLatch.isExclusiveOwner()) { 223 224 rootLatch.release(); 225 rootLatch.acquireExclusive(); 226 227 /* 228 * If the root field changed while unlatched then we have an 229 * invalid state and cannot continue. [#21686] 230 */ 231 if (this != root) { 232 throw EnvironmentFailureException.unexpectedState( 233 database.getEnv(), 234 "Root changed while unlatched, dbId=" + 235 database.getId()); 236 } 237 } 238 239 return super.fetchTarget(database, in); 240 } 241 242 @Override setTarget(Node target)243 public void setTarget(Node target) { 244 assert rootLatch.isExclusiveOwner(); 245 super.setTarget(target); 246 } 247 248 @Override clearTarget()249 public void clearTarget() { 250 assert rootLatch.isExclusiveOwner(); 251 super.clearTarget(); 252 } 253 254 @Override setLsn(long lsn)255 public void setLsn(long lsn) { 256 assert rootLatch.isExclusiveOwner(); 257 super.setLsn(lsn); 258 } 259 260 @Override updateLsnAfterOptionalLog(DatabaseImpl dbImpl, long lsn)261 void updateLsnAfterOptionalLog(DatabaseImpl dbImpl, long lsn) { 262 assert rootLatch.isExclusiveOwner(); 263 super.updateLsnAfterOptionalLog(dbImpl, lsn); 264 } 265 266 @Override setDirty()267 void setDirty() { 268 super.setDirty(); 269 database.setDirty(); 270 } 271 } 272 273 /** 274 * Create a new tree. 275 */ Tree(DatabaseImpl database)276 public Tree(DatabaseImpl database) { 277 init(database); 278 setDatabase(database); 279 } 280 281 /** 282 * Create a tree that's being read in from the log. 283 */ Tree()284 public Tree() { 285 init(null); 286 maxTreeEntriesPerNode = 0; 287 } 288 289 /** 290 * constructor helper 291 */ init(DatabaseImpl database)292 private void init(DatabaseImpl database) { 293 this.root = null; 294 this.database = database; 295 296 /* Do the stats definitions. */ 297 stats = new StatGroup(GROUP_NAME, GROUP_DESC); 298 relatchesRequired = new LongStat(stats, BTREE_RELATCHES_REQUIRED); 299 rootSplits = new IntStat(stats, BTREE_ROOT_SPLITS); 300 } 301 302 /** 303 * Set the database for this tree. Used by recovery when recreating an 304 * existing tree. 305 */ setDatabase(DatabaseImpl database)306 public void setDatabase(DatabaseImpl database) { 307 this.database = database; 308 309 final EnvironmentImpl envImpl = database.getEnv(); 310 311 /* 312 * The LatchContext for the root is special in that it is considered a 313 * Btree latch (the Btree latch table is used), but the context is not 314 * implemented by the IN class. 315 */ 316 final LatchContext latchContext = new LatchContext() { 317 @Override 318 public int getLatchTimeoutMs() { 319 return envImpl.getLatchTimeoutMs(); 320 } 321 @Override 322 public String getLatchName() { 323 return "RootLatch"; 324 } 325 @Override 326 public LatchTable getLatchTable() { 327 return LatchSupport.btreeLatchTable; 328 } 329 @Override 330 public EnvironmentImpl getEnvImplForFatalException() { 331 return envImpl; 332 } 333 }; 334 335 rootLatch = LatchFactory.createSharedLatch( 336 latchContext, false /*exclusiveOnly*/); 337 338 339 maxTreeEntriesPerNode = database.getNodeMaxTreeEntries(); 340 } 341 342 /** 343 * @return the database for this Tree. 344 */ getDatabase()345 public DatabaseImpl getDatabase() { 346 return database; 347 } 348 latchRootLatchExclusive()349 public void latchRootLatchExclusive() 350 throws DatabaseException { 351 352 rootLatch.acquireExclusive(); 353 } 354 releaseRootLatch()355 public void releaseRootLatch() 356 throws DatabaseException { 357 358 rootLatch.release(); 359 } 360 361 /** 362 * Set the root for the tree. Should only be called within the root latch. 363 */ setRoot(ChildReference newRoot, boolean notLatched)364 public void setRoot(ChildReference newRoot, boolean notLatched) { 365 assert (notLatched || rootLatch.isExclusiveOwner()); 366 root = newRoot; 367 } 368 makeRootChildReference( Node target, byte[] key, long lsn)369 public ChildReference makeRootChildReference( 370 Node target, 371 byte[] key, 372 long lsn) { 373 374 return new RootChildReference(target, key, lsn); 375 } 376 makeRootChildReference()377 private RootChildReference makeRootChildReference() { 378 return new RootChildReference(); 379 } 380 381 /* 382 * A tree doesn't have a root if (a) the root field is null, or (b) the 383 * root is non-null, but has neither a valid target nor a valid LSN. Case 384 * (b) can happen if the database is or was previously opened in deferred 385 * write mode. 386 * 387 * @return false if there is no real root. 388 */ rootExists()389 public boolean rootExists() { 390 if (root == null) { 391 return false; 392 } 393 394 if ((root.getTarget() == null) && 395 (root.getLsn() == DbLsn.NULL_LSN)) { 396 return false; 397 } 398 399 return true; 400 } 401 402 /** 403 * Perform a fast check to see if the root IN is resident. No latching is 404 * performed. To ensure that the root IN is not loaded by another thread, 405 * this method should be called while holding a write lock on the MapLN. 406 * That will prevent opening the DB in another thread, and potentially 407 * loading the root IN. [#13415] 408 */ isRootResident()409 public boolean isRootResident() { 410 return root != null && root.getTarget() != null; 411 } 412 413 /** 414 * Helper to obtain the root IN with shared root latching. Optionally 415 * updates the generation of the root when latching it. 416 */ getRootIN(CacheMode cacheMode)417 public IN getRootIN(CacheMode cacheMode) 418 throws DatabaseException { 419 420 return getRootINInternal(cacheMode, false/*exclusive*/); 421 } 422 423 /** 424 * Helper to obtain the root IN with exclusive root latching. Optionally 425 * updates the generation of the root when latching it. 426 */ getRootINLatchedExclusive(CacheMode cacheMode)427 public IN getRootINLatchedExclusive(CacheMode cacheMode) 428 throws DatabaseException { 429 430 return getRootINInternal(cacheMode, true/*exclusive*/); 431 } 432 getRootINInternal(CacheMode cacheMode, boolean exclusive)433 private IN getRootINInternal(CacheMode cacheMode, boolean exclusive) 434 throws DatabaseException { 435 436 rootLatch.acquireShared(); 437 try { 438 return getRootINRootAlreadyLatched(cacheMode, exclusive); 439 } finally { 440 rootLatch.release(); 441 } 442 } 443 444 /** 445 * Helper to obtain the root IN, when the root latch is already held. 446 */ getRootINRootAlreadyLatched( CacheMode cacheMode, boolean exclusive)447 public IN getRootINRootAlreadyLatched( 448 CacheMode cacheMode, 449 boolean exclusive) { 450 451 if (!rootExists()) { 452 return null; 453 } 454 455 final IN rootIN = (IN) root.fetchTarget(database, null); 456 457 if (exclusive) { 458 rootIN.latch(cacheMode); 459 } else { 460 rootIN.latchShared(cacheMode); 461 } 462 return rootIN; 463 } 464 getResidentRootIN(boolean latched)465 public IN getResidentRootIN(boolean latched) 466 throws DatabaseException { 467 468 IN rootIN = null; 469 if (rootExists()) { 470 rootIN = (IN) root.getTarget(); 471 if (rootIN != null && latched) { 472 rootIN.latchShared(CacheMode.UNCHANGED); 473 } 474 } 475 return rootIN; 476 } 477 withRootLatchedExclusive(WithRootLatched wrl)478 public IN withRootLatchedExclusive(WithRootLatched wrl) 479 throws DatabaseException { 480 481 try { 482 rootLatch.acquireExclusive(); 483 return wrl.doWork(root); 484 } finally { 485 rootLatch.release(); 486 } 487 } 488 withRootLatchedShared(WithRootLatched wrl)489 public IN withRootLatchedShared(WithRootLatched wrl) 490 throws DatabaseException { 491 492 try { 493 rootLatch.acquireShared(); 494 return wrl.doWork(root); 495 } finally { 496 rootLatch.release(); 497 } 498 } 499 500 /** 501 * Get LSN of the rootIN. Obtained without latching, should only be 502 * accessed while quiescent. 503 */ getRootLsn()504 public long getRootLsn() { 505 if (root == null) { 506 return DbLsn.NULL_LSN; 507 } else { 508 return root.getLsn(); 509 } 510 } 511 512 /** 513 * Cheaply calculates and returns the maximum possible number of LNs in the 514 * btree. 515 */ getMaxLNs()516 public long getMaxLNs() { 517 518 final int levels; 519 final int topLevelSlots; 520 rootLatch.acquireShared(); 521 try { 522 IN rootIN = (IN) root.fetchTarget(database, null); 523 levels = rootIN.getLevel() & IN.LEVEL_MASK; 524 topLevelSlots = rootIN.getNEntries(); 525 } finally { 526 rootLatch.release(); 527 } 528 return (long) (topLevelSlots * 529 Math.pow(database.getNodeMaxTreeEntries(), levels - 1)); 530 } 531 532 /** 533 * Deletes a BIN specified by key from the tree. If the BIN resides in a 534 * subtree that can be pruned away, prune as much as possible, so we 535 * don't leave a branch that has no BINs. 536 * 537 * It's possible that the targeted BIN will now have entries, or will 538 * have resident cursors. Either will prevent deletion. 539 * 540 * Reverse splits cause INs to be logged in all ancestors, including the 541 * root. This is to avoid the "great aunt" problem described in 542 * LevelRecorder. 543 * 544 * INs below the root are logged provisionally; only the root is logged 545 * non-provisionally. Provisional logging is necessary during a checkpoint 546 * for levels less than maxFlushLevel. 547 * 548 * @param idKey - the identifier key of the node to delete. 549 * @param localTracker is used for tracking obsolete node info. 550 */ delete( byte[] idKey, LocalUtilizationTracker localTracker)551 public void delete( 552 byte[] idKey, 553 LocalUtilizationTracker localTracker) 554 throws NodeNotEmptyException, CursorsExistException { 555 556 final EnvironmentImpl envImpl = database.getEnv(); 557 final INList inList = envImpl.getInMemoryINs(); 558 final Logger logger = envImpl.getLogger(); 559 IN branchRoot = null; 560 IN branchParent = null; 561 562 /* 563 * A delete is a reverse split that must be propagated up to the root. 564 * [#13501] Keep all nodes from the rootIN to the parent of the 565 * deletable subtree latched as we descend so we can log the 566 * IN deletion and cascade the logging up the tree. The latched 567 * nodes are kept in order in the nodeLadder. 568 */ 569 final ArrayList<SplitInfo> nodeLadder = new ArrayList<SplitInfo>(); 570 571 IN rootIN = null; 572 573 rootLatch.acquireExclusive(); 574 575 try { 576 if (!rootExists()) { 577 /* no action, tree is deleted or was never persisted. */ 578 return; 579 } 580 581 rootIN = (IN) root.fetchTarget(database, null); 582 rootIN.latch(CacheMode.UNCHANGED); 583 584 searchDeletableSubTree(rootIN, idKey, nodeLadder); 585 586 if (nodeLadder.size() == 0) { 587 588 /* 589 * The tree is empty, so do nothing. Root compression is no 590 * longer supported. Root compression has no impact on memory 591 * usage now that we evict the root IN. It reduces log space 592 * taken by INs for empty (but not removed) databases, yet 593 * requires logging a INDelete and MapLN; this provides very 594 * little benefit, if any. Because it requires extensive 595 * testing (which has not been done), this minor benefit is not 596 * worth the cost. And by removing it we no longer log 597 * INDelete, which reduces complexity going forward. [#17546] 598 */ 599 } else { 600 /* Detach this subtree. */ 601 SplitInfo detachPoint = nodeLadder.get(nodeLadder.size() - 1); 602 603 branchParent = detachPoint.parent; 604 branchRoot = detachPoint.child; 605 606 if (logger.isLoggable(Level.FINEST)) { 607 LoggerUtils.envLogMsg( 608 Level.FINEST, envImpl, 609 "Tree.delete() " + 610 Thread.currentThread().getId() + "-" + 611 Thread.currentThread().getName() + "-" + 612 envImpl.getName() + 613 " Deleting child node: " + branchRoot.getNodeId() + 614 " from parent node: " + branchParent.getNodeId() + 615 " parent has " + branchParent.getNEntries() + 616 " children"); 617 } 618 619 boolean deleteOk = branchParent.deleteEntry( 620 detachPoint.index, true); 621 assert deleteOk; 622 623 /* 624 * For a deferred-write DB, account for deleted INs. For a 625 * deferred-write DB, they can't be actually counted obsolete 626 * until the parent is logged at a later time. [#21348] 627 * 628 * We do this before calling cascadeUpdates, because in the 629 * future it may also apply to a normal DB, in which case they 630 * will be actually counted obsolete when the parent is logged 631 * by cascadeUpdates. 632 */ 633 if (database.isDeferredWriteMode()) { 634 branchRoot.accountForDeferredWriteSubtreeRemoval( 635 inList, detachPoint.parent); 636 } 637 638 /* Cascade updates upward, including writing the root IN. */ 639 cascadeUpdates(nodeLadder); 640 } 641 } finally { 642 releaseNodeLadderLatches(nodeLadder); 643 644 if (rootIN != null) { 645 rootIN.releaseLatch(); 646 } 647 648 rootLatch.release(); 649 } 650 651 if (branchRoot != null) { 652 653 if (!database.isDeferredWriteMode()) { 654 655 /* 656 * Count obsolete nodes after logging the delete. We can do 657 * this without having the parent of the subtree latched 658 * because the subtree has been detached from the tree. 659 */ 660 branchRoot.accountForSubtreeRemoval(inList, localTracker); 661 } 662 663 if (logger.isLoggable(Level.FINE)) { 664 LoggerUtils.envLogMsg( 665 Level.FINE, envImpl, 666 "SubtreeRemoval: subtreeRoot = " + 667 branchRoot.getNodeId()); 668 } 669 } 670 } 671 672 /** 673 * Search down the tree using a key, but instead of returning the BIN that 674 * houses that key, find the point where we can detach a deletable 675 * subtree. A deletable subtree is a branch where each IN has one child, 676 * and the bottom BIN has no entries and no resident cursors. That point 677 * can be found by saving a pointer to the lowest node in the path with 678 * more than one entry. 679 * 680 * INa 681 * / \ 682 * INb INc 683 * | | 684 * INd .. 685 * / \ 686 * INe .. 687 * | 688 * BINx (suspected of being empty) 689 * 690 * In this case, we'd like to prune off the subtree headed by INe. INd 691 * is the parent of this deletable subtree. As we descend, we must keep 692 * latches for all the nodes that will be logged. In this case, we 693 * will need to keep INa, INb and INd latched when we return from this 694 * method. 695 * 696 * The method returns a list of parent/child/index structures. In this 697 * example, the list will hold: 698 * INa/INb/index 699 * INb/INd/index 700 * INd/INe/index 701 * Every node is latched, and every node except for the bottom most child 702 * (INe) must be logged. 703 */ searchDeletableSubTree( IN parent, byte[] key, ArrayList<SplitInfo> nodeLadder)704 private void searchDeletableSubTree( 705 IN parent, 706 byte[] key, 707 ArrayList<SplitInfo> nodeLadder) 708 throws NodeNotEmptyException, CursorsExistException { 709 710 assert (parent!=null); 711 assert (key!= null); 712 assert parent.isLatchExclusiveOwner(); 713 714 int index; 715 IN child = null; 716 717 /* Save the lowest IN in the path that has multiple entries. */ 718 IN lowestMultipleEntryIN = null; 719 IN pinedIN = null; 720 721 do { 722 if (parent.getNEntries() == 0) { 723 throw EnvironmentFailureException.unexpectedState( 724 "Found upper IN with 0 entries"); 725 } 726 727 /* Remember if this is the lowest multiple point. */ 728 if (parent.getNEntries() > 1) { 729 lowestMultipleEntryIN = parent; 730 pinedIN = null; 731 } else if (parent.isPinned()) { 732 pinedIN = parent; 733 } 734 735 index = parent.findEntry(key, false, false); 736 assert index >= 0; 737 738 /* Get the child node that matches. */ 739 child = parent.fetchIN(index); 740 741 child.latch(CacheMode.UNCHANGED); 742 743 nodeLadder.add(new SplitInfo(parent, child, index)); 744 745 /* Continue down a level */ 746 parent = child; 747 } while (!parent.isBIN()); 748 749 if (pinedIN != null) { 750 throw CursorsExistException.CURSORS_EXIST; 751 } 752 753 /* 754 * See if there is a reason we can't delete this BIN -- i.e. 755 * new items have been inserted, or a cursor exists on it. 756 */ 757 assert(child.isBIN()); 758 759 if (child.getNEntries() != 0) { 760 throw NodeNotEmptyException.NODE_NOT_EMPTY; 761 } 762 763 if (child.isBINDelta()) { 764 throw EnvironmentFailureException.unexpectedState( 765 "Found BIN delta with 0 entries"); 766 } 767 768 /* 769 * This case can happen if we are keeping a cursor on an empty 770 * BIN as we traverse. 771 */ 772 if (((BIN) child).nCursors() > 0 || child.isPinned()) { 773 throw CursorsExistException.CURSORS_EXIST; 774 } 775 776 if (lowestMultipleEntryIN != null) { 777 778 final INList inList = database.getEnv().getInMemoryINs(); 779 780 /* 781 * Release all nodes up to the pair that holds the detach 782 * point. We won't be needing those nodes, since they'll be 783 * pruned and won't need to be updated. 784 */ 785 ListIterator<SplitInfo> iter = 786 nodeLadder.listIterator(nodeLadder.size()); 787 788 while (iter.hasPrevious()) { 789 SplitInfo info = iter.previous(); 790 791 if (info.parent == lowestMultipleEntryIN) { 792 break; 793 } else { 794 /* Remove the child from the LRU and the INList */ 795 inList.remove(info.child); 796 info.child.releaseLatch(); 797 iter.remove(); 798 } 799 } 800 } else { 801 802 /* 803 * We actually have to prune off the entire tree. Release 804 * all latches, and clear the node ladder. 805 */ 806 releaseNodeLadderLatches(nodeLadder); 807 nodeLadder.clear(); 808 } 809 } 810 releaseNodeLadderLatches(ArrayList<SplitInfo> nodeLadder)811 private void releaseNodeLadderLatches(ArrayList<SplitInfo> nodeLadder) 812 throws DatabaseException { 813 814 /* 815 * Clear any latches left in the node ladder. Release from the 816 * bottom up. 817 */ 818 ListIterator<SplitInfo> iter = 819 nodeLadder.listIterator(nodeLadder.size()); 820 821 while (iter.hasPrevious()) { 822 SplitInfo info = iter.previous(); 823 info.child.releaseLatch(); 824 } 825 } 826 827 /** 828 * Update nodes for a delete, going upwards. For example, suppose a 829 * node ladder holds: 830 * INa, INb, index for INb in INa 831 * INb, INc, index for INc in INb 832 * INc, BINd, index for BINd in INc 833 * 834 * When we enter this method, BINd has already been removed from INc. We 835 * need to 836 * - log INc 837 * - update INb, log INb 838 * - update INa, log INa 839 * 840 * @param nodeLadder List of SplitInfos describing each node pair on the 841 * downward path 842 */ cascadeUpdates(ArrayList<SplitInfo> nodeLadder)843 private void cascadeUpdates(ArrayList<SplitInfo> nodeLadder) 844 throws DatabaseException { 845 846 final EnvironmentImpl envImpl = database.getEnv(); 847 final LogManager logManager = envImpl.getLogManager(); 848 849 for (int i = nodeLadder.size() - 1; i >= 0; i -= 1) { 850 final SplitInfo info = nodeLadder.get(i); 851 852 if (info.parent.isDbRoot()) { 853 assert i == 0; 854 final long newLsn = info.parent.optionalLog(logManager); 855 root.updateLsnAfterOptionalLog(database, newLsn); 856 } else { 857 assert i > 0; 858 final SplitInfo nextInfo = nodeLadder.get(i - 1); 859 860 final long newLsn = info.parent.optionalLogProvisional( 861 logManager, nextInfo.parent); 862 863 nextInfo.parent.updateEntry( 864 nextInfo.index, newLsn, 0 /*lastLoggedSize*/); 865 } 866 } 867 } 868 869 /** 870 * Find the leftmost node (IN or BIN) in the tree. 871 * 872 * @return the leftmost node in the tree, null if the tree is empty. The 873 * returned node is latched and the caller must release it. 874 */ getFirstNode(CacheMode cacheMode)875 public BIN getFirstNode(CacheMode cacheMode) 876 throws DatabaseException { 877 878 BIN bin = search( 879 null /*key*/, SearchType.LEFT, null /*binBoundary*/, 880 cacheMode, null /*comparator*/); 881 882 if (bin != null) { 883 bin.mutateToFullBIN(); 884 } 885 886 return bin; 887 } 888 889 /** 890 * Find the rightmost node (IN or BIN) in the tree. 891 * 892 * @return the rightmost node in the tree, null if the tree is empty. The 893 * returned node is latched and the caller must release it. 894 */ getLastNode(CacheMode cacheMode)895 public BIN getLastNode(CacheMode cacheMode) 896 throws DatabaseException { 897 898 BIN bin = search( 899 null /*key*/, SearchType.RIGHT, null /*binBoundary*/, 900 cacheMode, null /*comparator*/); 901 902 if (bin != null) { 903 bin.mutateToFullBIN(); 904 } 905 906 return bin; 907 } 908 909 /** 910 * Return a reference to the adjacent BIN. 911 * 912 * @param bin The BIN to find the next BIN for. This BIN is latched. 913 * 914 * @return The next BIN, or null if there are no more. The returned node 915 * is latched and the caller must release it. If null is returned, the 916 * argument BIN remains latched. 917 */ getNextBin(BIN bin, CacheMode cacheMode)918 public BIN getNextBin(BIN bin, CacheMode cacheMode) 919 throws DatabaseException { 920 921 return (BIN) getNextIN(bin, true, cacheMode); 922 } 923 924 /** 925 * Return a reference to the previous BIN. 926 * 927 * @param bin The BIN to find the next BIN for. This BIN is latched. 928 * 929 * @return The previous BIN, or null if there are no more. The returned 930 * node is latched and the caller must release it. If null is returned, 931 * the argument bin remains latched. 932 */ getPrevBin(BIN bin, CacheMode cacheMode)933 public BIN getPrevBin(BIN bin, CacheMode cacheMode) 934 throws DatabaseException { 935 936 return (BIN) getNextIN(bin, false, cacheMode); 937 } 938 939 /** 940 * Returns the next IN in the tree before/after the given IN, and at the 941 * same level. For example, if a BIN is passed in the prevIn parameter, 942 * the next BIN will be returned. 943 * 944 * TODO: A possible problem with this method is that we don't know for 945 * certain whether it works properly in the face of splits. There are 946 * comments below indicating it does. But the Cursor.checkForInsertion 947 * method was apparently added because getNextBin/getPrevBin didn't work 948 * properly, and may skip a BIN. So at least it didn't work properly in 949 * the distant past. Archeology and possibly testing are needed to find 950 * the truth. Hopefully it does now work, and Cursor.checkForInsertion can 951 * be removed. 952 */ getNextIN(IN prevIn, boolean forward, CacheMode cacheMode)953 public IN getNextIN(IN prevIn, boolean forward, CacheMode cacheMode) { 954 955 assert prevIn.isLatchExclusiveOwner() : "what?"; 956 if (LatchSupport.TRACK_LATCHES) { 957 LatchSupport.expectBtreeLatchesHeld(1); 958 } 959 960 prevIn.mutateToFullBIN(); 961 962 /* 963 * Use the right most key (for a forward progressing cursor) or the 964 * left most key (for a backward progressing cursor) as the search key. 965 * The reason is that the IN may get split while finding the next IN so 966 * it's not safe to take the IN's identifierKey entry. If the IN gets 967 * split, then the right (left) most key will still be on the 968 * resultant node. The exception to this is that if there are no 969 * entries, we just use the identifier key. 970 */ 971 final byte[] searchKey; 972 973 if (prevIn.getNEntries() == 0) { 974 searchKey = prevIn.getIdentifierKey(); 975 } else if (forward) { 976 searchKey = prevIn.getKey(prevIn.getNEntries() - 1); 977 } else { 978 searchKey = prevIn.getKey(0); 979 } 980 981 final int targetLevel = prevIn.getLevel(); 982 IN next = prevIn; 983 boolean nextIsLatched = false; 984 985 /* 986 * Ascend the tree until we find a level that still has nodes to the 987 * right (or left if !forward) of the path that we're on. If we reach 988 * the root level, we're done. 989 */ 990 IN parent = null; 991 IN nextIN = null; 992 boolean nextINIsLatched = false; 993 boolean normalExit = false; 994 try { 995 while (true) { 996 997 /* 998 * Move up a level from where we are now and check to see if we 999 * reached the top of the tree. 1000 */ 1001 nextIsLatched = false; 1002 1003 if (next.isRoot()) { 1004 /* We've reached the root of the tree. */ 1005 next.releaseLatch(); 1006 if (LatchSupport.TRACK_LATCHES) { 1007 LatchSupport.expectBtreeLatchesHeld(0); 1008 } 1009 normalExit = true; 1010 return null; 1011 } 1012 1013 final SearchResult result = getParentINForChildIN( 1014 next, false, /*useTargetLevel*/ 1015 true, /*doFetch*/ cacheMode); 1016 1017 if (result.exactParentFound) { 1018 if (LatchSupport.TRACK_LATCHES) { 1019 LatchSupport.expectBtreeLatchesHeld(1); 1020 } 1021 parent = result.parent; 1022 } else { 1023 throw EnvironmentFailureException.unexpectedState( 1024 "Failed to find parent for IN"); 1025 } 1026 1027 /* 1028 * Figure out which entry we are in the parent. Add (subtract) 1029 * 1 to move to the next (previous) one and check that we're 1030 * still pointing to a valid child. Don't just use the result 1031 * of the parent.findEntry call in getParentNode, because we 1032 * want to use our explicitly chosen search key. 1033 */ 1034 int index = parent.findEntry(searchKey, false, false); 1035 1036 final boolean moreEntriesThisIn; 1037 1038 if (forward) { 1039 index++; 1040 moreEntriesThisIn = (index < parent.getNEntries()); 1041 } else { 1042 moreEntriesThisIn = (index > 0); 1043 index--; 1044 } 1045 1046 if (moreEntriesThisIn) { 1047 1048 /* 1049 * There are more entries to the right of the current path 1050 * in parent. Get the entry, and then descend down the 1051 * left most path to an IN. 1052 */ 1053 nextIN = parent.fetchIN(index); 1054 1055 nextIN.latch(cacheMode); 1056 nextINIsLatched = true; 1057 1058 nextIN.mutateToFullBIN(); 1059 1060 if (LatchSupport.TRACK_LATCHES) { 1061 LatchSupport.expectBtreeLatchesHeld(2); 1062 } 1063 1064 if (nextIN.getLevel() == targetLevel) { 1065 parent.releaseLatch(); 1066 parent = null; // to avoid falsely unlatching parent 1067 1068 final TreeWalkerStatsAccumulator treeStatsAccumulator = 1069 getTreeStatsAccumulator(); 1070 if (treeStatsAccumulator != null) { 1071 nextIN.accumulateStats(treeStatsAccumulator); 1072 } 1073 1074 if (LatchSupport.TRACK_LATCHES) { 1075 LatchSupport.expectBtreeLatchesHeld(1); 1076 } 1077 1078 normalExit = true; 1079 return nextIN; 1080 1081 } else { 1082 1083 /* 1084 * We landed at a higher level than the target level. 1085 * Descend down to the appropriate level. 1086 */ 1087 parent.releaseLatch(); 1088 parent = null; // to avoid falsely unlatching parent 1089 nextINIsLatched = false; 1090 1091 final IN ret = searchSubTree( 1092 nextIN, null, /*key*/ 1093 (forward ? SearchType.LEFT : SearchType.RIGHT), 1094 targetLevel, cacheMode, null /*comparator*/); 1095 1096 if (LatchSupport.TRACK_LATCHES) { 1097 LatchSupport.expectBtreeLatchesHeld(1); 1098 } 1099 1100 if (ret.getLevel() == targetLevel) { 1101 normalExit = true; 1102 return ret; 1103 } else { 1104 throw EnvironmentFailureException.unexpectedState( 1105 "subtree did not have a IN at level " + 1106 targetLevel); 1107 } 1108 } 1109 } 1110 1111 /* Nothing at this level. Ascend to a higher level. */ 1112 next = parent; 1113 nextIsLatched = true; 1114 parent = null; // to avoid falsely unlatching parent below 1115 } 1116 } finally { 1117 if (!normalExit) { 1118 if (next != null && nextIsLatched) { 1119 next.releaseLatch(); 1120 } 1121 1122 if (parent != null) { 1123 parent.releaseLatch(); 1124 } 1125 1126 if (nextIN != null && nextINIsLatched) { 1127 nextIN.releaseLatch(); 1128 } 1129 } 1130 } 1131 } 1132 1133 /** 1134 * Search for the parent P of a given IN C (where C is viewed as a logical 1135 * node; not as a java obj). If found, P is returned latched exclusively. 1136 * The method is used when C has been accessed "directly", i.e., not via a 1137 * tree search, and we need to perform an operation on C that requires an 1138 * update to its parent. Such situations arise during eviction (C has been 1139 * accessed via the LRU list), checkpointing, and recovery (C has been read 1140 * from the log and is not attached to the in-memory tree). 1141 * 1142 * The method uses C's identifierKey to search down the tree until: 1143 * 1144 * (a) doFetch is false and we need to access a node that is not cached. 1145 * In this case, we are actually looking for the cached copies of both C 1146 * and its parent, so a cache miss on the path to C is considered a 1147 * failure. This search mode is used by the evictor: to evict C (which has 1148 * been retieved from the LRU), its parent must be found and EX-latched; 1149 * however, if the C has been evicted already by another thread, there is 1150 * nothing to to (C will GC-ed). 1151 * or 1152 * (b) We reach a node whose node id equals the C's node id. In this case, 1153 * we know for sure that C still belongs to the BTree and its parent has 1154 * been found. 1155 * or 1156 * (c) useTargetLevel is true and we reach a node P that is at one level 1157 * above C's level. We know that P contains a slot S whose corresponding 1158 * key range includes C's identifierKey. Since we haven't read the child 1159 * node under S to check its node id, we cannot know for sure that C is 1160 * still in the tree. Nevertheless, we consider this situation a success, 1161 * i.e., P is the parent node we are looking for. In this search mode, 1162 * after this method returns, the caller is expected to take further 1163 * action based on the info in slot S. For example, if C was created 1164 * by reading a log entry at LSN L, and the LSN at slot S is also L, then 1165 * we know P is the real parent (and we have thus saved a possible extra 1166 * I/O to refetch the C node from the log to check its node id). This 1167 * search mode is used by the cleaner. 1168 * or 1169 * (d) None of the above conditions occur and the bottom of the BTree is 1170 * reached. In this case, no parent exists (the child node is an old 1171 * version of a node that has been removed from the BTree). 1172 * 1173 * @param child The child node for which to find the parent. This node is 1174 * latched by the caller and is unlatched by this function before returning 1175 * to the caller. 1176 * 1177 * @param useTargetLevel If true, the search is considered successful if 1178 * a node P is reached at one level above C's level. P is the parent to 1179 * return to the caller. 1180 * 1181 * @param doFetch if false, stop the search if we run into a non-resident 1182 * child and assume that no parent exists. 1183 * 1184 * @param cacheMode The CacheMode for affecting the hotness of the nodes 1185 * visited during the search. 1186 * 1187 * @return a SearchResult object. If the parent has been found, 1188 * result.foundExactMatch is true, result.parent refers to that node, and 1189 * result.index is the slot for the child IN inside the parent IN. 1190 * Otherwise, result.foundExactMatch is false and result.parent is null. 1191 */ getParentINForChildIN( IN child, boolean useTargetLevel, boolean doFetch, CacheMode cacheMode)1192 public SearchResult getParentINForChildIN( 1193 IN child, 1194 boolean useTargetLevel, 1195 boolean doFetch, 1196 CacheMode cacheMode) 1197 throws DatabaseException { 1198 1199 return getParentINForChildIN( 1200 child, useTargetLevel, doFetch, 1201 cacheMode, null /*trackingList*/); 1202 } 1203 1204 1205 /* 1206 * This version of getParentINForChildIN does the same thing as the version 1207 * above, but also adds a "trackingList" param. If trackingList is not 1208 * null, the LSNs of the parents visited along the way are added to the 1209 * list, as a debug tracing mechanism. This is meant to stay in production, 1210 * to add information to the log. 1211 */ getParentINForChildIN( IN child, boolean useTargetLevel, boolean doFetch, CacheMode cacheMode, List<TrackingInfo> trackingList)1212 public SearchResult getParentINForChildIN( 1213 IN child, 1214 boolean useTargetLevel, 1215 boolean doFetch, 1216 CacheMode cacheMode, 1217 List<TrackingInfo> trackingList) 1218 throws DatabaseException { 1219 1220 /* Sanity checks */ 1221 if (child == null) { 1222 throw EnvironmentFailureException.unexpectedState( 1223 "getParentINForChildIN given null child node"); 1224 } 1225 1226 assert child.isLatchOwner(); 1227 1228 /* 1229 * Get information from child before releasing latch. 1230 */ 1231 long targetId = child.getNodeId(); 1232 byte[] targetKey = child.getIdentifierKey(); 1233 int targetLevel = (useTargetLevel ? child.getLevel() : -1); 1234 int exclusiveLevel = child.getLevel() + 1; 1235 boolean requireExactMatch = true; 1236 1237 child.releaseLatch(); 1238 1239 return getParentINForChildIN( 1240 targetId, targetKey, targetLevel, 1241 exclusiveLevel, requireExactMatch, doFetch, 1242 cacheMode, trackingList); 1243 1244 } 1245 1246 /* 1247 * This version of getParentINForChildIN() is the actual implementation 1248 * of the previous 2 versions (read the comments there), but it also 1249 * implements two additional use cases via the extra "requireExactMatch" 1250 * param. 1251 * 1252 * (a) requireExactMatch == false && doFetch == false 1253 * In this case we are actually looking for the lowest cached ancestor 1254 * of the C node. The method will always return a node (considred as the 1255 * "parent") unless the BTree is empty (has no nodes at all). The returned 1256 * node must be latched, but not necessarily in EX mode. This search mode 1257 * is used by the checkpointer. 1258 * 1259 * (b) requireExactMatch == false && doFetch == true 1260 * This search mode is used during recovery. It seems that 1261 * requireExactMatch can actually be true here ???? (and maybe use a 1262 * targetLevel as well ????). 1263 * 1264 * The exclusiveLevel param: 1265 * In general, if exclusiveLevel == L, nodes above L will be SH latched and 1266 * nodes at or below L will be EX-latched. In all current use cases, except 1267 * case (a) above, L is set to 1 + C.level; in case (a) it is set to 0. 1268 * This is because in all use cases except (a), the parent P, if any, 1269 * should be at level 1 + C.level and since P should be returned 1270 * EX-latched, when the search reaches L, it must EX-latch the node at that 1271 * level. In case (a), the "parent" does not need to be EX-latched (and it 1272 * can in fact be at any level). 1273 */ getParentINForChildIN( long targetNodeId, byte[] targetKey, int targetLevel, int exclusiveLevel, boolean requireExactMatch, boolean doFetch, CacheMode cacheMode, List<TrackingInfo> trackingList)1274 public SearchResult getParentINForChildIN( 1275 long targetNodeId, 1276 byte[] targetKey, 1277 int targetLevel, 1278 int exclusiveLevel, 1279 boolean requireExactMatch, 1280 boolean doFetch, 1281 CacheMode cacheMode, 1282 List<TrackingInfo> trackingList) 1283 throws DatabaseException { 1284 1285 /* Call hook before latching. No latches are held. */ 1286 TestHookExecute.doHookIfSet(getParentINHook); 1287 1288 /* 1289 * SearchResult is initialized as follows: 1290 * exactParentFound = false; 1291 * parent = null; index = -1; childNotResident = false; 1292 */ 1293 SearchResult result = new SearchResult(); 1294 1295 /* Get the tree root, SH-latched. */ 1296 IN rootIN = getRootIN(cacheMode); 1297 1298 if (rootIN == null) { 1299 return result; 1300 } 1301 1302 /* If the root is the target node, there is no parent */ 1303 assert(rootIN.getNodeId() != targetNodeId); 1304 assert(rootIN.getLevel() >= exclusiveLevel); 1305 1306 IN parent = rootIN; 1307 IN child = null; 1308 boolean success = false; 1309 1310 try { 1311 1312 if (rootIN.getLevel() <= exclusiveLevel) { 1313 rootIN.releaseLatch(); 1314 rootIN = getRootINLatchedExclusive(cacheMode); 1315 assert(rootIN != null); 1316 parent = rootIN; 1317 } 1318 1319 while (true) { 1320 1321 assert(parent.getNEntries() > 0); 1322 1323 result.index = parent.findEntry(targetKey, false, false); 1324 1325 if (trackingList != null) { 1326 trackingList.add(new TrackingInfo( 1327 parent.getLsn(result.index), parent.getNodeId(), 1328 parent.getNEntries(), result.index)); 1329 } 1330 1331 assert TestHookExecute.doHookIfSet(searchHook); 1332 1333 if (targetLevel > 0 && parent.getLevel() == targetLevel + 1) { 1334 result.exactParentFound = true; 1335 result.parent = parent; 1336 break; 1337 } 1338 1339 if (doFetch) { 1340 child = parent.fetchINWithNoLatch(result, targetKey); 1341 1342 if (child == null) { 1343 if (trackingList != null) { 1344 trackingList.clear(); 1345 } 1346 result.reset(); 1347 1348 TestHookExecute.doHookIfSet(fetchINHook, child); 1349 1350 rootIN = getRootIN(cacheMode); 1351 assert(rootIN != null); 1352 1353 if (rootIN.getLevel() <= exclusiveLevel) { 1354 rootIN.releaseLatch(); 1355 rootIN = getRootINLatchedExclusive(cacheMode); 1356 assert(rootIN != null); 1357 } 1358 1359 parent = rootIN; 1360 continue; 1361 } 1362 } else { 1363 child = (IN) parent.getTarget(result.index); 1364 } 1365 1366 assert(child != null || !doFetch); 1367 1368 if (child == null) { 1369 if (requireExactMatch) { 1370 parent.releaseLatch(); 1371 } else { 1372 result.parent = parent; 1373 } 1374 result.childNotResident = true; 1375 break; 1376 } 1377 1378 if (child.getNodeId() == targetNodeId) { 1379 result.exactParentFound = true; 1380 result.parent = parent; 1381 break; 1382 } 1383 1384 if (child.isBIN()) { 1385 if (requireExactMatch) { 1386 parent.releaseLatch(); 1387 } else { 1388 result.parent = parent; 1389 } 1390 break; 1391 } 1392 1393 /* We can search further down the tree. */ 1394 if (child.getLevel() <= exclusiveLevel) { 1395 child.latch(cacheMode); 1396 } else { 1397 child.latchShared(cacheMode); 1398 } 1399 1400 parent.releaseLatch(); 1401 parent = child; 1402 child = null; 1403 } 1404 1405 success = true; 1406 1407 } finally { 1408 1409 if (!success) { 1410 if (parent.isLatchOwner()) { 1411 parent.releaseLatch(); 1412 } 1413 1414 if (child != null && child.isLatchOwner()) { 1415 child.releaseLatch(); 1416 } 1417 } 1418 } 1419 1420 if (result.parent != null) { 1421 if (LatchSupport.TRACK_LATCHES) { 1422 LatchSupport.expectBtreeLatchesHeld(1); 1423 } 1424 assert((!doFetch && !requireExactMatch) || 1425 result.parent.isLatchExclusiveOwner()); 1426 } 1427 1428 return result; 1429 } 1430 1431 /** 1432 * Return a reference to the parent of this LN. This searches through the 1433 * tree and allows splits, if the splitsAllowed param is true. Set the 1434 * tree location to the proper BIN parent whether or not the LN child is 1435 * found. That's because if the LN is not found, recovery or abort will 1436 * need to place it within the tree, and so we must point at the 1437 * appropriate position. 1438 * 1439 * <p>When this method returns with location.bin non-null, the BIN is 1440 * latched and must be unlatched by the caller. Note that location.bin may 1441 * be non-null even if this method returns false.</p> 1442 * 1443 * @param location a holder class to hold state about the location 1444 * of our search. Sort of an internal cursor. 1445 * 1446 * @param key key to navigate through main key 1447 * 1448 * @param splitsAllowed true if this method is allowed to cause tree splits 1449 * as a side effect. In practice, recovery can cause splits, but abort 1450 * can't. 1451 * 1452 * @param blindDeltaOps Normally, if this method lands on a BIN-delta and 1453 * the search key is not in that delta, it will mutate the delta to a full 1454 * BIN to make sure whether the search key exists in the tree or not. 1455 * However, by passing true for blindDeltaOps, the caller indicates that 1456 * it doesn't really care whether the key is in the tree or not: it is 1457 * going to insert the key in the BIN-delta, if not already there, 1458 * essentially overwritting the slot that may exist in the full BIN. So, 1459 * if blindDeltaOps is true, the method will not mutate a BIN-delta parent 1460 * (unless the BIN-delta has no space for a slot insertion). 1461 * 1462 * @param cacheMode The CacheMode for affecting the hotness of the tree. 1463 * 1464 * @return true if node found in tree. 1465 * If false is returned and there is the possibility that we can insert 1466 * the record into a plausible parent we must also set 1467 * - location.bin (may be null if no possible parent found) 1468 * - location.lnKey (don't need to set if no possible parent). 1469 */ getParentBINForChildLN( TreeLocation location, byte[] key, boolean splitsAllowed, boolean blindDeltaOps, CacheMode cacheMode)1470 public boolean getParentBINForChildLN( 1471 TreeLocation location, 1472 byte[] key, 1473 boolean splitsAllowed, 1474 boolean blindDeltaOps, 1475 CacheMode cacheMode) 1476 throws DatabaseException { 1477 1478 /* 1479 * Find the BIN that either points to this LN or could be its 1480 * ancestor. 1481 */ 1482 location.reset(); 1483 BIN bin; 1484 int index; 1485 1486 if (splitsAllowed) { 1487 bin = searchSplitsAllowed(key, cacheMode, null /*comparator*/); 1488 } else { 1489 bin = search(key, cacheMode); 1490 } 1491 1492 if (bin == null) { 1493 return false; 1494 } 1495 1496 try { 1497 while (true) { 1498 1499 location.bin = bin; 1500 1501 index = bin.findEntry( 1502 key, true /*indicateIfExact*/, false /*exactSearch*/); 1503 1504 boolean match = (index >= 0 && 1505 (index & IN.EXACT_MATCH) != 0); 1506 1507 index &= ~IN.EXACT_MATCH; 1508 location.index = index; 1509 location.lnKey = key; 1510 1511 /* 1512 if (!match && bin.isBINDelta() && blindDeltaOps) { 1513 System.out.println( 1514 "Blind op on BIN-delta : " + bin.getNodeId() + 1515 " nEntries = " + 1516 bin.getNEntries() + 1517 " max entries = " + 1518 bin.getMaxEntries() + 1519 " full BIN entries = " + 1520 bin.getFullBinNEntries() + 1521 " full BIN max entries = " + 1522 bin.getFullBinMaxEntries()); 1523 } 1524 */ 1525 1526 if (match) { 1527 location.childLsn = bin.getLsn(index); 1528 location.childLoggedSize = bin.getLastLoggedSize(index); 1529 1530 return true; 1531 1532 } else { 1533 1534 if (bin.isBINDelta() && 1535 (!blindDeltaOps || 1536 bin.getNEntries() >= bin.getMaxEntries())) { 1537 1538 bin.mutateToFullBIN(); 1539 location.reset(); 1540 continue; 1541 } 1542 1543 return false; 1544 } 1545 } 1546 1547 } catch (RuntimeException e) { 1548 bin.releaseLatch(); 1549 location.bin = null; 1550 throw e; 1551 } 1552 } 1553 1554 /** 1555 * Find the BIN that is relevant to the insert. If the tree doesn't exist 1556 * yet, then create the first IN and BIN. On return, the cursor is set to 1557 * the BIN that is found or created, and the BIN is latched. 1558 */ findBinForInsert(final byte[] key, final CacheMode cacheMode)1559 public BIN findBinForInsert(final byte[] key, final CacheMode cacheMode) { 1560 1561 boolean rootLatchIsHeld = false; 1562 BIN bin = null; 1563 1564 try { 1565 long logLsn; 1566 1567 /* 1568 * We may have to try several times because of a small 1569 * timing window, explained below. 1570 */ 1571 while (true) { 1572 1573 rootLatchIsHeld = true; 1574 rootLatch.acquireShared(); 1575 1576 if (!rootExists()) { 1577 1578 rootLatch.release(); 1579 rootLatch.acquireExclusive(); 1580 if (rootExists()) { 1581 rootLatch.release(); 1582 rootLatchIsHeld = false; 1583 continue; 1584 } 1585 1586 final EnvironmentImpl env = database.getEnv(); 1587 final LogManager logManager = env.getLogManager(); 1588 final INList inMemoryINs = env.getInMemoryINs(); 1589 final Evictor evictor = env.getEvictor(); 1590 1591 /* 1592 * This is an empty tree, either because it's brand new 1593 * tree or because everything in it was deleted. Create an 1594 * IN and a BIN. We could latch the rootIN here, but 1595 * there's no reason to since we're just creating the 1596 * initial tree and we have the rootLatch held. Log the 1597 * nodes as soon as they're created, but remember that 1598 * referred-to children must come before any references to 1599 * their LSNs. 1600 */ 1601 1602 /* First BIN in the tree, log provisionally right away. */ 1603 bin = new BIN(database, key, maxTreeEntriesPerNode, 1); 1604 bin.latch(cacheMode); 1605 logLsn = bin.optionalLogProvisional(logManager, null); 1606 1607 /* 1608 * Log the root right away. Leave the root dirty, because 1609 * the MapLN is not being updated, and we want to avoid 1610 * this scenario from [#13897], where the LN has no 1611 * possible parent. 1612 * provisional BIN 1613 * root IN 1614 * checkpoint start 1615 * LN is logged 1616 * checkpoint end 1617 * BIN is dirtied, but is not part of checkpoint 1618 */ 1619 1620 IN rootIN = 1621 new IN(database, key, maxTreeEntriesPerNode, 2); 1622 1623 /* 1624 * OK to latch the root after a child BIN because it's 1625 * during creation. 1626 */ 1627 rootIN.latch(cacheMode); 1628 rootIN.setIsRoot(true); 1629 1630 boolean insertOk = rootIN.insertEntry(bin, key, logLsn); 1631 assert insertOk; 1632 1633 logLsn = rootIN.optionalLog(logManager); 1634 rootIN.setDirty(true); /*force re-logging, see [#13897]*/ 1635 1636 root = makeRootChildReference(rootIN, new byte[0], logLsn); 1637 1638 rootIN.releaseLatch(); 1639 1640 /* Add the new nodes to the in memory list. */ 1641 inMemoryINs.add(bin); 1642 inMemoryINs.add(rootIN); 1643 evictor.addBack(bin); 1644 1645 rootLatch.release(); 1646 rootLatchIsHeld = false; 1647 1648 break; 1649 } else { 1650 rootLatch.release(); 1651 rootLatchIsHeld = false; 1652 1653 /* 1654 * There's a tree here, so search for where we should 1655 * insert. However, note that a window exists after we 1656 * release the root latch. We release the latch because the 1657 * search method expects to take the latch. After the 1658 * release and before search, the INCompressor may come in 1659 * and delete the entire tree, so search may return with a 1660 * null. 1661 */ 1662 bin = searchSplitsAllowed(key, cacheMode); 1663 1664 if (bin == null) { 1665 /* The tree was deleted by the INCompressor. */ 1666 continue; 1667 } else { 1668 /* search() found a BIN where this key belongs. */ 1669 break; 1670 } 1671 } 1672 } 1673 } finally { 1674 if (rootLatchIsHeld) { 1675 rootLatch.release(); 1676 } 1677 } 1678 1679 /* testing hook to insert item into log. */ 1680 assert TestHookExecute.doHookIfSet(ckptHook); 1681 1682 return bin; 1683 } 1684 1685 /** 1686 * Do a key based search, permitting pre-emptive splits. Returns the 1687 * target node's parent. 1688 */ searchSplitsAllowed(byte[] key, CacheMode cacheMode)1689 public BIN searchSplitsAllowed(byte[] key, CacheMode cacheMode) { 1690 1691 return searchSplitsAllowed(key, cacheMode, null); 1692 } 1693 1694 searchSplitsAllowed( byte[] key, CacheMode cacheMode, Comparator<byte[]> comparator)1695 private BIN searchSplitsAllowed( 1696 byte[] key, 1697 CacheMode cacheMode, 1698 Comparator<byte[]> comparator) { 1699 1700 BIN insertTarget = null; 1701 1702 while (insertTarget == null) { 1703 1704 rootLatch.acquireShared(); 1705 1706 boolean rootLatched = true; 1707 boolean rootINLatched = false; 1708 boolean success = false; 1709 IN rootIN = null; 1710 1711 /* 1712 * Latch the rootIN, check if it needs splitting. If so split it 1713 * and update the associated MapLN. To update the MapLN, we must 1714 * lock it, which implies that all latches must be released prior 1715 * to the lock, and as a result, the root may require splitting 1716 * again or may be split by another thread. So we must restart 1717 * the loop to get the latest root. 1718 */ 1719 try { 1720 if (!rootExists()) { 1721 return null; 1722 } 1723 1724 rootIN = (IN) root.fetchTarget(database, null); 1725 1726 if (rootIN.needsSplitting()) { 1727 1728 rootLatch.release(); 1729 rootLatch.acquireExclusive(); 1730 1731 if (!rootExists()) { 1732 return null; 1733 } 1734 1735 rootIN = (IN) root.fetchTarget(database, null); 1736 1737 if (rootIN.needsSplitting()) { 1738 1739 splitRoot(cacheMode); 1740 1741 rootLatch.release(); 1742 rootLatched = false; 1743 1744 EnvironmentImpl env = database.getEnv(); 1745 env.getDbTree().optionalModifyDbRoot(database); 1746 1747 continue; 1748 } 1749 } 1750 1751 rootIN.latchShared(cacheMode); 1752 rootINLatched = true; 1753 success = true; 1754 1755 } finally { 1756 if (!success && rootINLatched) { 1757 rootIN.releaseLatch(); 1758 } 1759 if (rootLatched) { 1760 rootLatch.release(); 1761 } 1762 } 1763 1764 /* 1765 * Now, search the tree, doing splits if required. The rootIN 1766 * is latched in SH mode, but this.root is not latched. If any 1767 * splits are needed, this.root will first be latched exclusivelly 1768 * and will stay latched until all splits are done. 1769 */ 1770 try { 1771 assert(rootINLatched); 1772 1773 insertTarget = searchSplitsAllowed( 1774 rootIN, key, cacheMode, comparator); 1775 1776 if (insertTarget == null) { 1777 if (LatchSupport.TRACK_LATCHES) { 1778 LatchSupport.expectBtreeLatchesHeld(0); 1779 } 1780 relatchesRequired.increment(); 1781 database.getEnv().incRelatchesRequired(); 1782 } 1783 } catch (SplitRequiredException e) { 1784 1785 /* 1786 * The last slot in the root was used at the point when this 1787 * thread released the rootIN latch in order to force splits. 1788 * Retry. SR [#11147]. 1789 */ 1790 continue; 1791 } 1792 } 1793 1794 return insertTarget; 1795 } 1796 1797 /** 1798 * Search the tree, permitting preemptive splits. 1799 * 1800 * When this returns, parent will be unlatched unless parent is the 1801 * returned IN. 1802 */ searchSplitsAllowed( IN rootIN, byte[] key, CacheMode cacheMode, Comparator<byte[]> comparator)1803 private BIN searchSplitsAllowed( 1804 IN rootIN, 1805 byte[] key, 1806 CacheMode cacheMode, 1807 Comparator<byte[]> comparator) 1808 throws SplitRequiredException { 1809 1810 assert(rootIN.isLatchOwner()); 1811 if (!rootIN.isRoot()) { 1812 throw EnvironmentFailureException.unexpectedState( 1813 "A null or non-root IN was given as the parent"); 1814 } 1815 1816 int index; 1817 IN parent = rootIN; 1818 IN child = null; 1819 boolean success = false; 1820 1821 /* 1822 * Search downward until we hit a node that needs a split. In that 1823 * case, retreat to the top of the tree and force splits downward. 1824 */ 1825 try { 1826 do { 1827 if (parent.getNEntries() == 0) { 1828 throw EnvironmentFailureException.unexpectedState( 1829 "Found upper IN with 0 entries"); 1830 } 1831 1832 index = parent.findEntry(key, false, false, comparator); 1833 assert index >= 0; 1834 1835 child = parent.fetchINWithNoLatch(index, key); 1836 1837 if (child == null) { 1838 return null; // restart the search 1839 } 1840 1841 /* if child is a BIN, it is actually EX-latched */ 1842 child.latchShared(cacheMode); 1843 1844 /* 1845 * If we need to split, try compressing first and 1846 * check again. If we still need to split, throw 1847 * exception. 1848 */ 1849 if (child.needsSplitting()) { 1850 1851 database.getEnv().lazyCompress(child); 1852 1853 if (child.needsSplitting()) { 1854 1855 child.releaseLatch(); 1856 parent.releaseLatch(); 1857 1858 /* SR [#11144]*/ 1859 assert TestHookExecute.doHookIfSet(waitHook); 1860 1861 /* 1862 * forceSplit may throw SplitRequiredException if it 1863 * finds that the root needs splitting. Allow the 1864 * exception to propagate up to the caller, who will 1865 * do the root split. Otherwise, restart the search 1866 * from the root IN again. 1867 */ 1868 rootIN = forceSplit(key, cacheMode); 1869 parent = rootIN; 1870 1871 assert(rootIN.isLatchOwner()); 1872 if (!rootIN.isRoot()) { 1873 throw EnvironmentFailureException.unexpectedState( 1874 "A null or non-root IN was given as the parent"); 1875 } 1876 continue; 1877 } 1878 } 1879 1880 /* Continue down a level */ 1881 parent.releaseLatch(); 1882 parent = child; 1883 child = null; 1884 1885 } while (!parent.isBIN()); 1886 1887 success = true; 1888 return (BIN)parent; 1889 1890 } finally { 1891 if (!success) { 1892 if (child != null && child.isLatchOwner()) { 1893 child.releaseLatch(); 1894 } 1895 if (parent != child && parent.isLatchOwner()) { 1896 parent.releaseLatch(); 1897 } 1898 } 1899 } 1900 } 1901 1902 /** 1903 * Do pre-emptive splitting: search down the tree until we get to the BIN 1904 * level, and split any nodes that fit the splittable requirement except 1905 * for the root. If the root needs splitting, a splitRequiredException 1906 * is thrown and the root split is handled at a higher level. 1907 * 1908 * Note that more than one node in the path may be splittable. For example, 1909 * a tree might have a level2 IN and a BIN that are both splittable, and 1910 * would be encountered by the same insert operation. 1911 * 1912 * Splits cause INs to be logged in all ancestors, including the root. This 1913 * is to avoid the "great aunt" problem described in LevelRecorder. 1914 * 1915 * INs below the root are logged provisionally; only the root is logged 1916 * non-provisionally. Provisional logging is necessary during a checkpoint 1917 * for levels less than maxFlushLevel. 1918 * 1919 * This method acquires and holds this.rootLatch in EX mode during its 1920 * whole duration (so splits are serialized). The rootLatch is released 1921 * on return. 1922 * 1923 * @return the tree root node, latched in EX mode. This may be different 1924 * than the tree root when this method was called, because no latches are 1925 * held on entering this method. 1926 * 1927 * All latches are released in case of exception. 1928 */ forceSplit(byte[] key, CacheMode cacheMode)1929 private IN forceSplit(byte[] key, CacheMode cacheMode) 1930 throws DatabaseException, SplitRequiredException { 1931 1932 final ArrayList<SplitInfo> nodeLadder = new ArrayList<SplitInfo>(); 1933 1934 boolean allLeftSideDescent = true; 1935 boolean allRightSideDescent = true; 1936 int index; 1937 IN parent; 1938 IN child = null; 1939 IN rootIN = null; 1940 1941 /* 1942 * Latch the root in order to update the root LSN when we're done. 1943 * Latch order must be: root, root IN. We'll leave this method with the 1944 * original parent latched. 1945 */ 1946 rootLatch.acquireExclusive(); 1947 1948 boolean success = false; 1949 try { 1950 /* The root IN may have been evicted. [#16173] */ 1951 rootIN = (IN) root.fetchTarget(database, null); 1952 parent = rootIN; 1953 parent.latch(cacheMode); 1954 1955 /* 1956 * Another thread may have crept in and 1957 * - used the last free slot in the parent, making it impossible 1958 * to correctly propagate the split. 1959 * - actually split the root, in which case we may be looking at 1960 * the wrong subtree for this search. 1961 * If so, throw and retry from above. SR [#11144] 1962 */ 1963 if (rootIN.needsSplitting()) { 1964 throw splitRequiredException; 1965 } 1966 1967 /* 1968 * Search downward to the BIN level, saving the information 1969 * needed to do a split if necessary. 1970 */ 1971 do { 1972 if (parent.getNEntries() == 0) { 1973 throw EnvironmentFailureException.unexpectedState( 1974 "Found upper IN with 0 entries"); 1975 } 1976 1977 /* Look for the entry matching key in the current node. */ 1978 index = parent.findEntry(key, false, false); 1979 assert index >= 0; 1980 if (index != 0) { 1981 allLeftSideDescent = false; 1982 } 1983 if (index != (parent.getNEntries() - 1)) { 1984 allRightSideDescent = false; 1985 } 1986 1987 /* 1988 * Get the child node that matches. We only need to work on 1989 * nodes in residence. 1990 */ 1991 if (!parent.isResident(index)) { 1992 child = null; 1993 break; 1994 } 1995 1996 child = (IN)parent.getTarget(index); 1997 1998 child.latch(cacheMode); 1999 2000 nodeLadder.add(new SplitInfo(parent, child, index)); 2001 2002 /* Continue down a level */ 2003 parent = child; 2004 } while (!parent.isBIN()); 2005 2006 boolean startedSplits = false; 2007 LogManager logManager = database.getEnv().getLogManager(); 2008 2009 /* 2010 * Process the accumulated nodes from the bottom up. Split each 2011 * node if required. If the node should not split, we check if 2012 * there have been any splits on the ladder yet. If there are none, 2013 * we merely release the node, since there is no update. If splits 2014 * have started, we need to propagate new LSNs upward, so we log 2015 * the node and update its parent. 2016 */ 2017 long lastParentForSplit = Node.NULL_NODE_ID; 2018 2019 for (int i = nodeLadder.size() - 1; i >= 0; i -= 1) { 2020 final SplitInfo info = nodeLadder.get(i); 2021 2022 child = info.child; 2023 parent = info.parent; 2024 index = info.index; 2025 2026 /* Opportunistically split the node if it is full. */ 2027 if (child.needsSplitting()) { 2028 2029 child.mutateToFullBIN(); 2030 2031 final IN grandParent = 2032 (i > 0) ? nodeLadder.get(i - 1).parent : null; 2033 2034 if (allLeftSideDescent || allRightSideDescent) { 2035 child.splitSpecial( 2036 parent, index, grandParent, maxTreeEntriesPerNode, 2037 key, allLeftSideDescent, cacheMode); 2038 } else { 2039 child.split( 2040 parent, index, grandParent, maxTreeEntriesPerNode, 2041 cacheMode); 2042 } 2043 2044 lastParentForSplit = parent.getNodeId(); 2045 startedSplits = true; 2046 2047 /* 2048 * If the DB root IN was logged, update the DB tree's child 2049 * reference. Now the MapLN is logically dirty. Be sure to 2050 * flush the MapLN if we ever evict the root. 2051 */ 2052 if (parent.isDbRoot()) { 2053 root.updateLsnAfterOptionalLog( 2054 database, parent.getLastLoggedVersion()); 2055 } 2056 } else { 2057 if (startedSplits) { 2058 final long newChildLsn; 2059 2060 /* 2061 * If this child was the parent of a split, it's 2062 * already logged by the split call. We just need to 2063 * propagate the logging upwards. If this child is just 2064 * a link in the chain upwards, log it. 2065 */ 2066 if (lastParentForSplit == child.getNodeId()) { 2067 newChildLsn = child.getLastLoggedVersion(); 2068 } else { 2069 newChildLsn = child.optionalLogProvisional( 2070 logManager, parent); 2071 } 2072 2073 parent.updateEntry( 2074 index, newChildLsn, 0 /*lastLoggedSize*/); 2075 2076 /* 2077 * The root is never a 'child' in nodeLadder so it must 2078 * be logged separately. 2079 */ 2080 if (parent.isDbRoot()) { 2081 2082 final long newRootLsn = 2083 parent.optionalLog(logManager); 2084 2085 root.updateLsnAfterOptionalLog( 2086 database, newRootLsn); 2087 } 2088 } 2089 } 2090 child.releaseLatch(); 2091 child = null; 2092 } 2093 success = true; 2094 } finally { 2095 if (!success) { 2096 if (child != null) { 2097 child.releaseLatchIfOwner(); 2098 } 2099 2100 for (SplitInfo info : nodeLadder) { 2101 info.child.releaseLatchIfOwner(); 2102 } 2103 2104 if (rootIN != null) { 2105 rootIN.releaseLatchIfOwner(); 2106 } 2107 } 2108 2109 rootLatch.release(); 2110 } 2111 2112 return rootIN; 2113 } 2114 2115 /** 2116 * Split the root of the tree. 2117 */ splitRoot(CacheMode cacheMode)2118 private void splitRoot(CacheMode cacheMode) 2119 throws DatabaseException { 2120 2121 /* 2122 * Create a new root IN, insert the current root IN into it, and then 2123 * call split. 2124 */ 2125 EnvironmentImpl env = database.getEnv(); 2126 LogManager logManager = env.getLogManager(); 2127 INList inMemoryINs = env.getInMemoryINs(); 2128 2129 IN curRoot = null; 2130 curRoot = (IN) root.fetchTarget(database, null); 2131 curRoot.latch(cacheMode); 2132 long curRootLsn = 0; 2133 long logLsn = 0; 2134 IN newRoot = null; 2135 try { 2136 2137 /* 2138 * Make a new root IN, giving it an id key from the previous root. 2139 */ 2140 byte[] rootIdKey = curRoot.getKey(0); 2141 newRoot = new IN(database, rootIdKey, maxTreeEntriesPerNode, 2142 curRoot.getLevel() + 1); 2143 newRoot.latch(cacheMode); 2144 newRoot.setIsRoot(true); 2145 curRoot.setIsRoot(false); 2146 2147 /* 2148 * Make the new root IN point to the old root IN. Log the old root 2149 * provisionally, because we modified it so it's not the root 2150 * anymore, then log the new root. We are guaranteed to be able to 2151 * insert entries, since we just made this root. 2152 */ 2153 boolean logSuccess = false; 2154 try { 2155 curRootLsn = curRoot.optionalLogProvisional( 2156 logManager, newRoot); 2157 2158 boolean inserted = newRoot.insertEntry( 2159 curRoot, rootIdKey, curRootLsn); 2160 assert inserted; 2161 2162 logLsn = newRoot.optionalLog(logManager); 2163 logSuccess = true; 2164 } finally { 2165 if (!logSuccess) { 2166 /* Something went wrong when we tried to log. */ 2167 curRoot.setIsRoot(true); 2168 } 2169 } 2170 2171 inMemoryINs.add(newRoot); 2172 2173 /* 2174 * Don't add the new root into the LRU because it has a cached 2175 * child. 2176 */ 2177 2178 /* 2179 * Make the tree's root reference point to this new node. Now the 2180 * MapLN is logically dirty, but the change hasn't been logged. Be 2181 * sure to flush the MapLN if we ever evict the root. 2182 */ 2183 root.setTarget(newRoot); 2184 root.updateLsnAfterOptionalLog(database, logLsn); 2185 curRoot.split(newRoot, 0, null, maxTreeEntriesPerNode, cacheMode); 2186 root.setLsn(newRoot.getLastLoggedVersion()); 2187 2188 } finally { 2189 /* FindBugs ignore possible null pointer dereference of newRoot. */ 2190 newRoot.releaseLatch(); 2191 curRoot.releaseLatch(); 2192 } 2193 rootSplits.increment(); 2194 traceSplitRoot(Level.FINE, TRACE_ROOT_SPLIT, newRoot, logLsn, 2195 curRoot, curRootLsn); 2196 } 2197 search(byte[] key, CacheMode cacheMode)2198 public BIN search(byte[] key, CacheMode cacheMode) { 2199 2200 return search(key, SearchType.NORMAL, null, cacheMode, null); 2201 } 2202 2203 /** 2204 * Search the tree, starting at the root. Depending on search type either 2205 * (a) search for the BIN that *should* contain a given key, or (b) return 2206 * the right-most or left-most BIN in the tree. 2207 * 2208 * Preemptive splitting is not done during the search. 2209 * 2210 * @param key - the key to search for, or null if searchType is LEFT or 2211 * RIGHT. 2212 * 2213 * @param searchType - The type of tree search to perform. NORMAL means 2214 * we're searching for key in the tree. LEFT/RIGHT means we're descending 2215 * down the left or right side, resp. 2216 * 2217 * @param binBoundary - If non-null, information is returned about whether 2218 * the BIN found is the first or last BIN in the database. 2219 * 2220 * @return - the BIN that matches the criteria, if any. Returns null if 2221 * the root is null. BIN is latched (unless it's null) and must be 2222 * unlatched by the caller. In a NORMAL search, it is the caller's 2223 * responsibility to do the findEntry() call on the key and BIN to locate 2224 * the entry (if any) that matches key. 2225 */ search( byte[] key, SearchType searchType, BINBoundary binBoundary, CacheMode cacheMode, Comparator<byte[]> comparator)2226 public BIN search( 2227 byte[] key, 2228 SearchType searchType, 2229 BINBoundary binBoundary, 2230 CacheMode cacheMode, 2231 Comparator<byte[]> comparator) { 2232 2233 IN rootIN = getRootIN(cacheMode); 2234 2235 if (rootIN == null) { 2236 return null; 2237 } 2238 2239 if ((searchType == SearchType.LEFT || 2240 searchType == SearchType.RIGHT) && 2241 key != null) { 2242 2243 /* 2244 * If caller is asking for a right or left search, they shouldn't 2245 * be passing us a key. 2246 */ 2247 throw EnvironmentFailureException.unexpectedState( 2248 "searchSubTree passed key and left/right search"); 2249 } 2250 2251 if (binBoundary != null) { 2252 binBoundary.isLastBin = true; 2253 binBoundary.isFirstBin = true; 2254 } 2255 2256 boolean success = false; 2257 int index; 2258 IN parent = rootIN; 2259 IN child = null; 2260 2261 TreeWalkerStatsAccumulator treeStatsAccumulator = 2262 getTreeStatsAccumulator(); 2263 2264 try { 2265 if (treeStatsAccumulator != null) { 2266 parent.accumulateStats(treeStatsAccumulator); 2267 } 2268 2269 do { 2270 if (parent.getNEntries() == 0) { 2271 throw EnvironmentFailureException.unexpectedState( 2272 "Upper IN with 0 entries"); 2273 } 2274 2275 if (searchType == SearchType.NORMAL) { 2276 index = parent.findEntry(key, false, false, comparator); 2277 2278 } else if (searchType == SearchType.LEFT) { 2279 index = 0; 2280 2281 } else if (searchType == SearchType.RIGHT) { 2282 index = parent.getNEntries() - 1; 2283 2284 } else { 2285 throw EnvironmentFailureException.unexpectedState( 2286 "Invalid value of searchType: " + searchType); 2287 } 2288 2289 assert(index >= 0); 2290 2291 if (binBoundary != null) { 2292 if (index != parent.getNEntries() - 1) { 2293 binBoundary.isLastBin = false; 2294 } 2295 if (index != 0) { 2296 binBoundary.isFirstBin = false; 2297 } 2298 } 2299 2300 child = parent.fetchINWithNoLatch(index, key); 2301 2302 if (child == null) { 2303 parent = getRootIN(cacheMode); 2304 assert(parent != null); 2305 if (treeStatsAccumulator != null) { 2306 parent.accumulateStats(treeStatsAccumulator); 2307 } 2308 continue; 2309 } 2310 2311 /* Latch the child. Note: BINs are always latched exclusive. */ 2312 child.latchShared(cacheMode); 2313 2314 if (treeStatsAccumulator != null) { 2315 child.accumulateStats(treeStatsAccumulator); 2316 } 2317 2318 parent.releaseLatch(); 2319 parent = child; 2320 child = null; 2321 2322 } while (!parent.isBIN()); 2323 2324 success = true; 2325 return (BIN)parent; 2326 2327 } finally { 2328 if (!success) { 2329 2330 /* 2331 * In [#14903] we encountered a latch exception below and the 2332 * original exception was lost. Print the stack trace and 2333 * allow the original exception to be thrown if this happens 2334 * again, to get more information about the problem. 2335 */ 2336 try { 2337 if (child != null && child.isLatchOwner()) { 2338 child.releaseLatch(); 2339 } 2340 2341 if (parent != child && parent.isLatchOwner()) { 2342 parent.releaseLatch(); 2343 } 2344 } catch (Exception e) { 2345 LoggerUtils.traceAndLogException( 2346 database.getEnv(), "Tree", "searchSubTreeInternal", "", 2347 e); 2348 } 2349 } 2350 } 2351 } 2352 searchSubTree( IN parent, byte[] key, SearchType searchType, int targetLevel, CacheMode cacheMode, Comparator<byte[]> comparator)2353 private IN searchSubTree( 2354 IN parent, 2355 byte[] key, 2356 SearchType searchType, 2357 int targetLevel, 2358 CacheMode cacheMode, 2359 Comparator<byte[]> comparator) { 2360 2361 /* 2362 * If a an intermediate IN (e.g., from getNextIN) was 2363 * originally passed, it was latched exclusively. 2364 */ 2365 assert(parent != null && 2366 (parent.isRoot() || 2367 parent.isLatchExclusiveOwner())); 2368 2369 if ((searchType == SearchType.LEFT || 2370 searchType == SearchType.RIGHT) && 2371 key != null) { 2372 2373 /* 2374 * If caller is asking for a right or left search, they shouldn't 2375 * be passing us a key. 2376 */ 2377 throw EnvironmentFailureException.unexpectedState( 2378 "searchSubTree passed key and left/right search"); 2379 } 2380 2381 assert(parent.isUpperIN()); 2382 assert(parent.isLatchOwner()); 2383 2384 boolean success = false; 2385 int index; 2386 IN subtreeRoot = parent; 2387 IN child = null; 2388 IN grandParent = null; 2389 boolean childIsLatched = false; 2390 boolean grandParentIsLatched = false; 2391 boolean doGrandparentLatching = !parent.isLatchExclusiveOwner(); 2392 2393 TreeWalkerStatsAccumulator treeStatsAccumulator = 2394 getTreeStatsAccumulator(); 2395 2396 try { 2397 do { 2398 if (treeStatsAccumulator != null) { 2399 parent.accumulateStats(treeStatsAccumulator); 2400 } 2401 2402 assert(parent.getNEntries() > 0); 2403 2404 if (searchType == SearchType.NORMAL) { 2405 /* Look for the entry matching key in the current node. */ 2406 index = parent.findEntry(key, false, false, comparator); 2407 } else if (searchType == SearchType.LEFT) { 2408 /* Left search, always take the 0th entry. */ 2409 index = 0; 2410 } else if (searchType == SearchType.RIGHT) { 2411 /* Right search, always take the highest entry. */ 2412 index = parent.getNEntries() - 1; 2413 } else { 2414 throw EnvironmentFailureException.unexpectedState( 2415 "Invalid value of searchType: " + searchType); 2416 } 2417 2418 assert(index >= 0); 2419 2420 /* 2421 * Get the child IN. 2422 * 2423 * If the child is not cached and we are usimg grandparent 2424 * latching, then: 2425 * 2426 * (a) If "parent" is not the subtree root, is is always 2427 * SH-latched at this point. So, to fetch the child, we need to 2428 * unlatch the parent and relatch it exclusively. Because we 2429 * have the grandparent latch (in either SH or EX mode), the 2430 * parent will not be evicted or detached from the tree and the 2431 * index of the child within the parent won't change. After 2432 * the parent is EX-latched, we can release the grandparent so 2433 *. it won't be held while reading the child from the log. 2434 * 2435 * (b) If "parent" is the BTree root, it may be SH-latched. In 2436 * this case, since there is no grandparent, we must unlatch 2437 * the parent and relatch it in EX mode under the protection 2438 * of the rootLatch; then we restart the do-loop. 2439 * 2440 * (c) If "parent" is the subtree root, but not the root of 2441 * the full Btree, then it must be EX-latched already, and 2442 * we can just fetch the child. 2443 */ 2444 child = (IN) parent.getTarget(index); 2445 2446 if (child == null && doGrandparentLatching) { 2447 2448 if (parent != subtreeRoot) { 2449 2450 assert(!parent.isLatchExclusiveOwner()); 2451 parent.releaseLatch(); 2452 parent.latch(cacheMode); 2453 grandParent.releaseLatch(); 2454 grandParentIsLatched = false; 2455 grandParent = null; 2456 doGrandparentLatching = false; 2457 2458 } else if (parent.isRoot() && 2459 !parent.isLatchExclusiveOwner()) { 2460 2461 parent.releaseLatch(); 2462 subtreeRoot = getRootINLatchedExclusive(cacheMode); 2463 parent = subtreeRoot; 2464 assert(parent != null); 2465 assert(grandParent == null); 2466 doGrandparentLatching = false; 2467 2468 continue; 2469 } 2470 2471 child = parent.fetchIN(index); 2472 2473 } else if (child == null) { 2474 2475 child = parent.fetchIN(index); 2476 } 2477 2478 /* After fetching the child we can release the grandparent. */ 2479 if (grandParent != null) { 2480 grandParent.releaseLatch(); 2481 grandParentIsLatched = false; 2482 } 2483 2484 /* Latch the child. Note: BINs are always latched exclusive. */ 2485 if (doGrandparentLatching && child.getLevel() != targetLevel) { 2486 child.latchShared(cacheMode); 2487 } else { 2488 child.latch(cacheMode); 2489 } 2490 childIsLatched = true; 2491 2492 child.mutateToFullBIN(); 2493 2494 if (treeStatsAccumulator != null) { 2495 child.accumulateStats(treeStatsAccumulator); 2496 } 2497 2498 /* Continue down a level */ 2499 if (doGrandparentLatching) { 2500 grandParent = parent; 2501 grandParentIsLatched = true; 2502 } else { 2503 parent.releaseLatch(); 2504 } 2505 parent = child; 2506 2507 } while (!parent.isBIN() && parent.getLevel() != targetLevel); 2508 2509 success = true; 2510 return child; 2511 2512 } finally { 2513 if (!success) { 2514 2515 /* 2516 * In [#14903] we encountered a latch exception below and the 2517 * original exception was lost. Print the stack trace and 2518 * allow the original exception to be thrown if this happens 2519 * again, to get more information about the problem. 2520 */ 2521 try { 2522 if (child != null && childIsLatched) { 2523 child.releaseLatch(); 2524 } 2525 2526 if (parent != child) { 2527 parent.releaseLatch(); 2528 } 2529 } catch (Exception e) { 2530 LoggerUtils.traceAndLogException( 2531 database.getEnv(), "Tree", "searchSubTreeInternal", "", 2532 e); 2533 } 2534 } 2535 2536 if (grandParent != null && grandParentIsLatched) { 2537 grandParent.releaseLatch(); 2538 } 2539 } 2540 } 2541 2542 /** 2543 * rebuildINList is used by recovery to add all the resident nodes to the 2544 * IN list. 2545 */ rebuildINList()2546 public void rebuildINList() 2547 throws DatabaseException { 2548 2549 INList inMemoryList = database.getEnv().getInMemoryINs(); 2550 2551 if (root != null) { 2552 rootLatch.acquireShared(); 2553 try { 2554 Node rootIN = root.getTarget(); 2555 if (rootIN != null) { 2556 rootIN.rebuildINList(inMemoryList); 2557 } 2558 } finally { 2559 rootLatch.release(); 2560 } 2561 } 2562 } 2563 2564 /** 2565 * Debugging check that all resident nodes are on the INList and no stray 2566 * nodes are present in the unused portion of the IN arrays. 2567 */ validateINList(IN parent)2568 public void validateINList(IN parent) 2569 throws DatabaseException { 2570 2571 if (parent == null) { 2572 parent = (IN) root.getTarget(); 2573 } 2574 2575 if (parent != null) { 2576 INList inList = database.getEnv().getInMemoryINs(); 2577 2578 if (!inList.contains(parent)) { 2579 throw EnvironmentFailureException.unexpectedState( 2580 "IN " + parent.getNodeId() + " missing from INList"); 2581 } 2582 2583 for (int i = 0;; i += 1) { 2584 try { 2585 Node node = parent.getTarget(i); 2586 2587 if (i >= parent.getNEntries()) { 2588 if (node != null) { 2589 throw EnvironmentFailureException.unexpectedState( 2590 "IN " + parent.getNodeId() + 2591 " has stray node " + node + 2592 " at index " + i); 2593 } 2594 byte[] key = parent.getKey(i); 2595 if (key != null) { 2596 throw EnvironmentFailureException.unexpectedState( 2597 "IN " + parent.getNodeId() + 2598 " has stray key " + key + 2599 " at index " + i); 2600 } 2601 } 2602 2603 if (node instanceof IN) { 2604 validateINList((IN) node); 2605 } 2606 } catch (ArrayIndexOutOfBoundsException e) { 2607 break; 2608 } 2609 } 2610 } 2611 } 2612 2613 /* 2614 * Logging support 2615 */ 2616 2617 /** 2618 * @see Loggable#getLogSize 2619 */ getLogSize()2620 public int getLogSize() { 2621 int size = 1; // rootExists 2622 if (root != null) { 2623 size += root.getLogSize(); 2624 } 2625 return size; 2626 } 2627 2628 /** 2629 * @see Loggable#writeToLog 2630 */ writeToLog(ByteBuffer logBuffer)2631 public void writeToLog(ByteBuffer logBuffer) { 2632 byte booleans = (byte) ((root != null) ? 1 : 0); 2633 logBuffer.put(booleans); 2634 if (root != null) { 2635 root.writeToLog(logBuffer); 2636 } 2637 } 2638 2639 /** 2640 * @see Loggable#readFromLog 2641 */ readFromLog(ByteBuffer itemBuffer, int entryVersion)2642 public void readFromLog(ByteBuffer itemBuffer, int entryVersion) { 2643 boolean rootExists = false; 2644 byte booleans = itemBuffer.get(); 2645 rootExists = (booleans & 1) != 0; 2646 if (rootExists) { 2647 root = makeRootChildReference(); 2648 root.readFromLog(itemBuffer, entryVersion); 2649 } 2650 } 2651 2652 /** 2653 * @see Loggable#dumpLog 2654 */ dumpLog(StringBuilder sb, boolean verbose)2655 public void dumpLog(StringBuilder sb, boolean verbose) { 2656 sb.append("<root>"); 2657 if (root != null) { 2658 root.dumpLog(sb, verbose); 2659 } 2660 sb.append("</root>"); 2661 } 2662 2663 /** 2664 * @see Loggable#getTransactionId 2665 */ getTransactionId()2666 public long getTransactionId() { 2667 return 0; 2668 } 2669 2670 /** 2671 * @see Loggable#logicalEquals 2672 * Always return false, this item should never be compared. 2673 */ logicalEquals(Loggable other)2674 public boolean logicalEquals(Loggable other) { 2675 return false; 2676 } 2677 2678 /** 2679 * @return the TreeStats for this tree. 2680 */ getTreeStats()2681 int getTreeStats() { 2682 return rootSplits.get(); 2683 } 2684 getTreeStatsAccumulator()2685 private TreeWalkerStatsAccumulator getTreeStatsAccumulator() { 2686 if (EnvironmentImpl.getThreadLocalReferenceCount() > 0) { 2687 return treeStatsAccumulatorTL.get(); 2688 } else { 2689 return null; 2690 } 2691 } 2692 setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA)2693 public void setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA) { 2694 treeStatsAccumulatorTL.set(tSA); 2695 } 2696 loadStats(StatsConfig config, BtreeStats btreeStats)2697 public void loadStats(StatsConfig config, BtreeStats btreeStats) { 2698 /* Add the tree statistics to BtreeStats. */ 2699 btreeStats.setTreeStats(stats.cloneGroup(false)); 2700 2701 if (config.getClear()) { 2702 relatchesRequired.clear(); 2703 rootSplits.clear(); 2704 } 2705 } 2706 2707 /* 2708 * Debugging stuff. 2709 */ dump()2710 public void dump() { 2711 System.out.println(dumpString(0)); 2712 } 2713 dumpString(int nSpaces)2714 public String dumpString(int nSpaces) { 2715 StringBuilder sb = new StringBuilder(); 2716 sb.append(TreeUtils.indent(nSpaces)); 2717 sb.append("<tree>"); 2718 sb.append('\n'); 2719 if (root != null) { 2720 sb.append(DbLsn.dumpString(root.getLsn(), nSpaces)); 2721 sb.append('\n'); 2722 IN rootIN = (IN) root.getTarget(); 2723 if (rootIN == null) { 2724 sb.append("<in/>"); 2725 } else { 2726 sb.append(rootIN.toString()); 2727 } 2728 sb.append('\n'); 2729 } 2730 sb.append(TreeUtils.indent(nSpaces)); 2731 sb.append("</tree>"); 2732 return sb.toString(); 2733 } 2734 2735 /** 2736 * Unit test support to validate subtree pruning. Didn't want to make root 2737 * access public. 2738 */ validateDelete(int index)2739 boolean validateDelete(int index) 2740 throws DatabaseException { 2741 2742 rootLatch.acquireShared(); 2743 try { 2744 IN rootIN = (IN) root.fetchTarget(database, null); 2745 rootIN.latch(); 2746 try { 2747 return rootIN.validateSubtreeBeforeDelete(index); 2748 } finally { 2749 rootIN.releaseLatch(); 2750 } 2751 } finally { 2752 rootLatch.release(); 2753 } 2754 } 2755 2756 /* For unit testing only. */ setWaitHook(TestHook hook)2757 public void setWaitHook(TestHook hook) { 2758 waitHook = hook; 2759 } 2760 2761 /* For unit testing only. */ setSearchHook(TestHook hook)2762 public void setSearchHook(TestHook hook) { 2763 searchHook = hook; 2764 } 2765 2766 /* For unit testing only. */ setCkptHook(TestHook hook)2767 public void setCkptHook(TestHook hook) { 2768 ckptHook = hook; 2769 } 2770 2771 /* For unit testing only. */ setGetParentINHook(TestHook hook)2772 public void setGetParentINHook(TestHook hook) { 2773 getParentINHook = hook; 2774 } 2775 setFetchINHook(TestHook hook)2776 public void setFetchINHook(TestHook hook) { 2777 fetchINHook = hook; 2778 } 2779 /** 2780 * Send trace messages to the java.util.logger. Don't rely on the logger 2781 * alone to conditionalize whether we send this message, we don't even want 2782 * to construct the message if the level is not enabled. 2783 */ traceSplitRoot(Level level, String splitType, IN newRoot, long newRootLsn, IN oldRoot, long oldRootLsn)2784 private void traceSplitRoot(Level level, 2785 String splitType, 2786 IN newRoot, 2787 long newRootLsn, 2788 IN oldRoot, 2789 long oldRootLsn) { 2790 Logger logger = database.getEnv().getLogger(); 2791 if (logger.isLoggable(level)) { 2792 StringBuilder sb = new StringBuilder(); 2793 sb.append(splitType); 2794 sb.append(" newRoot=").append(newRoot.getNodeId()); 2795 sb.append(" newRootLsn="). 2796 append(DbLsn.getNoFormatString(newRootLsn)); 2797 sb.append(" oldRoot=").append(oldRoot.getNodeId()); 2798 sb.append(" oldRootLsn="). 2799 append(DbLsn.getNoFormatString(oldRootLsn)); 2800 LoggerUtils.logMsg( 2801 logger, database.getEnv(), level, sb.toString()); 2802 } 2803 } 2804 2805 private static class SplitInfo { 2806 IN parent; 2807 IN child; 2808 int index; 2809 SplitInfo(IN parent, IN child, int index)2810 SplitInfo(IN parent, IN child, int index) { 2811 this.parent = parent; 2812 this.child = child; 2813 this.index = index; 2814 } 2815 } 2816 } 2817