1 /* 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2002, 2014 Oracle and/or its affiliates. All rights reserved. 5 * 6 */ 7 8 package com.sleepycat.je.dbi; 9 10 import java.util.Arrays; 11 import java.util.ArrayList; 12 import java.util.List; 13 import java.util.Comparator; 14 import java.util.Set; 15 import java.util.logging.Level; 16 17 import com.sleepycat.je.CacheMode; 18 import com.sleepycat.je.DatabaseEntry; 19 import com.sleepycat.je.DatabaseException; 20 import com.sleepycat.je.DuplicateDataException; 21 import com.sleepycat.je.EnvironmentFailureException; 22 import com.sleepycat.je.LockConflictException; 23 import com.sleepycat.je.OperationStatus; 24 import com.sleepycat.je.evictor.Evictor.EvictionSource; 25 import com.sleepycat.je.latch.LatchSupport; 26 import com.sleepycat.je.log.LogUtils; 27 import com.sleepycat.je.log.ReplicationContext; 28 import com.sleepycat.je.tree.BIN; 29 import com.sleepycat.je.tree.BINBoundary; 30 import com.sleepycat.je.tree.IN; 31 import com.sleepycat.je.tree.Key; 32 import com.sleepycat.je.tree.LN; 33 import com.sleepycat.je.tree.SearchResult; 34 import com.sleepycat.je.tree.TrackingInfo; 35 import com.sleepycat.je.tree.Tree; 36 import com.sleepycat.je.tree.TreeWalkerStatsAccumulator; 37 import com.sleepycat.je.txn.LockGrantType; 38 import com.sleepycat.je.txn.LockInfo; 39 import com.sleepycat.je.txn.LockManager; 40 import com.sleepycat.je.txn.LockResult; 41 import com.sleepycat.je.txn.LockType; 42 import com.sleepycat.je.txn.Locker; 43 import com.sleepycat.je.txn.LockerFactory; 44 import com.sleepycat.je.txn.WriteLockInfo; 45 import com.sleepycat.je.utilint.DbLsn; 46 import com.sleepycat.je.utilint.LoggerUtils; 47 import com.sleepycat.je.utilint.Pair; 48 import com.sleepycat.je.utilint.StatGroup; 49 import com.sleepycat.je.utilint.TestHook; 50 import com.sleepycat.je.utilint.TestHookExecute; 51 import com.sleepycat.je.utilint.ThroughputStatGroup; 52 import com.sleepycat.je.utilint.VLSN; 53 54 /** 55 * A CursorImpl is the internal implementation of the cursor. 56 */ 57 public class CursorImpl implements Cloneable { 58 59 private static final boolean DEBUG = false; 60 61 private static final byte CURSOR_NOT_INITIALIZED = 1; 62 private static final byte CURSOR_INITIALIZED = 2; 63 private static final byte CURSOR_CLOSED = 3; 64 private static final String TRACE_DELETE = "Delete"; 65 private static final String TRACE_MOD = "Mod:"; 66 private static final String TRACE_INSERT = "Ins:"; 67 68 public static final int FOUND = 0x1; 69 /* Exact match on the key portion. */ 70 public static final int EXACT_KEY = 0x2; 71 /* Record found is the last one in the databaseImpl. */ 72 public static final int FOUND_LAST = 0x4; 73 74 /* 75 * Allocate hashCode ids from this. [#13896] 76 */ 77 private static long lastAllocatedId = 0; 78 79 /* 80 * Unique id that we can return as a hashCode to prevent calls to 81 * Object.hashCode(). [#13896] 82 */ 83 private final int thisId; 84 85 /* The databaseImpl behind the handle. */ 86 private final DatabaseImpl databaseImpl; 87 88 /* Owning transaction. */ 89 private Locker locker; 90 91 private final boolean retainNonTxnLocks; 92 93 private final boolean isSecondaryCursor; 94 95 /* 96 * Cursor location in the databaseImpl, represented by a BIN and an index 97 * in the BIN. The bin is null if not established, and the index is 98 * negative if not established. 99 */ 100 private volatile BIN bin; 101 private volatile int index; 102 103 /* State of the cursor. See CURSOR_XXX above. */ 104 private byte status; 105 106 private CacheMode cacheMode; 107 private boolean allowEviction; 108 109 /* 110 * A cache of the record version for the operation at the current position. 111 * Is null if the cursor is uninitialized. For a secondary cursor, is the 112 * version of the primary record. 113 */ 114 private RecordVersion currentRecordVersion; 115 116 private ThreadLocal<TreeWalkerStatsAccumulator> treeStatsAccumulatorTL; 117 118 private TestHook testHook; 119 120 public enum SearchMode { 121 SET(true, false, "SET"), 122 BOTH(true, true, "BOTH"), 123 SET_RANGE(false, false, "SET_RANGE"), 124 BOTH_RANGE(false, true, "BOTH_RANGE"); 125 126 private final boolean exactSearch; 127 private final boolean dataSearch; 128 private final String name; 129 SearchMode(boolean exactSearch, boolean dataSearch, String name)130 private SearchMode(boolean exactSearch, 131 boolean dataSearch, 132 String name) { 133 this.exactSearch = exactSearch; 134 this.dataSearch = dataSearch; 135 this.name = "SearchMode." + name; 136 } 137 138 /** 139 * Returns true when the key or key/data search is exact, i.e., for SET 140 * and BOTH. 141 */ isExactSearch()142 public final boolean isExactSearch() { 143 return exactSearch; 144 } 145 146 /** 147 * Returns true when the data value is included in the search, i.e., 148 * for BOTH and BOTH_RANGE. 149 */ isDataSearch()150 public final boolean isDataSearch() { 151 return dataSearch; 152 } 153 154 @Override toString()155 public String toString() { 156 return name; 157 } 158 } 159 160 /** 161 * Creates a cursor with retainNonTxnLocks=true, isSecondaryCursor=false. 162 * These are the standard settings for an internal cursor. 163 */ CursorImpl(DatabaseImpl database, Locker locker)164 public CursorImpl(DatabaseImpl database, Locker locker) { 165 this(database, locker, 166 true /*retainNonTxnLocks*/, 167 false /*isSecondaryCursor*/); 168 } 169 170 /** 171 * Creates a cursor. 172 * 173 * A cursor always retains transactional locks when it is reset or closed. 174 * Non-transaction locks may be retained or not, depending on the 175 * retainNonTxnLocks parameter value. 176 * 177 * Normally a user-created non-transactional Cursor releases locks on reset 178 * and close, and a ThreadLocker is normally used. However, by passing 179 * true for retainNonTxnLocks a ThreadLocker can be made to retain locks; 180 * this capability is used by SecondaryCursor.readPrimaryAfterGet. 181 * 182 * For internal (non-user) cursors, a BasicLocker is often used and locks 183 * are retained. In these internal use cases the caller explicitly calls 184 * BasicLocker.operationEnd() after the cursor is closed, and 185 * retainNonTxnLocks is set to true to prevent the locks acquired by the 186 * BasicLocker from being released when the cursor is closed. 187 * 188 * BasicLocker is also used for NameLN operations while opening a Database 189 * handle. Database handle locks must be retained, even if the Database is 190 * opened non-transactionally. 191 * 192 * @param retainNonTxnLocks is true if non-transactional locks should be 193 * retained (not released automatically) when the cursor is reset or 194 * closed. 195 * 196 * @param isSecondaryCursor whether to treat this cursor as a secondary 197 * cursor, e.g., secondary records don't have record versions. 198 */ CursorImpl( DatabaseImpl databaseImpl, Locker locker, boolean retainNonTxnLocks, boolean isSecondaryCursor)199 public CursorImpl( 200 DatabaseImpl databaseImpl, 201 Locker locker, 202 boolean retainNonTxnLocks, 203 boolean isSecondaryCursor) { 204 205 thisId = (int) getNextCursorId(); 206 bin = null; 207 index = -1; 208 209 this.retainNonTxnLocks = retainNonTxnLocks; 210 this.isSecondaryCursor = isSecondaryCursor; 211 212 /* Associate this cursor with the databaseImpl. */ 213 this.databaseImpl = databaseImpl; 214 this.locker = locker; 215 this.locker.registerCursor(this); 216 217 /* 218 * This default value is used only when the CursorImpl is used directly 219 * (mainly for internal databases). When the CursorImpl is created by 220 * a Cursor, CursorImpl.setCacheMode will be called. 221 */ 222 this.cacheMode = CacheMode.DEFAULT; 223 224 status = CURSOR_NOT_INITIALIZED; 225 226 /* 227 * Do not perform eviction here because we may be synchronized on the 228 * Database instance. For example, this happens when we call 229 * Database.openCursor(). Also eviction may be disabled after the 230 * cursor is constructed. 231 */ 232 } 233 234 /** 235 * Performs a shallow copy and returns the new cursor. 236 * 237 * @param samePosition If true, this cursor's position is used for the new 238 * cursor, and addCursor is called on the new cursor to register it with 239 * the current BIN. If false, the new cursor will be uninitialized. 240 */ cloneCursor(final boolean samePosition)241 public CursorImpl cloneCursor(final boolean samePosition) { 242 243 assert assertCursorState( 244 false /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 245 246 CursorImpl ret = null; 247 try { 248 latchBIN(); 249 250 ret = (CursorImpl) super.clone(); 251 252 if (!retainNonTxnLocks) { 253 ret.locker = locker.newNonTxnLocker(); 254 } 255 256 ret.locker.registerCursor(ret); 257 258 if (samePosition) { 259 ret.addCursor(); 260 } else { 261 ret.clear(); 262 } 263 } catch (CloneNotSupportedException cannotOccur) { 264 return null; 265 } finally { 266 releaseBIN(); 267 } 268 269 /* Perform eviction before and after each cursor operation. */ 270 criticalEviction(); 271 272 return ret; 273 } 274 275 /* 276 * Allocate a new hashCode id. Doesn't need to be synchronized since it's 277 * ok for two objects to have the same hashcode. 278 */ getNextCursorId()279 private static long getNextCursorId() { 280 return ++lastAllocatedId; 281 } 282 283 @Override hashCode()284 public int hashCode() { 285 return thisId; 286 } 287 getLocker()288 public Locker getLocker() { 289 return locker; 290 } 291 292 /** 293 * Called when a cursor has been duplicated prior to being moved. The new 294 * locker is informed of the old locker, so that a preempted lock taken by 295 * the old locker can be ignored. [#16513] 296 * 297 * @param closingCursor the old cursor that will be closed if the new 298 * cursor is moved successfully. 299 */ setClosingLocker(CursorImpl closingCursor)300 public void setClosingLocker(CursorImpl closingCursor) { 301 302 /* 303 * If the two lockers are different, then the old locker will be closed 304 * when the operation is complete. This is currently the case only for 305 * ReadCommitted cursors and non-transactional cursors that do not 306 * retain locks. 307 */ 308 if (!retainNonTxnLocks && locker != closingCursor.locker) { 309 locker.setClosingLocker(closingCursor.locker); 310 } 311 } 312 313 /** 314 * Called when a cursor move operation is complete. Clears the 315 * closingLocker so that a reference to the old closed locker is not held. 316 */ clearClosingLocker()317 public void clearClosingLocker() { 318 locker.setClosingLocker(null); 319 } 320 getCacheMode()321 public CacheMode getCacheMode() { 322 return cacheMode; 323 } 324 325 /** 326 * Sets the effective cache mode to use for the next operation. The 327 * cacheMode field will never be set to null or DYNAMIC, and can be passed 328 * directly to latching methods. 329 * 330 * @see #performCacheEviction 331 */ setCacheMode(final CacheMode mode)332 public void setCacheMode(final CacheMode mode) { 333 cacheMode = databaseImpl.getEffectiveCacheMode(mode); 334 } 335 setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA)336 public void setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA) { 337 maybeInitTreeStatsAccumulator(); 338 treeStatsAccumulatorTL.set(tSA); 339 } 340 maybeInitTreeStatsAccumulator()341 private void maybeInitTreeStatsAccumulator() { 342 if (treeStatsAccumulatorTL == null) { 343 treeStatsAccumulatorTL = 344 new ThreadLocal<TreeWalkerStatsAccumulator>(); 345 } 346 } 347 getTreeStatsAccumulator()348 private TreeWalkerStatsAccumulator getTreeStatsAccumulator() { 349 if (EnvironmentImpl.getThreadLocalReferenceCount() > 0) { 350 maybeInitTreeStatsAccumulator(); 351 return treeStatsAccumulatorTL.get(); 352 } else { 353 return null; 354 } 355 } 356 incrementLNCount()357 public void incrementLNCount() { 358 TreeWalkerStatsAccumulator treeStatsAccumulator = 359 getTreeStatsAccumulator(); 360 if (treeStatsAccumulator != null) { 361 treeStatsAccumulator.incrementLNCount(); 362 } 363 } 364 getIndex()365 public int getIndex() { 366 return index; 367 } 368 getBIN()369 public BIN getBIN() { 370 return bin; 371 } 372 setIndex(int idx)373 public void setIndex(int idx) { 374 index = idx; 375 } 376 setOnFirstSlot()377 public void setOnFirstSlot() { 378 assert(bin.isLatchOwner()); 379 index = 0; 380 } 381 setOnLastSlot()382 public void setOnLastSlot() { 383 assert(bin.isLatchOwner()); 384 index = bin.getNEntries() - 1; 385 } 386 isOnBIN(BIN bin)387 public boolean isOnBIN(BIN bin) { 388 return this.bin == bin; 389 } 390 assertBIN(BIN bin)391 public void assertBIN(BIN bin) { 392 assert this.bin == bin : 393 "nodeId=" + bin.getNodeId() + 394 " cursor=" + dumpToString(true); 395 } 396 isOnSamePosition(CursorImpl other)397 public boolean isOnSamePosition(CursorImpl other) { 398 return bin == other.bin && index == other.index; 399 } 400 setBIN(BIN newBin)401 public void setBIN(BIN newBin) { 402 403 /* 404 * Historical note. In the past we checked here that the cursor was 405 * removed for the prior BIN by calling BIN.containsCursor [#16280]. 406 * Because the containsCursor method takes a latch on the prior BIN, 407 * this causes a rare latch deadlock when newBin is latched (during an 408 * insert, for example), since this thread will latch two BINs in 409 * arbitrary order; so the assertion was removed [#21395]. 410 */ 411 bin = newBin; 412 } 413 latchBIN()414 public void latchBIN() 415 throws DatabaseException { 416 417 while (bin != null) { 418 BIN waitingOn = bin; 419 waitingOn.latch(cacheMode); 420 if (bin == waitingOn) { 421 return; 422 } 423 waitingOn.releaseLatch(); 424 } 425 } 426 releaseBIN()427 public void releaseBIN() { 428 if (bin != null) { 429 bin.releaseLatchIfOwner(); 430 } 431 } 432 addCursor(BIN bin)433 void addCursor(BIN bin) { 434 if (bin != null) { 435 assert bin.isLatchExclusiveOwner(); 436 bin.addCursor(this); 437 } 438 } 439 440 /** 441 * Add to the current cursor. 442 */ addCursor()443 void addCursor() { 444 if (bin != null) { 445 addCursor(bin); 446 } 447 } 448 449 /** 450 * Change cursor to point to the given BIN/index. If the new BIN is 451 * different, then old BIN must be unlatched and the new BIN must be 452 * latched. 453 */ setPosition(BIN newBin, int newIndex)454 private void setPosition(BIN newBin, int newIndex) { 455 if (bin != newBin) { 456 if (bin != null) { 457 latchBIN(); 458 bin.removeCursor(this); 459 bin.releaseLatch(); 460 } 461 setBIN(newBin); 462 addCursor(); 463 } 464 setIndex(newIndex); 465 } 466 467 /** 468 * Called for creating trace messages without any latching. 469 */ getCurrentNodeId()470 public long getCurrentNodeId() { 471 final BIN b = bin; 472 return (b == null ? -1 : b.getNodeId()); 473 } 474 getCurrentLsn()475 public long getCurrentLsn() { 476 477 assert(bin != null && bin.isLatchOwner()); 478 assert(index >= 0 && index < bin.getNEntries()); 479 480 return bin.getLsn(index); 481 } 482 getCurrentKey()483 public byte[] getCurrentKey() { 484 return getCurrentKey(false); 485 } 486 487 /** 488 * Returns the key at the current position, regardless of whether the 489 * record is deleted. Does not lock. The key returned is not a copy and 490 * may not be returned directly to the user without copying it first. 491 * 492 * The cursor must be initialized. 493 */ getCurrentKey(boolean isLatched)494 public byte[] getCurrentKey(boolean isLatched) { 495 496 if (!isLatched) { 497 latchBIN(); 498 } 499 500 try { 501 assert(bin != null); 502 assert(index >= 0 && index < bin.getNEntries()); 503 504 return bin.getKey(index); 505 } finally { 506 if (!isLatched) { 507 releaseBIN(); 508 } 509 } 510 } 511 setInitialized()512 private void setInitialized() { 513 status = CURSOR_INITIALIZED; 514 } 515 516 /** 517 * @return true if this cursor is closed 518 */ isClosed()519 public boolean isClosed() { 520 return (status == CURSOR_CLOSED); 521 } 522 523 /** 524 * @return true if this cursor is not initialized 525 */ isNotInitialized()526 public boolean isNotInitialized() { 527 return (status == CURSOR_NOT_INITIALIZED); 528 } 529 isInternalDbCursor()530 public boolean isInternalDbCursor() { 531 return databaseImpl.isInternalDb(); 532 } 533 hasDuplicates()534 public boolean hasDuplicates() { 535 return databaseImpl.getSortedDuplicates(); 536 } 537 dumpTree()538 public void dumpTree() { 539 databaseImpl.getTree().dump(); 540 } 541 542 /** 543 * For a non-sticky cursor, this is an alternative to closing or resetting 544 * the cursor when it is initialized and an advancing operation 545 * (next/prev/skip) is about to be performed. The cursor position cannot be 546 * reset as it would be if the operation were a search, for example. 547 */ beforeAdvance()548 public void beforeAdvance() { 549 550 /* 551 * When EVICT_LN is configured we should evict the LN at the current 552 * position before changing the position. [#21973] 553 * 554 * Note that EVICT_BIN cannot be implemented for a non-sticky cursor. 555 * We would have to call removeCursor before the BIN eviction, and then 556 * our position would be unstable. Even if this were possible, since 557 * we can't predict the new position we would evict the BIN at every 558 * operation, even when the BIN remained the same. 559 * 560 * It might be possible to support MAKE_COLD here, but more thought and 561 * testing would be required. 562 * 563 * Therefore, when EVICT_BIN or MAKE_COLD are configured, cursor 564 * cloning is always used. 565 */ 566 if (cacheMode == CacheMode.EVICT_LN && bin != null) { 567 evict(); 568 } 569 570 releaseNonTxnLocks(); 571 572 criticalEviction(); 573 } 574 575 /** 576 * Reset a cursor to an uninitialized state, but unlike close(), allow it 577 * to be used further. 578 */ reset()579 public void reset() 580 throws DatabaseException { 581 582 /* Must remove cursor before evicting BIN and releasing locks. */ 583 removeCursorAndPerformCacheEviction(null /*newCursor*/); 584 585 releaseNonTxnLocks(); 586 587 /* Perform eviction before and after each cursor operation. */ 588 criticalEviction(); 589 } 590 clear()591 private void clear() { 592 bin = null; 593 index = -1; 594 status = CURSOR_NOT_INITIALIZED; 595 currentRecordVersion = null; 596 } 597 releaseNonTxnLocks()598 private void releaseNonTxnLocks() { 599 if (!retainNonTxnLocks) { 600 locker.releaseNonTxnLocks(); 601 } 602 } 603 close()604 public void close() 605 throws DatabaseException { 606 607 close(null); 608 } 609 610 /** 611 * Close a cursor. 612 * 613 * @param newCursor is another cursor that is kept open by the parent 614 * Cursor object, or null if no other cursor is kept open. 615 * 616 * @throws DatabaseException if the cursor was previously closed. 617 */ close(final CursorImpl newCursor)618 public void close(final CursorImpl newCursor) 619 throws DatabaseException { 620 621 assert assertCursorState( 622 false /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 623 624 /* Must remove cursor before evicting BIN and releasing locks. */ 625 removeCursorAndPerformCacheEviction(newCursor); 626 627 locker.unRegisterCursor(this); 628 629 if (!retainNonTxnLocks) { 630 locker.nonTxnOperationEnd(); 631 } 632 633 status = CURSOR_CLOSED; 634 635 /* Perform eviction before and after each cursor operation. */ 636 criticalEviction(); 637 } 638 removeCursorAndPerformCacheEviction(CursorImpl newCursor)639 private void removeCursorAndPerformCacheEviction(CursorImpl newCursor) { 640 641 latchBIN(); 642 643 if (bin == null) { 644 clear(); // ensure that state is uninitialized 645 return; 646 } 647 648 try { 649 /* Must remove cursor before evicting BIN. */ 650 bin.removeCursor(this); 651 performCacheEviction(newCursor); // may release latch 652 } finally { 653 releaseBIN(); 654 clear(); 655 } 656 } 657 658 /** 659 * Disables or enables eviction during cursor operations. For example, a 660 * cursor used to implement eviction (e.g., in some UtilizationProfile and 661 * most DbTree and VLSNIndex methods) should not itself perform eviction, 662 * but eviction should be enabled for user cursors. Eviction is disabled 663 * by default. 664 */ setAllowEviction(boolean allowed)665 public void setAllowEviction(boolean allowed) { 666 allowEviction = allowed; 667 } 668 criticalEviction()669 public void criticalEviction() { 670 671 /* 672 * In addition to disabling critical eviction for internal cursors (see 673 * setAllowEviction above), we do not perform critical eviction when 674 * EVICT_BIN or MAKE_COLD is used. Operations using MAKE_COLD and 675 * EVICT_BIN generally do not add any net memory to the cache, so they 676 * shouldn't have to perform critical eviction or block while another 677 * thread performs eviction. 678 */ 679 if (allowEviction && 680 cacheMode != CacheMode.MAKE_COLD && 681 cacheMode != CacheMode.EVICT_BIN) { 682 databaseImpl.getEnv().criticalEviction(false /*backgroundIO*/); 683 } 684 } 685 686 /** 687 * When multiple operations are performed, CacheMode-based eviction is 688 * performed for a given operation at the end of the next operation, which 689 * calls close() or reset() on the CursorImpl of the previous operation. 690 * Eviction for the last operation (including when only one operation is 691 * performed) also occurs during Cursor.close(), which calls 692 * CursorImpl.close(). 693 * 694 * By default, the CacheMode returned by DatabaseImpl.getCacheMode is used, 695 * and the defaults specified by the user for the Database or Environment 696 * are applied. However, the default mode can be overridden by the user by 697 * calling Cursor.setCacheMode, and the mode may be changed prior to each 698 * operation, if desired. 699 * 700 * To implement a per-operation CacheMode, two CacheMode fields are 701 * maintained. Cursor.cacheMode is the mode to use for the next operation. 702 * CursorImpl.cacheMode is the mode that was used for the previous 703 * operation, and that is used for eviction when that CursorImpl is closed 704 * or reset. 705 * 706 * This method must be called with the BIN latched but may release it, 707 * namely when the BIN is evicted. 708 */ performCacheEviction(final CursorImpl newCursor)709 private void performCacheEviction(final CursorImpl newCursor) { 710 711 EnvironmentImpl envImpl = databaseImpl.getEnv(); 712 713 if (cacheMode != CacheMode.EVICT_LN && 714 cacheMode != CacheMode.EVICT_BIN && 715 (cacheMode != CacheMode.MAKE_COLD || 716 !(envImpl.isCacheFull() || envImpl.wasCacheEverFull()))) { 717 /* Eviction not configured, or for MAKE_COLD, cache never full. */ 718 return; 719 } 720 721 final BIN nextBin; 722 final int nextIndex; 723 724 if (newCursor != null) { 725 nextBin = newCursor.bin; 726 nextIndex = newCursor.index; 727 } else { 728 nextBin = null; 729 nextIndex = -1; 730 } 731 732 switch (cacheMode) { 733 case EVICT_LN: 734 /* Evict the LN if we've moved to a new record. */ 735 if (bin != nextBin || index != nextIndex) { 736 evict(true); 737 } 738 break; 739 case EVICT_BIN: 740 if (bin == nextBin) { 741 /* BIN has not changed, do not evict it. May evict the LN. */ 742 if (index != nextIndex) { 743 evict(true); 744 } 745 } else { 746 /* BIN has changed, evict it. */ 747 evictBIN(); 748 } 749 break; 750 case MAKE_COLD: 751 if (bin == nextBin) { 752 /* BIN has not changed, do not evict it. May evict the LN. */ 753 if (index != nextIndex) { 754 evict(true); 755 } 756 } else if (!envImpl.isCacheFull()) { 757 /* 758 * The BIN has changed, but the cache is not full. Just evict 759 * the LN. 760 */ 761 evict(true); 762 } else { 763 /* BIN has changed, evict it. */ 764 evictBIN(); 765 } 766 break; 767 default: 768 assert false; 769 } 770 } 771 772 /** 773 * Evict current BIN. Must already be latched. The latch will be released 774 * inside the doEvictOneIN() call. 775 */ evictBIN()776 private void evictBIN() { 777 databaseImpl.getEnv().getEvictor().doEvictOneIN( 778 bin, EvictionSource.CACHEMODE); 779 } 780 781 /** 782 * Evict the LN node at the cursor position. 783 */ evict()784 public void evict() 785 throws DatabaseException { 786 787 evict(false); // isLatched 788 } 789 790 /** 791 * Evict the LN node at the cursor position. 792 */ evict(boolean isLatched)793 public void evict(boolean isLatched) 794 throws DatabaseException { 795 796 try { 797 if (!isLatched) { 798 latchBIN(); 799 } 800 if (index >= 0) { 801 bin.evictLN(index); 802 } 803 } finally { 804 if (!isLatched) { 805 releaseBIN(); 806 } 807 } 808 } 809 deleteCurrentRecord( ReplicationContext repContext)810 public OperationStatus deleteCurrentRecord( 811 ReplicationContext repContext) 812 throws DatabaseException { 813 return deleteCurrentRecord(repContext, null); 814 } 815 816 /** 817 * Delete the item pointed to by the cursor. If the item is already 818 * deleted, return KEYEMPTY. Returns with nothing latched. 819 * 820 * @param opStats Used to collect stats about user ops performed on BIN 821 * deltas. Internal ops should pass null. 822 * 823 * @return 0 on success, appropriate error code otherwise. 824 */ deleteCurrentRecord( ReplicationContext repContext, ThroughputStatGroup opStats)825 public OperationStatus deleteCurrentRecord( 826 ReplicationContext repContext, 827 ThroughputStatGroup opStats) 828 throws DatabaseException { 829 830 assert assertCursorState( 831 true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 832 final EnvironmentImpl envImpl = databaseImpl.getEnv(); 833 final DbType dbType = databaseImpl.getDbType(); 834 835 boolean success = false; 836 final long oldLsn; 837 final LN.LogResult logResult; 838 839 latchBIN(); 840 841 try { 842 /* 843 * Get a write lock. An uncontended lock is permitted because we 844 * will log a new LN before releasing the BIN latch. 845 */ 846 final LockStanding lockStanding = lockLN( 847 LockType.WRITE, true /*allowUncontended*/, false /*noWait*/); 848 849 if (!lockStanding.recordExists()) { 850 /* LN was deleted or cleaned. */ 851 revertLock(lockStanding); 852 success = true; 853 return OperationStatus.KEYEMPTY; 854 } 855 856 oldLsn = lockStanding.lsn; 857 assert oldLsn != DbLsn.NULL_LSN; 858 859 /* 860 * Must fetch LN if any of the following are true: 861 * - CLEANER_FETCH_OBSOLETE_SIZE is configured and lastLoggedSize 862 * is unknown 863 * - this database does not use the standard LN class and we 864 * cannot call DbType.createdDeletedLN further below 865 * For other cases, we are careful not to fetch, in order to avoid 866 * a random read during a delete operation. 867 * 868 * Note that we checked for a deleted/cleaned LN above, so we know 869 * that fetchLN will not return null. 870 */ 871 final int lastLoggedSize = bin.getLastLoggedSize(index); 872 LN ln; 873 if ((lastLoggedSize == 0 && 874 envImpl.getCleaner().getFetchObsoleteSize(databaseImpl)) || 875 !dbType.mayCreateDeletedLN()) { 876 /* Must fetch. */ 877 ln = bin.fetchLN(index, cacheMode); 878 } else { 879 /* If resident, use LN for obsolete size tracking. */ 880 ln = (LN) bin.getTarget(index); 881 } 882 883 /* 884 * Make the existing LN deleted, if cached, else create a new 885 * deleted LN. 886 */ 887 final long oldLNMemSize; 888 if (ln != null) { 889 /* Leave LN cached. Set ln.data to null and mark it dirty. */ 890 oldLNMemSize = ln.getMemorySizeIncludedByParent(); 891 ln.delete(); 892 } else { 893 /* 894 * LN is not cached. Create an LN with ln.data == null, and 895 * mark it dirty. Then attach it to the tree. 896 */ 897 ln = dbType.createDeletedLN(envImpl); 898 oldLNMemSize = ln.getMemorySizeIncludedByParent(); 899 bin.attachNode(index, ln, null /*lnSlotKey*/); 900 } 901 902 /* Get a wli to log. */ 903 WriteLockInfo wli = lockStanding.prepareForUpdate(); 904 905 /* Log the deleted record version and lock its new LSN. */ 906 logResult = ln.optionalLog( 907 envImpl, databaseImpl, bin.getKey(index), oldLsn, 908 lastLoggedSize, locker, wli, repContext); 909 910 /* 911 * Now update the parent BIN of the LN to correctly reference the 912 * LN and adjust the memory sizing. 913 */ 914 bin.updateEntry( 915 index, oldLNMemSize, logResult.newLsn, logResult.newSize, 916 null /*lnSlotKey*/); 917 bin.setPendingDeleted(index); 918 919 /* Cache record version for delete operation. */ 920 setCurrentVersion(ln.getVLSNSequence(), logResult.newLsn); 921 922 /* 923 * It is desirable to evict the LN after deletion because it will 924 * never be fetched again. This is especially true because slot 925 * compression is deferred until a full BIN is logged. But for 926 * deferred-write DBs we should not evict a dirty LN since it may 927 * be logged unnecessarily. 928 */ 929 if (!databaseImpl.isDeferredWriteMode() && 930 bin.getTarget(index) != null) { 931 bin.evictLN(index); 932 } 933 934 locker.addDeleteInfo(bin); 935 success = true; 936 937 } finally { 938 939 if (success && 940 opStats != null && 941 bin != null && 942 bin.isBINDelta()) { 943 opStats.increment( 944 ThroughputStatGroup.BIN_DELTA_DELETES_OFFSET); 945 } 946 947 releaseBIN(); 948 } 949 950 trace(Level.FINER, TRACE_DELETE, bin, index, oldLsn, logResult.newLsn); 951 952 return OperationStatus.SUCCESS; 953 } 954 955 /** 956 * Modify the current record with the given data, and optionally replace 957 * the key. 958 * 959 * @param key The new key value for the BIN slot S to be updated. Cannot 960 * be partial. For a no-dups DB, it is null. For dups DBs it is a 2-part 961 * key combining the current primary key of slot S with the original, 962 * user-provided data. "key" (if not null) must compare equal to S.key 963 * (otherwise DuplicateDataException is thrown), but the 2 keys may not 964 * be identical if custom comparators are used. So, S.key will actually 965 * be replaced by "key". 966 * 967 * @param data The new data to (perhaps partially) replace the data of the 968 * LN associated with the BIN slot. For dups DBs it is EMPTY_DUPS_DATA. 969 * Note: for dups DBs the original, user-provided "data" must not be 970 * partial. 971 * 972 * @param returnOldData To receive the old LN data (before the update). 973 * It is needed only by DBs with indexes/triggers; will be null otherwise. 974 * 975 * @param returnNewData To receive the full data of the updated LN. 976 * It is needed only by DBs with indexes/triggers and only if "data" is 977 * partial; will be null otherwise. Note: "returnNewData" may be different 978 * than "data" only if "data" is partial. 979 * 980 * @param opStats Used to collect stats about user ops performed on BIN 981 * deltas. Internal ops should pass null. 982 */ updateCurrentRecord( DatabaseEntry key, DatabaseEntry data, DatabaseEntry returnOldData, DatabaseEntry returnNewData, ReplicationContext repContext, ThroughputStatGroup opStats)983 public OperationStatus updateCurrentRecord( 984 DatabaseEntry key, 985 DatabaseEntry data, 986 DatabaseEntry returnOldData, 987 DatabaseEntry returnNewData, 988 ReplicationContext repContext, 989 ThroughputStatGroup opStats) { 990 991 assert assertCursorState( 992 true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 993 994 if (returnOldData != null) { 995 returnOldData.setData(null); 996 } 997 if (returnNewData != null) { 998 returnNewData.setData(null); 999 } 1000 1001 final LockStanding lockStanding; 1002 OperationStatus status = OperationStatus.NOTFOUND; 1003 boolean success = false; 1004 1005 latchBIN(); 1006 1007 try { 1008 /* Get a write lock. */ 1009 lockStanding = lockLN( 1010 LockType.WRITE, true /*allowUncontended*/, false /*noWait*/); 1011 1012 if (!lockStanding.recordExists()) { 1013 /* LN was deleted or cleaned. */ 1014 revertLock(lockStanding); 1015 } else { 1016 status = updateRecordInternal( 1017 (key != null ? Key.makeKey(key) : null), data, 1018 returnOldData, returnNewData, lockStanding, repContext); 1019 } 1020 1021 success = true; 1022 return status; 1023 1024 } finally { 1025 1026 if (success && 1027 opStats != null && 1028 bin != null && 1029 bin.isBINDelta()) { 1030 opStats.increment( 1031 ThroughputStatGroup.BIN_DELTA_UPDATES_OFFSET); 1032 } 1033 1034 releaseBIN(); 1035 } 1036 } 1037 1038 /** 1039 * Insert the given record (key + LN) in the tree or return KEYEXIST if 1040 * the key is already present. 1041 * 1042 * The cursor must initially be uninitialized. 1043 * 1044 * This method is called directly internally for putting tree map LNs 1045 * and file summary LNs. 1046 */ insertRecord( byte[] key, LN ln, boolean blindInsertion, ReplicationContext repContext)1047 public OperationStatus insertRecord( 1048 byte[] key, 1049 LN ln, 1050 boolean blindInsertion, 1051 ReplicationContext repContext) 1052 throws DatabaseException { 1053 1054 assert assertCursorState( 1055 false /*mustBeInitialized*/, true /*mustNotBeInitialized*/); 1056 if (LatchSupport.TRACK_LATCHES) { 1057 LatchSupport.expectBtreeLatchesHeld(0); 1058 } 1059 1060 try { 1061 final Pair<LockStanding, Boolean> result = insertRecordInternal( 1062 key, ln, blindInsertion, null, repContext); 1063 1064 if (!result.second()) { 1065 return OperationStatus.KEYEXIST; 1066 } 1067 return OperationStatus.SUCCESS; 1068 } finally { 1069 releaseBIN(); 1070 } 1071 } 1072 1073 /** 1074 * Insert or update a given record. The method searches for the record 1075 * using its key. It will perform an update if the record is found, 1076 * otherwise an insertion. 1077 * 1078 * The cursor must initially be uninitialized. 1079 * 1080 * Called by all the Cursor.putXXX() ops, except putCurrent(). 1081 * 1082 * @param key The new key value for the BIN slot S to be inserted/updated. 1083 * Cannot be partial. For dups DBs it is a 2-part key combining the 1084 * original, user-provided key and data. In case of update, "key" must 1085 * compare equal to S.key (otherwise DuplicateDataException is thrown), 1086 * but the 2 keys may not be identical if custom comparators are used. 1087 * So, S.key will actually be replaced by "key". 1088 * 1089 * @param data In case of update, the new data to (perhaps partially) 1090 * replace the data of the LN associated with the BIN slot. For dups DBs 1091 * it is EMPTY_DUPS_DATA. Note: for dups DBs the original, user-provided 1092 * "data" must not be partial. 1093 * 1094 * @param ln is normally a new LN node that is created for insertion, and 1095 * will be discarded if an update occurs. However, HA will pass an 1096 * existing node. 1097 * 1098 * @param putMode OVERWRITE or NO_OVERWRITE 1099 * 1100 * @param returnOldData To receive, in case of update, the old LN data 1101 * (before the update). It is needed only by DBs with indexes/triggers; 1102 * will be null otherwise. 1103 * 1104 * @param returnNewData To receive the full data of the new or updated LN. 1105 * It is needed only by DBs with indexes/triggers and only if "data" is 1106 * partial; will be null otherwise. Note: "returnNewData" may be different 1107 * than "data" only if "data" is partial. 1108 * 1109 * @param opStats Used to collect stats about user ops performed on BIN 1110 * deltas. Internal ops should pass null. 1111 * 1112 * @return pair of status and 'inserted' boolean. 1113 */ insertOrUpdateRecord( final DatabaseEntry key, final DatabaseEntry data, final LN ln, final PutMode putMode, final DatabaseEntry returnOldData, final DatabaseEntry returnNewData, final ReplicationContext repContext, ThroughputStatGroup opStats)1114 public Pair<OperationStatus, Boolean> insertOrUpdateRecord( 1115 final DatabaseEntry key, 1116 final DatabaseEntry data, 1117 final LN ln, 1118 final PutMode putMode, 1119 final DatabaseEntry returnOldData, 1120 final DatabaseEntry returnNewData, 1121 final ReplicationContext repContext, 1122 ThroughputStatGroup opStats) { 1123 1124 assert key != null; 1125 assert data != null; 1126 assert ln != null; 1127 assert putMode != null; 1128 assert assertCursorState( 1129 false /*mustBeInitialized*/, true /*mustNotBeInitialized*/); 1130 if (LatchSupport.TRACK_LATCHES) { 1131 LatchSupport.expectBtreeLatchesHeld(0); 1132 } 1133 1134 if (putMode != PutMode.OVERWRITE && 1135 putMode != PutMode.NO_OVERWRITE && 1136 putMode != PutMode.BLIND_INSERTION) { 1137 throw EnvironmentFailureException.unexpectedState( 1138 putMode.toString()); 1139 } 1140 1141 boolean success = false; 1142 boolean inserted = false; 1143 1144 byte[] keyCopy = Key.makeKey(key); 1145 1146 try { 1147 1148 /* 1149 * Try to insert the key/data pair as a new record. Will succeed if 1150 * the record does not exist in the DB already. Otherwise, the 1151 * insertRecord() returns with the cursor registered on the slot 1152 * whose key is equal to "key", with the LSN of that slot ocked 1153 * in WRITE mode, and with the containing BIN latched. 1154 */ 1155 Pair<LockStanding, Boolean> insertResult = insertRecordInternal( 1156 keyCopy, ln, 1157 (putMode == PutMode.BLIND_INSERTION) /*blindInsertion*/, 1158 returnNewData, repContext); 1159 1160 if (insertResult.second()) { 1161 inserted = true; 1162 success = true; 1163 return new Pair<>(OperationStatus.SUCCESS, true); 1164 } 1165 1166 /* 1167 * There is a non-deleted slot whose key is == "key". So, this is 1168 * going to be an update. Note: Cursor has been registered on the 1169 * existing slot by insertRecord() 1170 */ 1171 if (putMode == PutMode.NO_OVERWRITE) { 1172 success = true; 1173 return new Pair<>(OperationStatus.KEYEXIST, false); 1174 } 1175 1176 /* 1177 * Update the non-deleted record at the cursor position. We have 1178 * optimized by preferring to take an uncontended lock. The 1179 * lockStanding var is guaranteed to be non-null in this case. 1180 * The BIN must remain latched when calling this method. 1181 */ 1182 final OperationStatus status = updateRecordInternal( 1183 keyCopy, data, returnOldData, returnNewData, 1184 insertResult.first(), repContext); 1185 1186 success = true; 1187 return new Pair<>(status, false); 1188 1189 } finally { 1190 1191 if (success && 1192 opStats != null && 1193 bin != null && 1194 bin.isBINDelta()) { 1195 if (inserted) { 1196 opStats.increment( 1197 ThroughputStatGroup.BIN_DELTA_INSERTS_OFFSET); 1198 } else { 1199 opStats.increment( 1200 ThroughputStatGroup.BIN_DELTA_UPDATES_OFFSET); 1201 } 1202 } 1203 1204 releaseBIN(); 1205 } 1206 } 1207 1208 /* 1209 * Try to insert the key/data pair as a new record. Will succeed if the 1210 * record does not exist already. 1211 * 1212 * The cursor must initially be uninitialized. 1213 * 1214 * On return, this.bin is latched. 1215 */ insertRecordInternal( final byte[] key, final LN ln, boolean blindInsertion, DatabaseEntry returnNewData, final ReplicationContext repContext)1216 private Pair<LockStanding, Boolean> insertRecordInternal( 1217 final byte[] key, 1218 final LN ln, 1219 boolean blindInsertion, 1220 DatabaseEntry returnNewData, 1221 final ReplicationContext repContext) { 1222 1223 final EnvironmentImpl envImpl = databaseImpl.getEnv(); 1224 WriteLockInfo wli; 1225 LockStanding lockStanding = null; 1226 final boolean isSlotReuse; 1227 final Tree tree = databaseImpl.getTree(); 1228 1229 /* 1230 * At this point, this cursor does not have a position so it cannot be 1231 * registered with the BIN that will be used. This is good because it 1232 * allows slot compression to occur before BIN splits (thus avoiding 1233 * splits if compression finds and removes any deleted slots). However, 1234 * if another cursor, including the one from which this was cloned, is 1235 * registered with the BIN, then splits won't be allowed. This is a 1236 * good reason to use non-sticky cursors for insertions, especially 1237 * sequential insertions since they will often end up in the same BIN. 1238 * 1239 * Find and latch the BIN that should contain the "key". On return from 1240 * the tree search, this.bin is latched, but "this" is still not 1241 * registered. 1242 */ 1243 bin = tree.findBinForInsert(key, getCacheMode()); 1244 1245 /* 1246 * In the case where logging occurs before locking, allow lockers to 1247 * reject the operation (e.g., if writing on a replica) and also 1248 * prepare to undo in the (very unlikely) event that logging succeeds 1249 * but locking fails. Call this method BEFORE slot insertion, in case 1250 * it throws an exception which would leave the slot with a null LSN. 1251 * 1252 * For Txn, creates the writeInfo map (if not done already), and 1253 * inserts databaseImpl in the undoDatabases map. Noop for other 1254 * non-HA lockers. 1255 */ 1256 locker.preLogWithoutLock(databaseImpl); 1257 1258 /* 1259 * If the key exists already, insertEntry1() does not insert, but 1260 * returns the index of the existing key. 1261 * 1262 * If bin is a delta and it does not contain the key, then: 1263 * (a) if blindInsertion is false, insertEntry1() will mutate it to a 1264 * full BIN and check again if the key exists or not. 1265 * (b) if blindInsertion is true, insertEntry1() will not mutate the 1266 * delta; it will just insert the key into the delta. This is OK, 1267 * because blindInsertion will be true only if we know already that the 1268 * key does not exist in the tree. 1269 */ 1270 int insertIndex = bin.insertEntry1( 1271 ln, key, DbLsn.NULL_LSN, blindInsertion); 1272 1273 if ((insertIndex & IN.INSERT_SUCCESS) == 0) { 1274 /* 1275 * Key exists. Insertion was not successful. Register the cursor on 1276 * the existing slot. If the slot is marked deleted, the key does 1277 * not really exist and the slot can be reused to do an insertion. 1278 */ 1279 isSlotReuse = true; 1280 setIndex(insertIndex); 1281 addCursor(); 1282 setInitialized(); 1283 1284 /* 1285 * Lock the LSN for the existing LN slot, and check deleted-ness. 1286 * An uncontended lock request is permitted because we are holding 1287 * the bin latch. If no locker holds a lock on the slot, then no 1288 * lock is taken by this cursor either. 1289 */ 1290 lockStanding = lockLN( 1291 LockType.WRITE, true /*allowUncontended*/, false /*noWait*/); 1292 1293 boolean isDeleted = !lockStanding.recordExists(); 1294 1295 if (!isDeleted) 1296 return new Pair<>(lockStanding, false); 1297 1298 /* 1299 * The current slot has its KD or PD flag set. Note: the record of 1300 * the slot may have been deleted by this.locker itself. 1301 */ 1302 assert(lockStanding != null); 1303 1304 /* 1305 * Create a new WriteLockInfo or get an existing one for the LSN 1306 * of the current slot, and set its abortLSN and abortKD fields, 1307 * if needed, i.e, if it is not the current txn the one who created 1308 * this LSN. The abortLSN and abortKD fields of the wli will be 1309 * included in the new logrec. 1310 */ 1311 wli = lockStanding.prepareForUpdate(); 1312 1313 } else { 1314 /* 1315 * Register the cursor at the slot that has been successfully 1316 * inserted. 1317 */ 1318 isSlotReuse = false; 1319 setIndex(insertIndex &= ~IN.INSERT_SUCCESS); 1320 addCursor(); 1321 setInitialized(); 1322 1323 /* Create a new WriteLockInfo */ 1324 wli = LockStanding.prepareForInsert(); 1325 } 1326 1327 /* 1328 * Log the new LN and lock the LSN of the new logrec in WRITE mode. 1329 * Note: in case of slot reuse, we pass NULL_LSN for the oldLsn param 1330 * because the old LN was counted obsolete when it was deleted. 1331 */ 1332 LN.LogResult logResult = null; 1333 try { 1334 logResult = ln.optionalLog( 1335 envImpl, databaseImpl, key, 1336 DbLsn.NULL_LSN /*oldLSN*/, 0 /*oldSize*/, 1337 locker, wli, repContext); 1338 } finally { 1339 if (logResult == null && !isSlotReuse) { 1340 /* 1341 * Possible buffer overflow, out-of-memory, or I/O exception 1342 * during logging. The BIN entry will contain a NULL_LSN. To 1343 * prevent an exception during a future fetchLN() call, we 1344 * set the KD flag. We do not call BIN.deleteEntry because it 1345 * does not adjust cursors. We do not add this entry to the 1346 * compressor queue to avoid complexity (this situation is 1347 * rare). 1348 */ 1349 bin.setKnownDeleted(index); 1350 } 1351 } 1352 1353 if (lockStanding == null) { 1354 /* 1355 * No slot reuse; straight insertion. Update LSN in BIN slot. 1356 * The LN is already in the slot. 1357 */ 1358 bin.updateEntry(index, logResult.newLsn, logResult.newSize); 1359 1360 /* 1361 * The following call accounts for extra marshaled memory, i.e., 1362 * memory that was added to the LN as a side-effect of logging it. 1363 * This can happen for FileSummaryLN's only (it is a noop for 1364 * other kinds of LNs). 1365 * 1366 * To avoid violating assertions (e.g., in IN.changeMemorySize), we 1367 * must must finish the memory adjustment while the BIN is still 1368 * latched. [#20069] 1369 * 1370 * This special handling does not apply to slot reuse, because the 1371 * updateEntry() version used in the slot reuse case will recalc 1372 * the BIN memory from scratch, and as a result, will take into 1373 * account the extra marshaled memory. [#20845] 1374 */ 1375 ln.addExtraMarshaledMemorySize(bin); 1376 } else { 1377 1378 /* For DW DBs, set the slot LSN to NULL. Otherwise, noop. */ 1379 bin.prepareForSlotReuse(index); 1380 1381 /* 1382 * Slot reuse. When reusing a slot, the key is replaced in the BIN 1383 * slot. This ensures that the correct key value is used when the 1384 * new key is non-identical to the key in the slot but is 1385 * considered equal by the btree comparator. 1386 */ 1387 bin.updateEntry(index, ln, logResult.newLsn, logResult.newSize, key); 1388 bin.clearKnownDeleted(index); 1389 bin.clearPendingDeleted(index); 1390 } 1391 1392 if (returnNewData != null) { 1393 returnNewData.setData(null); 1394 ln.setEntry(returnNewData); 1395 } 1396 1397 /* Cursor is positioned on new record. */ 1398 setInitialized(); 1399 1400 /* Cache record version for insertion operation. */ 1401 setCurrentVersion(ln.getVLSNSequence(), bin.getLsn(index)); 1402 1403 /* 1404 * It is desirable to evict the LN in a duplicates DB because it will 1405 * never be fetched again. But for deferred-write DBs we should not 1406 * evict a dirty LN since it may be logged unnecessarily. 1407 */ 1408 if (databaseImpl.getSortedDuplicates() && 1409 !databaseImpl.isDeferredWriteMode() && 1410 bin.getTarget(index) != null) { 1411 bin.evictLN(index); 1412 } 1413 1414 traceInsert(Level.FINER, bin, logResult.newLsn, index); 1415 1416 return new Pair<>(lockStanding, true); 1417 } 1418 1419 /** 1420 * Update the record where the cursor is currently positioned at. The 1421 * cursor is registered with this position, the associated bin is latched, 1422 * the BIN slot is not marked deleted, and it has been locked in WRITE 1423 * mode. 1424 */ updateRecordInternal( byte[] key, DatabaseEntry data, DatabaseEntry returnOldData, DatabaseEntry returnNewData, LockStanding lockStanding, ReplicationContext repContext)1425 private OperationStatus updateRecordInternal( 1426 byte[] key, 1427 DatabaseEntry data, 1428 DatabaseEntry returnOldData, 1429 DatabaseEntry returnNewData, 1430 LockStanding lockStanding, 1431 ReplicationContext repContext) { 1432 1433 final EnvironmentImpl envImpl = databaseImpl.getEnv(); 1434 final DbType dbType = databaseImpl.getDbType(); 1435 1436 assert lockStanding.recordExists(); 1437 final long oldLsn = lockStanding.lsn; 1438 assert oldLsn != DbLsn.NULL_LSN; 1439 1440 final LN.LogResult logResult; 1441 1442 /* 1443 * Must fetch LN if any of the following are true: 1444 * - returnOldData is non-null: data needs to be returned 1445 * - data is a partial entry: needs to be resolved 1446 * - CLEANER_FETCH_OBSOLETE_SIZE is configured and lastLoggedSize 1447 * is unknown 1448 * - this database does not use the standard LN class and we 1449 * cannot call DbType.createdUpdatedLN further below (this is 1450 * the case for NameLNs, MapLNs, and FileSummaryLNs). 1451 * For other cases, we are careful not to fetch, in order to avoid 1452 * a random read during an update operation. 1453 * 1454 * Note that we checked for a deleted/cleaned LN above, so we know 1455 * that fetchLN will not return null. 1456 */ 1457 final int lastLoggedSize = bin.getLastLoggedSize(index); 1458 LN ln; 1459 1460 if (returnOldData != null || 1461 data.getPartial() || 1462 (lastLoggedSize == 0 && 1463 envImpl.getCleaner().getFetchObsoleteSize(databaseImpl)) || 1464 !dbType.mayCreateUpdatedLN()) { 1465 /* Must fetch. */ 1466 ln = bin.fetchLN(index, cacheMode); 1467 } else { 1468 /* If resident, use LN for obsolete size tracking. */ 1469 ln = (LN) bin.getTarget(index); 1470 } 1471 1472 final byte[] oldKey = bin.getKey(index); 1473 final byte[] foundDataBytes = (ln != null) ? ln.getData() : null; 1474 final byte[] newData = (data.getPartial() ? 1475 LN.resolvePartialEntry(data, foundDataBytes) : 1476 LN.copyEntryData(data)); 1477 1478 /* 1479 * If the key is changed (according to the comparator), we assume 1480 * it is actually the data that has changed for a duplicate's DB. 1481 */ 1482 if (key != null && 1483 Key.compareKeys( 1484 oldKey, key, databaseImpl.getKeyComparator()) != 0) { 1485 throw new DuplicateDataException( 1486 "Can't replace a duplicate with new data that is not " + 1487 "equal to the existing data according to the duplicate " + 1488 " comparator."); 1489 } 1490 1491 if (returnOldData != null) { 1492 assert foundDataBytes != null; 1493 returnOldData.setData(null); 1494 LN.setEntry(returnOldData, foundDataBytes); 1495 } 1496 1497 /* Update the existing LN, if cached, else create new LN. */ 1498 final long oldLNMemSize; 1499 if (ln != null) { 1500 /* LN is cached: set ln.data to newData and mark ln dirty. */ 1501 oldLNMemSize = ln.getMemorySizeIncludedByParent(); 1502 ln.modify(newData); 1503 } else { 1504 /* LN is not cached, create new LN for logging. */ 1505 ln = dbType.createUpdatedLN(envImpl, newData); 1506 /* Make new LN resident. */ 1507 bin.attachNode(index, ln, null /*lnSlotKey*/); 1508 oldLNMemSize = ln.getMemorySizeIncludedByParent(); 1509 } 1510 1511 /* 1512 * Create a new WriteLockInfo or get an existing one for the LSN 1513 * of the current slot, and set its abortLSN and abortKD fields, 1514 * if needed, i.e, if it is not the current txn the one who created 1515 * this LSN. The abortLSN and abortKD fields of the wli will be 1516 * included in the new logrec. 1517 */ 1518 WriteLockInfo wli = lockStanding.prepareForUpdate(); 1519 1520 /* Log the new record version and lock its new LSN . */ 1521 logResult = ln.optionalLog( 1522 envImpl, databaseImpl, (key != null ? key : oldKey), 1523 oldLsn, lastLoggedSize, locker, wli, repContext); 1524 1525 /* Return a copy of resulting data, if requested. [#16932] */ 1526 if (returnNewData != null) { 1527 returnNewData.setData(null); 1528 ln.setEntry(returnNewData); 1529 } 1530 1531 /* 1532 * Update the parent BIN. Update the key, if changed. [#15704] 1533 */ 1534 bin.updateEntry( 1535 index, oldLNMemSize, logResult.newLsn, logResult.newSize, key); 1536 1537 /* Cache record version for update operation. */ 1538 setCurrentVersion(ln.getVLSNSequence(), logResult.newLsn); 1539 1540 /* 1541 * It is desirable to evict the LN in a duplicates DB because it 1542 * will never be fetched again. But for deferred-write DBs we 1543 * should not evict a dirty LN since it may be logged unnecessarily. 1544 */ 1545 if (databaseImpl.getSortedDuplicates() && 1546 !databaseImpl.isDeferredWriteMode() && 1547 bin.getTarget(index) != null) { 1548 bin.evictLN(index); 1549 } 1550 1551 trace(Level.FINER, TRACE_MOD, bin, index, oldLsn, logResult.newLsn); 1552 1553 return OperationStatus.SUCCESS; 1554 } 1555 1556 /** 1557 * Position the cursor at the first or last record of the databaseImpl. 1558 * It's okay if this record is deleted. 1559 * 1560 * The cursor must initially be uninitialized. 1561 * 1562 * Returns with the target BIN latched! 1563 * 1564 * @return true if a first or last position is found, false if the 1565 * tree being searched is empty. 1566 */ positionFirstOrLast(boolean first)1567 public boolean positionFirstOrLast(boolean first) 1568 throws DatabaseException { 1569 1570 assert assertCursorState( 1571 false /*mustBeInitialized*/, true /*mustNotBeInitialized*/); 1572 1573 boolean found = false; 1574 1575 try { 1576 if (first) { 1577 bin = databaseImpl.getTree().getFirstNode(cacheMode); 1578 } else { 1579 bin = databaseImpl.getTree().getLastNode(cacheMode); 1580 } 1581 1582 if (bin != null) { 1583 1584 TreeWalkerStatsAccumulator treeStatsAccumulator = 1585 getTreeStatsAccumulator(); 1586 1587 if (bin.getNEntries() == 0) { 1588 1589 /* 1590 * An IN was found. Even if it's empty, let Cursor 1591 * handle moving to the first non-deleted entry. 1592 */ 1593 found = true; 1594 index = -1; 1595 } else { 1596 index = (first ? 0 : (bin.getNEntries() - 1)); 1597 1598 if (treeStatsAccumulator != null && 1599 !bin.isEntryKnownDeleted(index) && 1600 !bin.isEntryPendingDeleted(index)) { 1601 treeStatsAccumulator.incrementLNCount(); 1602 } 1603 1604 /* 1605 * Even if the entry is deleted, just leave our 1606 * position here and return. 1607 */ 1608 found = true; 1609 } 1610 } 1611 1612 addCursor(bin); 1613 setInitialized(); 1614 1615 return found; 1616 } catch (final Throwable e) { 1617 /* Release latch on error. */ 1618 releaseBIN(); 1619 throw e; 1620 } 1621 } 1622 1623 /** 1624 * Position this cursor on the slot whose key is the max key less or equal 1625 * to the given search key. 1626 * 1627 * To be more precise, let K1 be search key. The method positions the 1628 * cursor on the BIN that should contain K1. If the BIN does contain K1, 1629 * this.index is set to the containing slot. Otherwise, this.index is 1630 * set to the right-most slot whose key is < K1, or to -1 if K1 is < than 1631 * all keys in the BIN. 1632 * 1633 * The cursor must initially be uninitialized. 1634 * 1635 * The method returns with the BIN latched, unless an exception is raised. 1636 * 1637 * The method returns an integer that encodes the search outcome: If the 1638 * FOUND bit is not set, the tree is completely empty (has no BINs). If 1639 * the FOUND bit is set, the EXACT_KEY bit says whether K1 was found or 1640 * not and the FOUND_LAST bit says whether the cursor is positioned to the 1641 * very last slot of the BTree (note that this state can only be counted 1642 * on as long as the BIN is latched). 1643 * 1644 * Even if the search returns an exact result, the record may be deleted. 1645 * The caller must therefore check whether the cursor is positioned on a 1646 * deleted record. 1647 * 1648 * This method does not lock the record. The caller is expected to call 1649 * lockAndGetCurrent to perform locking. 1650 */ searchRange( DatabaseEntry searchKey, Comparator<byte[]> comparator)1651 public int searchRange( 1652 DatabaseEntry searchKey, 1653 Comparator<byte[]> comparator) { 1654 1655 assert assertCursorState( 1656 false /*mustBeInitialized*/, true /*mustNotBeInitialized*/); 1657 1658 boolean foundSomething = false; 1659 boolean foundExactKey = false; 1660 boolean foundLast = false; 1661 BINBoundary binBoundary = new BINBoundary(); 1662 1663 try { 1664 byte[] key = Key.makeKey(searchKey); 1665 1666 bin = databaseImpl.getTree().search( 1667 key, Tree.SearchType.NORMAL, binBoundary, cacheMode, 1668 comparator); 1669 1670 if (bin != null) { 1671 1672 foundSomething = true; 1673 if (bin.isBINDelta() && comparator != null) { 1674 1675 /* 1676 * We must mutate a BIN delta if a non-null comparator is 1677 * used. Otherwise, if we positioned the cursor on the 1678 * delta using the non-null comparator, we would not be 1679 * able to adjust its position correctly later when the 1680 * delta gets mutated for some reason (because at that 1681 * later time, the comparator used here would not be 1682 * known). 1683 */ 1684 bin.mutateToFullBIN(); 1685 } 1686 1687 index = bin.findEntry( 1688 key, true /*indicateIfExact*/, false/*exact*/, comparator); 1689 1690 if (bin.isBINDelta() && 1691 (index < 0 || 1692 (index & IN.EXACT_MATCH) == 0 || 1693 binBoundary.isLastBin)) { 1694 1695 /* 1696 * Note: if binBoundary.isLastBin, we must mutate the BIN 1697 * in order to compute the foundLast flag below. 1698 */ 1699 bin.mutateToFullBIN(); 1700 index = bin.findEntry(key, true, false, comparator); 1701 } 1702 1703 if (index >= 0) { 1704 if ((index & IN.EXACT_MATCH) != 0) { 1705 foundExactKey = true; 1706 index &= ~IN.EXACT_MATCH; 1707 } 1708 1709 foundLast = (binBoundary.isLastBin && 1710 index == bin.getNEntries() - 1); 1711 } 1712 1713 /* 1714 * Must call addCursor after mutateToFullBIN() to avoid having 1715 * to reposition "this" inside mutateToFullBIN(), which would 1716 * be both unnecessary and wrong given that this.index could 1717 * have the IN.EXACT_MATCH still on. 1718 */ 1719 addCursor(bin); 1720 } 1721 1722 setInitialized(); 1723 1724 /* Return a multi-part status value */ 1725 return ((foundSomething ? FOUND : 0) | 1726 (foundExactKey ? EXACT_KEY : 0) | 1727 (foundLast ? FOUND_LAST : 0)); 1728 1729 } catch (final Throwable e) { 1730 releaseBIN(); 1731 throw e; 1732 } 1733 } 1734 searchExact(DatabaseEntry searchKey, LockType lockType)1735 public boolean searchExact(DatabaseEntry searchKey, LockType lockType) { 1736 return searchExact(searchKey, lockType, false, false) != null; 1737 } 1738 1739 /** 1740 * Position this cursor on the slot (if any) whose key matches the given 1741 * search key. If no such slot is found or the slot does not hold a "valid" 1742 * record, return null. Otherwise, lock the found record with the specified 1743 * lock type (which may be NONE) and return the LockStanding obj that was 1744 * created by the locking op. Whether the slot contains a "valid" record or 1745 * not depends on the slot's KD/PD flags and the lockType and dirtyReadAll 1746 * parameters. Four cases are considered; they are described in the 1747 * lockLNAndCheckDeleted() method. 1748 * 1749 * The cursor must initially be uninitialized. 1750 * 1751 * The method returns with the BIN latched, unless an exception is raised. 1752 * 1753 * In all cases, the method registers the cursor with the BIN that contains 1754 * or should contain the search key. 1755 * 1756 * @return the LockStanding for the found record, or null if no record was 1757 * found. 1758 */ searchExact( final DatabaseEntry searchKey, final LockType lockType, final boolean dirtyReadAll, final boolean dataRequested)1759 public LockStanding searchExact( 1760 final DatabaseEntry searchKey, 1761 final LockType lockType, 1762 final boolean dirtyReadAll, 1763 final boolean dataRequested) { 1764 1765 assert assertCursorState( 1766 false /*mustBeInitialized*/, true /*mustNotBeInitialized*/); 1767 1768 LockStanding lockStanding = null; 1769 1770 try { 1771 byte[] key = Key.makeKey(searchKey); 1772 1773 bin = databaseImpl.getTree().search(key, cacheMode); 1774 1775 if (bin != null) { 1776 1777 index = bin.findEntry(key, false, true /*exact*/); 1778 1779 if (index < 0 && bin.isBINDelta()) { 1780 1781 if (bin.mayHaveKeyInFullBin(key)) { 1782 bin.mutateToFullBIN(); 1783 index = bin.findEntry(key, false, true /*exact*/); 1784 } 1785 } 1786 1787 addCursor(bin); 1788 1789 if (index >= 0) { 1790 lockStanding = lockLNAndCheckDeleted( 1791 lockType, dirtyReadAll, dataRequested); 1792 } 1793 } 1794 1795 setInitialized(); 1796 return lockStanding; 1797 1798 } catch (final Throwable e) { 1799 /* Release latch on error. */ 1800 releaseBIN(); 1801 throw e; 1802 } 1803 } 1804 1805 /** 1806 * Lock and copy current record into the key and data DatabaseEntry. 1807 * When calling this method, this.bin should not be latched already. 1808 * On return, this.bin is unlatched. 1809 */ lockAndGetCurrent( DatabaseEntry foundKey, DatabaseEntry foundData, final LockType lockType)1810 public OperationStatus lockAndGetCurrent( 1811 DatabaseEntry foundKey, 1812 DatabaseEntry foundData, 1813 final LockType lockType) 1814 throws DatabaseException { 1815 1816 return lockAndGetCurrent( 1817 foundKey, foundData, lockType, false, false, true); 1818 } 1819 1820 /** 1821 * Let S be the slot where this cursor is currently positioned on. If S 1822 * does not hold a "valid" record, return KEYEMPTY. Otherwise, lock the 1823 * record in S with the specified lock type(which may be NONE), copy its 1824 * key and data into the key and data DatabaseEntries, and return SUCCESS. 1825 * Whether the slot contains a "valid" record or not depends on the slot's 1826 * KD/PD flags and the lockType and dirtyReadAll parameters. Four cases are 1827 * considered; they are described in the lockLNAndCheckDeleted() method. 1828 * 1829 * On entry, the isLatched param says whether this.bin is latched or not. 1830 * On return, this.bin is unlatched if the unlatch param is true or an 1831 * exception is thrown. 1832 */ lockAndGetCurrent( DatabaseEntry foundKey, DatabaseEntry foundData, final LockType lockType, final boolean dirtyReadAll, final boolean isLatched, final boolean unlatch)1833 public OperationStatus lockAndGetCurrent( 1834 DatabaseEntry foundKey, 1835 DatabaseEntry foundData, 1836 final LockType lockType, 1837 final boolean dirtyReadAll, 1838 final boolean isLatched, 1839 final boolean unlatch) 1840 throws DatabaseException { 1841 1842 /* Used in the finally to indicate whether exception was raised. */ 1843 boolean success = false; 1844 1845 try { 1846 assert assertCursorState( 1847 true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 1848 1849 assert checkAlreadyLatched(isLatched) : dumpToString(true); 1850 1851 if (!isLatched) { 1852 latchBIN(); 1853 } 1854 1855 assert(bin.getCursorSet().contains(this)); 1856 1857 TreeWalkerStatsAccumulator treeStatsAccumulator = 1858 getTreeStatsAccumulator(); 1859 1860 /* 1861 * If we encounter a deleted slot, opportunistically add the BIN 1862 * to the compressor queue. 1863 */ 1864 if (index >= 0 && index < bin.getNEntries() && 1865 (bin.isEntryKnownDeleted(index) || 1866 bin.isEntryPendingDeleted(index))) { 1867 bin.queueSlotDeletion(); 1868 } 1869 1870 /* 1871 * Check the KD flag in the BIN slot and make sure this isn't an 1872 * empty BIN. The BIN could be empty by virtue of the compressor 1873 * running the size of this BIN to 0 but not having yet deleted 1874 * it from the tree. 1875 * 1876 * The index may be negative if we're at an intermediate stage in 1877 * an higher level operation (e.g., the starting search for a range 1878 * scan op), and we expect a higher level method to do a next or 1879 * prev operation after this returns KEYEMPTY. [#11700] 1880 */ 1881 if (index < 0 || 1882 index >= bin.getNEntries() || 1883 bin.isEntryKnownDeleted(index)) { 1884 /* Node is no longer present. */ 1885 if (treeStatsAccumulator != null) { 1886 treeStatsAccumulator.incrementDeletedLNCount(); 1887 } 1888 1889 success = true; 1890 return OperationStatus.KEYEMPTY; 1891 } 1892 1893 assert TestHookExecute.doHookIfSet(testHook); 1894 1895 final boolean dataRequested = 1896 (foundData != null && 1897 (!foundData.getPartial() || 1898 foundData.getPartialLength() != 0)); 1899 1900 if (lockLNAndCheckDeleted( 1901 lockType, dirtyReadAll, dataRequested) == null) { 1902 if (treeStatsAccumulator != null) { 1903 treeStatsAccumulator.incrementDeletedLNCount(); 1904 } 1905 success = true; 1906 return OperationStatus.KEYEMPTY; 1907 } 1908 1909 getCurrent(foundKey, foundData); 1910 1911 success = true; 1912 return OperationStatus.SUCCESS; 1913 1914 } finally { 1915 if (unlatch || !success) { 1916 releaseBIN(); 1917 } 1918 } 1919 } 1920 1921 /** 1922 * Let S be the slot where this cursor is currently positioned on. The 1923 * method locks S (i.e. its LSN), and depending on S's KD/PD flags, it 1924 * returns either null or the LockStanding obj that was created by the 1925 * locking op. The following 4 cases are considered: 1926 * 1927 * 1. If S is not KD/PD, return the LockStanding obj. In this case, we 1928 * know that S holds a valid (non-deleted) record. 1929 * 1930 * 2. If S is KD/PD and the lock type is not NONE, return null. In this 1931 * case, we know that the record that used to be in S has been definitely 1932 * deleted. 1933 * 1934 * 3. If S is KD/PD, the lock kind is NONE, and dirtyReadAll is false, 1935 * return null. This case corresponds to the READ_UNCOMMITTED LockMode. 1936 * The record in S has been deleted, but the deleting txn may be active 1937 * still, and if it aborts later, the record will be restored. To avoid a 1938 * potentially blocking lock, in READ_UNCOMMITTED mode we consider the 1939 * record to be non-existing and return null. 1940 * 1941 * 4. If S is KD/PD, the lock kind is NONE, and dirtyReadAll is true, lock 1942 * the record in READ mode. This case corresponds to the 1943 * READ_UNCOMMITTED_ALL LockMode, which requires that we do not skip 1944 * "provisionally deleted" records. There are two sub-cases: 1945 * 1946 * 4a. If dataRequested is false, we wait until the deleting txn finishes. 1947 * In this case the READ lock is blocking. If after the lock is 1948 * granted S is still KD/PD, release the lock and return null. 1949 * Otherwise, release the lock and return the LockStanding obj. 1950 * 1951 * 4b. If dataRequested is true, then we check whether the deleting txn is 1952 * still open by requested a non-blocking READ lock. If the lock is 1953 * granted then the deleting txn is closed or this cursor's locker is 1954 * the deleter, and we proceed as if the READ lock was granted in 4a. 1955 * If the lock is denied then the deleting txn is still open, and we 1956 * return the LockStanding obj so that the record is not skipped. 1957 * 1958 * The BIN must be latched on entry and is latched on exit. 1959 * 1960 * @param dirtyReadAll is true if using LockMode.READ_UNCOMMITTED_ALL. 1961 * 1962 * @param dataRequested is true if the read operation should return the 1963 * record data, meaning that a blocking lock must be used for dirtyReadAll. 1964 * Is ignored if dirtyReadAll is false. Is always false for a dup DB, 1965 * since data is never requested for dup DB ops at the CursorImpl level. 1966 */ lockLNAndCheckDeleted( final LockType lockType, final boolean dirtyReadAll, final boolean dataRequested)1967 private LockStanding lockLNAndCheckDeleted( 1968 final LockType lockType, 1969 final boolean dirtyReadAll, 1970 final boolean dataRequested) { 1971 1972 assert !(dirtyReadAll && lockType != LockType.NONE); 1973 assert !(dataRequested && databaseImpl.getSortedDuplicates()); 1974 1975 LockStanding standing = lockLN(lockType); 1976 1977 if (standing.recordExists()) { 1978 return standing; 1979 } 1980 1981 /* The slot is KD/PD. */ 1982 1983 if (lockType != LockType.NONE) { 1984 revertLock(standing); 1985 /* 1986 * The deletion was committed by another locker, or has been 1987 * performed by this locker. 1988 */ 1989 return null; 1990 } 1991 1992 /* We're using dirty-read. The lockLN above did not actually lock. */ 1993 1994 if (!dirtyReadAll) { 1995 /* READ_UNCOMMITTED -- skip deleted records without locking. */ 1996 return null; 1997 } 1998 1999 /* 2000 * READ_UNCOMMITTED_ALL -- get a read lock. Whether we can request a 2001 * no-wait or a blocking lock depends on the dataRequested parameter. 2002 * 2003 * Although there is some redundant processing in the sense that lockLN 2004 * is called more than once (above and below), this is not considered a 2005 * performance issue because accessing deleted records is normally 2006 * infrequent. Deleted slots are normally compressed away quickly. 2007 */ 2008 standing = lockLN( 2009 LockType.READ, false /*allowUncontended*/, 2010 !dataRequested /*noWait*/); 2011 2012 if (standing.lockResult.getLockGrant() == LockGrantType.DENIED) { 2013 2014 /* 2015 * The no-wait lock request was denied, which means the data is not 2016 * needed and the deleting transaction is still open. The deleted 2017 * record should not be skipped in this case, according to the 2018 * definition of READ_UNCOMMITTED_ALL. 2019 */ 2020 assert !standing.recordExists(); 2021 return standing; 2022 } 2023 2024 /* We have acquired a temporary read lock. */ 2025 revertLock(standing); 2026 2027 if (standing.recordExists()) { 2028 /* Another txn aborted the deletion while we waited. */ 2029 return standing; 2030 } 2031 2032 /* 2033 * The deletion was committed by another locker, or has been performed 2034 * by this locker. 2035 */ 2036 return null; 2037 } 2038 2039 /** 2040 * Copy current record into the key and data DatabaseEntry. 2041 */ getCurrent( DatabaseEntry foundKey, DatabaseEntry foundData)2042 public void getCurrent( 2043 DatabaseEntry foundKey, 2044 DatabaseEntry foundData) { 2045 2046 assert(bin.isLatchExclusiveOwner()); 2047 assert(index >= 0 && index < bin.getNEntries()); 2048 assert(!bin.isEntryKnownDeleted(index)); 2049 2050 /* 2051 * We don't need to fetch the LN if the user has not requested that we 2052 * return the data, or if we know for sure that the LN is empty. 2053 */ 2054 final boolean isEmptyLN = databaseImpl.isLNImmediatelyObsolete(); 2055 2056 final boolean dataRequested = 2057 (foundData != null && 2058 (!foundData.getPartial() || foundData.getPartialLength() != 0)); 2059 2060 final LN ln = (!isEmptyLN && dataRequested ? 2061 bin.fetchLN(index, cacheMode) : 2062 null); 2063 2064 /* Return the data. */ 2065 if (dataRequested) { 2066 assert(ln != null || isEmptyLN); 2067 2068 byte[] lnData = 2069 (ln != null ? ln.getData() : LogUtils.ZERO_LENGTH_BYTE_ARRAY); 2070 2071 LN.setEntry(foundData, lnData); 2072 } 2073 2074 /* Return the key */ 2075 if (foundKey != null) { 2076 LN.setEntry(foundKey, bin.getKey(index)); 2077 } 2078 2079 /* Cache record version for fetch operation. */ 2080 final long vlsn = (ln != null ? 2081 ln.getVLSNSequence() : 2082 bin.getVLSN(index, false /*allowFetch*/, cacheMode)); 2083 2084 setCurrentVersion(vlsn, bin.getLsn(index)); 2085 } 2086 getCurrentLN(final boolean isLatched, final boolean unlatch)2087 public LN getCurrentLN(final boolean isLatched, final boolean unlatch) 2088 throws DatabaseException { 2089 2090 /* Used in the finally to indicate whether exception was raised. */ 2091 boolean success = false; 2092 2093 try { 2094 assert assertCursorState( 2095 true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 2096 assert checkAlreadyLatched(isLatched) : dumpToString(true); 2097 2098 if (!isLatched) { 2099 latchBIN(); 2100 } 2101 2102 assert(bin.getCursorSet().contains(this)); 2103 2104 LN ln = bin.fetchLN(index, cacheMode); 2105 2106 success = true; 2107 return ln; 2108 } finally { 2109 if (unlatch || !success) { 2110 releaseBIN(); 2111 } 2112 } 2113 } 2114 2115 /** 2116 * Retrieve the current LN, return with the target bin unlatched. 2117 */ lockAndGetCurrentLN(final LockType lockType)2118 public LN lockAndGetCurrentLN(final LockType lockType) 2119 throws DatabaseException { 2120 2121 return lockAndGetCurrentLN(lockType, false, true); 2122 } 2123 2124 /** 2125 * Retrieve the current LN. On entry, the isLatched param says whether 2126 * this.bin is latched or not. On return, this.bin is unlatched if the 2127 * unlatch param is true or an exception is thrown. 2128 */ lockAndGetCurrentLN(final LockType lockType, final boolean isLatched, final boolean unlatch)2129 public LN lockAndGetCurrentLN(final LockType lockType, 2130 final boolean isLatched, 2131 final boolean unlatch) 2132 throws DatabaseException { 2133 2134 /* Used in the finally to indicate whether exception was raised. */ 2135 boolean success = false; 2136 2137 try { 2138 assert assertCursorState( 2139 true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 2140 assert checkAlreadyLatched(isLatched) : dumpToString(true); 2141 2142 if (!isLatched) { 2143 latchBIN(); 2144 } 2145 2146 assert(bin.getCursorSet().contains(this)); 2147 2148 LockStanding lockStanding = lockLN(lockType); 2149 if (!lockStanding.recordExists()) { 2150 revertLock(lockStanding); 2151 success = true; 2152 return null; 2153 } 2154 2155 LN ln = bin.fetchLN(index, cacheMode); 2156 2157 success = true; 2158 return ln; 2159 } finally { 2160 if (unlatch || !success) { 2161 releaseBIN(); 2162 } 2163 } 2164 } 2165 2166 /** 2167 * Returns the VLSN and LSN for the record at the current position. Must 2168 * be called when the cursor is positioned on a record. 2169 * 2170 * If this method is called on a secondary cursor, the version of the 2171 * associated primary record is returned. In that case, the allowFetch 2172 * parameter is ignored, and the version is available only if the primary 2173 * record was retrieved (see setSecondaryCurrentVersion). 2174 * 2175 * @param allowFetch is true to fetch the LN to get the VLSN, or false to 2176 * return -1 for the VLSN if both the LN and VLSN are not cached. 2177 * 2178 * @throws IllegalStateException if the cursor is closed or uninitialized, 2179 * or this is a secondary cursor and the version is not cached. 2180 */ getCurrentVersion(boolean allowFetch)2181 public RecordVersion getCurrentVersion(boolean allowFetch) { 2182 2183 /* Ensure cursor is open and initialized. */ 2184 checkCursorState( 2185 true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 2186 2187 /* 2188 * For a secondary cursor, the cached version is all we have. 2189 * See setSecondaryCurrentVersion. 2190 */ 2191 if (isSecondaryCursor) { 2192 if (currentRecordVersion == null) { 2193 throw new IllegalStateException( 2194 "Record version is available via a SecondaryCursor only " + 2195 "if the associated primary record was retrieved."); 2196 } 2197 return currentRecordVersion; 2198 } 2199 2200 /* 2201 * Use cached version if available. Do not use cached version if it 2202 * does not contain a VLSN, and VLSNs are preserved, and fetching is 2203 * allowed; instead, try to fetch it below. 2204 */ 2205 if (currentRecordVersion != null) { 2206 if ((currentRecordVersion.getVLSN() != 2207 VLSN.NULL_VLSN.getSequence()) || 2208 !allowFetch || 2209 !databaseImpl.getEnv().getPreserveVLSN()) { 2210 return currentRecordVersion; 2211 } 2212 } 2213 2214 /* Get the VLSN from the BIN, create the version and cache it. */ 2215 latchBIN(); 2216 try { 2217 setCurrentVersion( 2218 bin.getVLSN(index, allowFetch, cacheMode), bin.getLsn(index)); 2219 } finally { 2220 releaseBIN(); 2221 } 2222 return currentRecordVersion; 2223 } 2224 setCurrentVersion(long vlsn, long lsn)2225 private void setCurrentVersion(long vlsn, long lsn) { 2226 if (isSecondaryCursor) { 2227 /* NOP. Version is set by setSecondaryCurrentVersion. */ 2228 return; 2229 } 2230 currentRecordVersion = new RecordVersion(vlsn, lsn); 2231 } 2232 2233 /** 2234 * Returns the cached version without attempting to construct it. 2235 */ getCachedRecordVersion()2236 public RecordVersion getCachedRecordVersion() { 2237 return currentRecordVersion; 2238 } 2239 2240 /** 2241 * When the primary record is read during a secondary operation, this 2242 * method is called to copy the primary version here. Since secondary 2243 * records do not have a version of their own, the secondary cursor 2244 * returns the version of the primary record. 2245 * 2246 * @param primaryVersion the version retrieved from the primary cursor 2247 * using getCachedRecordVersion. 2248 */ setSecondaryCurrentVersion(RecordVersion primaryVersion)2249 public void setSecondaryCurrentVersion(RecordVersion primaryVersion) { 2250 assert isSecondaryCursor; 2251 currentRecordVersion = primaryVersion; 2252 } 2253 2254 /** 2255 * Advance a cursor. Used so that verify can advance a cursor even in the 2256 * face of an exception [12932]. 2257 * @param key on return contains the key if available, or null. 2258 * @param data on return contains the data if available, or null. 2259 */ advanceCursor(DatabaseEntry key, DatabaseEntry data)2260 public boolean advanceCursor(DatabaseEntry key, DatabaseEntry data) { 2261 2262 BIN oldBin = bin; 2263 int oldIndex = index; 2264 2265 key.setData(null); 2266 data.setData(null); 2267 2268 try { 2269 getNext( 2270 key, data, LockType.NONE, false /*dirtyReadAll*/, 2271 true /*forward*/, false /*isLatched*/, 2272 null /*rangeConstraint*/); 2273 } catch (DatabaseException ignored) { 2274 /* Klockwork - ok */ 2275 } 2276 2277 /* 2278 * If the position changed, regardless of an exception, then we believe 2279 * that we have advanced the cursor. 2280 */ 2281 if (bin != oldBin || index != oldIndex) { 2282 2283 /* 2284 * Return the key and data from the BIN entries, if we were not 2285 * able to read it above. 2286 */ 2287 if (key.getData() == null && bin != null && index > 0) { 2288 LN.setEntry(key, bin.getKey(index)); 2289 } 2290 return true; 2291 } else { 2292 return false; 2293 } 2294 } 2295 2296 /** 2297 * Move the cursor forward and return the next "valid" record. Whether a 2298 * slot contains a "valid" record or not depends on the slot's KD/PD flags 2299 * and the lockType and dirtyReadAll parameters. Four cases are considered; 2300 * they are described in the lockLNAndCheckDeleted() method. 2301 * 2302 * This will cross BIN boundaries. On return, no latches are held. If no 2303 * exceptions, the cursor is registered with its new location. 2304 * 2305 * @param foundKey DatabaseEntry to use for returning key 2306 * 2307 * @param foundData DatabaseEntry to use for returning data 2308 * 2309 * @param forward if true, move forward, else move backwards 2310 * 2311 * @param isLatched if true, the bin that we're on is already 2312 * latched. 2313 * 2314 * @param rangeConstraint if non-null, is called to determine whether a key 2315 * is out of range. 2316 * 2317 * @return the status. 2318 */ getNext( DatabaseEntry foundKey, DatabaseEntry foundData, LockType lockType, boolean dirtyReadAll, boolean forward, boolean isLatched, RangeConstraint rangeConstraint)2319 public OperationStatus getNext( 2320 DatabaseEntry foundKey, 2321 DatabaseEntry foundData, 2322 LockType lockType, 2323 boolean dirtyReadAll, 2324 boolean forward, 2325 boolean isLatched, 2326 RangeConstraint rangeConstraint) 2327 throws DatabaseException { 2328 2329 assert assertCursorState( 2330 true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); 2331 2332 assert checkAlreadyLatched(isLatched) : dumpToString(true); 2333 2334 OperationStatus result = OperationStatus.NOTFOUND; 2335 BIN anchorBIN = null; 2336 2337 try { 2338 while (bin != null) { 2339 2340 assert checkAlreadyLatched(isLatched) : dumpToString(true); 2341 2342 if (!isLatched) { 2343 latchBIN(); 2344 isLatched = true; 2345 } 2346 2347 if (DEBUG) { 2348 verifyCursor(bin); 2349 } 2350 2351 bin.mutateToFullBIN(); 2352 2353 /* Is there anything left on this BIN? */ 2354 if ((forward && ++index < bin.getNEntries()) || 2355 (!forward && --index > -1)) { 2356 2357 if (rangeConstraint != null && 2358 !rangeConstraint.inBounds(bin.getKey(index))) { 2359 2360 result = OperationStatus.NOTFOUND; 2361 releaseBIN(); 2362 break; 2363 } 2364 2365 OperationStatus ret = lockAndGetCurrent( 2366 foundKey, foundData, lockType, dirtyReadAll, 2367 true /*isLatched*/, false /*unlatch*/); 2368 2369 if (LatchSupport.TRACK_LATCHES) { 2370 LatchSupport.expectBtreeLatchesHeld(1); 2371 } 2372 2373 if (ret == OperationStatus.SUCCESS) { 2374 incrementLNCount(); 2375 releaseBIN(); 2376 result = OperationStatus.SUCCESS; 2377 break; 2378 } else { 2379 continue; 2380 } 2381 } else { 2382 /* 2383 * Make sure that the current BIN will not be pruned away 2384 * if it is or becomes empty after it gets unlatched by 2385 * Tree.getNextBin() or Tree.getPrevBin(). The operation 2386 * of these Tree methods relies on the current BIN not 2387 * getting pruned. 2388 */ 2389 anchorBIN = bin; 2390 anchorBIN.pin(); 2391 bin.removeCursor(this); 2392 bin = null; 2393 2394 final Tree tree = databaseImpl.getTree(); 2395 2396 /* SR #12736 Try to prune away oldBin */ 2397 assert TestHookExecute.doHookIfSet(testHook); 2398 2399 if (forward) { 2400 bin = tree.getNextBin(anchorBIN, cacheMode); 2401 index = -1; 2402 } else { 2403 bin = tree.getPrevBin(anchorBIN, cacheMode); 2404 if (bin != null) { 2405 index = bin.getNEntries(); 2406 } 2407 } 2408 isLatched = true; 2409 2410 if (bin == null) { 2411 if (LatchSupport.TRACK_LATCHES) { 2412 LatchSupport.expectBtreeLatchesHeld(0); 2413 } 2414 result = OperationStatus.NOTFOUND; 2415 break; 2416 } else { 2417 if (LatchSupport.TRACK_LATCHES) { 2418 LatchSupport.expectBtreeLatchesHeld(1); 2419 } 2420 2421 addCursor(); 2422 anchorBIN.unpin(); 2423 anchorBIN = null; 2424 } 2425 } 2426 } 2427 } finally { 2428 if (anchorBIN != null) { 2429 anchorBIN.unpin(); 2430 } 2431 } 2432 2433 if (LatchSupport.TRACK_LATCHES) { 2434 LatchSupport.expectBtreeLatchesHeld(0); 2435 } 2436 2437 return result; 2438 } 2439 2440 /** 2441 * Used to detect phantoms during "get next" operations with serializable 2442 * isolation. If this method returns true, the caller should restart the 2443 * operation from the prior position. 2444 * 2445 * Something may have been added to the original cursor (cursorImpl) while 2446 * we were getting the next BIN. cursorImpl would have been adjusted 2447 * properly but we would have skipped a BIN in the process. This can 2448 * happen when all INs are unlatched in Tree.getNextBin. It can also 2449 * happen without a split, simply due to inserted entries in the previous 2450 * BIN. 2451 * 2452 * @return true if an unaccounted for insertion happened. 2453 * 2454 * TODO: 2455 * Unfortunately, this method doesn't cover all cases where a phantom may 2456 * have been inserted. Another case is described below. 2457 * 2458 * IN-0 2459 * ---------------------- 2460 * | | 50 | 100 | | 2461 * ---------------------- 2462 * / / \ \ 2463 * / \ 2464 * /---- ----\ 2465 * IN-1 / \ IN-2 2466 * ---------------- ---------------- 2467 * | 60 | 70 | 80 | | | | | 2468 * ---------------- ---------------- 2469 * / | \ / 2470 * / | \ / 2471 * ---------------- ----------------- 2472 * | 81 | 83 | 85 | | 110 | | | 2473 * ---------------- ----------------- 2474 * BIN-3 BIN-4 2475 * 2476 * Initially, the tree looks as above and a cursor (C) is located on the 2477 * last slot of BIN-3. For simplicity, assume no duplicates and no 2478 * serializable isolation. Also assume that C is a sticky cursor. 2479 * 2480 * 1. Thread 1 calls C.getNext(), which calls retrieveNextAllowPhantoms(), 2481 * which duplicates C's cursorImpl, and calls dup.getNext(). 2482 * 2483 * dup.getNext() latches BIN-3, sets dup.binToBeRemoved to BIN-3 and 2484 * then calls Tree.getNextBin(BIN-3). 2485 * 2486 * Tree.getNextBin(BIN-3) does the following: 2487 * - sets searchKey to 85 2488 * - calls Tree.getParentINForChildIN(BIN-3). 2489 * - Tree.getParentINForChildIN(BIN-3) unlatches BIN-3 and searches for 2490 * BIN-3 parent, thus reaching IN-1. 2491 * - IN-1.findEntry(85) sets "index" to 2, 2492 * - "index" is incremented, 2493 * - "moreEntriesThisIn" is set to false, 2494 * - "next" is set to IN-1, . 2495 * - Tree.getParentINForChildIN(IN-1) is called and unlatches IN-1. 2496 * 2497 * Assume at this point thread 1 looses the cpu. 2498 * 2499 * 2. Thread 2 inserts keys 90 and 95, causing a split of both BIN-3 and 2500 * IN-1. So the tree now looks like this: 2501 * 2502 * IN-0 2503 * --------------------------- 2504 * | | 50 | 80 | 100 | | 2505 * --------------------------- 2506 * / / | \ \ 2507 * / | \ 2508 * /--------- | ----------\ 2509 * IN-1 / | \ IN-2 2510 * / IN-5 | \ 2511 * ----------- ----------- ---------------- 2512 * | 60 | 70 | | 80 | 90 | | | | | 2513 * ----------- ----------- ---------------- 2514 * / | / \ / 2515 * / | / \ / 2516 * ---------------- ----------- ----------------- 2517 * | 81 | 83 | 85 | | 90 | 95 | | 110 | | | 2518 * ---------------- ----------- ----------------- 2519 * BIN-3 BIN-6 BIN-4 2520 * 2521 * 2522 * Notice that C.cursorImpl still points to the last slot of BIN-3. 2523 * 2524 * 3. Thread 1 resumes: 2525 * 2526 * - Tree.getParentINForChildIN(IN-1) reaches IN-0. 2527 * - IN-0.findEntry(85) sets "index" to 2, 2528 * - "index" is incremented, 2529 * - "nextIN" is set to IN-2, which is latched. 2530 * - Tree.searchSubTree(IN-2, LEFT) is called, and returns BIN-4. 2531 * - BIN-4 is the result of Tree.getNextBin(BIN-3), i.e., BIN-6 was 2532 * skipped 2533 * 2534 * Now we are back in dup.getNext(): 2535 * - dup.bin is set to BIN-4, dup.index to -1, and dup is added to BIN-4 2536 * - the while loop repeats, dup.index is set to 0, the 1st slot of 2537 * BIN-4 is locked, and dup.getNext() returns SUCCESS. 2538 * 2539 * Now we are back in C.retrieveNextAllowPhantoms(): 2540 * - C.checkForInsertion() is called 2541 * - C.cursorImpl and dup are on different BINs, but the condition: 2542 * origBIN.getNEntries() - 1 > origCursor.getIndex() 2543 * is false, so C.checkForInsertion() returns false. 2544 * 2545 * The end result is that BIN-6 has been missed. This is not be a "bug" for 2546 * non-serializable isolation, but the above scenario applies to 2547 * serializable isolation as well, and in that case, BIN-6 should really 2548 * not be missed. This could be solved by re-implementing 2549 * Tree.getNext/PrevBIN() do a more "logical" kind of search. 2550 */ checkForInsertion( final GetMode getMode, final CursorImpl dupCursor)2551 public boolean checkForInsertion( 2552 final GetMode getMode, 2553 final CursorImpl dupCursor) { 2554 2555 final CursorImpl origCursor = this; 2556 boolean forward = getMode.isForward(); 2557 boolean ret = false; 2558 2559 if (origCursor.bin != dupCursor.bin) { 2560 2561 /* 2562 * We jumped to the next BIN during getNext(). 2563 * 2564 * Be sure to operate on the BIN returned by latchBIN, not a cached 2565 * var [#21121]. 2566 * 2567 * Note that a cursor BIN can change after the check above, but 2568 * that's not relevant; what we're trying to detect are BIN changes 2569 * during the operation that has already completed. 2570 * 2571 * Note that we can call isPendingDeleted without locking. If we 2572 * see a non-committed deleted entry, we'll just iterate around in 2573 * the caller. So a false positive is ok. 2574 */ 2575 origCursor.latchBIN(); 2576 final BIN origBIN = origCursor.bin; 2577 2578 origBIN.mutateToFullBIN(); 2579 2580 try { 2581 if (forward) { 2582 if (origBIN.getNEntries() - 1 > origCursor.getIndex()) { 2583 2584 /* 2585 * We were adjusted to something other than the 2586 * last entry so some insertion happened. 2587 */ 2588 for (int i = origCursor.getIndex() + 1; 2589 i < origBIN.getNEntries(); 2590 i++) { 2591 if (!origBIN.isEntryKnownDeleted(i) && 2592 !origBIN.isEntryPendingDeleted(i)) { 2593 /* See comment above about locking. */ 2594 ret = true; 2595 break; 2596 } 2597 } 2598 } 2599 } else { 2600 if (origCursor.getIndex() > 0) { 2601 2602 /* 2603 * We were adjusted to something other than the 2604 * first entry so some insertion happened. 2605 */ 2606 for (int i = 0; i < origCursor.getIndex(); i++) { 2607 if (!origBIN.isEntryKnownDeleted(i) && 2608 !origBIN.isEntryPendingDeleted(i)) { 2609 /* See comment above about locking. */ 2610 ret = true; 2611 break; 2612 } 2613 } 2614 } 2615 } 2616 } finally { 2617 origCursor.releaseBIN(); 2618 } 2619 return ret; 2620 } 2621 return false; 2622 } 2623 2624 /** 2625 * Skips over entries until a boundary condition is satisfied, either 2626 * because maxCount is reached or RangeConstraint.inBounds returns false. 2627 * 2628 * If a maxCount is passed, this allows advancing the cursor quickly by N 2629 * entries. If a rangeConstraint is passed, this allows returning the 2630 * entry count after advancing until the predicate returns false, e.g., the 2631 * number of entries in a key range. In either case, the number of entries 2632 * advanced is returned. 2633 * 2634 * Optimized to scan using level two of the tree when possible, to avoid 2635 * calling getNextBin/getPrevBin for every BIN of the database. All BINs 2636 * beneath a level two IN can be skipped quickly, with the level two parent 2637 * IN latched, when all of its children BINs are resident and can be 2638 * latched without waiting. When a child BIN is not resident or latching 2639 * waits, we revert to the getNextBin/getPrevBin approach, to avoid keeping 2640 * the parent IN latched for long time periods. 2641 * 2642 * Although this method positions the cursor on the last non-deleted entry 2643 * seen (before the boundary condition is satisfied), because it does not 2644 * lock the LN it is possible that it is deleted by another thread after 2645 * the BIN is unlatched. 2646 * 2647 * @param forward is true to skip forward, false to skip backward. 2648 * 2649 * @param maxCount is the maximum number of non-deleted entries to skip, 2650 * and may be LTE zero if no maximum is enforced. 2651 * 2652 * @param rangeConstraint is a predicate that returns false at a position 2653 * where advancement should stop, or null if no predicate is enforced. 2654 * 2655 * @return the number of non-deleted entries that were skipped. 2656 */ skip( boolean forward, long maxCount, RangeConstraint rangeConstraint)2657 public long skip( 2658 boolean forward, 2659 long maxCount, 2660 RangeConstraint rangeConstraint) { 2661 2662 final CursorImpl c = cloneCursor(true /*samePosition*/); 2663 2664 try { 2665 return c.skipInternal(forward, maxCount, rangeConstraint, this); 2666 } catch (final Throwable e) { 2667 /* 2668 * Get more info on dbsim duplicate.conf failure when c.close below 2669 * throws because the BIN latch is already held. It should have 2670 * been released by skipInternal and therefore an unexpected 2671 * exception must have been throw and the error handling must be 2672 * incorrect. 2673 */ 2674 e.printStackTrace(System.out); 2675 throw e; 2676 } finally { 2677 c.close(); 2678 } 2679 } 2680 2681 2682 /** 2683 * Use this cursor to reference the current BIN in the traversal, to 2684 * prevent the current BIN from being compressed away. But set the given 2685 * finalPositionCursor (the 'user' cursor) position only at non-deleted 2686 * entries, since it should be positioned on a valid entry when this method 2687 * returns. 2688 */ skipInternal( boolean forward, long maxCount, RangeConstraint rangeConstraint, CursorImpl finalPositionCursor)2689 private long skipInternal( 2690 boolean forward, 2691 long maxCount, 2692 RangeConstraint rangeConstraint, 2693 CursorImpl finalPositionCursor) { 2694 2695 /* Start with the entry at the cursor position. */ 2696 final Tree tree = databaseImpl.getTree(); 2697 2698 latchBIN(); 2699 2700 IN parent = null; 2701 BIN prevBin = null; 2702 BIN curBin = bin; 2703 int curIndex = getIndex(); 2704 long count = 0; 2705 boolean success = false; 2706 2707 try { 2708 while (true) { 2709 curBin.mutateToFullBIN(); 2710 2711 /* Skip entries in the current BIN. */ 2712 count = skipEntries( 2713 forward, maxCount, rangeConstraint, finalPositionCursor, 2714 curBin, curIndex, count); 2715 2716 if (count < 0) { 2717 curBin.releaseLatch(); 2718 success = true; 2719 return (- count); 2720 } 2721 2722 /* 2723 * Get the parent IN at level two. The BIN is unlatched by 2724 * getParentINForChildIN. Before releasing the BIN latch, get 2725 * the search key for the last entry. 2726 */ 2727 final byte[] idKey = 2728 (curBin.getNEntries() == 0 ? 2729 curBin.getIdentifierKey() : 2730 (forward ? 2731 curBin.getKey(curBin.getNEntries() - 1) : 2732 curBin.getKey(0))); 2733 2734 final SearchResult result = tree.getParentINForChildIN( 2735 curBin, false, /*useTargetLevel*/ 2736 true, /*doFetch*/ CacheMode.DEFAULT); 2737 2738 parent = result.parent; 2739 2740 if (!result.exactParentFound) { 2741 throw EnvironmentFailureException.unexpectedState( 2742 "Cannot get parent of BIN id=" + 2743 curBin.getNodeId() + " key=" + 2744 Arrays.toString(idKey)); 2745 } 2746 2747 /* 2748 * Find and latch previous child BIN by matching idKey rather 2749 * than using result.index, as in Tree.getNextIN (see comments 2750 * there). 2751 */ 2752 int parentIndex = parent.findEntry(idKey, false, false); 2753 2754 curBin = (BIN) parent.fetchIN(parentIndex); 2755 curBin.latch(); 2756 2757 if (forward ? 2758 (parentIndex < parent.getNEntries() - 1) : 2759 (parentIndex > 0)) { 2760 2761 /* 2762 * There are more entries in the parent. Skip entries for 2763 * child BINs that are resident and can be latched no-wait. 2764 */ 2765 final int incr = forward ? 1 : (-1); 2766 2767 for (parentIndex += incr;; parentIndex += incr) { 2768 2769 prevBin = curBin; 2770 curBin = null; 2771 2772 /* Break is no more entries in parent. */ 2773 if ((forward ? 2774 parentIndex >= parent.getNEntries() : 2775 parentIndex < 0)) { 2776 parent.releaseLatch(); 2777 break; 2778 } 2779 2780 /* 2781 * Latch next child BIN, if cached and unlatched. 2782 * 2783 * Note that although 2 BINs are latched here, this 2784 * can't cause deadlocks because the 2nd latch is 2785 * no-wait. 2786 */ 2787 curBin = (BIN) parent.getTarget(parentIndex); 2788 2789 if (curBin == null || 2790 !curBin.latchNoWait(CacheMode.DEFAULT)) { 2791 parent.releaseLatch(); 2792 break; 2793 } 2794 2795 /* Unlatch the prev BIN */ 2796 prevBin.releaseLatch(); 2797 prevBin = null; 2798 2799 /* Position at new BIN to prevent compression. */ 2800 setPosition(curBin, -1); 2801 2802 curBin.mutateToFullBIN(); 2803 2804 /* Skip entries in new child BIN. */ 2805 count = skipEntries( 2806 forward, maxCount, rangeConstraint, 2807 finalPositionCursor, curBin, 2808 forward ? (-1) : curBin.getNEntries(), count); 2809 2810 if (count < 0) { 2811 parent.releaseLatch(); 2812 curBin.releaseLatch(); 2813 success = true; 2814 return (- count); 2815 } 2816 } 2817 } else { 2818 /* No more entries in the parent. */ 2819 parent.releaseLatch(); 2820 prevBin = curBin; 2821 } 2822 2823 /* 2824 * Only the prevBin is still latched here. Move to the next 2825 * BIN the "hard" way (i.e., via full tree searches). 2826 */ 2827 curBin = forward ? 2828 tree.getNextBin(prevBin, CacheMode.DEFAULT) : 2829 tree.getPrevBin(prevBin, CacheMode.DEFAULT); 2830 2831 assert(!prevBin.isLatchOwner()); 2832 2833 if (curBin == null) { 2834 success = true; 2835 return count; 2836 } 2837 2838 prevBin = null; 2839 curIndex = forward ? (-1) : curBin.getNEntries(); 2840 2841 /* Position at new BIN to prevent compression. */ 2842 setPosition(curBin, -1); 2843 } 2844 } finally { 2845 if (curBin != null && !success) { 2846 curBin.releaseLatchIfOwner(); 2847 } 2848 if (prevBin != null && !success) { 2849 prevBin.releaseLatchIfOwner(); 2850 } 2851 if (parent != null && !success) { 2852 parent.releaseLatchIfOwner(); 2853 } 2854 2855 if (LatchSupport.TRACK_LATCHES) { 2856 LatchSupport.expectBtreeLatchesHeld(0); 2857 } 2858 } 2859 } 2860 2861 /** 2862 * Skip entries in curBin from one past curIndex and onward. Returns 2863 * non-negative count if skipping should continue, or negative count if 2864 * bounds is exceeded. 2865 */ skipEntries( boolean forward, long maxCount, RangeConstraint rangeConstraint, CursorImpl finalPositionCursor, BIN curBin, int curIndex, long count)2866 private long skipEntries( 2867 boolean forward, 2868 long maxCount, 2869 RangeConstraint rangeConstraint, 2870 CursorImpl finalPositionCursor, 2871 BIN curBin, 2872 int curIndex, 2873 long count) { 2874 2875 assert(!curBin.isBINDelta()); 2876 2877 final int incr = forward ? 1 : (-1); 2878 2879 for (int i = curIndex + incr;; i += incr) { 2880 if (forward ? (i >= curBin.getNEntries()) : (i < 0)) { 2881 break; 2882 } 2883 if (rangeConstraint != null && 2884 !rangeConstraint.inBounds(curBin.getKey(i))) { 2885 return (- count); 2886 } 2887 if (!curBin.isEntryKnownDeleted(i) && 2888 !curBin.isEntryPendingDeleted(i)) { 2889 count += 1; 2890 finalPositionCursor.setPosition(curBin, i); 2891 if (maxCount > 0 && count >= maxCount) { 2892 return (- count); 2893 } 2894 } 2895 } 2896 return count; 2897 } 2898 2899 /** 2900 * Returns the stack of ancestor TrackingInfo for the BIN at the cursor, or 2901 * null if a split occurs and the information returned would be 2902 * inconsistent. 2903 * 2904 * Used by CountEstimator. 2905 */ getAncestorPath()2906 public List<TrackingInfo> getAncestorPath() { 2907 2908 /* 2909 * Search for parent of BIN, get TrackingInfo for ancestors. If the 2910 * exact parent is not found, a split occurred and null is returned. 2911 */ 2912 final List<TrackingInfo> trackingList = new ArrayList<TrackingInfo>(); 2913 2914 latchBIN(); 2915 2916 final BIN origBin = bin; 2917 final Tree tree = databaseImpl.getTree(); 2918 2919 final SearchResult result = tree.getParentINForChildIN( 2920 origBin, false, /*useTargetLevel*/ 2921 true /*doFetch*/, CacheMode.UNCHANGED, trackingList); 2922 2923 if (!result.exactParentFound) { 2924 /* Must have been a split. */ 2925 return null; 2926 } 2927 2928 /* 2929 * The parent was found and is now latched. If the child BIN does not 2930 * match the cursor's BIN, then a split occurred and null is returned. 2931 */ 2932 final long binLsn; 2933 try { 2934 if (origBin != result.parent.getTarget(result.index) || 2935 origBin != bin) { 2936 /* Must have been a split. */ 2937 return null; 2938 } 2939 2940 binLsn = result.parent.getLsn(result.index); 2941 bin.latch(); 2942 2943 } finally { 2944 result.parent.releaseLatch(); 2945 } 2946 2947 /* 2948 * The child BIN is now latched. Subtract deleted entries from BIN's 2949 * total entries and adjust the index accordingly. Add TrackingInfo 2950 * for child BIN. 2951 */ 2952 try { 2953 int binEntries = bin.getNEntries(); 2954 int binIndex = getIndex(); 2955 2956 for (int i = bin.getNEntries() - 1; i >= 0; i -= 1) { 2957 2958 if (bin.isEntryKnownDeleted(i) || 2959 bin.isEntryPendingDeleted(i)) { 2960 2961 binEntries -= 1; 2962 if (i < binIndex) { 2963 binIndex -= 1; 2964 } 2965 } 2966 } 2967 2968 final TrackingInfo info = new TrackingInfo( 2969 binLsn, bin.getNodeId(), binEntries, binIndex); 2970 2971 trackingList.add(info); 2972 2973 return trackingList; 2974 2975 } finally { 2976 bin.releaseLatch(); 2977 } 2978 } 2979 2980 /** 2981 * Search for the next key following the given key, and acquire a range 2982 * insert lock on it. If there are no more records following the given 2983 * key, lock the special EOF node for the databaseImpl. 2984 */ lockNextKeyForInsert(DatabaseEntry key)2985 public void lockNextKeyForInsert(DatabaseEntry key) 2986 throws DatabaseException { 2987 2988 DatabaseEntry tempKey = new DatabaseEntry( 2989 key.getData(), key.getOffset(), key.getSize()); 2990 2991 boolean lockedNextKey = false; 2992 boolean latched = true; 2993 2994 try { 2995 while (true) { 2996 2997 int searchResult = searchRange(tempKey, null /*comparator*/); 2998 2999 if ((searchResult & FOUND) != 0 && 3000 (searchResult & FOUND_LAST) == 0) { 3001 3002 /* 3003 * The search positioned "this" on the BIN that should 3004 * contain K1 and this BIN is now latched. If the BIN does 3005 * contain K1, this.index points to K1's slot. Otherwise, 3006 * this.index points to the right-most slot whose key is 3007 * < K1 (or this.index is -1 if K1 is < than all keys in 3008 * the BIN). Furthermore, "this" is NOT positioned on the 3009 * very last slot of the BTree. 3010 * 3011 * Call getNext() to advance "this" to the next *valid* 3012 * (i.e., not marked deleted) slot and lock that slot in 3013 * RANGE_INSERT mode. Normally, getNext() will move the 3014 * cursor to the 1st slot with a key K2 > K1. However, it 3015 * is possible that K2 <= K1 (see the comments in 3016 * Cursor.searchRangeAdvanceAndCheckKey() about how this 3017 * can happen. We handle this race condition by restarting 3018 * the search. 3019 */ 3020 DatabaseEntry tempData = new DatabaseEntry(); 3021 tempData.setPartial(0, 0, true); 3022 3023 OperationStatus status = getNext( 3024 tempKey, tempData, LockType.RANGE_INSERT, 3025 false, true, true, 3026 null /*rangeConstraint*/); 3027 3028 latched = false; 3029 3030 if (status == OperationStatus.SUCCESS) { 3031 3032 Comparator<byte[]> comparator = 3033 databaseImpl.getKeyComparator(); 3034 3035 int c = Key.compareKeys(tempKey, key, comparator); 3036 if (c <= 0) { 3037 tempKey.setData( 3038 key.getData(), key.getOffset(), key.getSize()); 3039 continue; 3040 } 3041 3042 lockedNextKey = true; 3043 } 3044 } 3045 3046 break; 3047 } 3048 } finally { 3049 if (latched) { 3050 releaseBIN(); 3051 } 3052 } 3053 3054 /* Lock the EOF node if no next key was found. */ 3055 if (!lockedNextKey) { 3056 lockEof(LockType.RANGE_INSERT); 3057 } 3058 } 3059 3060 /* 3061 * Locking 3062 */ 3063 3064 /** 3065 * Holds the result of a lockLN operation. A lock may not actually be 3066 * held (getLockResult may return null) if an uncontended lock is allowed. 3067 */ 3068 public static class LockStanding { 3069 private long lsn; 3070 private boolean nullLsn; 3071 private boolean deleted; 3072 private LockResult lockResult; 3073 recordExists()3074 public boolean recordExists() { 3075 return !nullLsn && !deleted; 3076 } 3077 getLockResult()3078 public LockResult getLockResult() { 3079 return lockResult; 3080 } 3081 3082 /** 3083 * Called by update and delete ops, after lockLN() and before logging 3084 * the LN. It returns a WriteLockInfo that is meant to be passed to 3085 * the LN logging method, where its info will be included in the LN 3086 * log entry and also copied into the new WriteLockInfo that will be 3087 * created for the new LSN. 3088 * 3089 * If the locker is not transactional, or the current LSN has not been 3090 * write-locked before by this locker, a new WriteLockInfo is created 3091 * here and its abortLsn and abortKD fields are set. (note: even though 3092 * lockLN() is called before prepareForUpdate(), it may not actually 3093 * acquire a lock because of the uncontended optimization). 3094 * 3095 * Otherwise, a WriteLockInfo exists already. It may have been created 3096 * by the lockLN() call during the current updating op, or a lockLN() 3097 * call during an earlier updating op by the same txn. In the later 3098 * case, the abortLsn and abortKD have been set already and should not 3099 * be overwriten here. 3100 */ prepareForUpdate()3101 public WriteLockInfo prepareForUpdate() { 3102 final boolean abortKnownDeleted = !recordExists(); 3103 WriteLockInfo wri = (lockResult == null ? 3104 null : 3105 lockResult.getWriteLockInfo()); 3106 if (wri == null) { 3107 wri = new WriteLockInfo(); 3108 wri.setAbortLsn(lsn); 3109 wri.setAbortKnownDeleted(abortKnownDeleted); 3110 } else { 3111 lockResult.setAbortLsn(lsn, abortKnownDeleted); 3112 } 3113 return wri; 3114 } 3115 3116 /** 3117 * Creates WriteLockInfo that is appropriate for a newly inserted slot. 3118 * The return value is meant to be passed to an LN logging method and 3119 * copied into the WriteLockInfo for the new LSN. This method is 3120 * static because lockLN is never called prior to logging an LN for a 3121 * newly inserted slot. 3122 */ prepareForInsert()3123 public static WriteLockInfo prepareForInsert() { 3124 final WriteLockInfo wri = new WriteLockInfo(); 3125 return wri; 3126 } 3127 } 3128 3129 /** Does not allow uncontended locks. See lockLN(LockType, boolean). */ lockLN(LockType lockType)3130 public LockStanding lockLN(LockType lockType) 3131 throws LockConflictException { 3132 3133 return lockLN(lockType, false /*allowUncontended*/, false /*noWait*/); 3134 } 3135 3136 /** 3137 * Locks the LN at the cursor position. Attempts to use a non-blocking 3138 * lock to avoid unlatching/relatching. 3139 * 3140 * Retries if necessary, to handle the case where the LSN is changed while 3141 * the BIN is unlatched. Because it re-latches the BIN to check the LSN, 3142 * this serializes access to the LSN for locking, guaranteeing that two 3143 * lockers cannot obtain conflicting locks on the old and new LSNs. 3144 * 3145 * Preconditions: The BIN must be latched. 3146 * 3147 * Postconditions: The BIN is latched. 3148 * 3149 * LN Locking Rules 3150 * ---------------- 3151 * The lock ID for an LN is its LSN in the parent BIN slot. Because the 3152 * LSN changes when logging the LN, only two methods of locking an LN may 3153 * be used to support concurrent access: 3154 * 3155 * 1. This method may be called to lock the old LSN. For read operations, 3156 * that is all that is necessary. For write operations, the new LSN must 3157 * be locked after logging it, which is done by all the LN logging methods. 3158 * Be sure to pass a non-null locker to the LN logging method to lock the 3159 * LN, unless locking is not desired. 3160 * 3161 * 2. A non-blocking lock may be obtained on the old LSN (using 3162 * Locker.nonBlockingLock rather than this method), as long as the lock is 3163 * released before the BIN latch is released. In this case a non-null 3164 * locker is not passed to the LN logging method; locking the new LSN is 3165 * unnecessary because no other thread can access the new LSN until the BIN 3166 * latch is released. 3167 * 3168 * The first method is used for all user operations. The second method is 3169 * used by the cleaner, when flushing dirty deferred-write LNs, and by 3170 * certain btree operations. 3171 * 3172 * Uncontended Lock Optimization 3173 * ----------------------------- 3174 * The allowUncontended param is passed as true for update and delete 3175 * operations as an optimization for the case where no lock on the old LSN 3176 * is held by any locker. In this case we don't need to lock the old LSN 3177 * at all, as long as we log the new LSN before releasing the BIN latch. 3178 * 3179 * 1. Latch BIN 3180 * 2. Determine that no lock/waiter exists for oldLsn 3181 * 3. Log LN and get newLsn 3182 * 4. Lock newLsn 3183 * 5. Update BIN 3184 * 6. Release BIN latch 3185 * 3186 * The oldLsn is never locked, saving operations on the lock table. The 3187 * assumption is that another locker will first have to latch the BIN to 3188 * get oldLsn, before requesting a lock. 3189 * 3190 * A potential problem is that the other locker may release the BIN latch 3191 * before requesting the lock. 3192 * 3193 * This Operation Another Operation 3194 * -------------- ----------------- 3195 * Latch BIN, get oldLsn, release BIN latch 3196 * Step 1 and 2 3197 * Request lock for oldLsn, granted 3198 * Step 3 and 4 3199 * 3200 * Both operations now believe they have an exclusive lock, but they have 3201 * locks on different LSNs. 3202 * 3203 * However, this problem is handled as long as the other lock is performed 3204 * using a lockLN method in this class, which will release the lock and 3205 * retry if the LSN changes while acquiring the lock. Because it 3206 * re-latches the BIN to check the LSN, this will serialize access to the 3207 * LSN for locking, guaranteeing that two conflicting locks cannot be 3208 * granted on the old and new LSNs. 3209 * 3210 * Deferred-Write Locking 3211 * ---------------------- 3212 * When one of the LN optionalLog methods is called, a deferred-write LN is 3213 * dirtied but not actually logged. In order to lock an LN that has been 3214 * inserted but not yet assigned a true LSN, a transient LSNs is assigned. 3215 * These LSNs serve to lock the LN but never appear in the log. See 3216 * LN.assignTransientLsn. 3217 * 3218 * A deferred-write LN is logged when its parent BIN is logged, or when the 3219 * LN is evicted. This will replace transient LSNs with durable LSNs. If 3220 * a lock is held by a cursor on a deferred-write LN when it is logged, the 3221 * same lock is acquired on the new LSN by the cursor. See 3222 * lockAfterLsnChange. 3223 * 3224 * Cleaner Migration Locking 3225 * ------------------------- 3226 * The cleaner takes a non-blocking read lock on the old LSN before 3227 * migrating/logging the LN, while holding the BIN latch. It does not take 3228 * a lock on the new LSN, since it does not need to retain a lock after 3229 * releasing the BIN latch. 3230 * 3231 * Because a read, not write, lock is taken, other read locks may be held 3232 * during migration. After logging, the cleaner calls lockAfterLsnChange 3233 * to lock the new LSN on behalf of other lockers. 3234 * 3235 * For more info on migration locking, see HandleLocker. 3236 * 3237 * Historical Notes 3238 * ---------------- 3239 * In JE 4.1 and earlier, each LN had a node ID that was used for locking, 3240 * rather than using the LSN. The node ID changed only if a deleted slot 3241 * was reused. The node ID was stored in the LN, requiring that the LN be 3242 * fetched when locking the LN. With LSN locking a fetch is not needed. 3243 * 3244 * When LN node IDs were used, deferred-write LNs were not assigned an LSN 3245 * until they were actually logged. Deferred-write LNs were initially 3246 * assigned a null LSN and transient LSNs were not needed. 3247 * 3248 * @param lockType the type of lock requested. 3249 * 3250 * @param allowUncontended is true to return immediately (no lock is taken) 3251 * when no locker holds or waits for the lock. 3252 * 3253 * @param noWait is true to perform a no-wait lock request while keeping 3254 * the BIN latched. The caller must check the lock result to see whether 3255 * the lock was granted. 3256 * 3257 * @return all information about the lock; see LockStanding. 3258 * 3259 * @throws LockConflictException if the lsn is non-null, the lock is 3260 * contended, and a lock could not be obtained by blocking. 3261 */ lockLN(final LockType lockType, final boolean allowUncontended, final boolean noWait)3262 public LockStanding lockLN(final LockType lockType, 3263 final boolean allowUncontended, 3264 final boolean noWait) 3265 throws LockConflictException { 3266 3267 final LockStanding standing = new LockStanding(); 3268 standing.lsn = bin.getLsn(index); 3269 standing.deleted = bin.isEntryKnownDeleted(index) || 3270 bin.isEntryPendingDeleted(index); 3271 3272 /* Ensure that a known-deleted null LSN is not present. */ 3273 if (standing.lsn == DbLsn.NULL_LSN) { 3274 assert bin.isEntryKnownDeleted(index); 3275 standing.nullLsn = true; 3276 return standing; 3277 } 3278 3279 /* 3280 * We can avoid taking a lock if uncontended. However, we must 3281 * call preLogWithoutLock to prevent logging on a replica, and as 3282 * good measure to prepare for undo. 3283 */ 3284 if (allowUncontended && 3285 databaseImpl.getEnv().getTxnManager().getLockManager(). 3286 isLockUncontended(standing.lsn)) { 3287 locker.preLogWithoutLock(databaseImpl); 3288 assert verifyPendingDeleted(lockType); 3289 return standing; 3290 } 3291 3292 /* 3293 * Try a non-blocking lock first, to avoid unlatching. If the default 3294 * is no-wait, use the standard lock method so 3295 * LockNotAvailableException is thrown; there is no need to try a 3296 * non-blocking lock twice. 3297 * 3298 * Even for dirty-read (LockType.NONE) we must call Locker.lock() since 3299 * it checks the locker state and may throw LockPreemptedException. 3300 */ 3301 if (locker.getDefaultNoWait()) { 3302 try { 3303 standing.lockResult = locker.lock( 3304 standing.lsn, lockType, true /*noWait*/, databaseImpl); 3305 } catch (LockConflictException e) { 3306 3307 /* 3308 * Release all latches. Note that we catch 3309 * LockConflictException for simplicity but we expect either 3310 * LockNotAvailableException or LockNotGrantedException. 3311 */ 3312 releaseBIN(); 3313 throw e; 3314 } 3315 } else { 3316 standing.lockResult = locker.nonBlockingLock( 3317 standing.lsn, lockType, false /*jumpAheadOfWaiters*/, 3318 databaseImpl); 3319 } 3320 3321 if (noWait || 3322 standing.lockResult.getLockGrant() != LockGrantType.DENIED) { 3323 /* Lock was granted whiled latched, no need to check LSN. */ 3324 assert verifyPendingDeleted(lockType); 3325 return standing; 3326 } 3327 3328 /* 3329 * Unlatch, get a blocking lock, latch, and get the current LSN from 3330 * the slot. If the LSN changes while unlatched, revert the lock and 3331 * repeat. 3332 */ 3333 while (true) { 3334 3335 /* Request a blocking lock. */ 3336 releaseBIN(); 3337 standing.lockResult = locker.lock( 3338 standing.lsn, lockType, false /*noWait*/, databaseImpl); 3339 3340 /* Check current LSN after locking. */ 3341 latchBIN(); 3342 standing.deleted = bin.isEntryKnownDeleted(index) || 3343 bin.isEntryPendingDeleted(index); 3344 final long newLsn = bin.getLsn(index); 3345 if (standing.lsn != newLsn) { 3346 /* The LSN changed, revert the lock and try again. */ 3347 revertLock(standing); 3348 standing.lsn = newLsn; 3349 /* Ensure that a known-deleted null LSN is not present. */ 3350 if (newLsn == DbLsn.NULL_LSN) { 3351 assert bin.isEntryKnownDeleted(index); 3352 standing.nullLsn = true; 3353 return standing; 3354 } 3355 continue; 3356 } else { 3357 /* If locked correctly, return the result. */ 3358 assert verifyPendingDeleted(lockType); 3359 return standing; 3360 } 3361 } 3362 } 3363 3364 /** 3365 * After logging a deferred-write LN during eviction/checkpoint or a 3366 * migrated LN during cleaning, for every existing lock on the old LSN held 3367 * by another locker, we must lock the new LSN on behalf of that locker. 3368 * 3369 * This is done while holding the BIN latch so that the new LSN does not 3370 * change during the locking process. The BIN must be latched on entry and 3371 * is left latched by this method. 3372 * 3373 * We release the lock on the oldLsn to prevent locks from accumulating 3374 * over time on a HandleLocker, as the cleaner migrates LNs, because 3375 * Database handle locks are legitimately very long-lived. It is important 3376 * to first acquire all newLsn locks and then release the oldLsn locks. 3377 * Releasing an oldLsn lock might allow another locker to acquire it, and 3378 * then acquiring another newLsn lock may encounter a conflict. [#20617] 3379 * 3380 * @see com.sleepycat.je.txn.HandleLocker 3381 * @see #lockLN 3382 */ lockAfterLsnChange(DatabaseImpl dbImpl, long oldLsn, long newLsn, Locker excludeLocker)3383 public static void lockAfterLsnChange(DatabaseImpl dbImpl, 3384 long oldLsn, 3385 long newLsn, 3386 Locker excludeLocker) { 3387 final LockManager lockManager = 3388 dbImpl.getEnv().getTxnManager().getLockManager(); 3389 final Set<LockInfo> owners = lockManager.getOwners(oldLsn); 3390 if (owners == null) { 3391 return; 3392 } 3393 /* Acquire newLsn locks. */ 3394 for (LockInfo lockInfo : owners) { 3395 final Locker locker = lockInfo.getLocker(); 3396 if (locker != excludeLocker) { 3397 locker.lockAfterLsnChange(oldLsn, newLsn, dbImpl); 3398 } 3399 } 3400 /* Release oldLsn locks. */ 3401 for (LockInfo lockInfo : owners) { 3402 final Locker locker = lockInfo.getLocker(); 3403 if (locker != excludeLocker && 3404 locker.allowReleaseLockAfterLsnChange()) { 3405 locker.releaseLock(oldLsn); 3406 } 3407 } 3408 } 3409 3410 /** 3411 * For debugging. Verify that a BINs cursor set refers to the BIN. 3412 */ verifyCursor(BIN bin)3413 private void verifyCursor(BIN bin) 3414 throws DatabaseException { 3415 3416 if (!bin.getCursorSet().contains(this)) { 3417 throw new EnvironmentFailureException 3418 (databaseImpl.getEnv(), 3419 EnvironmentFailureReason.UNEXPECTED_STATE, 3420 "BIN cursorSet is inconsistent"); 3421 } 3422 } 3423 3424 /** 3425 * Calls checkCursorState and asserts false if an exception is thrown. 3426 * Otherwise returns true, so it can be called under an assertion. 3427 */ assertCursorState(boolean mustBeInitialized, boolean mustNotBeInitialized)3428 private boolean assertCursorState(boolean mustBeInitialized, 3429 boolean mustNotBeInitialized) { 3430 try { 3431 checkCursorState(mustBeInitialized, mustNotBeInitialized); 3432 return true; 3433 } catch (RuntimeException e) { 3434 assert false : e.toString() + " " + dumpToString(true); 3435 return false; // for compiler 3436 } 3437 } 3438 3439 /** 3440 * Check that the cursor is open and optionally if it is initialized or 3441 * uninitialized. 3442 * 3443 * @throws IllegalStateException via all Cursor methods that call 3444 * Cursor.checkState (all get and put methods, plus more). 3445 */ checkCursorState(boolean mustBeInitialized, boolean mustNotBeInitialized)3446 public void checkCursorState(boolean mustBeInitialized, 3447 boolean mustNotBeInitialized) { 3448 switch (status) { 3449 case CURSOR_NOT_INITIALIZED: 3450 if (mustBeInitialized) { 3451 throw new IllegalStateException("Cursor not initialized."); 3452 } 3453 break; 3454 case CURSOR_INITIALIZED: 3455 if (mustNotBeInitialized) { 3456 throw EnvironmentFailureException.unexpectedState( 3457 "Cursor is initialized."); 3458 } 3459 if (DEBUG) { 3460 if (bin != null) { 3461 verifyCursor(bin); 3462 } 3463 } 3464 break; 3465 case CURSOR_CLOSED: 3466 throw new IllegalStateException("Cursor has been closed."); 3467 default: 3468 throw EnvironmentFailureException.unexpectedState( 3469 "Unknown cursor status: " + status); 3470 } 3471 } 3472 3473 /** 3474 * Checks that LN deletedness matches KD/PD flag state, at least when the 3475 * LN is resident. Should only be called under an assertion. 3476 */ verifyPendingDeleted(LockType lockType)3477 private boolean verifyPendingDeleted(LockType lockType) { 3478 3479 /* Cannot verify deletedness if LN is not locked. */ 3480 if (lockType == LockType.NONE) { 3481 return true; 3482 } 3483 3484 /* Cannot verify deletedness if cursor is not intialized. */ 3485 if (bin == null || index < 0) { 3486 return true; 3487 } 3488 3489 /* Cannot verify deletedness if LN is not resident. */ 3490 final LN ln = (LN) bin.getTarget(index); 3491 if (ln == null) { 3492 return true; 3493 } 3494 3495 /* 3496 * If the LN is deleted then KD or PD must be set. If the LN is not 3497 * deleted then PD must not be set, but KD may or may not be set since 3498 * it used for various purposes (see IN.java). 3499 */ 3500 final boolean kd = bin.isEntryKnownDeleted(index); 3501 final boolean pd = bin.isEntryPendingDeleted(index); 3502 final boolean lnDeleted = ln.isDeleted(); 3503 assert ((lnDeleted && (kd || pd)) || (!lnDeleted && !pd)) : 3504 "Deleted state mismatch LNDeleted = " + lnDeleted + 3505 " PD = " + pd + " KD = " + kd; 3506 return true; 3507 } 3508 revertLock(LockStanding standing)3509 public void revertLock(LockStanding standing) 3510 throws DatabaseException { 3511 3512 if (standing.lockResult != null) { 3513 revertLock(standing.lsn, standing.lockResult); 3514 standing.lockResult = null; 3515 } 3516 } 3517 3518 /** 3519 * Return this lock to its prior status. If the lock was just obtained, 3520 * release it. If it was promoted, demote it. 3521 */ revertLock(long lsn, LockResult lockResult)3522 private void revertLock(long lsn, LockResult lockResult) 3523 throws DatabaseException { 3524 3525 LockGrantType lockStatus = lockResult.getLockGrant(); 3526 if ((lockStatus == LockGrantType.NEW) || 3527 (lockStatus == LockGrantType.WAIT_NEW)) { 3528 locker.releaseLock(lsn); 3529 } else if ((lockStatus == LockGrantType.PROMOTION) || 3530 (lockStatus == LockGrantType.WAIT_PROMOTION)){ 3531 locker.demoteLock(lsn); 3532 } 3533 } 3534 3535 /** 3536 * Locks the logical EOF node for the databaseImpl. 3537 */ lockEof(LockType lockType)3538 public void lockEof(LockType lockType) 3539 throws DatabaseException { 3540 3541 locker.lock(databaseImpl.getEofLsn(), lockType, 3542 false /*noWait*/, databaseImpl); 3543 } 3544 3545 /** 3546 * @throws EnvironmentFailureException if the underlying environment is 3547 * invalid. 3548 */ checkEnv()3549 public void checkEnv() { 3550 databaseImpl.getEnv().checkIfInvalid(); 3551 } 3552 3553 /** 3554 * Callback object for traverseDbWithCursor. 3555 */ 3556 public interface WithCursor { 3557 3558 /** 3559 * Called for each record in the databaseImpl. 3560 * @return true to continue or false to stop the enumeration. 3561 */ withCursor(CursorImpl cursor, DatabaseEntry key, DatabaseEntry data)3562 boolean withCursor(CursorImpl cursor, 3563 DatabaseEntry key, 3564 DatabaseEntry data) 3565 throws DatabaseException; 3566 } 3567 3568 /** 3569 * Enumerates all records in a databaseImpl non-transactionally and calls 3570 * the withCursor method for each record. Stops the enumeration if the 3571 * callback returns false. 3572 * 3573 * @param db DatabaseImpl to traverse. 3574 * 3575 * @param lockType non-null LockType for reading records. 3576 * 3577 * @param allowEviction should normally be true to evict when performing 3578 * multiple operations, but may be false if eviction is disallowed in a 3579 * particular context. 3580 * 3581 * @param withCursor callback object. 3582 */ traverseDbWithCursor( DatabaseImpl db, LockType lockType, boolean allowEviction, WithCursor withCursor)3583 public static void traverseDbWithCursor( 3584 DatabaseImpl db, 3585 LockType lockType, 3586 boolean allowEviction, 3587 WithCursor withCursor) 3588 throws DatabaseException { 3589 3590 DatabaseEntry key = new DatabaseEntry(); 3591 DatabaseEntry data = new DatabaseEntry(); 3592 Locker locker = null; 3593 CursorImpl cursor = null; 3594 3595 try { 3596 EnvironmentImpl envImpl = db.getEnv(); 3597 3598 locker = LockerFactory.getInternalReadOperationLocker(envImpl); 3599 3600 cursor = new CursorImpl(db, locker); 3601 cursor.setAllowEviction(allowEviction); 3602 3603 if (cursor.positionFirstOrLast(true /*first*/)) { 3604 3605 OperationStatus status = cursor.lockAndGetCurrent( 3606 key, data, lockType, false /*dirtyReadAll*/, 3607 true /*isLatched*/, true /*unlatch*/); 3608 3609 boolean done = false; 3610 while (!done) { 3611 3612 /* 3613 * lockAndGetCurrent may have returned non-SUCCESS 3614 * if the first record is deleted, but we can call getNext 3615 * below to move forward. 3616 */ 3617 if (status == OperationStatus.SUCCESS) { 3618 if (!withCursor.withCursor(cursor, key, data)) { 3619 done = true; 3620 } 3621 } 3622 3623 if (!done) { 3624 status = cursor.getNext( 3625 key, data, lockType, false /*dirtyReadAll*/, 3626 true /*forward*/, false /*isLatched*/, 3627 null /*rangeConstraint*/); 3628 3629 if (status != OperationStatus.SUCCESS) { 3630 done = true; 3631 } 3632 } 3633 } 3634 } 3635 } finally { 3636 if (cursor != null) { 3637 cursor.releaseBIN(); 3638 cursor.close(); 3639 } 3640 if (locker != null) { 3641 locker.operationEnd(); 3642 } 3643 } 3644 } 3645 3646 /** 3647 * Dump the cursor for debugging purposes. Dump the bin that the cursor 3648 * refers to if verbose is true. 3649 */ dump(boolean verbose)3650 public void dump(boolean verbose) { 3651 System.out.println(dumpToString(verbose)); 3652 } 3653 3654 /** 3655 * dump the cursor for debugging purposes. 3656 */ dump()3657 public void dump() { 3658 System.out.println(dumpToString(true)); 3659 } 3660 3661 /* 3662 * dumper 3663 */ statusToString(byte status)3664 private String statusToString(byte status) { 3665 switch(status) { 3666 case CURSOR_NOT_INITIALIZED: 3667 return "CURSOR_NOT_INITIALIZED"; 3668 case CURSOR_INITIALIZED: 3669 return "CURSOR_INITIALIZED"; 3670 case CURSOR_CLOSED: 3671 return "CURSOR_CLOSED"; 3672 default: 3673 return "UNKNOWN (" + Byte.toString(status) + ")"; 3674 } 3675 } 3676 3677 /* 3678 * dumper 3679 */ dumpToString(boolean verbose)3680 public String dumpToString(boolean verbose) { 3681 StringBuilder sb = new StringBuilder(); 3682 3683 sb.append("<Cursor idx=\"").append(index).append("\""); 3684 sb.append(" status=\"").append(statusToString(status)).append("\""); 3685 sb.append(">\n"); 3686 if (verbose) { 3687 sb.append((bin == null) ? "" : bin.dumpString(2, true)); 3688 } 3689 sb.append("\n</Cursor>"); 3690 3691 return sb.toString(); 3692 } 3693 3694 /* 3695 * For unit tests 3696 */ getLockStats()3697 public StatGroup getLockStats() 3698 throws DatabaseException { 3699 3700 return locker.collectStats(); 3701 } 3702 3703 /** 3704 * Send trace messages to the java.util.logger. Don't rely on the logger 3705 * alone to conditionalize whether we send this message, we don't even want 3706 * to construct the message if the level is not enabled. 3707 */ trace(Level level, String changeType, BIN theBin, int lnIndex, long oldLsn, long newLsn)3708 private void trace(Level level, 3709 String changeType, 3710 BIN theBin, 3711 int lnIndex, 3712 long oldLsn, 3713 long newLsn) { 3714 EnvironmentImpl envImpl = databaseImpl.getEnv(); 3715 if (envImpl.getLogger().isLoggable(level)) { 3716 StringBuilder sb = new StringBuilder(); 3717 sb.append(changeType); 3718 sb.append(" bin="); 3719 sb.append(theBin.getNodeId()); 3720 sb.append(" lnIdx="); 3721 sb.append(lnIndex); 3722 sb.append(" oldLnLsn="); 3723 sb.append(DbLsn.getNoFormatString(oldLsn)); 3724 sb.append(" newLnLsn="); 3725 sb.append(DbLsn.getNoFormatString(newLsn)); 3726 3727 LoggerUtils.logMsg 3728 (envImpl.getLogger(), envImpl, level, sb.toString()); 3729 } 3730 } 3731 3732 /** 3733 * Send trace messages to the java.util.logger. Don't rely on the logger 3734 * alone to conditionalize whether we send this message, we don't even want 3735 * to construct the message if the level is not enabled. 3736 */ traceInsert(Level level, BIN insertingBin, long lnLsn, int index)3737 private void traceInsert(Level level, 3738 BIN insertingBin, 3739 long lnLsn, 3740 int index) { 3741 EnvironmentImpl envImpl = databaseImpl.getEnv(); 3742 if (envImpl.getLogger().isLoggable(level)) { 3743 StringBuilder sb = new StringBuilder(); 3744 sb.append(TRACE_INSERT); 3745 sb.append(" bin="); 3746 sb.append(insertingBin.getNodeId()); 3747 sb.append(" lnLsn="); 3748 sb.append(DbLsn.getNoFormatString(lnLsn)); 3749 sb.append(" index="); 3750 sb.append(index); 3751 3752 LoggerUtils.logMsg(envImpl.getLogger(), envImpl, level, 3753 sb.toString()); 3754 } 3755 } 3756 3757 /* For unit testing only. */ setTestHook(TestHook hook)3758 public void setTestHook(TestHook hook) { 3759 testHook = hook; 3760 } 3761 3762 /* Check that the target bin is latched. For use in assertions. */ checkAlreadyLatched(boolean isLatched)3763 private boolean checkAlreadyLatched(boolean isLatched) { 3764 if (isLatched) { 3765 if (bin != null) { 3766 return bin.isLatchExclusiveOwner(); 3767 } 3768 } 3769 return true; 3770 } 3771 } 3772