1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2002, 2014 Oracle and/or its affiliates. All rights reserved. 5 * 6 */ 7 8 package com.sleepycat.je.tree; 9 10 import java.util.Collections; 11 import java.util.Iterator; 12 import java.util.Set; 13 14 import com.sleepycat.je.CacheMode; 15 import com.sleepycat.je.DatabaseException; 16 import com.sleepycat.je.EnvironmentFailureException; 17 import com.sleepycat.je.cleaner.LocalUtilizationTracker; 18 import com.sleepycat.je.config.EnvironmentParams; 19 import com.sleepycat.je.dbi.CursorImpl; 20 import com.sleepycat.je.dbi.DatabaseImpl; 21 import com.sleepycat.je.dbi.DbConfigManager; 22 import com.sleepycat.je.dbi.DbTree; 23 import com.sleepycat.je.dbi.EnvironmentFailureReason; 24 import com.sleepycat.je.dbi.EnvironmentImpl; 25 import com.sleepycat.je.dbi.MemoryBudget; 26 import com.sleepycat.je.evictor.Evictor; 27 import com.sleepycat.je.log.LogEntryType; 28 import com.sleepycat.je.log.LogManager; 29 import com.sleepycat.je.log.ReplicationContext; 30 import com.sleepycat.je.log.entry.BINDeltaLogEntry; 31 import com.sleepycat.je.log.entry.INLogEntry; 32 import com.sleepycat.je.tree.BINDeltaBloomFilter.HashContext; 33 import com.sleepycat.je.txn.BasicLocker; 34 import com.sleepycat.je.txn.LockGrantType; 35 import com.sleepycat.je.txn.LockResult; 36 import com.sleepycat.je.txn.LockType; 37 import com.sleepycat.je.utilint.DatabaseUtil; 38 import com.sleepycat.je.utilint.DbLsn; 39 import com.sleepycat.je.utilint.SizeofMarker; 40 import com.sleepycat.je.utilint.TinyHashSet; 41 import com.sleepycat.je.utilint.VLSN; 42 43 /** 44 * A BIN represents a Bottom Internal Node in the JE tree. 45 * 46 * BIN-deltas 47 * ========== 48 * A BIN-delta is a BIN with the non-dirty slots omitted. A "full BIN", OTOH 49 * contains all slots. On disk and in memory, the format of a BIN-delta is the 50 * same as that of a BIN. In memory, a BIN object is actually a BIN-delta when 51 * the BIN-delta flag is set (IN.isBINDelta). On disk, the NewBINDelta log 52 * entry type (class BINDeltaLogEntry) is the only thing that distinguishes it 53 * from a full BIN, which has the BIN log entry type. 54 * 55 * BIN-deltas provides two benefits: Reduced writing and reduced memory usage. 56 * 57 * Reduced Writing 58 * --------------- 59 * Logging a BIN-delta rather a full BIN reduces writing significantly. The 60 * cost, however, is that two reads are necessary to reconstruct a full BIN 61 * from scratch. The reduced writing is worth this cost, particularly because 62 * less writing means less log cleaning. 63 * 64 * A BIN-delta is logged when 25% or less (configured with EnvironmentConfig 65 * TREE_BIN_DELTA) of the slots in a BIN are dirty. When a BIN-delta is logged, 66 * the dirty flag is cleared on the the BIN in cache. If more slots are 67 * dirtied and another BIN-delta is logged, it will contain all entries dirtied 68 * since the last full BIN was logged. In other words, BIN-deltas are 69 * cumulative and not chained, to avoid reading many (more than two) log 70 * entries to reconstruct a full BIN. The dirty flag on each slot is cleared 71 * only when a full BIN is logged. 72 * 73 * In addition to the cost of fetching two entries on a BIN cache miss, another 74 * drawback of the current approach is that dirtiness propagates upward in the 75 * Btree due to BIN-delta logging, causing repeated logging of upper INs. The 76 * slot of the parent IN contains the LSN of the most recent BIN-delta or full 77 * BIN that was logged. A BINDeltaLogEntry in turn contains the LSN of the 78 * last full BIN logged. 79 * 80 * Historical note: The pre-JE 5 implementation of OldBINDeltas worked 81 * differently and had a different cost/benefit trade-off. When an 82 * OldBINDelta was logged, its dirty flag was not cleared, causing it to be 83 * logged repeatedly at every checkpoint. A full BIN was logged after 10 84 * deltas, to prevent endless logging of the same BIN. One benefit of this 85 * approach is that the BIN's parent IN was not dirtied when logging the 86 * OldBINDelta, preventing dirtiness from propagating upward. Another 87 * benefit is that the OldBINDelta was only processed by recovery, and did 88 * not have to be fetched to reconstruct a full BIN from scratch on a cache 89 * miss. But the cost (the logging of an OldBINDelta every checkpoint, even 90 * when it hadn't changed since the last time logged) outweighed the 91 * benefits. When the current approach was implemented in JE 5, performance 92 * improved due to less logging. 93 * 94 * In JE 6, deltas were also maintained in the Btree cache. This was done to 95 * provide the reduced memory benefits described in the next section. The 96 * log format for a delta was also changed. The OldBINDelta log format is 97 * different (not the same as the BIN format) and is supported for backward 98 * compatibility as the OldBINDeltaLogEntry. Its log entry type name is 99 * still BINDelta, which is why the new type is named NewBINDelta (for 100 * backward compatibility, log entry type names cannot be changed.) This is 101 * also why the spelling "BIN-delta" is used to refer to deltas in the new 102 * approach. The old BINDelta class was renamed to OldBINDelta and there is 103 * no longer a class named BINDelta. 104 * 105 * Reduced Memory Usage 106 * -------------------- 107 * In the Btree cache, a BIN may be represented as a full BIN or a BIN-delta. 108 * Eviction will mutate a full BIN to a BIN-delta in preference to discarding 109 * the entire BIN. A BIN-delta in cache occupies less memory than a full BIN, 110 * and can be exploited as follows: 111 * 112 * - When a full BIN is needed, it can be constructed with only one fetch 113 * rather than two, reducing IO overall. IN.fetchIN implements this 114 * optimization. 115 * 116 * - Certain operations can sometimes be performed using the BIN-delta alone, 117 * allowing such operations on a given data set to take place using less 118 * less IO (for a given cache size). 119 * 120 * The latter benefit is not yet implemented. No user CRUD operations are 121 * currently implemented using BIN-deltas. In the future we plan to implement 122 * the following operations using the BIN-delta alone. 123 * 124 * - Consider recording deletions in a BIN-delta. Currently, slot deletion 125 * prohibits a BIN-delta from being logged. To record deletion in 126 * BIN-deltas, slot deletion will have to be deferred until a full BIN is 127 * logged. 128 * 129 * - User reads by key, updates and deletions can be implemented if the key 130 * happens to appear in the BIN-delta. 131 * 132 * - The Cleaner can migrate an LN if its key happens to appear in the 133 * BIN-delta. This is similar to a user update operation, but in a 134 * different code path. 135 * 136 * - Insertions, deletions and updates can always be performed in a BIN-delta 137 * during replica replay, since the Master operation has already determined 138 * whether the key exists. 139 * 140 * - Recovery LN redo could also apply insertions, updates and inserts in the 141 * manner described. 142 * 143 * - Add idempotent put/delete operations, which can always be applied in a 144 * BIN-delta. 145 * 146 * - Store a hash of the keys in the full BIN in the BIN-delta and use it to 147 * perform the following in the delta: 148 * - putIfAbsent (true insertion) 149 * - get/delete/putIfPresent operations that return NOTFOUND 150 * - to avoid accumulating unnecessary deletions 151 * 152 * However, some internal operations do currently exploit BIN-deltas to avoid 153 * unnecessary IO. The following are currently implemented. 154 * 155 * - The Evictor and Checkpointer log a BIN-delta that is present in the 156 * cache, without having to fetch the full BIN. 157 * 158 * - The Cleaner can use the BIN-delta to avoid fetching when processing a BIN 159 * log entry (delta or full) and the BIN is not present in cache, 160 * 161 * To support BIB-delta-aware operations, the IN.fetchIN() and IN.getTarget() 162 * methods may return a BIN delta. IN.getTarget() will return whatever object 163 * is cached under the parent IN, and IN.fetchIN() will do a single I/O to 164 * fetch the most recently log record for the requested BIN, which may be a 165 * full BIN or a delta. Callers of these methods must be prepared to handle 166 * a BIN delta; either doing their operation directly on the delta, if 167 * possible, or mutating the delta to a full BIN by calling 168 * BIN.mutateToFullBIN(). 169 */ 170 public class BIN extends IN { 171 172 private static final String BEGIN_TAG = "<bin>"; 173 private static final String END_TAG = "</bin>"; 174 175 /* 176 * The set of cursors that are currently referring to this BIN. 177 * This field is set to null when there are no cursors on this BIN. 178 */ 179 private TinyHashSet<CursorImpl> cursorSet; 180 181 /* 182 * Support for logging BIN deltas. (Partial BIN logging) 183 */ 184 185 /* 186 * If this is a delta, fullBinNEntries stores the number of entries 187 * in the full version of the BIN. This is a persistent field for 188 * BIN-delta logrecs only, and for log versions >= 10. 189 */ 190 private int fullBinNEntries = -1; 191 192 /* 193 * If this is a delta, fullBinMaxEntries stores the max number of 194 * entries (capacity) in the full version of the BIN. This is a 195 * persistent field for BIN-delta logrecs only, and for log versions >= 10. 196 */ 197 private int fullBinMaxEntries = -1; 198 199 /* 200 * If "this" is a BIN-delta, bloomFilter is a bloom-filter representation 201 * of the set of keys in the clean slots of the full version of the same 202 * BIN. It is used to allow blind put operations in deltas, by answering 203 * the question whether the put key is in the full BIN or not. See the 204 * javadoc of the TREE_BIN_DELTA_BLIND_PUTS config param for more info. 205 * This is a persistent field for BIN-delta logrecs only, and for log 206 * versions >= 10. 207 */ 208 byte[] bloomFilter; 209 210 /* 211 * Let L be the most recently written logrec for this BIN instance. If 212 * L is a full-version logrec, lastDeltaVersion is NULL; otherwise it 213 * is the lsn of L. 214 * 215 * It is used for obsolete tracking. 216 * 217 * It is set in 2 cases: 218 * 219 * (a) after "this" is created via reading a logrec L, lastDeltaVersion 220 * is set to L's lsn, if L is a BIN-delta logrec, or to NULL if L is a 221 * full-BIN logrec (this is done in IN.commonPostFetchInit(), via 222 * BIN.setLastLoggedLsn()). 223 * 224 * (b) After we write a logrec L for this BIN instance, lastDeltaVersion 225 * is set to NULL if L is a full-BIN logrec, or to L's lsn, if L is a 226 * BIN-delta logrec. 227 * 228 * Notice that this is a persistent field, but when reading a logrec L, 229 * it is set not to the value found in L, but to the lsn of L. This is why 230 * its read/write is managed by the INLogEntry class rather than the IN 231 * readFromLog/writeFromLog methods. 232 */ 233 private long lastDeltaVersion = DbLsn.NULL_LSN; 234 235 /* 236 * Disallow delta on next log. Set to true (a) when we we delete a slot 237 * from a BIN, (b) when the cleaner marks a BIN as dirty so that it will 238 * be migrated during the next checkpoint. 239 */ 240 private boolean prohibitNextDelta; 241 242 /* 243 * Caches the VLSN sequence for the LN entries in a BIN, when VLSN 244 * preservation and caching are configured. 245 * 246 * A VLSN is added to the cache when an LN is evicted from a BIN. When the 247 * LN is resident, there is no need for caching because the LN contains the 248 * VLSN. See BIN.setTarget. This strategy works because an LN is always 249 * cached during a read or write operation, and only evicted after that, 250 * based on eviction policies. 251 * 252 * An EMPTY_REP is used initially until the need arises to add a non-zero 253 * value. The cache will remain empty if LNs are never evicted or version 254 * caching is not configured, which is always the case for standalone JE. 255 */ 256 private INLongRep vlsnCache = INLongRep.EMPTY_REP; 257 258 /* 259 * Stores the size of the most recently written logrec of each LN, or zero 260 * if the size is unknown. 261 * 262 * We use INLongRep in spite of the fact that sizes are int not long; 263 * INLongRep will store the minimum number of bytes. An EMPTY_REP is 264 * used initially until the need arises to add a non-zero value. 265 */ 266 private INLongRep lastLoggedSizes = INLongRep.EMPTY_REP; 267 268 /** 269 * Can be set to true by tests to prevent last logged sizes from being 270 * stored. 271 */ 272 public static boolean TEST_NO_LAST_LOGGED_SIZES = false; 273 BIN()274 public BIN() { 275 } 276 BIN( DatabaseImpl db, byte[] identifierKey, int capacity, int level)277 public BIN( 278 DatabaseImpl db, 279 byte[] identifierKey, 280 int capacity, 281 int level) { 282 283 super(db, identifierKey, capacity, level); 284 } 285 286 /** 287 * For Sizeof. 288 */ BIN(@uppressWarningsR) SizeofMarker marker)289 public BIN(@SuppressWarnings("unused") SizeofMarker marker) { 290 super(marker); 291 } 292 293 /** 294 * Create a new BIN. Need this because we can't call newInstance() 295 * without getting a 0 for nodeId. 296 */ 297 @Override createNewInstance( byte[] identifierKey, int maxEntries, int level)298 protected IN createNewInstance( 299 byte[] identifierKey, 300 int maxEntries, 301 int level) { 302 303 return new BIN(getDatabase(), identifierKey, maxEntries, level); 304 } 305 306 /** 307 * Create a holder object that encapsulates information about this BIN for 308 * the INCompressor. 309 */ createReference()310 public BINReference createReference() { 311 return new BINReference(getNodeId(), getDatabase().getId(), 312 getIdentifierKey()); 313 } 314 315 @Override isBIN()316 public boolean isBIN() { 317 return true; 318 } 319 320 /* 321 * Return whether the shared latch for this kind of node should be of the 322 * "always exclusive" variety. Presently, only IN's are actually latched 323 * shared. BINs are latched exclusive only. 324 */ 325 @Override isAlwaysLatchedExclusively()326 boolean isAlwaysLatchedExclusively() { 327 return true; 328 } 329 330 @Override shortClassName()331 public String shortClassName() { 332 return "BIN"; 333 } 334 335 @Override beginTag()336 public String beginTag() { 337 return BEGIN_TAG; 338 } 339 340 @Override endTag()341 public String endTag() { 342 return END_TAG; 343 } 344 setCachedVLSN(int idx, long vlsn)345 private void setCachedVLSN(int idx, long vlsn) { 346 347 /* 348 * We do not cache the VLSN for dup DBs, because dup DBs are typically 349 * used only for indexes, and the overhead of VLSN maintenance would be 350 * wasted. Plus, although technically VLSN preservation might apply to 351 * dup DBs, the VLSNs are not reliably available since the LNs are 352 * immediately obsolete. 353 */ 354 if (databaseImpl.getSortedDuplicates() || !getEnv().getCacheVLSN()) { 355 return; 356 } 357 setCachedVLSNUnconditional(idx, vlsn); 358 } 359 setCachedVLSNUnconditional(int idx, long vlsn)360 private void setCachedVLSNUnconditional(int idx, long vlsn) { 361 vlsnCache = vlsnCache.set( 362 idx, 363 (vlsn == VLSN.NULL_VLSN.getSequence()) ? 0 : vlsn, 364 this, getEnv().getCachedVLSNMinLength()); 365 } 366 getCachedVLSN(int idx)367 private long getCachedVLSN(int idx) { 368 final long vlsn = vlsnCache.get(idx); 369 return (vlsn == 0) ? VLSN.NULL_VLSN.getSequence() : vlsn; 370 } 371 372 /** 373 * Returns the VLSN. VLSN.NULL_VLSN.getSequence() (-1) is returned in two 374 * cases: 375 * 1) This is a standalone environment. 376 * 2) The VLSN is not cached (perhaps VLSN caching is not configured), and 377 * the allowFetch param is false. 378 * 379 * WARNING: Because the vlsnCache is only updated when an LN is evicted, it 380 * is critical that getVLSN returns the VLSN for a resident LN before 381 * getting the VLSN from the cache. 382 */ getVLSN(int idx, boolean allowFetch, CacheMode cacheMode)383 public long getVLSN(int idx, boolean allowFetch, CacheMode cacheMode) { 384 385 /* Must return the VLSN from the LN, if it is resident. */ 386 LN ln = (LN) getTarget(idx); 387 if (ln != null) { 388 return ln.getVLSNSequence(); 389 } 390 391 /* Next try the vlsnCache. */ 392 final long vlsn = getCachedVLSN(idx); 393 if (!VLSN.isNull(vlsn)) { 394 return vlsn; 395 } 396 397 /* As the last resort, fetch the LN if fetching is allowed. */ 398 if (!allowFetch) { 399 return vlsn; 400 } 401 ln = fetchLN(idx, cacheMode); 402 return ln.getVLSNSequence(); 403 } 404 405 /** For unit testing. */ getVLSNCache()406 public INLongRep getVLSNCache() { 407 return vlsnCache; 408 } 409 410 /** 411 * The last logged size is never needed when the LN is counted obsolete 412 * immediately, since it is only needed for counting an LN obsolete 413 * during an update or deletion. 414 * 415 * This method may not be called until after the database is initialized, 416 * i,e., it may not be called during readFromLog. 417 */ 418 @Override isLastLoggedSizeStored()419 boolean isLastLoggedSizeStored() { 420 421 /* Check final static first so all test code is optimized away. */ 422 if (DatabaseUtil.TEST) { 423 /* Don't skew test measurements with internal DBs. */ 424 if (TEST_NO_LAST_LOGGED_SIZES && 425 !databaseImpl.getDbType().isInternal()) { 426 return false; 427 } 428 } 429 430 return !databaseImpl.isLNImmediatelyObsolete(); 431 } 432 433 /** 434 * Sets last logged size if necessary. 435 * 436 * This method does not dirty the IN because the caller methods dirty it, 437 * for example, when setting the LSN, key, or node. 438 * 439 * This method is sometimes called to add the logged size for a pre log 440 * version 9 BIN, for example, during fetchTarget and preload. This makes 441 * the logged size available for obsolete counting but does not dirty the 442 * IN, since that could cause an unexpected write of the IN being read. 443 * 444 * @param lastLoggedSize is positive if the size is known, zero if the size 445 * is unknown, or -1 if the size should not be changed because logging of 446 * the LN was deferred. 447 */ 448 @Override setLastLoggedSize(int idx, int lastLoggedSize)449 public void setLastLoggedSize(int idx, int lastLoggedSize) { 450 if ((lastLoggedSize < 0) || !isLastLoggedSizeStored()) { 451 return; 452 } 453 setLastLoggedSizeUnconditional(idx, lastLoggedSize); 454 } 455 456 /** 457 * Sets the size without checking whether it is necessary. 458 * 459 * This method is used when reading from the log because the databaseImpl 460 * is not yet initialized and isLastLoggedSizeStored cannot be called. 461 * It is also called for efficiency reasons when it is known that storing 462 * the logged size is necessary, for example, when copying values between 463 * slots. 464 */ 465 @Override setLastLoggedSizeUnconditional(int idx, int lastLoggedSize)466 void setLastLoggedSizeUnconditional(int idx, int lastLoggedSize) { 467 /* minLength (last param) is 1 since log sizes are unpredictable. */ 468 lastLoggedSizes = lastLoggedSizes.set(idx, lastLoggedSize, this, 1); 469 } 470 471 /** 472 * @return a positive value if the size is known, or zero if unknown. 473 */ 474 @Override getLastLoggedSize(int idx)475 public int getLastLoggedSize(int idx) { 476 return (int) lastLoggedSizes.get(idx); 477 } 478 479 /** 480 * Updates the vlsnCache when an LN target is evicted. See vlsnCache. 481 */ 482 @Override setTarget(int idx, Node target)483 void setTarget(int idx, Node target) { 484 if (target == null) { 485 final Node oldTarget = getTarget(idx); 486 if (oldTarget instanceof LN) { 487 setCachedVLSN(idx, ((LN) oldTarget).getVLSNSequence()); 488 } 489 } 490 super.setTarget(idx, target); 491 } 492 493 /** 494 * Overridden to account for vlsnCache and lastLoggedSizes. 495 */ 496 @Override copyEntry(int idx, IN from, int fromIdx)497 void copyEntry(int idx, IN from, int fromIdx) { 498 super.copyEntry(idx, from, fromIdx); 499 setCachedVLSNUnconditional(idx, ((BIN) from).getCachedVLSN(fromIdx)); 500 setLastLoggedSizeUnconditional(idx, from.getLastLoggedSize(fromIdx)); 501 } 502 503 /** 504 * Overridden to account for vlsnCache and lastLoggedSizes. 505 */ 506 @Override copyEntries(int from, int to, int n)507 void copyEntries(int from, int to, int n) { 508 super.copyEntries(from, to, n); 509 vlsnCache = vlsnCache.copy(from, to, n); 510 lastLoggedSizes = lastLoggedSizes.copy(from, to, n); 511 } 512 513 /** 514 * Overridden to account for vlsnCache and lastLoggedSizes. 515 */ 516 @Override clearEntry(int idx)517 void clearEntry(int idx) { 518 super.clearEntry(idx); 519 setCachedVLSNUnconditional(idx, VLSN.NULL_VLSN.getSequence()); 520 setLastLoggedSizeUnconditional(idx, 0); 521 } 522 523 /** 524 * Mark this entry as deleted, using the delete flag. Only BINS may do 525 * this. 526 * 527 * @param index indicates target entry 528 */ 529 @Override setKnownDeleted(int index)530 public void setKnownDeleted(int index) { 531 532 /* 533 * The target is cleared to save memory, since a known deleted entry 534 * will never be fetched. 535 */ 536 super.setKnownDeleted(index); 537 538 /* 539 * We know it's an LN because we never call setKnownDeleted for 540 * an IN. 541 */ 542 LN oldLN = (LN) getTarget(index); 543 updateMemorySize(oldLN, null /* newNode */); 544 if (oldLN != null) { 545 oldLN.releaseMemoryBudget(); 546 } 547 setTarget(index, null); 548 } 549 setKnownDeletedClearAll(int index)550 public void setKnownDeletedClearAll(int index) { 551 setKnownDeleted(index); 552 setLsnInternal(index, DbLsn.NULL_LSN); 553 } 554 555 /* 556 * Cursors 557 */ 558 559 /* public for the test suite. */ getCursorSet()560 public Set<CursorImpl> getCursorSet() { 561 if (cursorSet == null) { 562 return Collections.emptySet(); 563 } 564 return cursorSet.copy(); 565 } 566 567 /** 568 * Register a cursor with this BIN. Caller has this BIN already latched. 569 * @param cursor Cursor to register. 570 */ addCursor(CursorImpl cursor)571 public void addCursor(CursorImpl cursor) { 572 assert isLatchExclusiveOwner(); 573 if (cursorSet == null) { 574 cursorSet = new TinyHashSet<CursorImpl>(); 575 } 576 cursorSet.add(cursor); 577 } 578 579 /** 580 * Unregister a cursor with this bin. Caller has this BIN already 581 * latched. 582 * 583 * @param cursor Cursor to unregister. 584 */ removeCursor(CursorImpl cursor)585 public void removeCursor(CursorImpl cursor) { 586 assert isLatchExclusiveOwner(); 587 if (cursorSet == null) { 588 return; 589 } 590 cursorSet.remove(cursor); 591 if (cursorSet.size() == 0) { 592 cursorSet = null; 593 } 594 } 595 596 /** 597 * @return the number of cursors currently referring to this BIN. 598 */ nCursors()599 public int nCursors() { 600 601 /* 602 * Use a local var to concurrent assignment to the cursorSet field by 603 * another thread. This method is called via eviction without latching. 604 * LRU-TODO: with the new evictor this method is called with the node 605 * EX-latched. So, cleanup after the old evictor is scrapped. 606 */ 607 final TinyHashSet<CursorImpl> cursors = cursorSet; 608 if (cursors == null) { 609 return 0; 610 } 611 return cursors.size(); 612 } 613 614 /** 615 * Adjust any cursors that are referring to this BIN. This method is 616 * called during a split operation. "this" is the BIN being split. 617 * newSibling is the new BIN into which the entries from "this" between 618 * newSiblingLow and newSiblingHigh have been copied. 619 * 620 * @param newSibling - the newSibling into which "this" has been split. 621 * @param newSiblingLow 622 * @param newSiblingHigh - the low and high entry of 623 * "this" that were moved into newSibling. 624 */ 625 @Override adjustCursors( IN newSibling, int newSiblingLow, int newSiblingHigh)626 void adjustCursors( 627 IN newSibling, 628 int newSiblingLow, 629 int newSiblingHigh) 630 { 631 assert newSibling.isLatchExclusiveOwner(); 632 assert this.isLatchExclusiveOwner(); 633 if (cursorSet == null) { 634 return; 635 } 636 int adjustmentDelta = (newSiblingHigh - newSiblingLow); 637 Iterator<CursorImpl> iter = cursorSet.iterator(); 638 639 while (iter.hasNext()) { 640 CursorImpl cursor = iter.next(); 641 int cIdx = cursor.getIndex(); 642 cursor.assertBIN(this); 643 assert newSibling instanceof BIN; 644 645 /* 646 * There are four cases to consider for cursor adjustments, 647 * depending on (1) how the existing node gets split, and (2) where 648 * the cursor points to currently. In cases 1 and 2, the id key of 649 * the node being split is to the right of the splitindex so the 650 * new sibling gets the node entries to the left of that index. 651 * This is indicated by "new sibling" to the left of the vertical 652 * split line below. The right side of the node contains entries 653 * that will remain in the existing node (although they've been 654 * shifted to the left). The vertical bar (^) indicates where the 655 * cursor currently points. 656 * 657 * case 1: 658 * 659 * We need to set the cursor's "bin" reference to point at the 660 * new sibling, but we don't need to adjust its index since that 661 * continues to be correct post-split. 662 * 663 * +=======================================+ 664 * | new sibling | existing node | 665 * +=======================================+ 666 * cursor ^ 667 * 668 * case 2: 669 * 670 * We only need to adjust the cursor's index since it continues 671 * to point to the current BIN post-split. 672 * 673 * +=======================================+ 674 * | new sibling | existing node | 675 * +=======================================+ 676 * cursor ^ 677 * 678 * case 3: 679 * 680 * Do nothing. The cursor continues to point at the correct BIN 681 * and index. 682 * 683 * +=======================================+ 684 * | existing Node | new sibling | 685 * +=======================================+ 686 * cursor ^ 687 * 688 * case 4: 689 * 690 * Adjust the "bin" pointer to point at the new sibling BIN and 691 * also adjust the index. 692 * 693 * +=======================================+ 694 * | existing Node | new sibling | 695 * +=======================================+ 696 * cursor ^ 697 */ 698 BIN ns = (BIN) newSibling; 699 if (newSiblingLow == 0) { 700 if (cIdx < newSiblingHigh) { 701 /* case 1 */ 702 iter.remove(); 703 cursor.setBIN(ns); 704 ns.addCursor(cursor); 705 } else { 706 /* case 2 */ 707 cursor.setIndex(cIdx - adjustmentDelta); 708 } 709 } else { 710 if (cIdx >= newSiblingLow) { 711 /* case 4 */ 712 cursor.setIndex(cIdx - newSiblingLow); 713 iter.remove(); 714 cursor.setBIN(ns); 715 ns.addCursor(cursor); 716 } 717 } 718 } 719 } 720 721 /** 722 * For each cursor in this BIN's cursor set, ensure that the cursor is 723 * actually referring to this BIN. 724 */ verifyCursors()725 public void verifyCursors() { 726 if (cursorSet == null) { 727 return; 728 } 729 for (CursorImpl cursor : cursorSet) { 730 cursor.assertBIN(this); 731 } 732 } 733 734 /** 735 * Adjust cursors referring to this BIN following an insert. 736 * 737 * @param insertIndex - The index of the new entry. 738 */ 739 @Override adjustCursorsForInsert(int insertIndex)740 void adjustCursorsForInsert(int insertIndex) { 741 742 assert this.isLatchExclusiveOwner(); 743 if (cursorSet == null) { 744 return; 745 } 746 747 for (CursorImpl cursor : cursorSet) { 748 int cIdx = cursor.getIndex(); 749 if (insertIndex <= cIdx) { 750 cursor.setIndex(cIdx + 1); 751 } 752 } 753 } 754 755 /** 756 * Called when we know we are about to split on behalf of a key that is the 757 * minimum (leftSide) or maximum (!leftSide) of this node. This is 758 * achieved by just forcing the split to occur either one element in from 759 * the left or the right (i.e. splitIndex is 1 or nEntries - 1). 760 */ 761 @Override splitSpecial( IN parent, int parentIndex, IN grandParent, int maxEntriesPerNode, byte[] key, boolean leftSide, CacheMode cacheMode)762 void splitSpecial( 763 IN parent, 764 int parentIndex, 765 IN grandParent, 766 int maxEntriesPerNode, 767 byte[] key, 768 boolean leftSide, 769 CacheMode cacheMode) 770 throws DatabaseException { 771 772 int nEntries = getNEntries(); 773 774 int index = findEntry(key, true, false); 775 776 boolean exact = (index & IN.EXACT_MATCH) != 0; 777 index &= ~IN.EXACT_MATCH; 778 779 if (leftSide && index < 0) { 780 splitInternal( 781 parent, parentIndex, grandParent, maxEntriesPerNode, 1, 782 cacheMode); 783 784 } else if (!leftSide && !exact && index == (nEntries - 1)) { 785 splitInternal( 786 parent, parentIndex, grandParent, maxEntriesPerNode, 787 nEntries - 1, cacheMode); 788 789 } else { 790 split( 791 parent, parentIndex, grandParent, maxEntriesPerNode, 792 cacheMode); 793 } 794 } 795 796 /** 797 * Compress this BIN by removing any entries that are deleted. No cursors 798 * may be present on the BIN. Caller is responsible for latching and 799 * unlatching this node. 800 * 801 * @param localTracker is used only for temporary DBs, and may be specified 802 * to consolidate multiple tracking operations. If null, the tracking is 803 * performed immediately in this method. 804 * 805 * @return true if all deleted slots were compressed, or false if one or 806 * more slots could not be compressed because we were unable to obtain a 807 * lock. 808 */ compress(LocalUtilizationTracker localTracker)809 public boolean compress(LocalUtilizationTracker localTracker) 810 throws DatabaseException { 811 812 /* 813 * If the environment is not yet recovered we can't rely on locks 814 * being set up to safeguard active data and so we can't compress 815 * safely. 816 */ 817 if (!databaseImpl.getEnv().isValid()) { 818 return false; 819 } 820 821 if (nCursors() > 0) { 822 throw EnvironmentFailureException.unexpectedState(); 823 } 824 825 if (isBINDelta()) { 826 throw EnvironmentFailureException.unexpectedState(); 827 } 828 829 boolean setNewIdKey = false; 830 boolean anyLocksDenied = false; 831 final DatabaseImpl db = getDatabase(); 832 final EnvironmentImpl envImpl = db.getEnv(); 833 834 for (int i = 0; i < getNEntries(); i++) { 835 836 /* KD and PD determine deletedness. */ 837 if (!isEntryPendingDeleted(i) && !isEntryKnownDeleted(i)) { 838 continue; 839 } 840 841 /* 842 * We have to be able to lock the LN before we can compress the 843 * entry. If we can't, then skip over it. 844 * 845 * We must lock the LN even if isKnownDeleted is true, because 846 * locks protect the aborts. (Aborts may execute multiple 847 * operations, where each operation latches and unlatches. It's the 848 * LN lock that protects the integrity of the whole multi-step 849 * process.) 850 * 851 * For example, during abort, there may be cases where we have 852 * deleted and then added an LN during the same txn. This means 853 * that to undo/abort it, we first delete the LN (leaving 854 * knownDeleted set), and then add it back into the tree. We want 855 * to make sure the entry is in the BIN when we do the insert back 856 * in. 857 */ 858 final BasicLocker lockingTxn = 859 BasicLocker.createBasicLocker(envImpl); 860 /* Don't allow this short-lived lock to be preempted/stolen. */ 861 lockingTxn.setPreemptable(false); 862 try { 863 /* Lock LSN. Can discard a NULL_LSN entry without locking. */ 864 final long lsn = getLsn(i); 865 866 if (lsn != DbLsn.NULL_LSN) { 867 final LockResult lockRet = lockingTxn.nonBlockingLock( 868 lsn, LockType.READ, false /*jumpAheadOfWaiters*/, db); 869 870 if (lockRet.getLockGrant() == LockGrantType.DENIED) { 871 anyLocksDenied = true; 872 continue; 873 } 874 } 875 876 /* At this point, we know we can delete. */ 877 if (Key.compareKeys(getKey(i), getIdentifierKey(), 878 getKeyComparator()) == 0) { 879 880 /* 881 * We're about to remove the entry with the idKey so the 882 * node will need a new idkey. 883 */ 884 setNewIdKey = true; 885 } 886 887 if (db.isDeferredWriteMode()) { 888 889 final LN ln = (LN) getTarget(i); 890 891 if (ln != null && 892 ln.isDirty() && 893 !DbLsn.isTransient(lsn)) { 894 895 if (db.isTemporary()) { 896 897 /* 898 * When a previously logged LN in a temporary DB is 899 * dirty, we can count the LSN of the last logged 900 * LN as obsolete without logging. There is no 901 * requirement for the dirty deleted LN to be 902 * durable past recovery. There is no danger of 903 * the last logged LN being accessed again (after 904 * log cleaning, for example), since temporary DBs 905 * do not survive recovery. 906 */ 907 if (localTracker != null) { 908 localTracker.countObsoleteNode( 909 lsn, ln.getGenericLogType(), 910 getLastLoggedSize(i), db); 911 } else { 912 envImpl.getLogManager().countObsoleteNode( 913 lsn, ln.getGenericLogType(), 914 getLastLoggedSize(i), db, 915 true /*countExact*/); 916 } 917 } else { 918 919 /* 920 * When a previously logged deferred-write LN is 921 * dirty, we log the dirty deleted LN to make the 922 * deletion durable. The act of logging will also 923 * count the last logged LSN as obsolete. 924 */ 925 logDirtyLN(i, ln, false /*ensureDurableLsn*/, 926 true /*allowEviction*/); 927 } 928 } 929 } 930 931 boolean deleteSuccess = deleteEntry(i, true); 932 assert deleteSuccess; 933 934 /* 935 * Since we're deleting the current entry, bump the current 936 * index back down one. 937 */ 938 i--; 939 } finally { 940 lockingTxn.operationEnd(); 941 } 942 } 943 944 if (getNEntries() != 0 && setNewIdKey) { 945 setIdentifierKey(getKey(0)); 946 } 947 948 /* This BIN is empty and expendable. */ 949 if (getNEntries() == 0) { 950 setGeneration(CacheMode.MAKE_COLD); 951 } 952 953 return !anyLocksDenied; 954 } 955 956 /** 957 * This method is called opporunistically at certain places where a deleted 958 * slot is observed (when the slot's PendingDeleted or KnownDeleted flag is 959 * set), to ensure that the slot is compressed away. This is an attempt to 960 * process slots that were not compressed during the mainstream record 961 * deletion process because of cursors on the BIN during compress, or a 962 * crash prior to compression. 963 * 964 * Called from BIN.afterLog(), CursorImpl.lockAndGetCurrent(), 965 * RecoverManager.redo() and RecoverManager.updo() 966 */ queueSlotDeletion()967 public void queueSlotDeletion() { 968 969 /* 970 * If the next logrec for this BIN is going to be a BIN-delta, don't 971 * queue the BIN, because no BIN slots should be removed before logging 972 * a BIN-delta. 973 */ 974 if (shouldLogDelta()) { 975 return; 976 } 977 978 getEnv().addToCompressorQueue(this, false/*doWakeup*/); 979 } 980 981 @Override isCompressible()982 public boolean isCompressible() { 983 return !isBINDelta(); 984 } 985 986 /* For debugging. Overrides method in IN. */ 987 @Override validateSubtreeBeforeDelete(int index)988 boolean validateSubtreeBeforeDelete(int index) { 989 990 assert(!isBINDelta()); 991 992 return true; 993 } 994 995 /** 996 * Check if this node fits the qualifications for being part of a deletable 997 * subtree. It may not have any LN children. 998 * 999 * We assume that this is only called under an assert. 1000 */ 1001 @Override isValidForDelete()1002 boolean isValidForDelete() 1003 throws DatabaseException { 1004 1005 assert(isLatchExclusiveOwner()); 1006 1007 if (isBINDelta()) { 1008 return false; 1009 } 1010 1011 int numValidEntries = 0; 1012 1013 for (int i = 0; i < getNEntries(); i++) { 1014 if (!isEntryKnownDeleted(i)) { 1015 numValidEntries++; 1016 } 1017 } 1018 1019 if (numValidEntries > 0) { // any valid entries, not eligible 1020 return false; 1021 } 1022 if (nCursors() > 0) { // cursors on BIN, not eligible 1023 return false; 1024 } 1025 return true; // 0 entries, no cursors 1026 } 1027 1028 /** 1029 * Adds vlsnCache size to computed memory size. 1030 */ 1031 @Override computeMemorySize()1032 public long computeMemorySize() { 1033 1034 /* 1035 * These fields are null only when this method is called by the 1036 * superclass constructor, i.e., before this class constructor has 1037 * run. Luckily the initial representations have a memory size of 1038 * zero, so we can ignore them in this case. 1039 */ 1040 long size = super.computeMemorySize(); 1041 if (vlsnCache != null) { 1042 size += vlsnCache.getMemorySize(); 1043 } 1044 1045 if (lastLoggedSizes != null) { 1046 size += lastLoggedSizes.getMemorySize(); 1047 } 1048 1049 if (bloomFilter != null) { 1050 size += BINDeltaBloomFilter.getMemorySize(bloomFilter); 1051 } 1052 1053 return size; 1054 } 1055 1056 /* Utility method used during unit testing. */ 1057 @Override printMemorySize()1058 protected long printMemorySize() { 1059 final long inTotal = super.printMemorySize(); 1060 final long vlsnCacheOverhead = vlsnCache.getMemorySize(); 1061 final long logSizesOverhead = lastLoggedSizes.getMemorySize(); 1062 final long binTotal = inTotal + vlsnCacheOverhead + logSizesOverhead; 1063 System.out.format( 1064 "BIN: %d vlsns: %d logSizes: %d %n", 1065 binTotal, vlsnCacheOverhead, logSizesOverhead); 1066 return binTotal; 1067 } 1068 1069 @Override getFixedMemoryOverhead()1070 protected long getFixedMemoryOverhead() { 1071 return MemoryBudget.BIN_FIXED_OVERHEAD; 1072 } 1073 1074 /** 1075 * Returns the treeAdmin memory in objects referenced by this BIN. 1076 * Specifically, this refers to the DbFileSummaryMap held by 1077 * MapLNs 1078 */ 1079 @Override getTreeAdminMemorySize()1080 public long getTreeAdminMemorySize() { 1081 1082 if (getDatabase().getId().equals(DbTree.ID_DB_ID)) { 1083 long treeAdminMem = 0; 1084 for (int i = 0; i < getMaxEntries(); i++) { 1085 Node n = getTarget(i); 1086 if (n != null) { 1087 MapLN mapLN = (MapLN) n; 1088 treeAdminMem += mapLN.getDatabase().getTreeAdminMemory(); 1089 } 1090 } 1091 return treeAdminMem; 1092 } else { 1093 return 0; 1094 } 1095 } 1096 1097 /** 1098 * Reduce memory consumption by evicting all LN targets. If no LNs are 1099 * resident, discard the VLSN cache. Note that evicting LNs may require 1100 * logging them, which will mark this BIN dirty. 1101 * 1102 * The BIN should be latched by the caller. 1103 * 1104 * @return a long number encoding (a) the number of evicted bytes, and 1105 * (b) whether this BIN is evictable. (b) will be false if the BIN has 1106 * any cursors on it, or has any non-evictable children. 1107 */ 1108 @Override partialEviction()1109 public long partialEviction() { 1110 1111 /* First try LN eviction. */ 1112 final long lnEvictionBytes = evictLNs(); 1113 1114 /* Return if any were evicted or are non-evictable. */ 1115 if (lnEvictionBytes != 0) { 1116 return lnEvictionBytes; 1117 } 1118 1119 /* If no LNs were resident, try discarding the VLSNCache. */ 1120 return discardVLSNCache(); 1121 } 1122 discardVLSNCache()1123 public long discardVLSNCache() { 1124 1125 final long vlsnBytes = vlsnCache.getMemorySize(); 1126 1127 if (vlsnBytes > 0) { 1128 vlsnCache = INLongRep.EMPTY_REP; 1129 updateMemorySize(0 - vlsnBytes); 1130 } 1131 1132 return vlsnBytes; 1133 } 1134 1135 /** 1136 * Reduce memory consumption by evicting all LN targets. Note that this may 1137 * cause LNs to be logged, which will mark this BIN dirty. 1138 * 1139 * The BIN should be latched by the caller. 1140 * 1141 * @return a long number encoding (a) the number of evicted bytes, and 1142 * (b) whether this BIN is evictable. (b) will be false if the BIN has 1143 * any cursors on it, or has any non-evictable children. 1144 */ evictLNs()1145 public long evictLNs() 1146 throws DatabaseException { 1147 1148 assert isLatchExclusiveOwner() : 1149 "BIN must be latched before evicting LNs"; 1150 1151 /* 1152 * We can't evict an LN which is pointed to by a cursor, in case that 1153 * cursor has a reference to the LN object. We'll take the cheap choice 1154 * and avoid evicting any LNs if there are cursors on this BIN. We 1155 * could do a more expensive, precise check to see entries have which 1156 * cursors. This is something we might move to later. 1157 */ 1158 if (nCursors() > 0) { 1159 return IN.NON_EVICTABLE_IN; 1160 } 1161 1162 /* Try to evict each child LN. */ 1163 long totalRemoved = 0; 1164 long numLNsEvicted = 0; 1165 boolean haveNonEvictableLN = false; 1166 1167 for (int i = 0; i < getNEntries(); i++) { 1168 1169 if (getTarget(i) == null) { 1170 continue; 1171 } 1172 1173 long lnRemoved = evictInternal(i); 1174 if (lnRemoved < 0) { 1175 haveNonEvictableLN = true; 1176 } else { 1177 totalRemoved += lnRemoved; 1178 ++numLNsEvicted; 1179 } 1180 } 1181 1182 /* 1183 * compactMemory() may decrease the memory footprint by mutating the 1184 * representations of the target and key sets. 1185 */ 1186 if (totalRemoved > 0) { 1187 updateMemorySize(totalRemoved, 0); 1188 totalRemoved += compactMemory(); 1189 } 1190 1191 getEvictor().incNumLNsEvicted(numLNsEvicted); 1192 1193 if (haveNonEvictableLN) { 1194 return (totalRemoved | IN.NON_EVICTABLE_IN); 1195 } else { 1196 return totalRemoved; 1197 } 1198 } 1199 1200 /** 1201 * Evict a single LN if allowed and adjust the memory budget. 1202 */ evictLN(int index)1203 public void evictLN(int index) 1204 throws DatabaseException { 1205 1206 final long removed = evictInternal(index); 1207 1208 /* May decrease the memory footprint by changing the INTargetRep. */ 1209 if (removed > 0) { 1210 updateMemorySize(removed, 0); 1211 compactMemory(); 1212 } 1213 } 1214 1215 /** 1216 * Evict a single LN if allowed. The amount of memory freed is returned 1217 * and must be subtracted from the memory budget by the caller. 1218 * 1219 * @return number of evicted bytes or -1 if the LN is not evictable. 1220 */ evictInternal(int index)1221 private long evictInternal(int index) 1222 throws DatabaseException { 1223 1224 final Node n = getTarget(index); 1225 1226 assert(n == null || n instanceof LN); 1227 1228 if (n == null) { 1229 return 0; 1230 } 1231 1232 final LN ln = (LN) n; 1233 final long lsn = getLsn(index); 1234 1235 /* 1236 * Don't evict MapLNs for open databases (LN.isEvictable) [#13415]. 1237 */ 1238 if (ln.isEvictable(lsn)) { 1239 1240 /* 1241 * Log target if necessary. Do not allow eviction since we evict 1242 * here and that would cause double-counting of the memory freed. 1243 */ 1244 logDirtyLN(index, ln, true /*ensureDurableLsn*/, 1245 false /*allowEviction*/); 1246 1247 /* Clear target. */ 1248 setTarget(index, null); 1249 ln.releaseMemoryBudget(); 1250 1251 return n.getMemorySizeIncludedByParent(); 1252 } 1253 1254 return -1; 1255 } 1256 1257 /** 1258 * @see IN#logDirtyChildren 1259 */ 1260 @Override logDirtyChildren()1261 public void logDirtyChildren() 1262 throws DatabaseException { 1263 1264 /* Look for targets that are dirty. */ 1265 for (int i = 0; i < getNEntries(); i++) { 1266 Node node = getTarget(i); 1267 if (node != null) { 1268 logDirtyLN(i, (LN) node, true /*ensureDurableLsn*/, 1269 true /*allowEviction*/); 1270 } 1271 } 1272 } 1273 logDirtyLNs()1274 private void logDirtyLNs() 1275 throws DatabaseException { 1276 1277 for (int i = 0; i < getNEntries(); i++) { 1278 final Node node = getTarget(i); 1279 if ((node != null) && (node instanceof LN)) { 1280 logDirtyLN(i, (LN) node, true /*ensureDurableLsn*/, 1281 true /*allowEviction*/); 1282 } 1283 } 1284 } 1285 1286 /** 1287 * Logs the LN at the given index if it is dirty. 1288 */ logDirtyLN( int index, LN ln, boolean ensureDurableLsn, boolean allowEviction)1289 private void logDirtyLN( 1290 int index, 1291 LN ln, 1292 boolean ensureDurableLsn, 1293 boolean allowEviction) 1294 throws DatabaseException { 1295 1296 final long oldLsn = getLsn(index); 1297 final boolean force = ensureDurableLsn && 1298 getDatabase().isDeferredWriteMode() && 1299 DbLsn.isTransientOrNull(oldLsn); 1300 if (force || ln.isDirty()) { 1301 final DatabaseImpl dbImpl = getDatabase(); 1302 final EnvironmentImpl envImpl = dbImpl.getEnv(); 1303 1304 /* Only deferred write databases should have dirty LNs. */ 1305 assert dbImpl.isDeferredWriteMode(); 1306 1307 /* 1308 * Do not lock while logging. Locking of new LSN is performed by 1309 * lockAfterLsnChange. This should never be part of the replication 1310 * stream, because this is a deferred-write DB. 1311 */ 1312 final LN.LogResult logResult = ln.log( 1313 envImpl, dbImpl, getKey(index), oldLsn, 1314 getLastLoggedSize(index), null /*locker*/, 1315 null /*writeLockInfo*/, true /*backgroundIO*/, 1316 ReplicationContext.NO_REPLICATE); 1317 1318 updateEntry(index, logResult.newLsn, logResult.newSize); 1319 /* Lock new LSN on behalf of existing lockers. */ 1320 CursorImpl.lockAfterLsnChange( 1321 dbImpl, oldLsn, logResult.newLsn, null /*excludeLocker*/); 1322 1323 /* 1324 * It is desirable to evict a non-dirty LN in a duplicates DB 1325 * because it will never be fetched again. 1326 */ 1327 if (allowEviction && databaseImpl.getSortedDuplicates()) { 1328 evictLN(index); 1329 } 1330 } 1331 } 1332 1333 /* 1334 * Logging support 1335 */ 1336 1337 /** 1338 * @see IN#getLogType 1339 */ 1340 @Override getLogType()1341 public LogEntryType getLogType() { 1342 return LogEntryType.LOG_BIN; 1343 } 1344 1345 /** 1346 * Overrides the IN method to account for deltas. 1347 * 1348 * It is called from IN.commonPostFetchInit(). If the logrec we have just 1349 * read was a BINDelta, this.lastFullVersion has already been set (in 1350 * BINDeltaLogEntry.readMainItem() or in OldBinDelta.reconstituteBIN()). 1351 * So, this method will set this.lastDeltaVarsion. Otherwise, if the 1352 * logrec was a full BIN, this.lastFullVersion has not been set yet, 1353 * and it will be set here. In this case, this.lastDeltaVarsion will 1354 * remain NULL. 1355 */ 1356 @Override setLastLoggedLsn(long lsn)1357 public void setLastLoggedLsn(long lsn) { 1358 if (getLastFullVersion() == DbLsn.NULL_LSN) { 1359 setLastFullLsn(lsn); 1360 } else { 1361 lastDeltaVersion = lsn; 1362 } 1363 } 1364 1365 /** 1366 * Overrides the IN method to account for deltas. 1367 */ 1368 @Override getLastLoggedVersion()1369 public long getLastLoggedVersion() { 1370 return (lastDeltaVersion != DbLsn.NULL_LSN) ? 1371 lastDeltaVersion : 1372 getLastFullVersion(); 1373 } 1374 1375 /** 1376 * Overrides the IN method to account for deltas. 1377 * Public for unit testing. 1378 */ 1379 @Override getLastDeltaVersion()1380 public long getLastDeltaVersion() { 1381 return lastDeltaVersion; 1382 } 1383 1384 /** 1385 * Overrides the IN method to account for deltas. 1386 */ 1387 @Override beforeLog( LogManager logManager, INLogItem item, INLogContext context)1388 public void beforeLog( 1389 LogManager logManager, 1390 INLogItem item, 1391 INLogContext context) { 1392 1393 final DatabaseImpl dbImpl = getDatabase(); 1394 final EnvironmentImpl envImpl = dbImpl.getEnv(); 1395 1396 /* Determine whether we log a delta rather than full version. */ 1397 item.isDelta = 1398 isBINDelta() || (context.allowDeltas && shouldLogDelta()); 1399 1400 /* Be sure that we didn't illegally mutate to a delta. */ 1401 assert (!(item.isDelta && isDeltaProhibited())); 1402 1403 /* Perform lazy compression when logging a full BIN. */ 1404 if (context.allowCompress && !item.isDelta) { 1405 envImpl.lazyCompress(this); 1406 } 1407 1408 /* 1409 * Write dirty LNs in deferred-write databases. This is done after 1410 * compression to reduce total logging, at least for temp DBs. 1411 */ 1412 if (dbImpl.isDeferredWriteMode()) { 1413 logDirtyLNs(); 1414 } 1415 1416 /* 1417 * In the Btree, the parent IN slot contains the latest full version 1418 * LSN or, if a delta was last logged, the delta LSN. Somewhat 1419 * redundantly, the transient IN.lastFullVersion and 1420 * BIN.lastDeltaVersion fields contain the last logged full version and 1421 * delta version LSNs. 1422 * 1423 * For delta logging: 1424 * + Count lastDeltaVersion obsolete, if non-null. 1425 * + Set lastDeltaVersion to newly logged LSN. 1426 * + Leave lastFullVersion unchanged. 1427 * 1428 * For full version logging: 1429 * + Count lastFullVersion and lastDeltaVersion obsolete, if non-null. 1430 * + Set lastFullVersion to newly logged LSN. 1431 * + Set lastDeltaVersion to null. 1432 */ 1433 beforeLogCommon( 1434 item, context, 1435 item.isDelta ? DbLsn.NULL_LSN : getLastFullVersion(), 1436 lastDeltaVersion); 1437 1438 item.entry = item.isDelta ? 1439 (new BINDeltaLogEntry(this)) : 1440 (new INLogEntry<BIN>(this)); 1441 } 1442 1443 /** 1444 * Overrides the IN method to account for deltas. See beforeLog. 1445 */ 1446 @Override afterLog( LogManager logManager, INLogItem item, INLogContext context)1447 public void afterLog( 1448 LogManager logManager, 1449 INLogItem item, 1450 INLogContext context) { 1451 1452 afterLogCommon(logManager, item, context, 1453 item.isDelta ? DbLsn.NULL_LSN : getLastFullVersion(), 1454 lastDeltaVersion); 1455 1456 if (item.isDelta) { 1457 lastDeltaVersion = item.newLsn; 1458 } else { 1459 setLastFullLsn(item.newLsn); 1460 lastDeltaVersion = DbLsn.NULL_LSN; 1461 1462 /* 1463 * Before logging a full version BIN we attempted to compress it. 1464 * If we could not compress a slot because of the presence of 1465 * cursors, we must re-queue (or at least re-dirty) the BIN so 1466 * that we will compress it later. The BIN is set non-dirty by 1467 * afterLogCommon above. 1468 */ 1469 for (int i = 0; i < getNEntries(); i += 1) { 1470 if (isEntryKnownDeleted(i) || isEntryPendingDeleted(i)) { 1471 queueSlotDeletion(); 1472 break; 1473 } 1474 } 1475 } 1476 1477 prohibitNextDelta = false; 1478 } 1479 1480 /* 1481 * BIN delta support 1482 */ 1483 getFullBinNEntries()1484 public int getFullBinNEntries() { 1485 if (isBINDelta()) { 1486 return fullBinNEntries; 1487 } else { 1488 return nEntries; 1489 } 1490 } 1491 setFullBinNEntries(int n)1492 public void setFullBinNEntries(int n) { 1493 assert(isBINDelta(false)); 1494 fullBinNEntries = n; 1495 } 1496 incFullBinNEntries()1497 void incFullBinNEntries() { 1498 assert(isBINDelta()); 1499 ++fullBinNEntries; 1500 } 1501 getFullBinMaxEntries()1502 public int getFullBinMaxEntries() { 1503 if (isBINDelta()) { 1504 return fullBinMaxEntries; 1505 } else { 1506 return getMaxEntries(); 1507 } 1508 } 1509 setFullBinMaxEntries(int n)1510 public void setFullBinMaxEntries(int n) { 1511 assert(isBINDelta(false)); 1512 fullBinMaxEntries = n; 1513 } 1514 getDeltaCapacity(int numDirtyEntries)1515 int getDeltaCapacity(int numDirtyEntries) { 1516 1517 boolean blindOps = 1518 (getEnv().allowBlindOps() || getEnv().allowBlindPuts()); 1519 1520 if (isBINDelta()) { 1521 return getMaxEntries(); 1522 } 1523 1524 if (blindOps) { 1525 return (getNEntries() * databaseImpl.getBinDeltaPercent()) / 100; 1526 } 1527 1528 return numDirtyEntries; 1529 } 1530 allowBlindPuts()1531 boolean allowBlindPuts() { 1532 boolean res = getEnv().allowBlindPuts(); 1533 1534 if (res) { 1535 res = res && databaseImpl.hasBtreeBinaryEqualityComparator(); 1536 res = res && databaseImpl.hasDuplicateBinaryEqualityComparator(); 1537 } 1538 1539 return res; 1540 } 1541 1542 /* 1543 * It is called in 3 cases listed below. In all cases, if blind puts are 1544 * not allowed, the method returns null. 1545 * 1546 * 1. A full BIN is being mutated to an in-memory delta. A new filter will 1547 * be created here and will be stored in the delta by the caller. 1548 * 2. A full BIN is being logged as a delta. A new filter will be created 1549 * here and will be written in the delta logrec by the caller. 1550 * 3. An in-memory BIN-delta is being logged. If the delta has a bloom 1551 * filter already, that filter will be returned and written into the 1552 * logrec. The delta may not have a filter already because it was read 1553 * from an older-version logfile; in this case we return null. 1554 */ createBloomFilter()1555 byte[] createBloomFilter() { 1556 1557 assert(bloomFilter == null || isBINDelta()); 1558 1559 boolean blindPuts = allowBlindPuts(); 1560 1561 if (!blindPuts) { 1562 assert(bloomFilter == null); 1563 return null; 1564 } 1565 1566 if (bloomFilter != null) { 1567 /* 1568 * We are here because we are logging a delta that has a filter 1569 * already. We just need to log the existing filter. 1570 */ 1571 return bloomFilter; 1572 } 1573 1574 if (isBINDelta()) { 1575 return null; 1576 } 1577 1578 int numKeys = getNEntries() - getNDeltas(); 1579 int nbytes = BINDeltaBloomFilter.getByteSize(numKeys); 1580 1581 byte[] bf = new byte[nbytes]; 1582 1583 BINDeltaBloomFilter.HashContext hc = 1584 new BINDeltaBloomFilter.HashContext(); 1585 1586 if (keyPrefix != null) { 1587 hc.hashKeyPrefix(keyPrefix); 1588 } 1589 1590 for (int i = 0; i < getNEntries(); ++i) { 1591 1592 if (isDirty(i)) { 1593 continue; 1594 } 1595 1596 byte[] suffix = entryKeyVals.get(i); 1597 if (suffix == null) { 1598 suffix = Key.EMPTY_KEY; 1599 } 1600 1601 BINDeltaBloomFilter.add(bf, suffix, hc); 1602 } 1603 1604 return bf; 1605 } 1606 mayHaveKeyInFullBin(byte[] key)1607 public boolean mayHaveKeyInFullBin(byte[] key) { 1608 1609 assert(isBINDelta()); 1610 1611 if (bloomFilter == null) { 1612 return true; 1613 } 1614 1615 return BINDeltaBloomFilter.contains(bloomFilter, key); 1616 } 1617 1618 /* 1619 * Used in IN.getLogSize() only 1620 */ getBloomFilterLogSize()1621 int getBloomFilterLogSize() { 1622 1623 if (!allowBlindPuts()) { 1624 return 0; 1625 } 1626 1627 if (isBINDelta()) { 1628 if (bloomFilter != null) { 1629 return BINDeltaBloomFilter.getLogSize(bloomFilter); 1630 } 1631 1632 return 0; 1633 1634 } else { 1635 assert(bloomFilter == null); 1636 int numKeys = getNEntries() - getNDeltas(); 1637 return BINDeltaBloomFilter.getLogSize(numKeys); 1638 } 1639 } 1640 1641 /** 1642 * If cleaned or compressed, must log full version. 1643 */ 1644 @Override setProhibitNextDelta()1645 public void setProhibitNextDelta() { 1646 prohibitNextDelta = true; 1647 } 1648 isDeltaProhibited()1649 private boolean isDeltaProhibited() { 1650 final DatabaseImpl dbImpl = getDatabase(); 1651 return prohibitNextDelta || 1652 dbImpl.isDeferredWriteMode() || 1653 (getLastFullVersion() == DbLsn.NULL_LSN); 1654 } 1655 1656 /** 1657 * Decide whether to log a full or partial BIN, depending on the ratio of 1658 * the delta size to full BIN size, and the number of deltas that have been 1659 * logged since the last full. 1660 * 1661 * Other factors are taken into account: 1662 * + a delta cannot be logged if the BIN has never been logged before 1663 * + deltas are not currently supported for DeferredWrite databases 1664 * + this particular delta may have been prohibited because the cleaner is 1665 * migrating the BIN or a slot has been deleted 1666 * + if there are no dirty slots, we might as well log a full BIN 1667 * 1668 * @return true if we should log the deltas of this BIN 1669 */ shouldLogDelta()1670 public boolean shouldLogDelta() { 1671 1672 if (isBINDelta()) { 1673 assert(!isDeltaProhibited()); 1674 return true; 1675 } 1676 1677 /* Cheapest checks first. */ 1678 if (isDeltaProhibited()) { 1679 return false; 1680 } 1681 1682 /* Must count deltas to check further. */ 1683 final int numDeltas = getNDeltas(); 1684 1685 /* A delta with zero items is not valid. */ 1686 if (numDeltas <= 0) { 1687 return false; 1688 } 1689 1690 /* Check the configured BinDeltaPercent. */ 1691 final int deltaLimit = 1692 (getNEntries() * databaseImpl.getBinDeltaPercent()) / 100; 1693 if (numDeltas > deltaLimit) { 1694 return false; 1695 } 1696 1697 return true; 1698 } 1699 1700 /** 1701 * Returns whether mutateToBINDelta can be called. 1702 */ canMutateToBINDelta()1703 public boolean canMutateToBINDelta() { 1704 return (!isBINDelta() && 1705 shouldLogDelta() && 1706 (nCursors() == 0)); 1707 } 1708 1709 /** 1710 * Mutate to a delta (discard non-dirty entries and resize arrays). 1711 * 1712 * This method must be called with this node latched exclusively, and 1713 * canMutateToBINDelta must return true. 1714 * 1715 * @return the number of bytes freed. 1716 */ mutateToBINDelta()1717 public long mutateToBINDelta() { 1718 1719 assert isLatchExclusiveOwner(); 1720 assert canMutateToBINDelta(); 1721 1722 if (getInListResident()) { 1723 getEnv().getInMemoryINs().updateBINDeltaStat(1); 1724 } 1725 1726 final long oldSize = getInMemorySize(); 1727 final int nDeltas = getNDeltas(); 1728 final int capacity = getDeltaCapacity(nDeltas); 1729 1730 bloomFilter = createBloomFilter(); 1731 1732 initBINDelta(this, nDeltas, capacity, true); 1733 1734 return oldSize - getInMemorySize(); 1735 } 1736 1737 /** 1738 * This method assumes that "this" BIN is a delta and creates a clone of 1739 * it. It is currently used by the DiskOrderedScanner only. The method 1740 * does not clone the targets array. 1741 */ cloneBINDelta()1742 public BIN cloneBINDelta() { 1743 1744 assert(isBINDelta()); 1745 1746 final BIN bin = new BIN( 1747 databaseImpl, getIdentifierKey(), 0/*capacity*/, getLevel()); 1748 1749 bin.nodeId = nodeId; 1750 bin.flags = flags; 1751 bin.lastFullVersion = lastFullVersion; 1752 1753 final int nDeltas = getNDeltas(); 1754 initBINDelta(bin, nDeltas, nDeltas, false); 1755 return bin; 1756 } 1757 1758 /** 1759 * Replaces the contents of destBIN with the deltas in this BIN. 1760 */ initBINDelta( final BIN destBIN, final int nDeltas, final int capacity, final boolean copyTargets)1761 private void initBINDelta( 1762 final BIN destBIN, 1763 final int nDeltas, 1764 final int capacity, 1765 final boolean copyTargets) { 1766 1767 long[] longLSNs = null; 1768 byte[] compactLSNs = null; 1769 1770 if (entryLsnLongArray == null) { 1771 compactLSNs = new byte[nDeltas * 4]; 1772 } else { 1773 longLSNs = new long[nDeltas]; 1774 } 1775 1776 final long[] vlsns = new long[nDeltas]; 1777 final int[] sizes= new int[nDeltas]; 1778 final byte[][] keys = new byte[nDeltas][]; 1779 final byte[] states = new byte[nDeltas]; 1780 Node[] targets = null; 1781 1782 if (copyTargets) { 1783 targets = new Node[nDeltas]; 1784 } 1785 1786 int j = 0; 1787 for (int i = 0; i < getNEntries(); i += 1) { 1788 1789 if (!isDirty(i)) { 1790 continue; 1791 } 1792 1793 if (entryLsnLongArray == null) { 1794 int doff = j << 2; 1795 int soff = i << 2; 1796 compactLSNs[doff] = entryLsnByteArray[soff]; 1797 compactLSNs[doff+1] = entryLsnByteArray[soff+1]; 1798 compactLSNs[doff+2] = entryLsnByteArray[soff+2]; 1799 compactLSNs[doff+3] = entryLsnByteArray[soff+3]; 1800 } else { 1801 longLSNs[j] = getLsn(i); 1802 } 1803 1804 keys[j] = entryKeyVals.get(i); 1805 states[j] = getState(i); 1806 1807 if (targets != null) { 1808 targets[j] = getTarget(i); 1809 } 1810 1811 vlsns[j] = getCachedVLSN(i); 1812 sizes[j] = getLastLoggedSize(i); 1813 1814 j += 1; 1815 } 1816 1817 /* 1818 * Do this before resetContent() because destBIN and "this" may be the 1819 * same java obj 1820 */ 1821 destBIN.fullBinNEntries = getFullBinNEntries(); 1822 destBIN.fullBinMaxEntries = getFullBinMaxEntries(); 1823 1824 destBIN.resetContent( 1825 capacity, nDeltas, 1826 baseFileNumber, compactLSNs, longLSNs, 1827 states, keyPrefix, keys, targets, 1828 sizes, vlsns); 1829 1830 destBIN.setBINDelta(true); 1831 1832 destBIN.compactMemory(); 1833 } 1834 1835 /** 1836 * Replaces the contents of this BIN with the given contents. 1837 * Used in mutating a full BIN to a BIN-delta or for creating 1838 * a new BIN delta with the given content. 1839 */ resetContent( final int capacity, final int newNEntries, final long baseFileNumber, final byte[] compactLSNs, final long[] longLSNs, final byte[] states, final byte[] keyPrefix, final byte[][] keys, final Node[] targets, final int[] loggedSizes, final long[] vlsns)1840 private void resetContent( 1841 final int capacity, 1842 final int newNEntries, 1843 final long baseFileNumber, 1844 final byte[] compactLSNs, 1845 final long[] longLSNs, 1846 final byte[] states, 1847 final byte[] keyPrefix, 1848 final byte[][] keys, 1849 final Node[] targets, 1850 final int[] loggedSizes, 1851 final long[] vlsns) { 1852 1853 updateRepCacheStats(false); 1854 1855 nEntries = newNEntries; 1856 1857 this.baseFileNumber = baseFileNumber; 1858 if (longLSNs == null) { 1859 entryLsnByteArray = new byte[capacity << 2]; 1860 entryLsnLongArray = null; 1861 } else { 1862 entryLsnByteArray = null; 1863 entryLsnLongArray = new long[capacity]; 1864 } 1865 1866 this.keyPrefix = keyPrefix; 1867 entryKeyVals = new INKeyRep.Default(capacity); 1868 1869 entryTargets = INTargetRep.NONE; 1870 1871 updateRepCacheStats(true); 1872 1873 entryStates = new byte[capacity]; 1874 1875 for (int i = 0; i < newNEntries; i += 1) { 1876 1877 if (longLSNs == null) { 1878 int off = i << 2; 1879 entryLsnByteArray[off] = compactLSNs[off]; 1880 entryLsnByteArray[off+1] = compactLSNs[off+1]; 1881 entryLsnByteArray[off+2] = compactLSNs[off+2]; 1882 entryLsnByteArray[off+3] = compactLSNs[off+3]; 1883 } else { 1884 entryLsnLongArray[i] = longLSNs[i]; 1885 } 1886 1887 entryKeyVals = entryKeyVals.set(i, keys[i], this); 1888 entryStates[i] = states[i]; 1889 1890 if (targets != null) { 1891 entryTargets = entryTargets.set(i, targets[i], this); 1892 } 1893 1894 setLastLoggedSizeUnconditional(i, loggedSizes[i]); 1895 setCachedVLSNUnconditional(i, vlsns[i]); 1896 } 1897 1898 updateMemorySize(inMemorySize, computeMemorySize()); 1899 } 1900 1901 /** 1902 * Fetch the full BIN and apply the deltas in this BIN to it, then use the 1903 * merged result to replace the contents of this BIN. 1904 * 1905 * This method must be called with this node latched exclusively. If 'this' 1906 * is not a delta, this method does nothing. 1907 */ 1908 @Override mutateToFullBIN()1909 public void mutateToFullBIN() { 1910 1911 if (!isBINDelta()) { 1912 return; 1913 } 1914 1915 final BIN fullBIN = fetchFullBIN(databaseImpl); 1916 1917 mutateToFullBIN(fullBIN); 1918 1919 Evictor e = getEvictor(); 1920 if (e == null) { 1921 return; 1922 } 1923 e.incFullBINMissStats(); 1924 } 1925 1926 /** 1927 * Mutates this delta to a full BIN by applying this delta to the fullBIN 1928 * param and then replacing this BIN's contents with it. 1929 * 1930 * This method must be called with this node latched exclusively. 'this' 1931 * must be a delta. 1932 * 1933 * The method is public because it is called directly from FileProcessor 1934 * when it finds a BIN that must be migrated. In that case, fullBIN is a 1935 * full BIN that has just been read from the log, and it is not part of 1936 * the memory-resident tree. 1937 */ mutateToFullBIN(BIN fullBIN)1938 public void mutateToFullBIN(BIN fullBIN) { 1939 1940 assert isLatchExclusiveOwner(); 1941 assert isBINDelta() : this; 1942 1943 byte[][] keys = null; 1944 int i = 0; 1945 1946 if (cursorSet != null) { 1947 keys = new byte[cursorSet.size()][]; 1948 1949 for (CursorImpl cursor : cursorSet) { 1950 final int index = cursor.getIndex(); 1951 if (index >= 0 && index < getNEntries()) { 1952 keys[i] = cursor.getCurrentKey(true/*isLatched*/); 1953 } 1954 ++i; 1955 } 1956 } 1957 1958 reconstituteBIN(databaseImpl, fullBIN); 1959 1960 resetContent(fullBIN); 1961 1962 setBINDelta(false); 1963 1964 compactMemory(); 1965 1966 if (cursorSet != null) { 1967 1968 i = 0; 1969 for (CursorImpl cursor : cursorSet) { 1970 1971 if (keys[i] != null) { 1972 /* 1973 * Do not ask for an exact match from findEntry because if 1974 * the cursor was on a KD slot, findEntry would return -1. 1975 */ 1976 int index = findEntry(keys[i], true, false); 1977 1978 if ((index & IN.EXACT_MATCH) == 0) { 1979 throw EnvironmentFailureException.unexpectedState( 1980 getEnv(), "Failed to reposition cursor during " + 1981 "mutation of a BIN delta to a full BIN"); 1982 } 1983 1984 index &= ~IN.EXACT_MATCH; 1985 1986 assert(index >= 0 && index < getNEntries()); 1987 cursor.setIndex(index); 1988 } 1989 ++i; 1990 } 1991 } 1992 1993 if (getInListResident()) { 1994 getEnv().getInMemoryINs().updateBINDeltaStat(-1); 1995 } 1996 } 1997 fetchFullBIN(DatabaseImpl dbImpl)1998 private BIN fetchFullBIN(DatabaseImpl dbImpl) { 1999 final EnvironmentImpl envImpl = dbImpl.getEnv(); 2000 final long lsn = getLastFullVersion(); 2001 2002 try { 2003 return (BIN) 2004 envImpl.getLogManager().getEntryHandleFileNotFound(lsn); 2005 2006 } catch (EnvironmentFailureException e) { 2007 e.addErrorMessage(makeFetchErrorMsg(null, this, lsn, (byte) 0)); 2008 throw e; 2009 2010 } catch (RuntimeException e) { 2011 throw new EnvironmentFailureException( 2012 envImpl, EnvironmentFailureReason.LOG_INTEGRITY, 2013 makeFetchErrorMsg(e.toString(), this, lsn, (byte) 0), e); 2014 } 2015 } 2016 2017 /** 2018 * Replaces the contents of this BIN with the contents of the given BIN, 2019 * including lsns, states, keys and targets. Key prefixing and key/target 2020 * representations will also be those of the given BIN. 2021 */ resetContent(final BIN other)2022 private void resetContent(final BIN other) { 2023 2024 updateRepCacheStats(false); 2025 2026 nEntries = other.nEntries; 2027 2028 baseFileNumber = other.baseFileNumber; 2029 entryLsnByteArray = other.entryLsnByteArray; 2030 entryLsnLongArray = other.entryLsnLongArray; 2031 2032 keyPrefix = other.keyPrefix; 2033 entryKeyVals = other.entryKeyVals; 2034 2035 entryTargets = other.entryTargets; 2036 2037 entryStates = other.entryStates; 2038 2039 lastLoggedSizes = other.lastLoggedSizes; 2040 2041 vlsnCache = other.vlsnCache; 2042 2043 bloomFilter = null; 2044 2045 updateMemorySize(inMemorySize, computeMemorySize()); 2046 2047 updateRepCacheStats(true); 2048 } 2049 2050 /** 2051 * Create a BIN by fetching the full version and applying the deltas in 2052 * this BIN to it. The BIN is not added to the INList. 2053 * 2054 * @return the full BIN with deltas applied. 2055 */ reconstituteBIN(DatabaseImpl dbImpl)2056 public BIN reconstituteBIN(DatabaseImpl dbImpl) { 2057 final BIN fullBIN = fetchFullBIN(dbImpl); 2058 reconstituteBIN(dbImpl, fullBIN); 2059 return fullBIN; 2060 } 2061 2062 /** 2063 * Given a full version BIN, apply the deltas in this BIN. The fullBIN will 2064 * then be complete, but its memory will not be compacted. 2065 */ reconstituteBIN(DatabaseImpl dbImpl, BIN fullBIN)2066 public void reconstituteBIN(DatabaseImpl dbImpl, BIN fullBIN) { 2067 2068 fullBIN.setDatabase(dbImpl); 2069 fullBIN.latch(CacheMode.UNCHANGED); 2070 2071 try { 2072 2073 /* 2074 * The BIN's lastFullLsn is set here, while its lastLoggedLsn is 2075 * set by postFetchInit or postRecoveryInit. 2076 */ 2077 fullBIN.setLastFullLsn(getLastFullVersion()); 2078 2079 /* Process each delta. */ 2080 for (int i = 0; i < getNEntries(); i++) { 2081 assert isDirty(i) : this; 2082 fullBIN.applyDelta( 2083 getKey(i), getLsn(i), getState(i), getLastLoggedSize(i), 2084 getCachedVLSN(i), getTarget(i)); 2085 } 2086 2087 /* 2088 * The applied deltas will leave some slots dirty, which is 2089 * necessary as a record of changes that will be included in the 2090 * next delta. However, the BIN itself should not be dirty, 2091 * because this delta is a persistent record of those changes. 2092 */ 2093 fullBIN.setDirty(false); 2094 } finally { 2095 fullBIN.releaseLatch(); 2096 } 2097 } 2098 2099 /** 2100 * Apply (insert, update) a given delta slot in this full BIN. 2101 */ applyDelta( final byte[] key, final long lsn, final byte state, final int lastLoggedSize, final long vlsn, final Node child)2102 void applyDelta( 2103 final byte[] key, 2104 final long lsn, 2105 final byte state, 2106 final int lastLoggedSize, 2107 final long vlsn, 2108 final Node child) { 2109 2110 /* 2111 * The delta is the authoritative version of the entry. In all cases, 2112 * it should supersede the entry in the full BIN. This is true even if 2113 * the BIN Delta's entry is knownDeleted or if the full BIN's version 2114 * is knownDeleted. Therefore we use the flavor of findEntry that will 2115 * return a knownDeleted entry if the entry key matches (i.e. true, 2116 * false) but still indicates exact matches with the return index. 2117 * findEntry only returns deleted entries if third arg is false, but we 2118 * still need to know if it's an exact match or not so indicateExact is 2119 * true. 2120 */ 2121 int foundIndex = findEntry(key, true, false); 2122 2123 if (foundIndex >= 0 && (foundIndex & IN.EXACT_MATCH) != 0) { 2124 2125 foundIndex &= ~IN.EXACT_MATCH; 2126 2127 /* 2128 * The entry exists in the full version, update it with the delta 2129 * info. Note that all state flags should be restored [#22848]. 2130 */ 2131 updateEntry(foundIndex, child, lsn, lastLoggedSize, state); 2132 2133 } else { 2134 2135 /* 2136 * The entry doesn't exist, insert the delta entry. We insert the 2137 * entry even when it is known or pending deleted, since the 2138 * deleted (and dirty) entry will be needed to log the next delta. 2139 * [#20737] 2140 */ 2141 final int result = insertEntry1( 2142 child, key, lsn, state, false/*blindInsertion*/); 2143 2144 assert (result & INSERT_SUCCESS) != 0; 2145 foundIndex = result & ~IN.INSERT_SUCCESS; 2146 2147 setLastLoggedSizeUnconditional(foundIndex, lastLoggedSize); 2148 } 2149 2150 setCachedVLSNUnconditional(foundIndex, vlsn); 2151 } 2152 2153 /* 2154 * DbStat support. 2155 */ 2156 @Override accumulateStats(TreeWalkerStatsAccumulator acc)2157 void accumulateStats(TreeWalkerStatsAccumulator acc) { 2158 acc.processBIN(this, Long.valueOf(getNodeId()), getLevel()); 2159 } 2160 } 2161