1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2002, 2014 Oracle and/or its affiliates.  All rights reserved.
5  *
6  */
7 
8 package com.sleepycat.je.tree;
9 
10 import java.util.Collections;
11 import java.util.Iterator;
12 import java.util.Set;
13 
14 import com.sleepycat.je.CacheMode;
15 import com.sleepycat.je.DatabaseException;
16 import com.sleepycat.je.EnvironmentFailureException;
17 import com.sleepycat.je.cleaner.LocalUtilizationTracker;
18 import com.sleepycat.je.config.EnvironmentParams;
19 import com.sleepycat.je.dbi.CursorImpl;
20 import com.sleepycat.je.dbi.DatabaseImpl;
21 import com.sleepycat.je.dbi.DbConfigManager;
22 import com.sleepycat.je.dbi.DbTree;
23 import com.sleepycat.je.dbi.EnvironmentFailureReason;
24 import com.sleepycat.je.dbi.EnvironmentImpl;
25 import com.sleepycat.je.dbi.MemoryBudget;
26 import com.sleepycat.je.evictor.Evictor;
27 import com.sleepycat.je.log.LogEntryType;
28 import com.sleepycat.je.log.LogManager;
29 import com.sleepycat.je.log.ReplicationContext;
30 import com.sleepycat.je.log.entry.BINDeltaLogEntry;
31 import com.sleepycat.je.log.entry.INLogEntry;
32 import com.sleepycat.je.tree.BINDeltaBloomFilter.HashContext;
33 import com.sleepycat.je.txn.BasicLocker;
34 import com.sleepycat.je.txn.LockGrantType;
35 import com.sleepycat.je.txn.LockResult;
36 import com.sleepycat.je.txn.LockType;
37 import com.sleepycat.je.utilint.DatabaseUtil;
38 import com.sleepycat.je.utilint.DbLsn;
39 import com.sleepycat.je.utilint.SizeofMarker;
40 import com.sleepycat.je.utilint.TinyHashSet;
41 import com.sleepycat.je.utilint.VLSN;
42 
43 /**
44  * A BIN represents a Bottom Internal Node in the JE tree.
45  *
46  * BIN-deltas
47  * ==========
48  * A BIN-delta is a BIN with the non-dirty slots omitted. A "full BIN", OTOH
49  * contains all slots.  On disk and in memory, the format of a BIN-delta is the
50  * same as that of a BIN.  In memory, a BIN object is actually a BIN-delta when
51  * the BIN-delta flag is set (IN.isBINDelta).  On disk, the NewBINDelta log
52  * entry type (class BINDeltaLogEntry) is the only thing that distinguishes it
53  * from a full BIN, which has the BIN log entry type.
54  *
55  * BIN-deltas provides two benefits: Reduced writing and reduced memory usage.
56  *
57  * Reduced Writing
58  * ---------------
59  * Logging a BIN-delta rather a full BIN reduces writing significantly.  The
60  * cost, however, is that two reads are necessary to reconstruct a full BIN
61  * from scratch.  The reduced writing is worth this cost, particularly because
62  * less writing means less log cleaning.
63  *
64  * A BIN-delta is logged when 25% or less (configured with EnvironmentConfig
65  * TREE_BIN_DELTA) of the slots in a BIN are dirty. When a BIN-delta is logged,
66  * the dirty flag is cleared on the the BIN in cache.  If more slots are
67  * dirtied and another BIN-delta is logged, it will contain all entries dirtied
68  * since the last full BIN was logged.  In other words, BIN-deltas are
69  * cumulative and not chained, to avoid reading many (more than two) log
70  * entries to reconstruct a full BIN.  The dirty flag on each slot is cleared
71  * only when a full BIN is logged.
72  *
73  * In addition to the cost of fetching two entries on a BIN cache miss, another
74  * drawback of the current approach is that dirtiness propagates upward in the
75  * Btree due to BIN-delta logging, causing repeated logging of upper INs.  The
76  * slot of the parent IN contains the LSN of the most recent BIN-delta or full
77  * BIN that was logged.  A BINDeltaLogEntry in turn contains the LSN of the
78  * last full BIN logged.
79  *
80  *   Historical note:  The pre-JE 5 implementation of OldBINDeltas worked
81  *   differently and had a different cost/benefit trade-off.  When an
82  *   OldBINDelta was logged, its dirty flag was not cleared, causing it to be
83  *   logged repeatedly at every checkpoint.  A full BIN was logged after 10
84  *   deltas, to prevent endless logging of the same BIN.  One benefit of this
85  *   approach is that the BIN's parent IN was not dirtied when logging the
86  *   OldBINDelta, preventing dirtiness from propagating upward.  Another
87  *   benefit is that the OldBINDelta was only processed by recovery, and did
88  *   not have to be fetched to reconstruct a full BIN from scratch on a cache
89  *   miss.  But the cost (the logging of an OldBINDelta every checkpoint, even
90  *   when it hadn't changed since the last time logged) outweighed the
91  *   benefits.  When the current approach was implemented in JE 5, performance
92  *   improved due to less logging.
93  *
94  *   In JE 6, deltas were also maintained in the Btree cache.  This was done to
95  *   provide the reduced memory benefits described in the next section.  The
96  *   log format for a delta was also changed.  The OldBINDelta log format is
97  *   different (not the same as the BIN format) and is supported for backward
98  *   compatibility as the OldBINDeltaLogEntry.  Its log entry type name is
99  *   still BINDelta, which is why the new type is named NewBINDelta (for
100  *   backward compatibility, log entry type names cannot be changed.)  This is
101  *   also why the spelling "BIN-delta" is used to refer to deltas in the new
102  *   approach.  The old BINDelta class was renamed to OldBINDelta and there is
103  *   no longer a class named BINDelta.
104  *
105  * Reduced Memory Usage
106  * --------------------
107  * In the Btree cache, a BIN may be represented as a full BIN or a BIN-delta.
108  * Eviction will mutate a full BIN to a BIN-delta in preference to discarding
109  * the entire BIN. A BIN-delta in cache occupies less memory than a full BIN,
110  * and can be exploited as follows:
111  *
112  *  - When a full BIN is needed, it can be constructed with only one fetch
113  *    rather than two, reducing IO overall.  IN.fetchIN implements this
114  *    optimization.
115  *
116  *  - Certain operations can sometimes be performed using the BIN-delta alone,
117  *    allowing such operations on a given data set to take place using less
118  *    less IO (for a given cache size).
119  *
120  * The latter benefit is not yet implemented.   No user CRUD operations are
121  * currently implemented using BIN-deltas. In the future we plan to implement
122  * the following operations using the BIN-delta alone.
123  *
124  *  - Consider recording deletions in a BIN-delta.  Currently, slot deletion
125  *    prohibits a BIN-delta from being logged.  To record deletion in
126  *    BIN-deltas, slot deletion will have to be deferred until a full BIN is
127  *    logged.
128  *
129  *  - User reads by key, updates and deletions can be implemented if the key
130  *    happens to appear in the BIN-delta.
131  *
132  *  - The Cleaner can migrate an LN if its key happens to appear in the
133  *    BIN-delta.  This is similar to a user update operation, but in a
134  *    different code path.
135  *
136  *  - Insertions, deletions and updates can always be performed in a BIN-delta
137  *    during replica replay, since the Master operation has already determined
138  *    whether the key exists.
139  *
140  *  - Recovery LN redo could also apply insertions, updates and inserts in the
141  *    manner described.
142  *
143  *  - Add idempotent put/delete operations, which can always be applied in a
144  *    BIN-delta.
145  *
146  *  - Store a hash of the keys in the full BIN in the BIN-delta and use it to
147  *    perform the following in the delta:
148  *    - putIfAbsent (true insertion)
149  *    - get/delete/putIfPresent operations that return NOTFOUND
150  *    - to avoid accumulating unnecessary deletions
151  *
152  * However, some internal operations do currently exploit BIN-deltas to avoid
153  * unnecessary IO.  The following are currently implemented.
154  *
155  *  - The Evictor and Checkpointer log a BIN-delta that is present in the
156  *    cache, without having to fetch the full BIN.
157  *
158  *  - The Cleaner can use the BIN-delta to avoid fetching when processing a BIN
159  *    log entry (delta or full) and the BIN is not present in cache,
160  *
161  * To support BIB-delta-aware operations, the IN.fetchIN() and IN.getTarget()
162  * methods may return a BIN delta. IN.getTarget() will return whatever object
163  * is cached under the parent IN, and IN.fetchIN() will do a single I/O to
164  * fetch the most recently log record for the requested BIN, which may be a
165  * full BIN or a delta. Callers of these methods must be prepared to handle
166  * a BIN delta; either doing their operation directly on the delta, if
167  * possible, or mutating the delta to a full BIN by calling
168  * BIN.mutateToFullBIN().
169  */
170 public class BIN extends IN {
171 
172     private static final String BEGIN_TAG = "<bin>";
173     private static final String END_TAG = "</bin>";
174 
175     /*
176      * The set of cursors that are currently referring to this BIN.
177      * This field is set to null when there are no cursors on this BIN.
178      */
179     private TinyHashSet<CursorImpl> cursorSet;
180 
181     /*
182      * Support for logging BIN deltas. (Partial BIN logging)
183      */
184 
185     /*
186      * If this is a delta, fullBinNEntries stores the number of entries
187      * in the full version of the BIN. This is a persistent field for
188      * BIN-delta logrecs only, and for log versions >= 10.
189      */
190     private int fullBinNEntries = -1;
191 
192     /*
193      * If this is a delta, fullBinMaxEntries stores the max number of
194      * entries (capacity) in the full version of the BIN. This is a
195      * persistent field for BIN-delta logrecs only, and for log versions >= 10.
196      */
197     private int fullBinMaxEntries = -1;
198 
199     /*
200      * If "this" is a BIN-delta, bloomFilter is a bloom-filter representation
201      * of the set of keys in the clean slots of the full version of the same
202      * BIN. It is used to allow blind put operations in deltas, by answering
203      * the question whether the put key is in the full BIN or not. See the
204      * javadoc of the  TREE_BIN_DELTA_BLIND_PUTS config param for more info.
205      * This is a persistent field for BIN-delta logrecs only, and for log
206      * versions >= 10.
207      */
208     byte[] bloomFilter;
209 
210     /*
211      * Let L be the most recently written logrec for this BIN instance. If
212      * L is a full-version logrec, lastDeltaVersion is NULL; otherwise it
213      * is the lsn of L.
214      *
215      * It is used for obsolete tracking.
216      *
217      * It is set in 2 cases:
218      *
219      * (a) after "this" is created via reading a logrec L, lastDeltaVersion
220      * is set to L's lsn, if L is a BIN-delta logrec, or to NULL if L is a
221      * full-BIN logrec (this is done in IN.commonPostFetchInit(), via
222      * BIN.setLastLoggedLsn()).
223      *
224      * (b) After we write a logrec L for this BIN instance, lastDeltaVersion
225      * is set to NULL if L is a full-BIN logrec, or to L's lsn, if L is a
226      * BIN-delta logrec.
227      *
228      * Notice that this is a persistent field, but when reading a logrec L,
229      * it is set not to the value found in L, but to the lsn of L. This is why
230      * its read/write is managed by the INLogEntry class rather than the IN
231      * readFromLog/writeFromLog methods.
232      */
233     private long lastDeltaVersion = DbLsn.NULL_LSN;
234 
235     /*
236      * Disallow delta on next log. Set to true (a) when we we delete a slot
237      * from a BIN, (b) when the cleaner marks a BIN as dirty so that it will
238      * be migrated during the next checkpoint.
239      */
240     private boolean prohibitNextDelta;
241 
242     /*
243      * Caches the VLSN sequence for the LN entries in a BIN, when VLSN
244      * preservation and caching are configured.
245      *
246      * A VLSN is added to the cache when an LN is evicted from a BIN.  When the
247      * LN is resident, there is no need for caching because the LN contains the
248      * VLSN. See BIN.setTarget.  This strategy works because an LN is always
249      * cached during a read or write operation, and only evicted after that,
250      * based on eviction policies.
251      *
252      * An EMPTY_REP is used initially until the need arises to add a non-zero
253      * value.  The cache will remain empty if LNs are never evicted or version
254      * caching is not configured, which is always the case for standalone JE.
255      */
256     private INLongRep vlsnCache = INLongRep.EMPTY_REP;
257 
258     /*
259      * Stores the size of the most recently written logrec of each LN, or zero
260      * if the size is unknown.
261      *
262      * We use INLongRep in spite of the fact that sizes are int not long;
263      * INLongRep will store the minimum number of bytes. An EMPTY_REP is
264      * used initially until the need arises to add a non-zero value.
265      */
266     private INLongRep lastLoggedSizes = INLongRep.EMPTY_REP;
267 
268     /**
269      * Can be set to true by tests to prevent last logged sizes from being
270      * stored.
271      */
272     public static boolean TEST_NO_LAST_LOGGED_SIZES = false;
273 
BIN()274     public BIN() {
275     }
276 
BIN( DatabaseImpl db, byte[] identifierKey, int capacity, int level)277     public BIN(
278         DatabaseImpl db,
279         byte[] identifierKey,
280         int capacity,
281         int level) {
282 
283         super(db, identifierKey, capacity, level);
284     }
285 
286     /**
287      * For Sizeof.
288      */
BIN(@uppressWarningsR) SizeofMarker marker)289     public BIN(@SuppressWarnings("unused") SizeofMarker marker) {
290         super(marker);
291     }
292 
293     /**
294      * Create a new BIN.  Need this because we can't call newInstance()
295      * without getting a 0 for nodeId.
296      */
297     @Override
createNewInstance( byte[] identifierKey, int maxEntries, int level)298     protected IN createNewInstance(
299         byte[] identifierKey,
300         int maxEntries,
301         int level) {
302 
303         return new BIN(getDatabase(), identifierKey, maxEntries, level);
304     }
305 
306     /**
307      * Create a holder object that encapsulates information about this BIN for
308      * the INCompressor.
309      */
createReference()310     public BINReference createReference() {
311       return new BINReference(getNodeId(), getDatabase().getId(),
312                               getIdentifierKey());
313     }
314 
315     @Override
isBIN()316     public boolean isBIN() {
317         return true;
318     }
319 
320     /*
321      * Return whether the shared latch for this kind of node should be of the
322      * "always exclusive" variety.  Presently, only IN's are actually latched
323      * shared.  BINs are latched exclusive only.
324      */
325     @Override
isAlwaysLatchedExclusively()326     boolean isAlwaysLatchedExclusively() {
327         return true;
328     }
329 
330     @Override
shortClassName()331     public String shortClassName() {
332         return "BIN";
333     }
334 
335     @Override
beginTag()336     public String beginTag() {
337         return BEGIN_TAG;
338     }
339 
340     @Override
endTag()341     public String endTag() {
342         return END_TAG;
343     }
344 
setCachedVLSN(int idx, long vlsn)345     private void setCachedVLSN(int idx, long vlsn) {
346 
347         /*
348          * We do not cache the VLSN for dup DBs, because dup DBs are typically
349          * used only for indexes, and the overhead of VLSN maintenance would be
350          * wasted.  Plus, although technically VLSN preservation might apply to
351          * dup DBs, the VLSNs are not reliably available since the LNs are
352          * immediately obsolete.
353          */
354         if (databaseImpl.getSortedDuplicates() || !getEnv().getCacheVLSN()) {
355             return;
356         }
357         setCachedVLSNUnconditional(idx, vlsn);
358     }
359 
setCachedVLSNUnconditional(int idx, long vlsn)360     private void setCachedVLSNUnconditional(int idx, long vlsn) {
361         vlsnCache = vlsnCache.set(
362             idx,
363             (vlsn == VLSN.NULL_VLSN.getSequence()) ? 0 : vlsn,
364             this, getEnv().getCachedVLSNMinLength());
365     }
366 
getCachedVLSN(int idx)367     private long getCachedVLSN(int idx) {
368         final long vlsn = vlsnCache.get(idx);
369         return (vlsn == 0) ? VLSN.NULL_VLSN.getSequence() : vlsn;
370     }
371 
372     /**
373      * Returns the VLSN.  VLSN.NULL_VLSN.getSequence() (-1) is returned in two
374      * cases:
375      * 1) This is a standalone environment.
376      * 2) The VLSN is not cached (perhaps VLSN caching is not configured), and
377      *    the allowFetch param is false.
378      *
379      * WARNING: Because the vlsnCache is only updated when an LN is evicted, it
380      * is critical that getVLSN returns the VLSN for a resident LN before
381      * getting the VLSN from the cache.
382      */
getVLSN(int idx, boolean allowFetch, CacheMode cacheMode)383     public long getVLSN(int idx, boolean allowFetch, CacheMode cacheMode) {
384 
385         /* Must return the VLSN from the LN, if it is resident. */
386         LN ln = (LN) getTarget(idx);
387         if (ln != null) {
388             return ln.getVLSNSequence();
389         }
390 
391         /* Next try the vlsnCache. */
392         final long vlsn = getCachedVLSN(idx);
393         if (!VLSN.isNull(vlsn)) {
394             return vlsn;
395         }
396 
397         /* As the last resort, fetch the LN if fetching is allowed. */
398         if (!allowFetch) {
399             return vlsn;
400         }
401         ln = fetchLN(idx, cacheMode);
402         return ln.getVLSNSequence();
403     }
404 
405     /** For unit testing. */
getVLSNCache()406     public INLongRep getVLSNCache() {
407         return vlsnCache;
408     }
409 
410     /**
411      * The last logged size is never needed when the LN is counted obsolete
412      * immediately, since it is only needed for counting an LN obsolete
413      * during an update or deletion.
414      *
415      * This method may not be called until after the database is initialized,
416      * i,e., it may not be called during readFromLog.
417      */
418     @Override
isLastLoggedSizeStored()419     boolean isLastLoggedSizeStored() {
420 
421         /* Check final static first so all test code is optimized away. */
422         if (DatabaseUtil.TEST) {
423             /* Don't skew test measurements with internal DBs. */
424             if (TEST_NO_LAST_LOGGED_SIZES &&
425                 !databaseImpl.getDbType().isInternal()) {
426                 return false;
427             }
428         }
429 
430         return !databaseImpl.isLNImmediatelyObsolete();
431     }
432 
433     /**
434      * Sets last logged size if necessary.
435      *
436      * This method does not dirty the IN because the caller methods dirty it,
437      * for example, when setting the LSN, key, or node.
438      *
439      * This method is sometimes called to add the logged size for a pre log
440      * version 9 BIN, for example, during fetchTarget and preload.  This makes
441      * the logged size available for obsolete counting but does not dirty the
442      * IN, since that could cause an unexpected write of the IN being read.
443      *
444      * @param lastLoggedSize is positive if the size is known, zero if the size
445      * is unknown, or -1 if the size should not be changed because logging of
446      * the LN was deferred.
447      */
448     @Override
setLastLoggedSize(int idx, int lastLoggedSize)449     public void setLastLoggedSize(int idx, int lastLoggedSize) {
450         if ((lastLoggedSize < 0) || !isLastLoggedSizeStored()) {
451             return;
452         }
453         setLastLoggedSizeUnconditional(idx, lastLoggedSize);
454     }
455 
456     /**
457      * Sets the size without checking whether it is necessary.
458      *
459      * This method is used when reading from the log because the databaseImpl
460      * is not yet initialized and isLastLoggedSizeStored cannot be called.
461      * It is also called for efficiency reasons when it is known that storing
462      * the logged size is necessary, for example, when copying values between
463      * slots.
464      */
465     @Override
setLastLoggedSizeUnconditional(int idx, int lastLoggedSize)466     void setLastLoggedSizeUnconditional(int idx, int lastLoggedSize) {
467         /* minLength (last param) is 1 since log sizes are unpredictable. */
468         lastLoggedSizes = lastLoggedSizes.set(idx, lastLoggedSize, this, 1);
469     }
470 
471     /**
472      * @return a positive value if the size is known, or zero if unknown.
473      */
474     @Override
getLastLoggedSize(int idx)475     public int getLastLoggedSize(int idx) {
476         return (int) lastLoggedSizes.get(idx);
477     }
478 
479     /**
480      * Updates the vlsnCache when an LN target is evicted.  See vlsnCache.
481      */
482     @Override
setTarget(int idx, Node target)483     void setTarget(int idx, Node target) {
484         if (target == null) {
485             final Node oldTarget = getTarget(idx);
486             if (oldTarget instanceof LN) {
487                 setCachedVLSN(idx, ((LN) oldTarget).getVLSNSequence());
488             }
489         }
490         super.setTarget(idx, target);
491     }
492 
493     /**
494      * Overridden to account for vlsnCache and lastLoggedSizes.
495      */
496     @Override
copyEntry(int idx, IN from, int fromIdx)497     void copyEntry(int idx, IN from, int fromIdx) {
498         super.copyEntry(idx, from, fromIdx);
499         setCachedVLSNUnconditional(idx, ((BIN) from).getCachedVLSN(fromIdx));
500         setLastLoggedSizeUnconditional(idx, from.getLastLoggedSize(fromIdx));
501     }
502 
503     /**
504      * Overridden to account for vlsnCache and lastLoggedSizes.
505      */
506     @Override
copyEntries(int from, int to, int n)507     void copyEntries(int from, int to, int n) {
508         super.copyEntries(from, to, n);
509         vlsnCache = vlsnCache.copy(from, to, n);
510         lastLoggedSizes = lastLoggedSizes.copy(from, to, n);
511     }
512 
513     /**
514      * Overridden to account for vlsnCache and lastLoggedSizes.
515      */
516     @Override
clearEntry(int idx)517     void clearEntry(int idx) {
518         super.clearEntry(idx);
519         setCachedVLSNUnconditional(idx, VLSN.NULL_VLSN.getSequence());
520         setLastLoggedSizeUnconditional(idx, 0);
521     }
522 
523     /**
524      * Mark this entry as deleted, using the delete flag. Only BINS may do
525      * this.
526      *
527      * @param index indicates target entry
528      */
529     @Override
setKnownDeleted(int index)530     public void setKnownDeleted(int index) {
531 
532         /*
533          * The target is cleared to save memory, since a known deleted entry
534          * will never be fetched.
535          */
536         super.setKnownDeleted(index);
537 
538         /*
539          * We know it's an LN because we never call setKnownDeleted for
540          * an IN.
541          */
542         LN oldLN = (LN) getTarget(index);
543         updateMemorySize(oldLN, null /* newNode */);
544         if (oldLN != null) {
545             oldLN.releaseMemoryBudget();
546         }
547         setTarget(index, null);
548     }
549 
setKnownDeletedClearAll(int index)550     public void setKnownDeletedClearAll(int index) {
551         setKnownDeleted(index);
552         setLsnInternal(index, DbLsn.NULL_LSN);
553     }
554 
555     /*
556      * Cursors
557      */
558 
559     /* public for the test suite. */
getCursorSet()560     public Set<CursorImpl> getCursorSet() {
561        if (cursorSet == null) {
562            return Collections.emptySet();
563        }
564        return cursorSet.copy();
565     }
566 
567     /**
568      * Register a cursor with this BIN.  Caller has this BIN already latched.
569      * @param cursor Cursor to register.
570      */
addCursor(CursorImpl cursor)571     public void addCursor(CursorImpl cursor) {
572         assert isLatchExclusiveOwner();
573         if (cursorSet == null) {
574             cursorSet = new TinyHashSet<CursorImpl>();
575         }
576         cursorSet.add(cursor);
577     }
578 
579     /**
580      * Unregister a cursor with this bin.  Caller has this BIN already
581      * latched.
582      *
583      * @param cursor Cursor to unregister.
584      */
removeCursor(CursorImpl cursor)585     public void removeCursor(CursorImpl cursor) {
586         assert isLatchExclusiveOwner();
587         if (cursorSet == null) {
588             return;
589         }
590         cursorSet.remove(cursor);
591         if (cursorSet.size() == 0) {
592             cursorSet = null;
593         }
594     }
595 
596     /**
597      * @return the number of cursors currently referring to this BIN.
598      */
nCursors()599     public int nCursors() {
600 
601         /*
602          * Use a local var to concurrent assignment to the cursorSet field by
603          * another thread. This method is called via eviction without latching.
604          * LRU-TODO: with the new evictor this method is called with the node
605          * EX-latched. So, cleanup after the old evictor is scrapped.
606          */
607         final TinyHashSet<CursorImpl> cursors = cursorSet;
608         if (cursors == null) {
609             return 0;
610         }
611         return cursors.size();
612     }
613 
614     /**
615      * Adjust any cursors that are referring to this BIN.  This method is
616      * called during a split operation.  "this" is the BIN being split.
617      * newSibling is the new BIN into which the entries from "this" between
618      * newSiblingLow and newSiblingHigh have been copied.
619      *
620      * @param newSibling - the newSibling into which "this" has been split.
621      * @param newSiblingLow
622      * @param newSiblingHigh - the low and high entry of
623      * "this" that were moved into newSibling.
624      */
625     @Override
adjustCursors( IN newSibling, int newSiblingLow, int newSiblingHigh)626     void adjustCursors(
627         IN newSibling,
628         int newSiblingLow,
629         int newSiblingHigh)
630     {
631         assert newSibling.isLatchExclusiveOwner();
632         assert this.isLatchExclusiveOwner();
633         if (cursorSet == null) {
634             return;
635         }
636         int adjustmentDelta = (newSiblingHigh - newSiblingLow);
637         Iterator<CursorImpl> iter = cursorSet.iterator();
638 
639         while (iter.hasNext()) {
640             CursorImpl cursor = iter.next();
641             int cIdx = cursor.getIndex();
642             cursor.assertBIN(this);
643             assert newSibling instanceof BIN;
644 
645             /*
646              * There are four cases to consider for cursor adjustments,
647              * depending on (1) how the existing node gets split, and (2) where
648              * the cursor points to currently.  In cases 1 and 2, the id key of
649              * the node being split is to the right of the splitindex so the
650              * new sibling gets the node entries to the left of that index.
651              * This is indicated by "new sibling" to the left of the vertical
652              * split line below.  The right side of the node contains entries
653              * that will remain in the existing node (although they've been
654              * shifted to the left).  The vertical bar (^) indicates where the
655              * cursor currently points.
656              *
657              * case 1:
658              *
659              *   We need to set the cursor's "bin" reference to point at the
660              *   new sibling, but we don't need to adjust its index since that
661              *   continues to be correct post-split.
662              *
663              *   +=======================================+
664              *   |  new sibling        |  existing node  |
665              *   +=======================================+
666              *         cursor ^
667              *
668              * case 2:
669              *
670              *   We only need to adjust the cursor's index since it continues
671              *   to point to the current BIN post-split.
672              *
673              *   +=======================================+
674              *   |  new sibling        |  existing node  |
675              *   +=======================================+
676              *                              cursor ^
677              *
678              * case 3:
679              *
680              *   Do nothing.  The cursor continues to point at the correct BIN
681              *   and index.
682              *
683              *   +=======================================+
684              *   |  existing Node        |  new sibling  |
685              *   +=======================================+
686              *         cursor ^
687              *
688              * case 4:
689              *
690              *   Adjust the "bin" pointer to point at the new sibling BIN and
691              *   also adjust the index.
692              *
693              *   +=======================================+
694              *   |  existing Node        |  new sibling  |
695              *   +=======================================+
696              *                                 cursor ^
697              */
698             BIN ns = (BIN) newSibling;
699             if (newSiblingLow == 0) {
700                 if (cIdx < newSiblingHigh) {
701                     /* case 1 */
702                     iter.remove();
703                     cursor.setBIN(ns);
704                     ns.addCursor(cursor);
705                 } else {
706                     /* case 2 */
707                     cursor.setIndex(cIdx - adjustmentDelta);
708                 }
709             } else {
710                 if (cIdx >= newSiblingLow) {
711                     /* case 4 */
712                     cursor.setIndex(cIdx - newSiblingLow);
713                     iter.remove();
714                     cursor.setBIN(ns);
715                     ns.addCursor(cursor);
716                 }
717             }
718         }
719     }
720 
721     /**
722      * For each cursor in this BIN's cursor set, ensure that the cursor is
723      * actually referring to this BIN.
724      */
verifyCursors()725     public void verifyCursors() {
726         if (cursorSet == null) {
727             return;
728         }
729         for (CursorImpl cursor : cursorSet) {
730             cursor.assertBIN(this);
731         }
732     }
733 
734     /**
735      * Adjust cursors referring to this BIN following an insert.
736      *
737      * @param insertIndex - The index of the new entry.
738      */
739     @Override
adjustCursorsForInsert(int insertIndex)740     void adjustCursorsForInsert(int insertIndex) {
741 
742         assert this.isLatchExclusiveOwner();
743         if (cursorSet == null) {
744             return;
745         }
746 
747         for (CursorImpl cursor : cursorSet) {
748             int cIdx = cursor.getIndex();
749             if (insertIndex <= cIdx) {
750                 cursor.setIndex(cIdx + 1);
751             }
752         }
753     }
754 
755     /**
756      * Called when we know we are about to split on behalf of a key that is the
757      * minimum (leftSide) or maximum (!leftSide) of this node.  This is
758      * achieved by just forcing the split to occur either one element in from
759      * the left or the right (i.e. splitIndex is 1 or nEntries - 1).
760      */
761     @Override
splitSpecial( IN parent, int parentIndex, IN grandParent, int maxEntriesPerNode, byte[] key, boolean leftSide, CacheMode cacheMode)762     void splitSpecial(
763         IN parent,
764         int parentIndex,
765         IN grandParent,
766         int maxEntriesPerNode,
767         byte[] key,
768         boolean leftSide,
769         CacheMode cacheMode)
770         throws DatabaseException {
771 
772         int nEntries = getNEntries();
773 
774         int index = findEntry(key, true, false);
775 
776         boolean exact = (index & IN.EXACT_MATCH) != 0;
777         index &= ~IN.EXACT_MATCH;
778 
779         if (leftSide && index < 0) {
780             splitInternal(
781                 parent, parentIndex, grandParent, maxEntriesPerNode, 1,
782                 cacheMode);
783 
784         } else if (!leftSide && !exact && index == (nEntries - 1)) {
785             splitInternal(
786                 parent, parentIndex, grandParent, maxEntriesPerNode,
787                 nEntries - 1, cacheMode);
788 
789         } else {
790             split(
791                 parent, parentIndex, grandParent, maxEntriesPerNode,
792                 cacheMode);
793         }
794     }
795 
796     /**
797      * Compress this BIN by removing any entries that are deleted.  No cursors
798      * may be present on the BIN.  Caller is responsible for latching and
799      * unlatching this node.
800      *
801      * @param localTracker is used only for temporary DBs, and may be specified
802      * to consolidate multiple tracking operations.  If null, the tracking is
803      * performed immediately in this method.
804      *
805      * @return true if all deleted slots were compressed, or false if one or
806      * more slots could not be compressed because we were unable to obtain a
807      * lock.
808      */
compress(LocalUtilizationTracker localTracker)809     public boolean compress(LocalUtilizationTracker localTracker)
810         throws DatabaseException {
811 
812         /*
813          * If the environment is not yet recovered we can't rely on locks
814          * being set up to safeguard active data and so we can't compress
815          * safely.
816          */
817         if (!databaseImpl.getEnv().isValid()) {
818             return false;
819         }
820 
821         if (nCursors() > 0) {
822             throw EnvironmentFailureException.unexpectedState();
823         }
824 
825         if (isBINDelta()) {
826             throw EnvironmentFailureException.unexpectedState();
827         }
828 
829         boolean setNewIdKey = false;
830         boolean anyLocksDenied = false;
831         final DatabaseImpl db = getDatabase();
832         final EnvironmentImpl envImpl = db.getEnv();
833 
834         for (int i = 0; i < getNEntries(); i++) {
835 
836             /* KD and PD determine deletedness. */
837             if (!isEntryPendingDeleted(i) && !isEntryKnownDeleted(i)) {
838                 continue;
839             }
840 
841             /*
842              * We have to be able to lock the LN before we can compress the
843              * entry. If we can't, then skip over it.
844              *
845              * We must lock the LN even if isKnownDeleted is true, because
846              * locks protect the aborts. (Aborts may execute multiple
847              * operations, where each operation latches and unlatches. It's the
848              * LN lock that protects the integrity of the whole multi-step
849              * process.)
850              *
851              * For example, during abort, there may be cases where we have
852              * deleted and then added an LN during the same txn.  This means
853              * that to undo/abort it, we first delete the LN (leaving
854              * knownDeleted set), and then add it back into the tree.  We want
855              * to make sure the entry is in the BIN when we do the insert back
856              * in.
857              */
858             final BasicLocker lockingTxn =
859                 BasicLocker.createBasicLocker(envImpl);
860             /* Don't allow this short-lived lock to be preempted/stolen. */
861             lockingTxn.setPreemptable(false);
862             try {
863                 /* Lock LSN. Can discard a NULL_LSN entry without locking. */
864                 final long lsn = getLsn(i);
865 
866                 if (lsn != DbLsn.NULL_LSN) {
867                     final LockResult lockRet = lockingTxn.nonBlockingLock(
868                         lsn, LockType.READ, false /*jumpAheadOfWaiters*/, db);
869 
870                     if (lockRet.getLockGrant() == LockGrantType.DENIED) {
871                         anyLocksDenied = true;
872                         continue;
873                     }
874                 }
875 
876                 /* At this point, we know we can delete. */
877                 if (Key.compareKeys(getKey(i), getIdentifierKey(),
878                                     getKeyComparator()) == 0) {
879 
880                     /*
881                      * We're about to remove the entry with the idKey so the
882                      * node will need a new idkey.
883                      */
884                     setNewIdKey = true;
885                 }
886 
887                 if (db.isDeferredWriteMode()) {
888 
889                     final LN ln = (LN) getTarget(i);
890 
891                     if (ln != null &&
892                         ln.isDirty() &&
893                         !DbLsn.isTransient(lsn)) {
894 
895                         if (db.isTemporary()) {
896 
897                             /*
898                              * When a previously logged LN in a temporary DB is
899                              * dirty, we can count the LSN of the last logged
900                              * LN as obsolete without logging.  There is no
901                              * requirement for the dirty deleted LN to be
902                              * durable past recovery.  There is no danger of
903                              * the last logged LN being accessed again (after
904                              * log cleaning, for example), since temporary DBs
905                              * do not survive recovery.
906                              */
907                             if (localTracker != null) {
908                                 localTracker.countObsoleteNode(
909                                     lsn, ln.getGenericLogType(),
910                                     getLastLoggedSize(i), db);
911                             } else {
912                                 envImpl.getLogManager().countObsoleteNode(
913                                     lsn, ln.getGenericLogType(),
914                                     getLastLoggedSize(i), db,
915                                     true /*countExact*/);
916                             }
917                         } else {
918 
919                             /*
920                              * When a previously logged deferred-write LN is
921                              * dirty, we log the dirty deleted LN to make the
922                              * deletion durable.  The act of logging will also
923                              * count the last logged LSN as obsolete.
924                              */
925                             logDirtyLN(i, ln, false /*ensureDurableLsn*/,
926                                        true /*allowEviction*/);
927                         }
928                     }
929                 }
930 
931                 boolean deleteSuccess = deleteEntry(i, true);
932                 assert deleteSuccess;
933 
934                 /*
935                  * Since we're deleting the current entry, bump the current
936                  * index back down one.
937                  */
938                 i--;
939             } finally {
940                 lockingTxn.operationEnd();
941             }
942         }
943 
944         if (getNEntries() != 0 && setNewIdKey) {
945             setIdentifierKey(getKey(0));
946         }
947 
948         /* This BIN is empty and expendable. */
949         if (getNEntries() == 0) {
950             setGeneration(CacheMode.MAKE_COLD);
951         }
952 
953         return !anyLocksDenied;
954     }
955 
956     /**
957      * This method is called opporunistically at certain places where a deleted
958      * slot is observed (when the slot's PendingDeleted or KnownDeleted flag is
959      * set), to ensure that the slot is compressed away. This is an attempt to
960      * process slots that were not compressed during the mainstream record
961      * deletion process because of cursors on the BIN during compress, or a
962      * crash prior to compression.
963      *
964      * Called from BIN.afterLog(), CursorImpl.lockAndGetCurrent(),
965      * RecoverManager.redo() and RecoverManager.updo()
966      */
queueSlotDeletion()967     public void queueSlotDeletion() {
968 
969         /*
970          * If the next logrec for this BIN is going to be a BIN-delta, don't
971          * queue the BIN, because no BIN slots should be removed before logging
972          * a BIN-delta.
973          */
974         if (shouldLogDelta()) {
975             return;
976         }
977 
978         getEnv().addToCompressorQueue(this, false/*doWakeup*/);
979     }
980 
981     @Override
isCompressible()982     public boolean isCompressible() {
983         return !isBINDelta();
984     }
985 
986     /* For debugging.  Overrides method in IN. */
987     @Override
validateSubtreeBeforeDelete(int index)988     boolean validateSubtreeBeforeDelete(int index) {
989 
990         assert(!isBINDelta());
991 
992         return true;
993     }
994 
995     /**
996      * Check if this node fits the qualifications for being part of a deletable
997      * subtree. It may not have any LN children.
998      *
999      * We assume that this is only called under an assert.
1000      */
1001     @Override
isValidForDelete()1002     boolean isValidForDelete()
1003         throws DatabaseException {
1004 
1005         assert(isLatchExclusiveOwner());
1006 
1007         if (isBINDelta()) {
1008             return false;
1009         }
1010 
1011         int numValidEntries = 0;
1012 
1013         for (int i = 0; i < getNEntries(); i++) {
1014             if (!isEntryKnownDeleted(i)) {
1015                 numValidEntries++;
1016             }
1017         }
1018 
1019         if (numValidEntries > 0) { // any valid entries, not eligible
1020             return false;
1021         }
1022         if (nCursors() > 0) {      // cursors on BIN, not eligible
1023             return false;
1024         }
1025         return true;               // 0 entries, no cursors
1026     }
1027 
1028     /**
1029      * Adds vlsnCache size to computed memory size.
1030      */
1031     @Override
computeMemorySize()1032     public long computeMemorySize() {
1033 
1034         /*
1035          * These fields are null only when this method is called by the
1036          * superclass constructor, i.e., before this class constructor has
1037          * run.  Luckily the initial representations have a memory size of
1038          * zero, so we can ignore them in this case.
1039          */
1040         long size = super.computeMemorySize();
1041         if (vlsnCache != null) {
1042             size += vlsnCache.getMemorySize();
1043         }
1044 
1045         if (lastLoggedSizes != null) {
1046             size += lastLoggedSizes.getMemorySize();
1047         }
1048 
1049         if (bloomFilter != null) {
1050             size += BINDeltaBloomFilter.getMemorySize(bloomFilter);
1051         }
1052 
1053         return size;
1054     }
1055 
1056     /* Utility method used during unit testing. */
1057     @Override
printMemorySize()1058     protected long printMemorySize() {
1059         final long inTotal = super.printMemorySize();
1060         final long vlsnCacheOverhead = vlsnCache.getMemorySize();
1061         final long logSizesOverhead = lastLoggedSizes.getMemorySize();
1062         final long binTotal = inTotal + vlsnCacheOverhead + logSizesOverhead;
1063         System.out.format(
1064             "BIN: %d vlsns: %d logSizes: %d %n",
1065             binTotal, vlsnCacheOverhead, logSizesOverhead);
1066         return binTotal;
1067     }
1068 
1069     @Override
getFixedMemoryOverhead()1070     protected long getFixedMemoryOverhead() {
1071         return MemoryBudget.BIN_FIXED_OVERHEAD;
1072     }
1073 
1074     /**
1075      * Returns the treeAdmin memory in objects referenced by this BIN.
1076      * Specifically, this refers to the DbFileSummaryMap held by
1077      * MapLNs
1078      */
1079     @Override
getTreeAdminMemorySize()1080     public long getTreeAdminMemorySize() {
1081 
1082         if (getDatabase().getId().equals(DbTree.ID_DB_ID)) {
1083             long treeAdminMem = 0;
1084             for (int i = 0; i < getMaxEntries(); i++) {
1085                 Node n = getTarget(i);
1086                 if (n != null) {
1087                     MapLN mapLN = (MapLN) n;
1088                     treeAdminMem += mapLN.getDatabase().getTreeAdminMemory();
1089                 }
1090             }
1091             return treeAdminMem;
1092         } else {
1093             return 0;
1094         }
1095     }
1096 
1097     /**
1098      * Reduce memory consumption by evicting all LN targets.  If no LNs are
1099      * resident, discard the VLSN cache.  Note that evicting LNs may require
1100      * logging them, which will mark this BIN dirty.
1101      *
1102      * The BIN should be latched by the caller.
1103      *
1104      * @return a long number encoding (a) the number of evicted bytes, and
1105      * (b) whether this BIN  is evictable. (b) will be false if the BIN has
1106      * any cursors on it, or has any non-evictable children.
1107      */
1108     @Override
partialEviction()1109     public long partialEviction() {
1110 
1111         /* First try LN eviction. */
1112         final long lnEvictionBytes = evictLNs();
1113 
1114         /* Return if any were evicted or are non-evictable. */
1115         if (lnEvictionBytes != 0) {
1116             return lnEvictionBytes;
1117         }
1118 
1119         /* If no LNs were resident, try discarding the VLSNCache. */
1120         return discardVLSNCache();
1121     }
1122 
discardVLSNCache()1123     public long discardVLSNCache() {
1124 
1125         final long vlsnBytes = vlsnCache.getMemorySize();
1126 
1127         if (vlsnBytes > 0) {
1128             vlsnCache = INLongRep.EMPTY_REP;
1129             updateMemorySize(0 - vlsnBytes);
1130         }
1131 
1132         return vlsnBytes;
1133     }
1134 
1135     /**
1136      * Reduce memory consumption by evicting all LN targets. Note that this may
1137      * cause LNs to be logged, which will mark this BIN dirty.
1138      *
1139      * The BIN should be latched by the caller.
1140      *
1141      * @return a long number encoding (a) the number of evicted bytes, and
1142      * (b) whether this BIN  is evictable. (b) will be false if the BIN has
1143      * any cursors on it, or has any non-evictable children.
1144      */
evictLNs()1145     public long evictLNs()
1146         throws DatabaseException {
1147 
1148         assert isLatchExclusiveOwner() :
1149             "BIN must be latched before evicting LNs";
1150 
1151         /*
1152          * We can't evict an LN which is pointed to by a cursor, in case that
1153          * cursor has a reference to the LN object. We'll take the cheap choice
1154          * and avoid evicting any LNs if there are cursors on this BIN. We
1155          * could do a more expensive, precise check to see entries have which
1156          * cursors. This is something we might move to later.
1157          */
1158         if (nCursors() > 0) {
1159             return IN.NON_EVICTABLE_IN;
1160         }
1161 
1162         /* Try to evict each child LN. */
1163         long totalRemoved = 0;
1164         long numLNsEvicted = 0;
1165         boolean haveNonEvictableLN = false;
1166 
1167         for (int i = 0; i < getNEntries(); i++) {
1168 
1169             if (getTarget(i) == null) {
1170                 continue;
1171             }
1172 
1173             long lnRemoved = evictInternal(i);
1174             if (lnRemoved < 0) {
1175                 haveNonEvictableLN = true;
1176             } else {
1177                 totalRemoved += lnRemoved;
1178                 ++numLNsEvicted;
1179             }
1180         }
1181 
1182         /*
1183          * compactMemory() may decrease the memory footprint by mutating the
1184          * representations of the target and key sets.
1185          */
1186         if (totalRemoved > 0) {
1187             updateMemorySize(totalRemoved, 0);
1188             totalRemoved += compactMemory();
1189         }
1190 
1191         getEvictor().incNumLNsEvicted(numLNsEvicted);
1192 
1193         if (haveNonEvictableLN) {
1194             return (totalRemoved | IN.NON_EVICTABLE_IN);
1195         } else {
1196             return totalRemoved;
1197         }
1198     }
1199 
1200     /**
1201      * Evict a single LN if allowed and adjust the memory budget.
1202      */
evictLN(int index)1203     public void evictLN(int index)
1204         throws DatabaseException {
1205 
1206         final long removed = evictInternal(index);
1207 
1208         /* May decrease the memory footprint by changing the INTargetRep. */
1209         if (removed > 0) {
1210             updateMemorySize(removed, 0);
1211             compactMemory();
1212         }
1213     }
1214 
1215     /**
1216      * Evict a single LN if allowed. The amount of memory freed is returned
1217      * and must be subtracted from the memory budget by the caller.
1218      *
1219      * @return number of evicted bytes or -1 if the LN is not evictable.
1220      */
evictInternal(int index)1221     private long evictInternal(int index)
1222         throws DatabaseException {
1223 
1224         final Node n = getTarget(index);
1225 
1226         assert(n == null || n instanceof LN);
1227 
1228         if (n == null) {
1229             return 0;
1230         }
1231 
1232         final LN ln = (LN) n;
1233         final long lsn = getLsn(index);
1234 
1235         /*
1236          * Don't evict MapLNs for open databases (LN.isEvictable) [#13415].
1237          */
1238         if (ln.isEvictable(lsn)) {
1239 
1240             /*
1241              * Log target if necessary. Do not allow eviction since we evict
1242              * here and that would cause double-counting of the memory freed.
1243              */
1244             logDirtyLN(index, ln, true /*ensureDurableLsn*/,
1245                        false /*allowEviction*/);
1246 
1247             /* Clear target. */
1248             setTarget(index, null);
1249             ln.releaseMemoryBudget();
1250 
1251             return n.getMemorySizeIncludedByParent();
1252         }
1253 
1254         return -1;
1255     }
1256 
1257     /**
1258      * @see IN#logDirtyChildren
1259      */
1260     @Override
logDirtyChildren()1261     public void logDirtyChildren()
1262         throws DatabaseException {
1263 
1264         /* Look for targets that are dirty. */
1265         for (int i = 0; i < getNEntries(); i++) {
1266             Node node = getTarget(i);
1267             if (node != null) {
1268                 logDirtyLN(i, (LN) node, true /*ensureDurableLsn*/,
1269                            true /*allowEviction*/);
1270             }
1271         }
1272     }
1273 
logDirtyLNs()1274     private void logDirtyLNs()
1275         throws DatabaseException {
1276 
1277         for (int i = 0; i < getNEntries(); i++) {
1278             final Node node = getTarget(i);
1279             if ((node != null) && (node instanceof LN)) {
1280                 logDirtyLN(i, (LN) node, true /*ensureDurableLsn*/,
1281                            true /*allowEviction*/);
1282             }
1283         }
1284     }
1285 
1286     /**
1287      * Logs the LN at the given index if it is dirty.
1288      */
logDirtyLN( int index, LN ln, boolean ensureDurableLsn, boolean allowEviction)1289     private void logDirtyLN(
1290         int index,
1291         LN ln,
1292         boolean ensureDurableLsn,
1293         boolean allowEviction)
1294         throws DatabaseException {
1295 
1296         final long oldLsn = getLsn(index);
1297         final boolean force = ensureDurableLsn &&
1298                               getDatabase().isDeferredWriteMode() &&
1299                               DbLsn.isTransientOrNull(oldLsn);
1300         if (force || ln.isDirty()) {
1301             final DatabaseImpl dbImpl = getDatabase();
1302             final EnvironmentImpl envImpl = dbImpl.getEnv();
1303 
1304             /* Only deferred write databases should have dirty LNs. */
1305             assert dbImpl.isDeferredWriteMode();
1306 
1307             /*
1308              * Do not lock while logging.  Locking of new LSN is performed by
1309              * lockAfterLsnChange. This should never be part of the replication
1310              * stream, because this is a deferred-write DB.
1311              */
1312             final LN.LogResult logResult = ln.log(
1313                 envImpl, dbImpl, getKey(index), oldLsn,
1314                 getLastLoggedSize(index), null /*locker*/,
1315                 null /*writeLockInfo*/, true /*backgroundIO*/,
1316                 ReplicationContext.NO_REPLICATE);
1317 
1318             updateEntry(index, logResult.newLsn, logResult.newSize);
1319             /* Lock new LSN on behalf of existing lockers. */
1320             CursorImpl.lockAfterLsnChange(
1321                 dbImpl, oldLsn, logResult.newLsn, null /*excludeLocker*/);
1322 
1323             /*
1324              * It is desirable to evict a non-dirty LN in a duplicates DB
1325              * because it will never be fetched again.
1326              */
1327             if (allowEviction && databaseImpl.getSortedDuplicates()) {
1328                 evictLN(index);
1329             }
1330         }
1331     }
1332 
1333     /*
1334      * Logging support
1335      */
1336 
1337     /**
1338      * @see IN#getLogType
1339      */
1340     @Override
getLogType()1341     public LogEntryType getLogType() {
1342         return LogEntryType.LOG_BIN;
1343     }
1344 
1345     /**
1346      * Overrides the IN method to account for deltas.
1347      *
1348      * It is called from IN.commonPostFetchInit(). If the logrec we have just
1349      * read was a BINDelta, this.lastFullVersion has already been set (in
1350      * BINDeltaLogEntry.readMainItem() or in OldBinDelta.reconstituteBIN()).
1351      * So, this method will set this.lastDeltaVarsion. Otherwise, if the
1352      * logrec was a full BIN, this.lastFullVersion has not been set yet,
1353      * and it will be set here. In this case, this.lastDeltaVarsion will
1354      * remain NULL.
1355      */
1356     @Override
setLastLoggedLsn(long lsn)1357     public void setLastLoggedLsn(long lsn) {
1358         if (getLastFullVersion() == DbLsn.NULL_LSN) {
1359             setLastFullLsn(lsn);
1360         } else {
1361             lastDeltaVersion = lsn;
1362         }
1363     }
1364 
1365     /**
1366      * Overrides the IN method to account for deltas.
1367      */
1368     @Override
getLastLoggedVersion()1369     public long getLastLoggedVersion() {
1370         return (lastDeltaVersion != DbLsn.NULL_LSN) ?
1371                lastDeltaVersion :
1372                getLastFullVersion();
1373     }
1374 
1375     /**
1376      * Overrides the IN method to account for deltas.
1377      * Public for unit testing.
1378      */
1379     @Override
getLastDeltaVersion()1380     public long getLastDeltaVersion() {
1381         return lastDeltaVersion;
1382     }
1383 
1384     /**
1385      * Overrides the IN method to account for deltas.
1386      */
1387     @Override
beforeLog( LogManager logManager, INLogItem item, INLogContext context)1388     public void beforeLog(
1389         LogManager logManager,
1390         INLogItem item,
1391         INLogContext context) {
1392 
1393         final DatabaseImpl dbImpl = getDatabase();
1394         final EnvironmentImpl envImpl = dbImpl.getEnv();
1395 
1396         /* Determine whether we log a delta rather than full version. */
1397         item.isDelta =
1398             isBINDelta() || (context.allowDeltas && shouldLogDelta());
1399 
1400         /* Be sure that we didn't illegally mutate to a delta. */
1401         assert (!(item.isDelta && isDeltaProhibited()));
1402 
1403         /* Perform lazy compression when logging a full BIN. */
1404         if (context.allowCompress && !item.isDelta) {
1405             envImpl.lazyCompress(this);
1406         }
1407 
1408         /*
1409          * Write dirty LNs in deferred-write databases.  This is done after
1410          * compression to reduce total logging, at least for temp DBs.
1411          */
1412         if (dbImpl.isDeferredWriteMode()) {
1413             logDirtyLNs();
1414         }
1415 
1416         /*
1417          * In the Btree, the parent IN slot contains the latest full version
1418          * LSN or, if a delta was last logged, the delta LSN.  Somewhat
1419          * redundantly, the transient IN.lastFullVersion and
1420          * BIN.lastDeltaVersion fields contain the last logged full version and
1421          * delta version LSNs.
1422          *
1423          * For delta logging:
1424          *  + Count lastDeltaVersion obsolete, if non-null.
1425          *  + Set lastDeltaVersion to newly logged LSN.
1426          *  + Leave lastFullVersion unchanged.
1427          *
1428          * For full version logging:
1429          *  + Count lastFullVersion and lastDeltaVersion obsolete, if non-null.
1430          *  + Set lastFullVersion to newly logged LSN.
1431          *  + Set lastDeltaVersion to null.
1432          */
1433         beforeLogCommon(
1434             item, context,
1435             item.isDelta ? DbLsn.NULL_LSN : getLastFullVersion(),
1436             lastDeltaVersion);
1437 
1438         item.entry = item.isDelta ?
1439             (new BINDeltaLogEntry(this)) :
1440             (new INLogEntry<BIN>(this));
1441     }
1442 
1443     /**
1444      * Overrides the IN method to account for deltas.  See beforeLog.
1445      */
1446     @Override
afterLog( LogManager logManager, INLogItem item, INLogContext context)1447     public void afterLog(
1448         LogManager logManager,
1449         INLogItem item,
1450         INLogContext context) {
1451 
1452         afterLogCommon(logManager, item, context,
1453                        item.isDelta ? DbLsn.NULL_LSN : getLastFullVersion(),
1454                        lastDeltaVersion);
1455 
1456         if (item.isDelta) {
1457             lastDeltaVersion = item.newLsn;
1458         } else {
1459             setLastFullLsn(item.newLsn);
1460             lastDeltaVersion = DbLsn.NULL_LSN;
1461 
1462             /*
1463              * Before logging a full version BIN we attempted to compress it.
1464              * If we could not compress a slot because of the presence of
1465              * cursors, we must re-queue (or at least re-dirty) the BIN so
1466              * that we will compress it later.  The BIN is set non-dirty by
1467              * afterLogCommon above.
1468              */
1469             for (int i = 0; i < getNEntries(); i += 1) {
1470                 if (isEntryKnownDeleted(i) || isEntryPendingDeleted(i)) {
1471                     queueSlotDeletion();
1472                     break;
1473                 }
1474             }
1475         }
1476 
1477         prohibitNextDelta = false;
1478     }
1479 
1480     /*
1481      * BIN delta support
1482      */
1483 
getFullBinNEntries()1484     public int getFullBinNEntries() {
1485         if (isBINDelta()) {
1486             return fullBinNEntries;
1487         } else {
1488             return nEntries;
1489         }
1490     }
1491 
setFullBinNEntries(int n)1492     public void setFullBinNEntries(int n) {
1493         assert(isBINDelta(false));
1494         fullBinNEntries = n;
1495     }
1496 
incFullBinNEntries()1497     void incFullBinNEntries() {
1498         assert(isBINDelta());
1499         ++fullBinNEntries;
1500     }
1501 
getFullBinMaxEntries()1502     public int getFullBinMaxEntries() {
1503         if (isBINDelta()) {
1504             return fullBinMaxEntries;
1505         } else {
1506             return getMaxEntries();
1507         }
1508     }
1509 
setFullBinMaxEntries(int n)1510     public void setFullBinMaxEntries(int n) {
1511         assert(isBINDelta(false));
1512         fullBinMaxEntries = n;
1513     }
1514 
getDeltaCapacity(int numDirtyEntries)1515     int getDeltaCapacity(int numDirtyEntries) {
1516 
1517         boolean blindOps =
1518             (getEnv().allowBlindOps() || getEnv().allowBlindPuts());
1519 
1520         if (isBINDelta()) {
1521             return getMaxEntries();
1522         }
1523 
1524         if (blindOps) {
1525             return (getNEntries() * databaseImpl.getBinDeltaPercent()) / 100;
1526         }
1527 
1528         return numDirtyEntries;
1529     }
1530 
allowBlindPuts()1531     boolean allowBlindPuts() {
1532         boolean res = getEnv().allowBlindPuts();
1533 
1534         if (res) {
1535             res = res && databaseImpl.hasBtreeBinaryEqualityComparator();
1536             res = res && databaseImpl.hasDuplicateBinaryEqualityComparator();
1537         }
1538 
1539         return res;
1540     }
1541 
1542     /*
1543      * It is called in 3 cases listed below. In all cases, if blind puts are
1544      * not allowed, the method returns null.
1545      *
1546      * 1. A full BIN is being mutated to an in-memory delta. A new filter will
1547      *    be created here and will be stored in the delta by the caller.
1548      * 2. A full BIN is being logged as a delta. A new filter will be created
1549      *    here and will be written in the delta logrec by the caller.
1550      * 3. An in-memory BIN-delta is being logged. If the delta has a bloom
1551      *    filter already, that filter will be returned and written into the
1552      *    logrec. The delta may not have a filter already because it was read
1553      *    from an older-version logfile; in this case we return null.
1554      */
createBloomFilter()1555     byte[] createBloomFilter() {
1556 
1557         assert(bloomFilter == null || isBINDelta());
1558 
1559         boolean blindPuts = allowBlindPuts();
1560 
1561         if (!blindPuts) {
1562             assert(bloomFilter == null);
1563             return null;
1564         }
1565 
1566         if (bloomFilter != null) {
1567             /*
1568              * We are here because we are logging a delta that has a filter
1569              * already. We just need to log the existing filter.
1570              */
1571             return bloomFilter;
1572         }
1573 
1574         if (isBINDelta()) {
1575             return null;
1576         }
1577 
1578         int numKeys = getNEntries() - getNDeltas();
1579         int nbytes = BINDeltaBloomFilter.getByteSize(numKeys);
1580 
1581         byte[] bf = new byte[nbytes];
1582 
1583         BINDeltaBloomFilter.HashContext hc =
1584             new BINDeltaBloomFilter.HashContext();
1585 
1586         if (keyPrefix != null) {
1587             hc.hashKeyPrefix(keyPrefix);
1588         }
1589 
1590         for (int i = 0; i < getNEntries(); ++i) {
1591 
1592             if (isDirty(i)) {
1593                 continue;
1594             }
1595 
1596             byte[] suffix = entryKeyVals.get(i);
1597             if (suffix == null) {
1598                 suffix = Key.EMPTY_KEY;
1599             }
1600 
1601             BINDeltaBloomFilter.add(bf, suffix, hc);
1602         }
1603 
1604         return bf;
1605     }
1606 
mayHaveKeyInFullBin(byte[] key)1607     public boolean mayHaveKeyInFullBin(byte[] key) {
1608 
1609         assert(isBINDelta());
1610 
1611         if (bloomFilter == null) {
1612             return true;
1613         }
1614 
1615         return BINDeltaBloomFilter.contains(bloomFilter, key);
1616     }
1617 
1618     /*
1619      * Used in IN.getLogSize() only
1620      */
getBloomFilterLogSize()1621     int getBloomFilterLogSize() {
1622 
1623         if (!allowBlindPuts()) {
1624             return 0;
1625         }
1626 
1627         if (isBINDelta()) {
1628             if (bloomFilter != null) {
1629                 return BINDeltaBloomFilter.getLogSize(bloomFilter);
1630             }
1631 
1632             return 0;
1633 
1634         } else {
1635             assert(bloomFilter == null);
1636             int numKeys = getNEntries() - getNDeltas();
1637             return BINDeltaBloomFilter.getLogSize(numKeys);
1638         }
1639     }
1640 
1641     /**
1642      * If cleaned or compressed, must log full version.
1643      */
1644     @Override
setProhibitNextDelta()1645     public void setProhibitNextDelta() {
1646         prohibitNextDelta = true;
1647     }
1648 
isDeltaProhibited()1649     private boolean isDeltaProhibited() {
1650         final DatabaseImpl dbImpl = getDatabase();
1651         return prohibitNextDelta ||
1652             dbImpl.isDeferredWriteMode() ||
1653             (getLastFullVersion() == DbLsn.NULL_LSN);
1654     }
1655 
1656     /**
1657      * Decide whether to log a full or partial BIN, depending on the ratio of
1658      * the delta size to full BIN size, and the number of deltas that have been
1659      * logged since the last full.
1660      *
1661      * Other factors are taken into account:
1662      * + a delta cannot be logged if the BIN has never been logged before
1663      * + deltas are not currently supported for DeferredWrite databases
1664      * + this particular delta may have been prohibited because the cleaner is
1665      *   migrating the BIN or a slot has been deleted
1666      * + if there are no dirty slots, we might as well log a full BIN
1667      *
1668      * @return true if we should log the deltas of this BIN
1669      */
shouldLogDelta()1670     public boolean shouldLogDelta() {
1671 
1672         if (isBINDelta()) {
1673             assert(!isDeltaProhibited());
1674             return true;
1675         }
1676 
1677         /* Cheapest checks first. */
1678         if (isDeltaProhibited()) {
1679             return false;
1680         }
1681 
1682         /* Must count deltas to check further. */
1683         final int numDeltas = getNDeltas();
1684 
1685         /* A delta with zero items is not valid. */
1686         if (numDeltas <= 0) {
1687             return false;
1688         }
1689 
1690         /* Check the configured BinDeltaPercent. */
1691         final int deltaLimit =
1692             (getNEntries() * databaseImpl.getBinDeltaPercent()) / 100;
1693         if (numDeltas > deltaLimit) {
1694             return false;
1695         }
1696 
1697         return true;
1698     }
1699 
1700     /**
1701      * Returns whether mutateToBINDelta can be called.
1702      */
canMutateToBINDelta()1703     public boolean canMutateToBINDelta() {
1704         return (!isBINDelta() &&
1705                 shouldLogDelta() &&
1706                 (nCursors() == 0));
1707     }
1708 
1709     /**
1710      * Mutate to a delta (discard non-dirty entries and resize arrays).
1711      *
1712      * This method must be called with this node latched exclusively, and
1713      * canMutateToBINDelta must return true.
1714      *
1715      * @return the number of bytes freed.
1716      */
mutateToBINDelta()1717     public long mutateToBINDelta() {
1718 
1719         assert isLatchExclusiveOwner();
1720         assert canMutateToBINDelta();
1721 
1722         if (getInListResident()) {
1723             getEnv().getInMemoryINs().updateBINDeltaStat(1);
1724         }
1725 
1726         final long oldSize = getInMemorySize();
1727         final int nDeltas = getNDeltas();
1728         final int capacity = getDeltaCapacity(nDeltas);
1729 
1730         bloomFilter = createBloomFilter();
1731 
1732         initBINDelta(this, nDeltas, capacity, true);
1733 
1734         return oldSize - getInMemorySize();
1735     }
1736 
1737     /**
1738      * This method assumes that "this" BIN is a delta and creates a clone of
1739      * it. It is currently used by the DiskOrderedScanner only. The method
1740      * does not clone the targets array.
1741      */
cloneBINDelta()1742     public BIN cloneBINDelta() {
1743 
1744         assert(isBINDelta());
1745 
1746         final BIN bin = new BIN(
1747             databaseImpl, getIdentifierKey(), 0/*capacity*/, getLevel());
1748 
1749         bin.nodeId = nodeId;
1750         bin.flags = flags;
1751         bin.lastFullVersion = lastFullVersion;
1752 
1753         final int nDeltas = getNDeltas();
1754         initBINDelta(bin, nDeltas, nDeltas, false);
1755         return bin;
1756     }
1757 
1758     /**
1759      * Replaces the contents of destBIN with the deltas in this BIN.
1760      */
initBINDelta( final BIN destBIN, final int nDeltas, final int capacity, final boolean copyTargets)1761     private void initBINDelta(
1762         final BIN destBIN,
1763         final int nDeltas,
1764         final int capacity,
1765         final boolean copyTargets) {
1766 
1767         long[] longLSNs = null;
1768         byte[] compactLSNs = null;
1769 
1770         if (entryLsnLongArray == null) {
1771             compactLSNs = new byte[nDeltas * 4];
1772         } else {
1773             longLSNs = new long[nDeltas];
1774         }
1775 
1776         final long[] vlsns = new long[nDeltas];
1777         final int[] sizes= new int[nDeltas];
1778         final byte[][] keys = new byte[nDeltas][];
1779         final byte[] states = new byte[nDeltas];
1780         Node[] targets = null;
1781 
1782         if (copyTargets) {
1783             targets = new Node[nDeltas];
1784         }
1785 
1786         int j = 0;
1787         for (int i = 0; i < getNEntries(); i += 1) {
1788 
1789             if (!isDirty(i)) {
1790                 continue;
1791             }
1792 
1793             if (entryLsnLongArray == null) {
1794                 int doff = j << 2;
1795                 int soff = i << 2;
1796                 compactLSNs[doff] = entryLsnByteArray[soff];
1797                 compactLSNs[doff+1] = entryLsnByteArray[soff+1];
1798                 compactLSNs[doff+2] = entryLsnByteArray[soff+2];
1799                 compactLSNs[doff+3] = entryLsnByteArray[soff+3];
1800             } else {
1801                 longLSNs[j] = getLsn(i);
1802             }
1803 
1804             keys[j] = entryKeyVals.get(i);
1805             states[j] = getState(i);
1806 
1807             if (targets != null) {
1808                 targets[j] = getTarget(i);
1809             }
1810 
1811             vlsns[j] = getCachedVLSN(i);
1812             sizes[j] = getLastLoggedSize(i);
1813 
1814             j += 1;
1815         }
1816 
1817         /*
1818          * Do this before resetContent() because destBIN and "this" may be the
1819          * same java obj
1820          */
1821         destBIN.fullBinNEntries = getFullBinNEntries();
1822         destBIN.fullBinMaxEntries = getFullBinMaxEntries();
1823 
1824         destBIN.resetContent(
1825             capacity, nDeltas,
1826             baseFileNumber, compactLSNs, longLSNs,
1827             states, keyPrefix, keys, targets,
1828             sizes, vlsns);
1829 
1830         destBIN.setBINDelta(true);
1831 
1832         destBIN.compactMemory();
1833     }
1834 
1835     /**
1836      * Replaces the contents of this BIN with the given contents.
1837      * Used in mutating a full BIN to a BIN-delta or for creating
1838      * a new BIN delta with the given content.
1839      */
resetContent( final int capacity, final int newNEntries, final long baseFileNumber, final byte[] compactLSNs, final long[] longLSNs, final byte[] states, final byte[] keyPrefix, final byte[][] keys, final Node[] targets, final int[] loggedSizes, final long[] vlsns)1840     private void resetContent(
1841         final int capacity,
1842         final int newNEntries,
1843         final long baseFileNumber,
1844         final byte[] compactLSNs,
1845         final long[] longLSNs,
1846         final byte[] states,
1847         final byte[] keyPrefix,
1848         final byte[][] keys,
1849         final Node[] targets,
1850         final int[] loggedSizes,
1851         final long[] vlsns) {
1852 
1853         updateRepCacheStats(false);
1854 
1855         nEntries = newNEntries;
1856 
1857         this.baseFileNumber = baseFileNumber;
1858         if (longLSNs == null) {
1859             entryLsnByteArray = new byte[capacity << 2];
1860             entryLsnLongArray = null;
1861         } else {
1862             entryLsnByteArray = null;
1863             entryLsnLongArray = new long[capacity];
1864         }
1865 
1866         this.keyPrefix = keyPrefix;
1867         entryKeyVals = new INKeyRep.Default(capacity);
1868 
1869         entryTargets = INTargetRep.NONE;
1870 
1871         updateRepCacheStats(true);
1872 
1873         entryStates = new byte[capacity];
1874 
1875         for (int i = 0; i < newNEntries; i += 1) {
1876 
1877             if (longLSNs == null) {
1878                 int off = i << 2;
1879                 entryLsnByteArray[off] = compactLSNs[off];
1880                 entryLsnByteArray[off+1] = compactLSNs[off+1];
1881                 entryLsnByteArray[off+2] = compactLSNs[off+2];
1882                 entryLsnByteArray[off+3] = compactLSNs[off+3];
1883             } else {
1884                 entryLsnLongArray[i] = longLSNs[i];
1885             }
1886 
1887             entryKeyVals = entryKeyVals.set(i, keys[i], this);
1888             entryStates[i] = states[i];
1889 
1890             if (targets != null) {
1891                 entryTargets = entryTargets.set(i, targets[i], this);
1892             }
1893 
1894             setLastLoggedSizeUnconditional(i, loggedSizes[i]);
1895             setCachedVLSNUnconditional(i, vlsns[i]);
1896         }
1897 
1898         updateMemorySize(inMemorySize, computeMemorySize());
1899     }
1900 
1901     /**
1902      * Fetch the full BIN and apply the deltas in this BIN to it, then use the
1903      * merged result to replace the contents of this BIN.
1904      *
1905      * This method must be called with this node latched exclusively. If 'this'
1906      * is not a delta, this method does nothing.
1907      */
1908     @Override
mutateToFullBIN()1909     public void mutateToFullBIN() {
1910 
1911         if (!isBINDelta()) {
1912             return;
1913         }
1914 
1915         final BIN fullBIN = fetchFullBIN(databaseImpl);
1916 
1917         mutateToFullBIN(fullBIN);
1918 
1919         Evictor e = getEvictor();
1920         if (e == null) {
1921             return;
1922         }
1923         e.incFullBINMissStats();
1924     }
1925 
1926     /**
1927      * Mutates this delta to a full BIN by applying this delta to the fullBIN
1928      * param and then replacing this BIN's contents with it.
1929      *
1930      * This method must be called with this node latched exclusively. 'this'
1931      * must be a delta.
1932      *
1933      * The method is public because it is called directly from FileProcessor
1934      * when it finds a BIN that must be migrated. In that case, fullBIN is a
1935      * full BIN that has just been read from the log, and it is not part of
1936      * the memory-resident tree.
1937      */
mutateToFullBIN(BIN fullBIN)1938     public void mutateToFullBIN(BIN fullBIN) {
1939 
1940         assert isLatchExclusiveOwner();
1941         assert isBINDelta() : this;
1942 
1943         byte[][] keys = null;
1944         int i = 0;
1945 
1946         if (cursorSet != null) {
1947             keys = new byte[cursorSet.size()][];
1948 
1949             for (CursorImpl cursor : cursorSet) {
1950                 final int index = cursor.getIndex();
1951                 if (index >= 0 && index < getNEntries()) {
1952                     keys[i] = cursor.getCurrentKey(true/*isLatched*/);
1953                 }
1954                 ++i;
1955             }
1956         }
1957 
1958         reconstituteBIN(databaseImpl, fullBIN);
1959 
1960         resetContent(fullBIN);
1961 
1962         setBINDelta(false);
1963 
1964         compactMemory();
1965 
1966         if (cursorSet != null) {
1967 
1968             i = 0;
1969             for (CursorImpl cursor : cursorSet) {
1970 
1971                 if (keys[i] != null) {
1972                     /*
1973                      * Do not ask for an exact match from findEntry because if
1974                      * the cursor was on a KD slot, findEntry would return -1.
1975                      */
1976                     int index = findEntry(keys[i], true, false);
1977 
1978                     if ((index & IN.EXACT_MATCH) == 0) {
1979                         throw EnvironmentFailureException.unexpectedState(
1980                             getEnv(), "Failed to reposition cursor during " +
1981                             "mutation of a BIN delta to a full BIN");
1982                     }
1983 
1984                     index &= ~IN.EXACT_MATCH;
1985 
1986                     assert(index >= 0 && index < getNEntries());
1987                     cursor.setIndex(index);
1988                 }
1989                 ++i;
1990             }
1991         }
1992 
1993         if (getInListResident()) {
1994             getEnv().getInMemoryINs().updateBINDeltaStat(-1);
1995         }
1996     }
1997 
fetchFullBIN(DatabaseImpl dbImpl)1998     private BIN fetchFullBIN(DatabaseImpl dbImpl) {
1999         final EnvironmentImpl envImpl = dbImpl.getEnv();
2000         final long lsn = getLastFullVersion();
2001 
2002         try {
2003             return (BIN)
2004                 envImpl.getLogManager().getEntryHandleFileNotFound(lsn);
2005 
2006         } catch (EnvironmentFailureException e) {
2007             e.addErrorMessage(makeFetchErrorMsg(null, this, lsn, (byte) 0));
2008             throw e;
2009 
2010         } catch (RuntimeException e) {
2011             throw new EnvironmentFailureException(
2012                 envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
2013                 makeFetchErrorMsg(e.toString(), this, lsn, (byte) 0), e);
2014         }
2015     }
2016 
2017     /**
2018      * Replaces the contents of this BIN with the contents of the given BIN,
2019      * including lsns, states, keys and targets.  Key prefixing and key/target
2020      * representations will also be those of the given BIN.
2021      */
resetContent(final BIN other)2022     private void resetContent(final BIN other) {
2023 
2024         updateRepCacheStats(false);
2025 
2026         nEntries = other.nEntries;
2027 
2028         baseFileNumber = other.baseFileNumber;
2029         entryLsnByteArray = other.entryLsnByteArray;
2030         entryLsnLongArray = other.entryLsnLongArray;
2031 
2032         keyPrefix = other.keyPrefix;
2033         entryKeyVals = other.entryKeyVals;
2034 
2035         entryTargets = other.entryTargets;
2036 
2037         entryStates = other.entryStates;
2038 
2039         lastLoggedSizes = other.lastLoggedSizes;
2040 
2041         vlsnCache = other.vlsnCache;
2042 
2043         bloomFilter = null;
2044 
2045         updateMemorySize(inMemorySize, computeMemorySize());
2046 
2047         updateRepCacheStats(true);
2048     }
2049 
2050     /**
2051      * Create a BIN by fetching the full version and applying the deltas in
2052      * this BIN to it.  The BIN is not added to the INList.
2053      *
2054      * @return the full BIN with deltas applied.
2055      */
reconstituteBIN(DatabaseImpl dbImpl)2056     public BIN reconstituteBIN(DatabaseImpl dbImpl) {
2057         final BIN fullBIN = fetchFullBIN(dbImpl);
2058         reconstituteBIN(dbImpl, fullBIN);
2059         return fullBIN;
2060     }
2061 
2062     /**
2063      * Given a full version BIN, apply the deltas in this BIN. The fullBIN will
2064      * then be complete, but its memory will not be compacted.
2065      */
reconstituteBIN(DatabaseImpl dbImpl, BIN fullBIN)2066     public void reconstituteBIN(DatabaseImpl dbImpl, BIN fullBIN) {
2067 
2068         fullBIN.setDatabase(dbImpl);
2069         fullBIN.latch(CacheMode.UNCHANGED);
2070 
2071         try {
2072 
2073             /*
2074              * The BIN's lastFullLsn is set here, while its lastLoggedLsn is
2075              * set by postFetchInit or postRecoveryInit.
2076              */
2077             fullBIN.setLastFullLsn(getLastFullVersion());
2078 
2079             /* Process each delta. */
2080             for (int i = 0; i < getNEntries(); i++) {
2081                 assert isDirty(i) : this;
2082                 fullBIN.applyDelta(
2083                     getKey(i), getLsn(i), getState(i), getLastLoggedSize(i),
2084                     getCachedVLSN(i), getTarget(i));
2085             }
2086 
2087             /*
2088              * The applied deltas will leave some slots dirty, which is
2089              * necessary as a record of changes that will be included in the
2090              * next delta.  However, the BIN itself should not be dirty,
2091              * because this delta is a persistent record of those changes.
2092              */
2093             fullBIN.setDirty(false);
2094         } finally {
2095             fullBIN.releaseLatch();
2096         }
2097     }
2098 
2099     /**
2100      * Apply (insert, update) a given delta slot in this full BIN.
2101      */
applyDelta( final byte[] key, final long lsn, final byte state, final int lastLoggedSize, final long vlsn, final Node child)2102     void applyDelta(
2103         final byte[] key,
2104         final long lsn,
2105         final byte state,
2106         final int lastLoggedSize,
2107         final long vlsn,
2108         final Node child) {
2109 
2110         /*
2111          * The delta is the authoritative version of the entry. In all cases,
2112          * it should supersede the entry in the full BIN.  This is true even if
2113          * the BIN Delta's entry is knownDeleted or if the full BIN's version
2114          * is knownDeleted. Therefore we use the flavor of findEntry that will
2115          * return a knownDeleted entry if the entry key matches (i.e. true,
2116          * false) but still indicates exact matches with the return index.
2117          * findEntry only returns deleted entries if third arg is false, but we
2118          * still need to know if it's an exact match or not so indicateExact is
2119          * true.
2120          */
2121         int foundIndex = findEntry(key, true, false);
2122 
2123         if (foundIndex >= 0 && (foundIndex & IN.EXACT_MATCH) != 0) {
2124 
2125             foundIndex &= ~IN.EXACT_MATCH;
2126 
2127             /*
2128              * The entry exists in the full version, update it with the delta
2129              * info.  Note that all state flags should be restored [#22848].
2130              */
2131             updateEntry(foundIndex, child, lsn, lastLoggedSize, state);
2132 
2133         } else {
2134 
2135             /*
2136              * The entry doesn't exist, insert the delta entry. We insert the
2137              * entry even when it is known or pending deleted, since the
2138              * deleted (and dirty) entry will be needed to log the next delta.
2139              * [#20737]
2140              */
2141             final int result = insertEntry1(
2142                 child, key, lsn, state, false/*blindInsertion*/);
2143 
2144             assert (result & INSERT_SUCCESS) != 0;
2145             foundIndex = result & ~IN.INSERT_SUCCESS;
2146 
2147             setLastLoggedSizeUnconditional(foundIndex, lastLoggedSize);
2148         }
2149 
2150         setCachedVLSNUnconditional(foundIndex, vlsn);
2151     }
2152 
2153     /*
2154      * DbStat support.
2155      */
2156     @Override
accumulateStats(TreeWalkerStatsAccumulator acc)2157     void accumulateStats(TreeWalkerStatsAccumulator acc) {
2158         acc.processBIN(this, Long.valueOf(getNodeId()), getLevel());
2159     }
2160 }
2161