1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2002, 2014 Oracle and/or its affiliates.  All rights reserved.
5  *
6  */
7 
8 package com.sleepycat.je.evictor;
9 
10 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_DELTA_BLIND_OPS;
11 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_DELTA_FETCH_MISS;
12 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_FETCH;
13 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_FETCH_MISS;
14 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_FETCH_MISS_RATIO;
15 import static com.sleepycat.je.evictor.EvictorStatDefinition.FULL_BIN_MISS;
16 import static com.sleepycat.je.evictor.EvictorStatDefinition.CACHED_IN_COMPACT_KEY;
17 import static com.sleepycat.je.evictor.EvictorStatDefinition.CACHED_IN_NO_TARGET;
18 import static com.sleepycat.je.evictor.EvictorStatDefinition.CACHED_IN_SPARSE_TARGET;
19 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_MUTATED;
20 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_STRIPPED;
21 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_EVICTION_RUNS;
22 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_EVICTED;
23 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_LNS_EVICTED;
24 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_TARGETED;
25 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_ROOT_NODES_EVICTED;
26 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_PUT_BACK;
27 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_SKIPPED;
28 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_MOVED_TO_DIRTY_LRU;
29 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_SHARED_CACHE_ENVS;
30 import static com.sleepycat.je.evictor.EvictorStatDefinition.GROUP_DESC;
31 import static com.sleepycat.je.evictor.EvictorStatDefinition.GROUP_NAME;
32 import static com.sleepycat.je.evictor.EvictorStatDefinition.LN_FETCH;
33 import static com.sleepycat.je.evictor.EvictorStatDefinition.LN_FETCH_MISS;
34 import static com.sleepycat.je.evictor.EvictorStatDefinition.NUM_BYTES_EVICTED_DESC;
35 import static com.sleepycat.je.evictor.EvictorStatDefinition.UPPER_IN_FETCH;
36 import static com.sleepycat.je.evictor.EvictorStatDefinition.UPPER_IN_FETCH_MISS;
37 import static com.sleepycat.je.evictor.EvictorStatDefinition.THREAD_UNAVAILABLE;
38 
39 
40 import java.lang.AssertionError;
41 import java.util.EnumSet;
42 import java.util.HashMap;
43 import java.util.Map;
44 import java.util.ArrayList;
45 import java.util.List;
46 import java.util.logging.Level;
47 import java.util.concurrent.ArrayBlockingQueue;
48 import java.util.concurrent.ConcurrentHashMap;
49 import java.util.concurrent.RejectedExecutionHandler;
50 import java.util.concurrent.ThreadPoolExecutor;
51 import java.util.concurrent.TimeUnit;
52 import java.util.concurrent.atomic.AtomicBoolean;
53 import java.util.concurrent.atomic.AtomicLong;
54 
55 import java.util.logging.Logger;
56 
57 import com.sleepycat.je.CacheMode;
58 import com.sleepycat.je.DatabaseException;
59 import com.sleepycat.je.EnvironmentFailureException;
60 import com.sleepycat.je.EnvironmentMutableConfig;
61 import com.sleepycat.je.StatsConfig;
62 import com.sleepycat.je.config.EnvironmentParams;
63 import com.sleepycat.je.dbi.DatabaseId;
64 import com.sleepycat.je.dbi.DatabaseImpl;
65 import com.sleepycat.je.dbi.DbConfigManager;
66 import com.sleepycat.je.dbi.DbTree;
67 import com.sleepycat.je.dbi.EnvConfigObserver;
68 import com.sleepycat.je.dbi.EnvironmentImpl;
69 import com.sleepycat.je.dbi.INList;
70 import com.sleepycat.je.dbi.MemoryBudget;
71 import com.sleepycat.je.recovery.Checkpointer;
72 import com.sleepycat.je.tree.BIN;
73 import com.sleepycat.je.tree.IN;
74 import com.sleepycat.je.tree.Tree;
75 import com.sleepycat.je.tree.SearchResult;
76 import com.sleepycat.je.tree.ChildReference;
77 import com.sleepycat.je.tree.WithRootLatched;
78 import com.sleepycat.je.utilint.AtomicLongStat;
79 import com.sleepycat.je.utilint.DbLsn;
80 import com.sleepycat.je.utilint.IntStat;
81 import com.sleepycat.je.utilint.LoggerUtils;
82 import com.sleepycat.je.utilint.FloatStat;
83 import com.sleepycat.je.utilint.LongStat;
84 import com.sleepycat.je.utilint.StatDefinition;
85 import com.sleepycat.je.utilint.StatGroup;
86 import com.sleepycat.je.utilint.StoppableThreadFactory;
87 import com.sleepycat.je.utilint.TestHook;
88 import com.sleepycat.je.utilint.TestHookExecute;
89 
90 /**
91  *
92  * Overview
93  * --------
94  *
95  * The Evictor is responsible for managing the JE cache. The cache is
96  * actually a collection of in-memory btree nodes, implemented by the
97  * com.sleepycat.je.dbi.INList class. A subset of the nodes in te INList
98  * are candidates for eviction. This subset is tracked in one or more
99  * LRULists, which are maintained by the Evictor. When a node is evicted,
100  * it is detached from its contining BTree and then removed from the INList
101  * and from its containig LRUList. Once all references to an evicted node
102  * are removed, it can be GC'd by the JVM.
103  *
104  * The Evictor owns a pool of threads that are available to handle eviction
105  * tasks. The eviction pool is a standard java.util.concurrent thread pool,
106  * and can be mutably configured in terms of core threads, max threads, and
107  * keepalive times.
108  *
109  * Eviction is carried out by three types of threads:
110  * 1. An application thread, in the course of doing critical eviction.
111  * 2. Daemon threads, such as the cleaner or INCompressor, in the course of
112  *    doing their respective duties.
113  * 3. Eviction pool threads.
114  *
115  * Memory consumption is tracked by the MemoryBudget. The Arbiter, which is
116  * also owned by the Evictor, is used to query the MemoryBudget and determine
117  * whether eviction is actually needed, and if so, how many bytes should be
118  * evicted by an evicting thread.
119  *
120  * Multiple threads can do eviction concurrently. As a result, it's important
121  * that eviction is both thread safe and as parallel as possible.  Memory
122  * thresholds are generally accounted for in an unsynchronized fashion, and are
123  * seen as advisory. The only point of true synchronization is around the
124  * selection of a node for eviction. The act of eviction itself can be done
125  * concurrently.
126  *
127  * The eviction method is not reentrant, and a simple concurrent hash map
128  * of threads is used to prevent recursive calls.
129  *
130  * Details on the implementation of the LRU-based eviction policy
131  * --------------------------------------------------------------
132  *
133  * ------------------
134  * Data structures
135  * ------------------
136  *
137  * An LRU eviction policy is approximated by one or more LRULists. An LRUList
138  * is a doubly linked list consisting of BTree nodes. If a node participates
139  * in an LRUList, then whenever it is accessed, it moves to the "back" of the
140  * list. When eviction is needed, the evictor evicts the nodes at the "front"
141  * of the LRULists.
142  *
143  * An LRUList is implemented as 2 IN references: a "front" ref pointing to the
144  * IN at the front of the list and a "back" ref, pointing to the IN at the back
145  * of the list. In addition, each IN has "nextLRUNode" and "prevLRUNode" refs
146  * for participating in an LRUList. This implementation works because an IN can
147  * belong to at most 1 LRUList at a time. Furthermore, it is the responsibility
148  * of the Evictor to know which LRUList a node belongs to at any given time
149  * (more on this below). As a result, each LRUList can assume that a node will
150  * either not be in any list at all, or will belong to "this" list. This way,
151  * membership of a node to an LRUList can be tested by just checking that
152  * either the nextLRUNode or prevLRUNode field of the node is non-null.
153  *
154  * The operations on an LRUList are:
155  *
156  * - addBack(IN) :
157  * Insert an IN at the back of the list. Assert that the node does not belong
158  * to an LRUList already.
159  *
160  * - addFront(IN) :
161  * Insert an IN at the front of the list.  Assert that the node does not belong
162  * to an LRUList already.
163  *
164  * - moveBack(IN) :
165  * Move an IN to the back of the list, if it is in the list already. Noop
166  * if the node is not in the list.
167  *
168  * - moveFront(IN) :
169  * Move an IN to the front of the list, if it is in the list already. Noop
170  * if the node is not in the list.
171  *
172  * - removeFront() :
173  * Remove the IN at the front of the list and return it to the caller.
174  * Return null if the list is empty.
175  *
176  * - remove(IN) :
177  * Remove the IN from the list, if it is there. Return true if the node was
178  * in the list, false otherwise.
179  *
180  * - contains(IN):
181  * Return true if the node is contained in the list, false otherwise.
182  *
183  * All of the above methods are synchronized on the LRUList object. This may
184  * create a synchronization bottleneck. To alleviate this, the Evictor uses
185  * multiple LRULists, which taken together comprise a logical LRU list, called
186  * an LRUSet. The number of LRULists per LRUSet (numLRULists) is fixed and
187  * determined by a config parameter (max of 64). The LRULists are stored in
188  * an array whose length is numLRULists.
189  *
190  * The Evictor actually maintains 2 LRUSets: the "mixed" one and the "dirty"
191  * one. Within an LRUSet, the nodeId is used to place a node to an LRUList: a
192  * node with id N goes to the (N % numLRULists)-th list. In addition, each
193  * node has a flag (isInDirtyLRU) to identify which LRUSet it belongs to.
194  * This way, the Evictor knows which LRUList a node should belong to, and
195  * accesses the appropriate LRUList instance when it needs to add/remove/move
196  * a node within the LRU.
197  *
198  * Access to the isInDirtyLRU flag is synchronized via the SH/EX node latch.
199  *
200  * Justification for the mixed and dirty LRUSets: We would like to keep dirty
201  * INs in memory as much as possible to achieve "write absorption". Ideally,
202  * dirty INs should be logged by the checkpointer only. So, we would like to
203  * have the option in the Evictor to chose a clean IN to evict over a dirty
204  * IN, even if the dirty IN is colder than the clean IN. In this mode, having
205  * a single LRUSet will not perform very well in the situation when most (or
206  * a lot) or the INs are dirty (because each time we get a dirty IN from an
207  * LRUList, we would have to put it back to the list and try another IN until
208  * we find a clean one, thus spending a lot of CPU time trying to select an
209  * eviction target).
210  *
211  * Withinh each LRUSet, picking an LRUList to evict from is done in a round-
212  * robin fashion. To this end, the Evictor maintains 2 int counters:
213  * nextMixedLRUList and nextDirtyLRUList. To evict from the mixed LRUSet, an
214  * evicting thread picks the (nextMixedLRUList % numLRULists)-th list, and
215  * then increments nextMixedLRUList. Similarly, to evict from the dirty LRUSet,
216  * an evicting thread picks the (nextDirtyLRUList % numLRULists)-th list,
217  * and then increments nextDirtyLRUList. This does not have to be done in a
218  * synchronized way.
219  *
220  * A new flag (called hasCachedChildren) is added to each IN to indicate
221  * whether the IN has cached children or not. This flag is used and maintained
222  * for upper INs (UINs) only. The need for this flag is explained below.
223  * Access to this flag is synchronized via the SH/EX node latch.
224  *
225  * ---------------------------------------------------------------------------
226  * LRUSet management: adding/removing/moving INs in/out of/within the LRUSets
227  * ---------------------------------------------------------------------------
228  *
229  * We don't want to track upper IN (UIN) nodes that have cached children.
230  * There are 2 reasons for this: (a) we cannot evict UINs with cached children
231  * (the children must be evicted first) and (b) UINs will normally have high
232  * access rate, and would add a lot of CPU overhead if they were tracked.
233  *
234  * The hasCachedChildren flag is used as a quick way to determine whether a
235  * UIN has cached children or not.
236  *
237  * Adding a node to the LRU.
238  * -------------------------
239  *
240  * A IN N is added in an LRUSet via one of the following Evictor methods:
241  * addBack(IN), addFront(IN), dirtyAddBack(IN), or dirtyAddFront(IN). The
242  * first 2 add the node to the mixed LRUSet and set its isInDirtyLRU flag
243  * to false. The last 2 add the node to the dirty LRUSet and set its
244  * isInDirtyLRU flag to true.
245  *
246  * Note: DINs and DBINs are never added to the LRU.
247  *
248  * A node N is added to the LRU in the following situations:
249  *
250  * 1. N is fetched into memory from the log. Evictor.addBack(N) is called
251  *    inside IN.postfetchInit() (just before N is connected to its parent).
252  *
253  * 2. N is a brand new node created during a split, and either N is a BIN or
254  *    N does not get any cached children from its split sibling.
255  *    Evictor.addFront(N) is called if N is a BIN and the cachemode is
256  *    MAKE_COLD or EVICT_BIN. Otherwise, Evictor.addBack(child) is called.
257  *
258  * 3. N is a UIN that is being split, and before the split it had cached
259  *    children, but all its cached children have now moved to its newly
260  *    created sibling. Evictor.addBack(N) is called in this case.
261  *
262  * 4. N is a UIN that looses its last cached child (either because the child is
263  *    evicted or it is deleted). Evictor.addBack(N) is called inside
264  *    IN.setTarget(), if the target is null, N is a UIN, N's hasCachedChildren
265  *    flag is true, and N after setting the target to null, N has no remaining
266  *    cached children.
267  *
268  * 5. N is the 1st BIN in a brand new tree. In this case, Evictor.addBack(N)
269  *    is called inside Tree.findBinForInsert().
270  *
271  * 6. N is a node visited during IN.rebuildINList() and N is either a BIN or
272  *    a UIN with no cached children.
273  *
274  * 7. An evicting thread T removes N from the LRU, but after T EX-latches N,
275  *    it determines that N is not evictable or should not be evicted, and
276  *    should be put back in the LRU. T puts N back to the LRU using one of
277  *    the above 4 methods (for details, read about the eviction processing
278  *    below), but ONLY IF (a) N is still in the INList, and (b) N is not in
279  *    the LRU already.
280  *
281  *    Case (b) can happen if N is a UIN and after T removed N from the LRU
282  *    but before T could latch N, another thread T1 added a child to N and
283  *    removed that child. Thus, by item 4 above, T1 adds N back to the LRU.
284  *    Furthermore, since N is now back in the LRU, case (a) can now happen
285  *    as well if another thread can evict N before T latches it.
286  *
287  * 8. When the checkpointer (or any other thread/operation) cleans a dirty IN,
288  *    it must move it from the dirty LRUSet (if there) to the mixed one. This
289  *    is done via the Evictor.moveToMixedLRU(N) method: If the isInDirtyLRU
290  *    flag of N is true, LRUList.remove(N) is called to remove the node from
291  *    the dirty LRUSet. If N was indeed in the dirty LRUSet (i.e.,
292  *    LRUList.remove() returns true), addBack(N) is called to put it in the
293  *    mixed LRUSet.
294  *
295  *    By moving N to the mixed LRUSet only after atomically removing it from
296  *    the dirty LRUSet and checking that it was indeed there, we prevent N
297  *    from being added into the LRU if N has been or would be removed from
298  *    the LRU by a concurrently running evicting thread.
299  *
300  * In cases 2, 3, 4, 5, 7, and 8 N is EX-latched. In case 1, the node is not
301  * latched, but it is inaccessible by any other threads because it is not
302  * conected to its parent yet and the parent is EX-latched (but N has already
303  * been inserted in the INList; can this create any problems ?????). In case
304  * 6 there is only one thread running. So, in all cases it's ok to set the
305  * isInDirtyLRU flag of the node.
306  *
307  * Question: can a thread T try to add a node N, seen as a Java obj instance,
308  * into the LRU, while N is already there? I believe not, and LRUList addBack()
309  * and addFront() methods assert that this cannot happen. In cases 1, 2, and 5
310  * above N is newly created node, so it cannot be in the LRU already. In cases
311  * 3 and 4, N is a UIN that has cached children, so it cannot be in the LRU.
312  * In case 6 there is only 1 thread. Finally, in cases 7 and 8, T checks that
313  * N is not in the LRU before attempting to add it (and the situation cannot
314  * change between tis check and the insertion into the LRU because N is EX-
315  * latched).
316  *
317  * Question: can a thread T try to add a node N, seen as a logical entity
318  * represented by its nodeId, into the LRU, while N is already there?
319  * Specifically, (a) can two Java instances, N1 and N2, of the same node
320  * N exist in memory at the same time, and (b) while N1 is in the LRU, can
321  * a thread T try to add N2 in the LRU? The answer to (a) is "yes", and as
322  * far as I can think, the answer to (b) is "no", but there is no explicit
323  * check in the code for this. Consider the following sequence of events:
324  * Initially only N1 is in memory and in the LRU. An evicting thread T1
325  * removes N1 from the LRU, thread T2 adds N1 in the LRU, thread T3 removes
326  * N1 from the LRU and actually evicts it, thread T4 fetches N from the log,
327  * thus creating instance N2 and adding N2 to the LRU, thread T1 finally
328  * EX-latches N1 and has to decide what to do with it. The check in case
329  * 7a above makes sure that N1 will not go back to the LRU. In fact the
330  * same check makes sure that N1 will not be evicted (i.e., logged, if
331  * dirty). T1 will just skip N1, thus allowing it to be GCed.
332  *
333  * Removing a node from the LRU
334  * ----------------------------
335  *
336  * A node is removed from the LRU when it is selected as an eviction target
337  * by an evicting thread. The thread chooses an LRUList list to evict from
338  * and calls removeFront() on it. The node is not latched when it is removed
339  * from the LRU in this case. The evicting thread is going to EX-latch the
340  * node shortly after the removal. But as explain already earlier, between
341  * the removal and the latching, another thread may put the node back to the
342  * LRU, and as a result, another thread may also choose the same node for
343  * eviciton. The node may also be detached from the BTree, or its database
344  * closed, or deleted.
345  *
346  * A node may also be removing from the LRU by a non-evicting thread. This
347  * is done via the Evictor.remove(IN) method. The method checks the node's
348  * isInDrtryLRU flag to determine which LRUSet the node belongs to (if any)
349  * and then calls LRUList.remove(N). The node must be at least SH latched
350  * when the method is called. The method is a noop if the node is not in the
351  * LRU. The node may not belong to any LRUList, because it has been selected
352  * for eviction by another thread (and thus removed from LRU), but the
353  * evicting thread has not yet latched the node. There are 3 cases (listed
354  * below) where Evictor.remove(N) is called. In the first two cases
355  * Evictor.rempove(N) is invoked from INList.removeInternal(N). This makes
356  * sure that N is removed from the LRU whenever it it removed from the
357  * INList (to guarantee that the nodes in the LRU are always a subset of
358  * the nodes in the INList).
359  *
360  * 1. When a tree branch containing N gets detached from its tree. In this
361  *    case, INList.remove(N) is invoked inside accountForSubtreeRemoval() or
362  *    accountForDeferredWriteSubtreeRemoval().
363  *
364  * 2. When the database containing N gets deleted or truncated. In this case,
365  *    INList.iter.remove() is called in DatabaseImpl.finishDeleteProcessing().
366  *
367  * 3. N is a UIN with no cached children (hasCachedChildren flag is false)
368  *    and a new child for N is fetched. The call to Evictor.remove(N) is
369  *    done inside IN.setTarget().
370  *
371  * Moving a node within the LRU
372  * ----------------------------
373  *
374  * A node N is moved within its containing LRUList (if any) via the Evictor
375  * moveBack(IN) and moveFront(IN) methods. The methods check the isInDirtyLRU
376  * flag of the node to determine the LRUSet the node belongs to and then move
377  * the node to the back or to the front of the LRUList. The node will be at
378  * least SH latched when these methods are called. Normally, the IN will be
379  * in an LRUList. However, it may not belong to any LRUList, because it has
380  * been selected for eviction by another thread (and thus removed from LRU),
381  * but the evicting thread has not yet EX-latched the node. In this case,
382  * these methods are is a noop. The methods are called in the following
383  * situations:
384  *
385  * 1. N is latched with cachemode DEFAULT, KEEP_HOT, or EVICT_LN and N is a
386  *    BIN or a UIN with no cached children (the hasCachedChildren flag is
387  *    used to check if the UIN has cached children, so we don't need to
388  *    iterate over all of the node's child entries). In this case,
389  *    Evictor.moveBack(N) .
390  *
391  * 2. N is latched with cachemode MAKE_COLD or EVICT_BIN and N is a BIN.
392  *    In this case, Evictor.moveFront(N) is called.
393  *
394  * -------------------
395  * Eviction Processing
396  * -------------------
397  *
398  * A thread can initiate eviction by invoking the Evictor.doEviction() method.
399  * This method implements an "eviction run". An eviction run consists of a
400  * number of "eviction passes", where each pass is given as input a maximum
401  * number of bytes to evict. An eviction pass is implemented by the
402  * Evictor.evictBatch() method.
403  *
404  * Inside Evictor.evictBatch(), an evicting thread T:
405  *
406  * 1. Picks the mixed LRUset initially as the "current" LRUSet to be processed,
407  *
408  * 2. Initializes the max number of nodes to be processed per LRUSet to the
409  *    current size of the mixed LRUSet,
410  *
411  * 3. Executes the following loop:
412  *
413  * 3.1. Picks a non-empty LRUList from the current LRUSet in a round-robin
414  *      fashion, as explained earlier, and invokes LRUList.removeFront() to
415  *      remove the node N at the front of the list. N becomes the current
416  *      eviction target.
417  *
418  * 3.2. If the DB node N belongs to has been deleted or closed, skips this node,
419  *      i.e., leaves N outside the LRU and goes to 3.4.
420  *
421  * 3.3. Calls ProcessTarget(N) (see below)
422  *
423  * 3.4. If the current LRUset is the mixed one and the number of target nodes
424  *      processed reaches the max number allowed, the dirty LRUSet becomes the
425  *      current one, the max number of nodes to be processed per LRUSet is set
426  *      set to the current size of the dirty LRUSet, and the number of nodes
427  *      processed is reset to 0.
428  *
429  * 3.5. Breaks the loop if the max number of bytes to evict during this pass
430  *      has been reached, or memConsumption is less than (maxMemory - M) (where
431  *      M is a config param), or the number of nodes that have been processed
432  *      in the current LRUSet reaches the max allowed.
433  *
434  * --------------------------
435  * The processTarget() method
436  * --------------------------
437  *
438  * This method is called after a node N has been selected for eviction (and as
439  * result, removed from the LRU). The method EX-latches N and determines
440  * whether it can/should really be evicted, and if not what is the appropriate
441  * action to be taken by the evicting thread. Before returning, the method
442  * unlatches N. Finally, it returns the number of bytes evicted (if any).
443  *
444  * If a decision is taken to evict N or mutate it to a BINDelta, N must first
445  * be unlatched and its parent must be searched within the tree. During this
446  * search, many things can happen to the unlatched N, and as a result, after
447  * the parent is found and the N is relatched, processTarget() calls itself
448  * recursivelly to re-consider all the possible actions for N.
449  *
450  * Let T be an evicting thread running processTarget() to determine what to do
451  * with a target node N. The following is the list of possible outcomes:
452  *
453  * 1. SKIP - Do nothing with N if:
454  *    (a) N is in the LRU. This can happen if N is a UIN and while it is
455  *        unlatched by T, other threads fetch one or more of N's children,
456  *        but then all of N's children are removed again, thus causing N to
457  *        be put back to the LRU.
458  *    (b) N is not in the INList. Given than N can be put back to the LRU while
459  *        it is unlatched by T, it can also be selected as an eviction target
460  *        by another thread and actually be evicted.
461  *    (c) N is a UIN with cached children. N could have acquired children
462  *        after the evicting thread removed it from the LRU, but before the
463  *        evicting thread could EX-latch it.
464  *    (d) N is the root of the DB naming tree or the DBmapping tree.
465  *    (e) N is dirty, but the DB is read-only.
466  *    (f) N's environment used a shared cache and the environment has been
467  *        closed or invalidated.
468  *    (g) If a decision was taken to evict od mutate N, but the tree search
469  *        (using N's keyId) to find N's parent, failed to find the parent, or
470  *        N itself. This can happen if during the search, N was evicted by
471  *        another thread, or a branch containing N was completely removed
472  *        from the tree.
473  *
474  * 2. PUT BACK - Put N to the back of the LRUSet it last belonged to, if:
475  *    (a) It is a BIN that was last accessed with KEEP_HOT cache mode.
476  *    (b) N has an entry with a NULL LSN and a null target.
477  *
478  * 3. PARTIAL EVICT - perform partial eviction on N, if none of the cases
479  *    listed above is true. Currently, paretial eviction applies to BINs only
480  *    and involves the eviction (stripping) of evictable LNs. If a cached LN
481  *    is not  evictable, the whole BIN is not evictable as well. Currently,
482  *    only MapLNs may be non-evictable (see MapLN.isEvictable()).
483  *
484  *    After partial eviction is performed the following outcomes are possible:
485  *
486  * 4. STRIPPED PUT BACK - Put N to the back of the LRUSet it last belonged to,
487  *    if partial eviction did evict any bytes, and N is not a BIN in EVICT_BIN
488  *    or MAKE_COLD cache mode.
489  *
490  * 5. PUT BACK - Put N to the back of the LRUSet it last belonged to, if
491  *    no bytes were stripped, but partial eviction determined that N is not
492  *    evictable.
493  *
494  * 6. MUTATE - Mutate N to a BINDelta, if none of the above apply and N is a
495  *    BIN that can be mutated.
496  *
497  * 7. MOVE TO DIRTY LRU - Move N to the front of the dirty LRUSet, if none of
498  *    the above apply and N is a dirty node that last belonged to the mixed
499  *    LRUSet.
500  *
501  * 8. EVICT - Evict N is none of the above apply.
502  *
503  * -------
504  * TODOs:
505  * -------
506  *
507  * 1. Determine what todo with the KEEP_HOT cache mode
508  *
509  * 2. Decide what to do about assertions (keep, remove, convert to JE excpetions,
510  *    convert to DEBUG-only expensive checks).
511  *
512  */
513 public class Evictor implements EnvConfigObserver {
514 
515     /*
516      * If new eviction source enums are added, a new stat is created, and
517      * EnvironmentStats must be updated to add a getter method.
518      *
519      * CRITICAL eviction is called by operations executed app or daemon
520      * threads which detect that the cache has reached its limits
521      * CACHE_MODE eviction is called by operations that use a specific
522      * Cursor.
523      * EVICTORThread is the eviction pool
524      * MANUAL is the call to Environment.evictMemory, called by recovery or
525      *   application code.
526      */
527     public enum EvictionSource {
528         /* Using ordinal for array values! */
529         EVICTORTHREAD, MANUAL, CRITICAL, CACHEMODE, DAEMON;
530 
getNumBytesEvictedStatDef()531         public StatDefinition getNumBytesEvictedStatDef() {
532             return new StatDefinition("nBytesEvicted" + toString(),
533                                       NUM_BYTES_EVICTED_DESC);
534         }
535     }
536 
537     /*
538      * The purpose of EvictionDebugStats is to capture the stats of a single
539      * eviction run (i.e., an execution of the Evictor.doEviction() method by
540      * a single thread). An instance of EvictionDebugStats is created at the
541      * start of doEviction() and is passed around to the methods called from
542      * doEviction(). At the end of doEviction(), the EvictionDebugStats
543      * instance can be printed out (for debugging), or (TODO) the captured
544      * stats can be loaded to the global Evictor.stats.
545      */
546     static class EvictionDebugStats {
547         boolean inMixedLRU;
548         boolean withParent;
549 
550         long mixedSize;
551         long dirtySize;
552 
553         int numSelectedMixed;
554         int numSelectedDirty;
555 
556         int numPutBackMixed;
557         int numPutBackDirty;
558 
559         int numBINsStripped1Mixed;
560         int numBINsStripped2Mixed;
561         int numBINsStripped1Dirty;
562         int numBINsStripped2Dirty;
563 
564         int numBINsMutatedMixed;
565         int numBINsMutatedDirty;
566 
567         int numUINsMoved1;
568         int numUINsMoved2;
569         int numBINsMoved1;
570         int numBINsMoved2;
571 
572         int numUINsEvictedMixed;
573         int numUINsEvictedDirty;
574         int numBINsEvictedMixed;
575         int numBINsEvictedDirty;
576 
reset()577         void reset() {
578             inMixedLRU = true;
579             withParent = false;
580 
581             mixedSize = 0;
582             dirtySize = 0;
583 
584             numSelectedMixed = 0;
585             numSelectedDirty = 0;
586 
587             numPutBackMixed = 0;
588             numPutBackDirty = 0;
589 
590             numBINsStripped1Mixed = 0;
591             numBINsStripped2Mixed = 0;
592             numBINsStripped1Dirty = 0;
593             numBINsStripped2Dirty = 0;
594 
595             numBINsMutatedMixed = 0;
596             numBINsMutatedDirty = 0;
597 
598             numUINsMoved1 = 0;
599             numUINsMoved2 = 0;
600             numBINsMoved1 = 0;
601             numBINsMoved2 = 0;
602 
603             numUINsEvictedMixed = 0;
604             numUINsEvictedDirty = 0;
605             numBINsEvictedMixed = 0;
606             numBINsEvictedDirty = 0;
607         }
608 
incNumSelected()609         void incNumSelected() {
610             if (inMixedLRU) {
611                 numSelectedMixed++;
612             } else {
613                 numSelectedDirty++;
614             }
615         }
616 
incNumPutBack()617         void incNumPutBack() {
618             if (inMixedLRU) {
619                 numPutBackMixed++;
620             } else {
621                 numPutBackDirty++;
622             }
623         }
624 
incNumStripped()625         void incNumStripped() {
626             if (inMixedLRU) {
627                 if (withParent) {
628                     numBINsStripped2Mixed++;
629                 } else {
630                     numBINsStripped1Mixed++;
631                 }
632             } else {
633                 if (withParent) {
634                     numBINsStripped2Dirty++;
635                 } else {
636                     numBINsStripped1Dirty++;
637                 }
638             }
639         }
640 
incNumMutated()641         void incNumMutated() {
642             if (inMixedLRU) {
643                 numBINsMutatedMixed++;
644             } else {
645                 numBINsMutatedDirty++;
646             }
647         }
648 
incNumMoved(boolean isBIN)649         void incNumMoved(boolean isBIN) {
650             if (withParent) {
651                 if (isBIN) {
652                     numBINsMoved2++;
653                 } else {
654                     numUINsMoved2++;
655                 }
656             } else {
657                 if (isBIN) {
658                     numBINsMoved1++;
659                 } else {
660                     numUINsMoved1++;
661                 }
662             }
663         }
664 
incNumEvicted(boolean isBIN)665         void incNumEvicted(boolean isBIN) {
666             if (inMixedLRU) {
667                 if (isBIN) {
668                     numBINsEvictedMixed++;
669                 } else {
670                     numUINsEvictedMixed++;
671                 }
672             } else {
673                 if (isBIN) {
674                     numBINsEvictedDirty++;
675                 } else {
676                     numUINsEvictedDirty++;
677                 }
678             }
679         }
680 
toString()681         public String toString() {
682             StringBuilder sb = new StringBuilder();
683 
684             sb.append("Eviction stats MIXED: size = ");
685             sb.append(mixedSize);
686             sb.append("\n");
687             sb.append("selected = ");
688             sb.append(numSelectedMixed);
689             sb.append(" | ");
690             sb.append("put back = ");
691             sb.append(numPutBackMixed);
692             sb.append(" | ");
693             sb.append("stripped = ");
694             sb.append(numBINsStripped1Mixed);
695             sb.append("/");
696             sb.append(numBINsStripped2Mixed);
697             sb.append(" | ");
698             sb.append("mutated = ");
699             sb.append(numBINsMutatedMixed);
700             sb.append(" | ");
701             sb.append("moved = ");
702             sb.append(numBINsMoved1);
703             sb.append("/");
704             sb.append(numBINsMoved2);
705             sb.append(" - ");
706             sb.append(numUINsMoved1);
707             sb.append("/");
708             sb.append(numUINsMoved2);
709             sb.append(" | ");
710             sb.append("evicted = ");
711             sb.append(numBINsEvictedMixed);
712             sb.append(" - ");
713             sb.append(numUINsEvictedMixed);
714             sb.append("\n");
715 
716             sb.append("Eviction stats DIRTY: size = ");
717             sb.append(dirtySize);
718             sb.append("\n");
719             sb.append("selected = ");
720             sb.append(numSelectedDirty);
721             sb.append(" | ");
722             sb.append("put back = ");
723             sb.append(numPutBackDirty);
724             sb.append(" | ");
725             sb.append("stripped = ");
726             sb.append(numBINsStripped1Dirty);
727             sb.append("/");
728             sb.append(numBINsStripped2Dirty);
729             sb.append(" | ");
730             sb.append("mutated = ");
731             sb.append(numBINsMutatedDirty);
732             sb.append(" | ");
733             sb.append("evicted = ");
734             sb.append(numBINsEvictedDirty);
735             sb.append(" - ");
736             sb.append(numUINsEvictedDirty);
737             sb.append("\n");
738 
739             return sb.toString();
740         }
741     }
742 
743     /*
744      * The purpose of LRUDebugStats is to capture stats on the current state
745      * of an LRUSet. This is done via a call to LRUEvictor.getMixedLRUStats(),
746      * or LRUEvictor.getDirtyLRUStats(). For now at least, these mehods are
747      * meant to be used for debugging and unit testing only.
748      */
749     static class LRUDebugStats {
750         int size;
751         int dirtySize;
752 
753         int numBINs;
754         int numDirtyBINs;
755 
756         int numStrippedBINs;
757         int numDirtyStrippedBINs;
758 
reset()759         void reset() {
760             size = 0;
761             dirtySize = 0;
762             numBINs = 0;
763             numDirtyBINs = 0;
764             numStrippedBINs = 0;
765             numDirtyStrippedBINs = 0;
766         }
767 
toString()768         public String toString() {
769             StringBuilder sb = new StringBuilder();
770 
771             sb.append("Clean/Dirty INs = ");
772             sb.append(size - dirtySize);
773             sb.append("/");
774             sb.append(dirtySize);
775 
776             sb.append(" BINs = ");
777             sb.append(numBINs - numDirtyBINs);
778             sb.append("/");
779             sb.append(numDirtyBINs);
780 
781             sb.append(" Stripped BINs = ");
782             sb.append(numStrippedBINs - numDirtyStrippedBINs);
783             sb.append("/");
784             sb.append(numDirtyStrippedBINs);
785 
786             return sb.toString();
787         }
788     }
789 
790     /*
791      * LRUList implementation
792      */
793     static class LRUList {
794 
795         private static final boolean doExpensiveCheck = false;
796 
797         private final int id;
798 
799         private int size = 0;
800 
801         private IN front = null;
802         private IN back = null;
803 
LRUList(int id)804         LRUList(int id) {
805             this.id = id;
806         }
807 
addBack(IN node)808         synchronized void addBack(IN node) {
809 
810             /* Make sure node is not in any LRUlist already */
811             if (node.getNextLRUNode() != null ||
812                 node.getPrevLRUNode() != null) {
813 
814                 throw EnvironmentFailureException.unexpectedState(
815                    node.getEnv(),
816                    Thread.currentThread().getId() + "-" +
817                    Thread.currentThread().getName() +
818                    "-" + node.getEnv().getName() +
819                    "Attempting to add node " + node.getNodeId() +
820                    " in the LRU, but node is already in the LRU.");
821             }
822             assert(!node.isDIN() && !node.isDBIN());
823 
824             node.setNextLRUNode(node);
825 
826             if (back != null) {
827                 node.setPrevLRUNode(back);
828                 back.setNextLRUNode(node);
829             } else {
830                 assert(front == null);
831                 node.setPrevLRUNode(node);
832             }
833 
834             back = node;
835 
836             if (front == null) {
837                 front = back;
838             }
839 
840             ++size;
841         }
842 
addFront(IN node)843         synchronized void addFront(IN node) {
844 
845             /* Make sure node is not in any LRUlist already */
846             if (node.getNextLRUNode() != null ||
847                 node.getPrevLRUNode() != null) {
848 
849                 throw EnvironmentFailureException.unexpectedState(
850                    node.getEnv(),
851                    Thread.currentThread().getId() + "-" +
852                    Thread.currentThread().getName() +
853                    "-" + node.getEnv().getName() +
854                    "Attempting to add node " + node.getNodeId() +
855                    " in the LRU, but node is already in the LRU.");
856             }
857             assert(!node.isDIN() && !node.isDBIN());
858 
859             node.setPrevLRUNode(node);
860 
861             if (front != null) {
862                 node.setNextLRUNode(front);
863                 front.setPrevLRUNode(node);
864             } else {
865                 assert(back == null);
866                 node.setNextLRUNode(node);
867             }
868 
869             front = node;
870 
871             if (back == null) {
872                 back = front;
873             }
874 
875             ++size;
876         }
877 
moveBack(IN node)878         synchronized void moveBack(IN node) {
879 
880             /* If the node is not in the list, don't do anything */
881             if (node.getNextLRUNode() == null) {
882                 assert(node.getPrevLRUNode() == null);
883                 return;
884             }
885 
886             if (doExpensiveCheck && !contains2(node)) {
887                 System.out.println("LRUList.moveBack(): list " + id +
888                                    "does not contain node " +
889                                    node.getNodeId() +
890                                    " Thread: " +
891                                    Thread.currentThread().getId() + "-" +
892                                    Thread.currentThread().getName() +
893                                    " isBIN: " + node.isBIN() +
894                                    " inDirtyLRU: " + node.isInDirtyLRU());
895                 assert(false);
896             }
897 
898             if (node.getNextLRUNode() == node) {
899                 /* The node is aready at the back */
900                 assert(back == node);
901                 assert(node.getPrevLRUNode().getNextLRUNode() == node);
902 
903             } else {
904                 assert(front != back);
905                 assert(size > 1);
906 
907                 if (node.getPrevLRUNode() == node) {
908                     /* the node is at the front  */
909                     assert(front == node);
910                     assert(node.getNextLRUNode().getPrevLRUNode() == node);
911 
912                     front = node.getNextLRUNode();
913                     front.setPrevLRUNode(front);
914                 } else {
915                     /* the node is in the "middle" */
916                     assert(front != node && back != node);
917                     assert(node.getPrevLRUNode().getNextLRUNode() == node);
918                     assert(node.getNextLRUNode().getPrevLRUNode() == node);
919 
920                     node.getPrevLRUNode().setNextLRUNode(node.getNextLRUNode());
921                     node.getNextLRUNode().setPrevLRUNode(node.getPrevLRUNode());
922                 }
923 
924                 node.setNextLRUNode(node);
925                 node.setPrevLRUNode(back);
926 
927                 back.setNextLRUNode(node);
928                 back = node;
929             }
930         }
931 
moveFront(IN node)932         synchronized void moveFront(IN node) {
933 
934             /* If the node is not in the list, don't do anything */
935             if (node.getNextLRUNode() == null) {
936                 assert(node.getPrevLRUNode() == null);
937                 return;
938             }
939 
940             if (doExpensiveCheck && !contains2(node)) {
941                 System.out.println("LRUList.moveFront(): list " + id +
942                                    "does not contain node " +
943                                    node.getNodeId() +
944                                    " Thread: " +
945                                    Thread.currentThread().getId() + "-" +
946                                    Thread.currentThread().getName() +
947                                    " isBIN: " + node.isBIN() +
948                                    " inDirtyLRU: " + node.isInDirtyLRU());
949                 assert(false);
950             }
951 
952             if (node.getPrevLRUNode() == node) {
953                 /* the node is aready at the front */
954                 assert(front == node);
955                 assert(node.getNextLRUNode().getPrevLRUNode() == node);
956 
957             } else {
958                 assert(front != back);
959                 assert(size > 1);
960 
961                 if (node.getNextLRUNode() == node) {
962                     /* the node is at the back */
963                     assert(back == node);
964                     assert(node.getPrevLRUNode().getNextLRUNode() == node);
965 
966                     back = node.getPrevLRUNode();
967                     back.setNextLRUNode(back);
968                 } else {
969                     /* the node is in the "middle" */
970                     assert(front != node && back != node);
971                     assert(node.getPrevLRUNode().getNextLRUNode() == node);
972                     assert(node.getNextLRUNode().getPrevLRUNode() == node);
973 
974                     node.getPrevLRUNode().setNextLRUNode(node.getNextLRUNode());
975                     node.getNextLRUNode().setPrevLRUNode(node.getPrevLRUNode());
976                 }
977 
978                 node.setPrevLRUNode(node);
979                 node.setNextLRUNode(front);
980 
981                 front.setPrevLRUNode(node);
982                 front = node;
983             }
984         }
985 
removeFront()986         synchronized IN removeFront() {
987             if (front == null) {
988                 assert(back == null);
989                 return null;
990             }
991 
992             IN res = front;
993 
994             if (front == back) {
995                 assert(front.getNextLRUNode() == front);
996                 assert(front.getPrevLRUNode() == front);
997                 assert(size == 1);
998 
999                 front = null;
1000                 back = null;
1001 
1002             } else {
1003                 assert(size > 1);
1004 
1005                 front = front.getNextLRUNode();
1006                 front.setPrevLRUNode(front);
1007             }
1008 
1009             res.setNextLRUNode(null);
1010             res.setPrevLRUNode(null);
1011             --size;
1012 
1013             return res;
1014         }
1015 
remove(IN node)1016         synchronized boolean remove(IN node) {
1017 
1018             /* If the node is not in the list, don't do anything */
1019             if (node.getNextLRUNode() == null) {
1020                 assert(node.getPrevLRUNode() == null);
1021                 return false;
1022             }
1023 
1024             assert(node.getPrevLRUNode() != null);
1025 
1026             if (doExpensiveCheck && !contains2(node)) {
1027                 System.out.println("LRUList.remove(): list " + id +
1028                                    "does not contain node " +
1029                                    node.getNodeId() +
1030                                    " Thread: " +
1031                                    Thread.currentThread().getId() + "-" +
1032                                    Thread.currentThread().getName() +
1033                                    " isBIN: " + node.isBIN() +
1034                                    " inDirtyLRU: " + node.isInDirtyLRU());
1035                 assert(false);
1036             }
1037 
1038             if (front == back) {
1039                 assert(size == 1);
1040                 assert(front == node);
1041                 assert(front.getNextLRUNode() == front);
1042                 assert(front.getPrevLRUNode() == front);
1043 
1044                 front = null;
1045                 back = null;
1046 
1047             } else if (node.getPrevLRUNode() == node) {
1048                 /* node is at the front */
1049                 assert(front == node);
1050                 assert(node.getNextLRUNode().getPrevLRUNode() == node);
1051 
1052                 front = node.getNextLRUNode();
1053                 front.setPrevLRUNode(front);
1054 
1055             } else if (node.getNextLRUNode() == node) {
1056                 /* the node is at the back */
1057                 assert(back == node);
1058                 assert(node.getPrevLRUNode().getNextLRUNode() == node);
1059 
1060                 back = node.getPrevLRUNode();
1061                 back.setNextLRUNode(back);
1062             } else {
1063                 /* the node is in the "middle" */
1064                 assert(size > 2);
1065                 assert(front != back);
1066                 assert(front != node && back != node);
1067                 assert(node.getPrevLRUNode().getNextLRUNode() == node);
1068                 assert(node.getNextLRUNode().getPrevLRUNode() == node);
1069 
1070                 node.getPrevLRUNode().setNextLRUNode(node.getNextLRUNode());
1071                 node.getNextLRUNode().setPrevLRUNode(node.getPrevLRUNode());
1072             }
1073 
1074             node.setNextLRUNode(null);
1075             node.setPrevLRUNode(null);
1076             --size;
1077 
1078             return true;
1079         }
1080 
removeINsForEnv(EnvironmentImpl env)1081         synchronized void removeINsForEnv(EnvironmentImpl env) {
1082 
1083             if (front == null) {
1084                 assert(back == null);
1085                 return;
1086             }
1087 
1088             IN node = front;
1089 
1090             while (true) {
1091 
1092                 IN nextNode = node.getNextLRUNode();
1093                 IN prevNode = node.getPrevLRUNode();
1094 
1095                 if (node.getDatabase().getEnv() == env) {
1096 
1097                     node.setNextLRUNode(null);
1098                     node.setPrevLRUNode(null);
1099 
1100                     if (front == back) {
1101                         assert(size == 1);
1102                         assert(front == node);
1103                         assert(nextNode == front);
1104                         assert(prevNode == front);
1105 
1106                         front = null;
1107                         back = null;
1108                         --size;
1109                         break;
1110 
1111                     } else if (prevNode == node) {
1112                         /* node is at the front */
1113                         assert(size > 1);
1114                         assert(front == node);
1115                         assert(nextNode.getPrevLRUNode() == node);
1116 
1117                         front = nextNode;
1118                         front.setPrevLRUNode(front);
1119                         node = front;
1120                         --size;
1121 
1122                     } else if (nextNode == node) {
1123                         /* the node is at the back */
1124                         assert(size > 1);
1125                         assert(back == node);
1126                         assert(prevNode.getNextLRUNode() == node);
1127 
1128                         back = prevNode;
1129                         back.setNextLRUNode(back);
1130                         --size;
1131                         break;
1132                     } else {
1133                         /* the node is in the "middle" */
1134                         assert(size > 2);
1135                         assert(front != back);
1136                         assert(front != node && back != node);
1137                         assert(prevNode.getNextLRUNode() == node);
1138                         assert(nextNode.getPrevLRUNode() == node);
1139 
1140                         prevNode.setNextLRUNode(nextNode);
1141                         nextNode.setPrevLRUNode(prevNode);
1142                         node = nextNode;
1143                         --size;
1144                     }
1145                 } else if (nextNode == node) {
1146                     break;
1147                 } else {
1148                     node = nextNode;
1149                 }
1150             }
1151         }
1152 
contains(IN node)1153         synchronized boolean contains(IN node) {
1154             return (node.getNextLRUNode() != null);
1155         }
1156 
contains2(IN node)1157         private boolean contains2(IN node) {
1158 
1159             if (front == null) {
1160                 assert(back == null);
1161                 return false;
1162             }
1163 
1164             IN curr = front;
1165 
1166             while (true) {
1167                 if (curr == node) {
1168                     return true;
1169                 }
1170 
1171                 if (curr.getNextLRUNode() == curr) {
1172                     break;
1173                 }
1174 
1175                 curr = curr.getNextLRUNode();
1176             }
1177 
1178             return false;
1179         }
1180 
getSize()1181         int getSize() {
1182             return size;
1183         }
1184 
getStats(EnvironmentImpl env, LRUDebugStats stats)1185         synchronized void getStats(EnvironmentImpl env, LRUDebugStats stats) {
1186 
1187             if (front == null) {
1188                 assert(back == null);
1189                 return;
1190             }
1191 
1192             IN curr = front;
1193 
1194             while (true) {
1195                 if (env == null || curr.getEnv() == env) {
1196                     stats.size++;
1197 
1198                     if (curr.getDirty()) {
1199                         stats.dirtySize++;
1200                     }
1201 
1202                     if (curr.isBIN()) {
1203                         stats.numBINs++;
1204 
1205                         if (curr.getDirty()) {
1206                             stats.numDirtyBINs++;
1207                         }
1208 
1209                         if (!curr.hasResidentChildren()) {
1210                             stats.numStrippedBINs++;
1211 
1212                             if (curr.getDirty()) {
1213                                 stats.numDirtyStrippedBINs++;
1214                             }
1215                         }
1216                     }
1217                 }
1218 
1219                 if (curr.getNextLRUNode() == curr) {
1220                     break;
1221                 }
1222 
1223                 curr = curr.getNextLRUNode();
1224             }
1225         }
1226     }
1227 
1228     /**
1229      * EnvInfo stores info related to the environments that share this evictor.
1230      */
1231     private static class EnvInfo {
1232         EnvironmentImpl env;
1233         INList ins;
1234     }
1235 
1236     /* Prevent endless eviction loops under extreme resource constraints. */
1237     static final int MAX_BATCHES_PER_RUN = 100;
1238 
1239     private static final boolean traceUINs = false;
1240     private static final boolean traceBINs = false;
1241     private static final Level traceLevel = Level.INFO;
1242 
1243     /* LRU-TODO: remove */
1244     private static final boolean collectEvictionDebugStats = false;
1245 
1246     /**
1247      * Number of LRULists per LRUSet. This is a configuration parameter.
1248      *
1249      * In general, using only one LRUList may create a synchronization
1250      * bottleneck, because all LRUList methods are synchronized and are
1251      * invoked with high frequency from multiple thread. To alleviate
1252      * this bottleneck, we need the option to break a single LRUList
1253      * into multiple ones comprising an "LRUSet" (even though this
1254      * reduces the quality of the LRU approximation).
1255      */
1256     private final int numLRULists;
1257 
1258     /**
1259      * Whether to actually use the dirtyLRUSet or not. This is a configuration
1260      * parameter.
1261      */
1262     private final boolean useDirtyLRUSet;
1263 
1264     /*
1265      * Whether to allow deltas when logging a dirty BIN that is being evicted.
1266      * This is a configuration parameter.
1267      */
1268     private final boolean allowBinDeltas;
1269 
1270     /*
1271      * Whether to mutate BINs to BIN deltas rather than evicting the full node.
1272      * This is a configuration parameter.
1273      */
1274     private final boolean mutateBins;
1275 
1276     /*
1277      * Access count after which we clear the DatabaseImpl cache.
1278      * This is a configuration parameter.
1279      */
1280     private int dbCacheClearCount;
1281 
1282     /*
1283      * This is a configuration parameter. If true, eviction is done by a pool
1284      * of evictor threads, as well as being done inline by application threads.
1285      * Note: runEvictorThreads is needed as a distinct flag, rather than
1286      * setting maxThreads to 0, because the ThreadPoolExecutor does not permit
1287      * maxThreads to be 0.
1288      */
1289     private boolean runEvictorThreads;
1290 
1291     /* This is a configuration parameter. */
1292     private int terminateMillis;
1293 
1294     /* The thread pool used to manage the background evictor threads. */
1295     private final ThreadPoolExecutor evictionPool;
1296 
1297     /* Flag to help shutdown launched eviction tasks. */
1298     private AtomicBoolean shutdownRequested;
1299 
1300     /*
1301      * Whether this evictor (and the memory cache) is shared by multiple
1302      * environments
1303      */
1304     private final boolean isShared;
1305 
1306     /*
1307      * In case of multiple environments sharing a cache (and this Evictor),
1308      * firstEnvImpl references the 1st EnvironmentImpl to be created with
1309      * the shared cache.
1310      */
1311     private final EnvironmentImpl firstEnvImpl;
1312 
1313     private final List<EnvInfo> envInfos;
1314 
1315     /**
1316      * This is used only when this evictor is shared by multiple envs. It
1317      * "points" to the next env to perform "special eviction" in.
1318      */
1319     private int specialEvictionIndex = 0;
1320 
1321     /*
1322      *
1323      */
1324     private final Arbiter arbiter;
1325 
1326     /**
1327      * mixedLRUSet contains both clean and dirty nodes. A freshly cached node
1328      * goes into this LRUSet. A dirty node will go to the dirtyLRUSet if it is
1329      * selected for eviction from the mixedLRUSet. A node will move from the
1330      * dirtyLRUSet to the mixedLRUSet when it gets logged (i.e., cleaned) by
1331      * the checkpointer.
1332      */
1333     private final LRUList[] mixedLRUSet;
1334     private final LRUList[] dirtyLRUSet;
1335 
1336     /**
1337      * nextMixedLRUList is used to implement the traversal of the lists in
1338      * the mixedLRUSet by one or more evicting threads. Such a thread will
1339      * select for eviction the front node from the (nextMixedLRUList %
1340      * numLRULists)-th list, and then increment nextMixedLRUList.
1341      * nextDirtyLRUList plays the same role for the dirty LRUSet.
1342      */
1343     private int nextMixedLRUList = 0;
1344     private int nextDirtyLRUList = 0;
1345 
1346     /*
1347      * The evictor is disabled during the 1st phase of recovery. The
1348      * RecoveryManager enables the evictor after it finishes its 1st
1349      * phase.
1350      */
1351     private boolean isEnabled = false;
1352 
1353     /* Eviction calls cannot be recursive. */
1354     private ReentrancyGuard reentrancyGuard;
1355 
1356     private Logger logger;
1357 
1358     /*
1359      * Stats
1360      */
1361     final StatGroup stats;
1362 
1363     /*
1364      * Number of eviction tasks that were submitted to the background evictor
1365      * pool, but were refused because all eviction threads were busy.
1366      */
1367     final AtomicLongStat nThreadUnavailable;
1368 
1369     /* Number of evictBatch() invocations. */
1370     final LongStat nEvictionRuns;
1371 
1372     /*
1373      * Number of nodes selected as eviction targets. An eviction target may
1374      * actually be evicted, or skipped, or put back to the LRU, potentially
1375      * after partial eviction or BIN-delta mutation is done on it.
1376      */
1377     final LongStat nNodesTargeted;
1378 
1379     /* Number of nodes evicted. */
1380     final LongStat nNodesEvicted;
1381 
1382     /* Number of closed database root nodes evicted. */
1383     final LongStat nRootNodesEvicted;
1384 
1385     /* Number of LNs evicted. */
1386     final LongStat nLNsEvicted;
1387 
1388     /* Number of BINs stripped. */
1389     final LongStat nNodesStripped;
1390 
1391     /* Number of BINs mutated to deltas. */
1392     final LongStat nNodesMutated;
1393 
1394     /* Number of target nodes put back to the LRU w/o any other action taken */
1395     final LongStat nNodesPutBack;
1396 
1397     /* Number of target nodes skipped. */
1398     final LongStat nNodesSkipped;
1399 
1400     /* Number of target nodes moved to the dirty LRU */
1401     final LongStat nNodesMovedToDirtyLRU;
1402 
1403     /* Number of bytes evicted per eviction source. */
1404     final AtomicLongStat[] numBytesEvicted;
1405 
1406     /*
1407      * Tree related cache hit/miss stats. A subset of the cache misses recorded
1408      * by the log manager, in that these only record tree node hits and misses.
1409      * Recorded by IN.fetchIN and IN.fetchLN, but grouped with evictor stats.
1410      * Use AtomicLongStat for multithreading safety.
1411      */
1412     final AtomicLongStat nLNFetch;
1413 
1414     final AtomicLongStat nLNFetchMiss;
1415 
1416     /*
1417      * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called
1418      * to fetch a UIN.
1419      */
1420     final AtomicLongStat nUpperINFetch;
1421 
1422     /*
1423      * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called
1424      * to fetch a UIN and that UIN was not already cached.
1425      */
1426     final AtomicLongStat nUpperINFetchMiss;
1427 
1428     /*
1429      * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called
1430      * to fetch a BIN.
1431      */
1432     final AtomicLongStat nBINFetch;
1433 
1434     /*
1435      * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called
1436      * to fetch a BIN and that BIN was not already cached.
1437      */
1438     final AtomicLongStat nBINFetchMiss;
1439 
1440     /*
1441      * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called
1442      * to fetch a BIN, that BIN was not already cached, and a BIN-delta was
1443      * fetched from disk.
1444      */
1445     final AtomicLongStat nBINDeltaFetchMiss;
1446 
1447     final FloatStat binFetchMissRatio;
1448 
1449     /*
1450      * Number of calls to BIN.mutateToFullBIN()
1451      */
1452     final AtomicLongStat nFullBINMiss;
1453 
1454     /*
1455      * Number of blind operations on BIN deltas
1456      */
1457     final AtomicLongStat nBinDeltaBlindOps;
1458 
1459      /* Stats for IN compact array representations currently in cache. */
1460     final AtomicLong nINSparseTarget;
1461     final AtomicLong nINNoTarget;
1462     final AtomicLong nINCompactKey;
1463 
1464     /* Number of envs sharing the cache. */
1465     final IntStat sharedCacheEnvs;
1466 
1467     /* Debugging and unit test support. */
1468 
1469     /*
1470      * Number of consecutive "no-eviction" events (i.e. when evictBatch()
1471      * returns 0). It is incremented at each "no-eviction" event and reset
1472      * to 0 when eviction does occur. It is used to determine whether to
1473      * log a WARNING for a "no-eviction" event: only 1 warning is logged
1474      * per sequence of consecutive "no-eviction" events (to avoid flooding
1475      * the logger files).
1476      */
1477     int numNoEvictionEvents = 0;
1478 
1479     TestHook<Object> preEvictINHook;
1480     TestHook<IN> evictProfile;
1481 
Evictor(EnvironmentImpl envImpl)1482     public Evictor(EnvironmentImpl envImpl)
1483         throws DatabaseException {
1484 
1485         isShared = envImpl.getSharedCache();
1486 
1487         firstEnvImpl = envImpl;
1488 
1489         /* Do the stats definitions. */
1490         stats = new StatGroup(GROUP_NAME, GROUP_DESC);
1491 
1492         nEvictionRuns = new LongStat(stats, EVICTOR_EVICTION_RUNS);
1493 
1494         nNodesTargeted = new LongStat(stats, EVICTOR_NODES_TARGETED);
1495         nNodesEvicted = new LongStat(stats, EVICTOR_NODES_EVICTED);
1496         nRootNodesEvicted = new LongStat(stats, EVICTOR_ROOT_NODES_EVICTED);
1497         nLNsEvicted = new LongStat(stats, EVICTOR_LNS_EVICTED);
1498         nNodesStripped = new LongStat(stats, EVICTOR_NODES_STRIPPED);
1499         nNodesMutated = new LongStat(stats, EVICTOR_NODES_MUTATED);
1500         nNodesPutBack = new LongStat(stats, EVICTOR_NODES_PUT_BACK);
1501         nNodesSkipped = new LongStat(stats, EVICTOR_NODES_SKIPPED);
1502         nNodesMovedToDirtyLRU = new LongStat(stats, EVICTOR_NODES_MOVED_TO_DIRTY_LRU);
1503 
1504         nLNFetch = new AtomicLongStat(stats, LN_FETCH);
1505         nBINFetch = new AtomicLongStat(stats, BIN_FETCH);
1506         nUpperINFetch = new AtomicLongStat(stats, UPPER_IN_FETCH);
1507         nLNFetchMiss = new AtomicLongStat(stats, LN_FETCH_MISS);
1508         nBINFetchMiss = new AtomicLongStat(stats, BIN_FETCH_MISS);
1509         nBINDeltaFetchMiss = new AtomicLongStat(stats, BIN_DELTA_FETCH_MISS);
1510         nUpperINFetchMiss = new AtomicLongStat(stats, UPPER_IN_FETCH_MISS);
1511         nFullBINMiss = new AtomicLongStat(stats, FULL_BIN_MISS);
1512         nBinDeltaBlindOps = new AtomicLongStat(stats, BIN_DELTA_BLIND_OPS);
1513         binFetchMissRatio = new FloatStat(stats, BIN_FETCH_MISS_RATIO);
1514 
1515         nThreadUnavailable = new AtomicLongStat(stats, THREAD_UNAVAILABLE);
1516 
1517         nINSparseTarget = new AtomicLong(0);
1518         nINNoTarget = new AtomicLong(0);
1519         nINCompactKey = new AtomicLong(0);
1520 
1521         sharedCacheEnvs = new IntStat(stats, EVICTOR_SHARED_CACHE_ENVS);
1522 
1523         EnumSet<EvictionSource> allSources =
1524             EnumSet.allOf(EvictionSource.class);
1525 
1526         int numSources = allSources.size();
1527 
1528         numBytesEvicted = new AtomicLongStat[numSources];
1529 
1530         for (EvictionSource source : allSources) {
1531 
1532             int index = source.ordinal();
1533 
1534             numBytesEvicted[index] = new AtomicLongStat(
1535                 stats, source.getNumBytesEvictedStatDef());
1536         }
1537 
1538         arbiter = new Arbiter(firstEnvImpl);
1539 
1540         logger = LoggerUtils.getLogger(getClass());
1541         reentrancyGuard = new ReentrancyGuard(firstEnvImpl, logger);
1542         shutdownRequested = new AtomicBoolean(false);
1543 
1544         DbConfigManager configManager = firstEnvImpl.getConfigManager();
1545 
1546         int corePoolSize = configManager.getInt(
1547             EnvironmentParams.EVICTOR_CORE_THREADS);
1548         int maxPoolSize = configManager.getInt(
1549             EnvironmentParams.EVICTOR_MAX_THREADS);
1550         long keepAliveTime = configManager.getDuration(
1551             EnvironmentParams.EVICTOR_KEEP_ALIVE);
1552         terminateMillis = configManager.getDuration(
1553             EnvironmentParams.EVICTOR_TERMINATE_TIMEOUT);
1554         dbCacheClearCount = configManager.getInt(
1555             EnvironmentParams.ENV_DB_CACHE_CLEAR_COUNT);
1556         numLRULists = configManager.getInt(
1557             EnvironmentParams.EVICTOR_N_LRU_LISTS);
1558         useDirtyLRUSet = configManager.getBoolean(
1559             EnvironmentParams.EVICTOR_USE_DIRTY_LRU);
1560 
1561         mixedLRUSet = new LRUList[numLRULists];
1562         dirtyLRUSet = new LRUList[numLRULists];
1563 
1564         for (int i = 0; i < numLRULists; ++i) {
1565             mixedLRUSet[i] = new LRUList(i);
1566             dirtyLRUSet[i] = new LRUList(numLRULists + i);
1567         }
1568 
1569         if (isShared) {
1570             envInfos = new ArrayList<EnvInfo>();
1571         } else {
1572             envInfos = null;
1573         }
1574 
1575         RejectedExecutionHandler rejectHandler = new RejectEvictHandler(
1576             nThreadUnavailable);
1577 
1578         evictionPool = new ThreadPoolExecutor(
1579             corePoolSize, maxPoolSize, keepAliveTime,
1580             TimeUnit.MILLISECONDS,
1581             new ArrayBlockingQueue<Runnable>(1),
1582             new StoppableThreadFactory(firstEnvImpl, "JEEvictor", logger),
1583             rejectHandler);
1584 
1585         runEvictorThreads = configManager.getBoolean(
1586             EnvironmentParams.ENV_RUN_EVICTOR);
1587 
1588         allowBinDeltas = configManager.getBoolean(
1589             EnvironmentParams.EVICTOR_ALLOW_BIN_DELTAS);
1590 
1591         mutateBins = configManager.getBoolean(
1592             EnvironmentParams.EVICTOR_MUTATE_BINS);
1593 
1594         /*
1595          * Request notification of mutable property changes. Do this after all
1596          * fields in the evictor have been initialized, in case this is called
1597          * quite soon.
1598          */
1599         firstEnvImpl.addConfigObserver(this);
1600     }
1601 
1602     /**
1603      * Respond to config updates.
1604      */
envConfigUpdate( DbConfigManager configManager, EnvironmentMutableConfig ignore)1605     public void envConfigUpdate(
1606         DbConfigManager configManager,
1607         EnvironmentMutableConfig ignore)
1608         throws DatabaseException {
1609 
1610         int corePoolSize = configManager.getInt(
1611             EnvironmentParams.EVICTOR_CORE_THREADS);
1612         int maxPoolSize = configManager.getInt(
1613             EnvironmentParams.EVICTOR_MAX_THREADS);
1614         long keepAliveTime = configManager.getDuration(
1615             EnvironmentParams.EVICTOR_KEEP_ALIVE);
1616         terminateMillis = configManager.getDuration(
1617             EnvironmentParams.EVICTOR_TERMINATE_TIMEOUT);
1618         dbCacheClearCount = configManager.getInt(
1619             EnvironmentParams.ENV_DB_CACHE_CLEAR_COUNT);
1620 
1621         evictionPool.setCorePoolSize(corePoolSize);
1622         evictionPool.setMaximumPoolSize(maxPoolSize);
1623         evictionPool.setKeepAliveTime(keepAliveTime, TimeUnit.MILLISECONDS);
1624 
1625         runEvictorThreads = configManager.getBoolean(
1626             EnvironmentParams.ENV_RUN_EVICTOR);
1627     }
1628 
useDirtyLRUSet()1629     public boolean useDirtyLRUSet() {
1630         return useDirtyLRUSet;
1631     }
1632 
getMutateToBINDeltas()1633     public boolean getMutateToBINDeltas() {
1634         return mutateBins;
1635     }
1636 
isEnabled()1637     public boolean isEnabled() {
1638         return isEnabled;
1639     }
1640 
setEnabled(boolean v)1641     public void setEnabled(boolean v) {
1642         isEnabled = v;
1643     }
1644 
1645     /**
1646      * @hidden
1647      * Return the ThreadPool, used by unit testing only.
1648      */
getThreadPool()1649     public ThreadPoolExecutor getThreadPool() {
1650         return evictionPool;
1651     }
1652 
1653     /**
1654      * Request and wait for a shutdown of all running eviction tasks.
1655      */
shutdown()1656     public void shutdown() {
1657 
1658         /*
1659          * Set the shutdown flag so that outstanding eviction tasks end
1660          * early. The call to evictionPool.shutdown is a ThreadPoolExecutor
1661          * call, and is an orderly shutdown that waits for and in flight tasks
1662          * to end.
1663          */
1664         shutdownRequested.set(true);
1665         evictionPool.shutdown();
1666 
1667         /*
1668          * AwaitTermination will wait for the timeout period, or will be
1669          * interrupted, but we don't really care which it is. The evictor
1670          * shouldn't be interrupted, but if it is, something urgent is
1671          * happening.
1672          */
1673         boolean shutdownFinished = false;
1674         try {
1675             shutdownFinished =
1676                 evictionPool.awaitTermination(terminateMillis,
1677                                               TimeUnit.MILLISECONDS);
1678         } catch (InterruptedException e) {
1679             /* We've been interrupted, just give up and end. */
1680         } finally {
1681             if (!shutdownFinished) {
1682                 evictionPool.shutdownNow();
1683             }
1684         }
1685     }
1686 
requestShutdownPool()1687     public void requestShutdownPool() {
1688         shutdownRequested.set(true);
1689         evictionPool.shutdown();
1690     }
1691 
addEnvironment(EnvironmentImpl env)1692     public synchronized void addEnvironment(EnvironmentImpl env) {
1693 
1694         if (isShared) {
1695             int numEnvs = envInfos.size();
1696             for (int i = 0; i < numEnvs; i += 1) {
1697                 EnvInfo info = envInfos.get(i);
1698                 if (info.env == env) {
1699                     return;
1700                 }
1701             }
1702 
1703             EnvInfo info = new EnvInfo();
1704             info.env = env;
1705             info.ins = env.getInMemoryINs();
1706             envInfos.add(info);
1707         } else {
1708             throw EnvironmentFailureException.unexpectedState();
1709         }
1710     }
1711 
removeEnvironment(EnvironmentImpl env)1712     public synchronized void removeEnvironment(EnvironmentImpl env) {
1713         if (isShared) {
1714             int numEnvs = envInfos.size();
1715             for (int i = 0; i < numEnvs; i += 1) {
1716                 EnvInfo info = envInfos.get(i);
1717 
1718                 if (info.env == env) {
1719 
1720                     try {
1721                         for (int j = 0; j < numLRULists; ++j) {
1722                             mixedLRUSet[j].removeINsForEnv(env);
1723                             dirtyLRUSet[j].removeINsForEnv(env);
1724                         }
1725                     } catch (AssertionError e) {
1726                         System.out.println("YYYYYYYYYY " + e);
1727                         e.printStackTrace(System.out);
1728                         throw e;
1729                     }
1730 
1731                     envInfos.remove(i);
1732                     return;
1733                 }
1734             }
1735         } else {
1736             throw EnvironmentFailureException.unexpectedState();
1737         }
1738     }
1739 
checkEnv(EnvironmentImpl env)1740     public synchronized boolean checkEnv(EnvironmentImpl env) {
1741         if (isShared) {
1742             int numEnvs = envInfos.size();
1743             for (int i = 0; i < numEnvs; i += 1) {
1744                 EnvInfo info = envInfos.get(i);
1745                 if (env == info.env) {
1746                     return true;
1747                 }
1748             }
1749 
1750             return false;
1751 
1752         } else {
1753             throw EnvironmentFailureException.unexpectedState();
1754         }
1755     }
1756 
1757     /**
1758      * Add the node to the back of the mixed LRUSet. The node is either
1759      * EX-latched already or is inaccessible from other threads.
1760      */
addBack(IN node)1761     public void addBack(IN node) {
1762 
1763         if (isEnabled && node.getEnv().getInMemoryINs().isEnabled()) {
1764 
1765             assert(node.getInListResident());
1766 
1767             node.setInDirtyLRU(false);
1768             mixedLRUSet[(int)(node.getNodeId() % numLRULists)].addBack(node);
1769         }
1770     }
1771 
1772     /**
1773      * Add the node to the front of the mixed LRUSet. The node is either
1774      * EX-latched already or is inaccessible from other threads.
1775      */
addFront(IN node)1776     public void addFront(IN node) {
1777 
1778         if (isEnabled && node.getEnv().getInMemoryINs().isEnabled()) {
1779 
1780             assert(node.getInListResident());
1781 
1782             node.setInDirtyLRU(false);
1783             mixedLRUSet[(int)(node.getNodeId() % numLRULists)].addFront(node);
1784         }
1785     }
1786 
1787     /*
1788      * Add the node to the back of the dirty LRUSet.
1789      */
dirtyAddBack(IN node)1790     private void dirtyAddBack(IN node) {
1791 
1792         assert(node.isLatchExclusiveOwner());
1793         assert(node.getInListResident());
1794         assert(useDirtyLRUSet);
1795 
1796         node.setInDirtyLRU(true);
1797         dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].addBack(node);
1798     }
1799 
1800     /*
1801      * Add the node to the front of the dirty LRUSet.
1802      */
dirtyAddFront(IN node)1803     private void dirtyAddFront(IN node) {
1804 
1805         assert(node.isLatchExclusiveOwner());
1806         assert(node.getInListResident());
1807         assert(useDirtyLRUSet);
1808 
1809         node.setInDirtyLRU(true);
1810         dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].addFront(node);
1811     }
1812 
1813     /**
1814      * Move the node to the back of its contining LRUList, if any.
1815      */
moveBack(IN node)1816     public void moveBack(IN node) {
1817 
1818         assert(node.isLatchOwner());
1819 
1820         if (node.isInDirtyLRU()) {
1821             dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].moveBack(node);
1822         } else {
1823             mixedLRUSet[(int)(node.getNodeId() % numLRULists)].moveBack(node);
1824         }
1825     }
1826 
1827     /**
1828      * Move the node to the front of its contining LRUList, if any.
1829      */
moveFront(IN node)1830     public void moveFront(IN node) {
1831 
1832         assert(node.isLatchOwner());
1833 
1834         if (node.isInDirtyLRU()) {
1835             dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].moveFront(node);
1836         } else {
1837             mixedLRUSet[(int)(node.getNodeId() % numLRULists)].moveFront(node);
1838         }
1839     }
1840 
1841     /**
1842      * Remove a node from its current LRUList, if any.
1843      */
remove(IN node)1844     public void remove(IN node) {
1845 
1846         assert(node.isLatchOwner());
1847 
1848         int listId = (int)(node.getNodeId() % numLRULists);
1849 
1850         if (node.isInDirtyLRU()) {
1851             dirtyLRUSet[listId].remove(node);
1852         } else {
1853             mixedLRUSet[listId].remove(node);
1854         }
1855     }
1856 
1857     /**
1858      * Move the node from the dirty LRUSet to the mixed LRUSet, if the node
1859      * is indeed in the mixed LRUSet.
1860      */
moveToMixedLRU(IN node)1861     public void moveToMixedLRU(IN node) {
1862 
1863         if (useDirtyLRUSet) {
1864 
1865             assert(node.isLatchExclusiveOwner());
1866 
1867             if (node.isInDirtyLRU()) {
1868                 int listId = (int)(node.getNodeId() % numLRULists);
1869 
1870                 if (dirtyLRUSet[listId].remove(node)) {
1871                     assert(node.getInListResident());
1872                     node.setInDirtyLRU(false);
1873                     mixedLRUSet[listId].addBack(node);
1874                 }
1875             }
1876         }
1877     }
1878 
contains(IN node)1879     public boolean contains(IN node) {
1880 
1881         assert(node.isLatchOwner());
1882 
1883         int listId = (int)(node.getNodeId() % numLRULists);
1884 
1885         if (node.isInDirtyLRU()) {
1886             return dirtyLRUSet[listId].contains(node);
1887         }
1888         return mixedLRUSet[listId].contains(node);
1889     }
1890 
getMixedLRUSize()1891     long getMixedLRUSize() {
1892         long size = 0;
1893         for (int i = 0; i < numLRULists; ++i) {
1894             size += mixedLRUSet[i].getSize();
1895         }
1896 
1897         return size;
1898     }
1899 
getDirtyLRUSize()1900     long getDirtyLRUSize() {
1901         long size = 0;
1902         for (int i = 0; i < numLRULists; ++i) {
1903             size += dirtyLRUSet[i].getSize();
1904         }
1905 
1906         return size;
1907     }
1908 
getMixedLRUStats(EnvironmentImpl env, LRUDebugStats stats)1909     void getMixedLRUStats(EnvironmentImpl env, LRUDebugStats stats) {
1910         stats.reset();
1911         for (int i = 0; i < numLRULists; ++i) {
1912             mixedLRUSet[i].getStats(env, stats);
1913         }
1914     }
1915 
getDirtyLRUStats(EnvironmentImpl env, LRUDebugStats stats)1916     void getDirtyLRUStats(EnvironmentImpl env, LRUDebugStats stats) {
1917         stats.reset();
1918         for (int i = 0; i < numLRULists; ++i) {
1919             dirtyLRUSet[i].getStats(env, stats);
1920         }
1921     }
1922 
1923     /**
1924      * This method is called from application threads for every cursor
1925      * operation.
1926      */
doCriticalEviction(boolean backgroundIO)1927     public void doCriticalEviction(boolean backgroundIO) {
1928 
1929         if (arbiter.isOverBudget()) {
1930 
1931             /*
1932              * Any time there's excessive cache usage, let the thread pool know
1933              * there's work to do.
1934              */
1935             alert();
1936 
1937             /*
1938              * Only do eviction if the memory budget overage fulfills the
1939              * critical eviction requirements. We want to avoid having
1940              * application thread do eviction.
1941              */
1942             if (arbiter.needCriticalEviction()) {
1943                 doEvict(EvictionSource.CRITICAL, backgroundIO);
1944             }
1945         }
1946     }
1947 
1948     /**
1949      * This method is called from daemon threads for every operation.
1950      */
doDaemonEviction(boolean backgroundIO)1951     public void doDaemonEviction(boolean backgroundIO) {
1952 
1953         if (arbiter.isOverBudget()) {
1954 
1955             /*
1956              * Any time there's excessive cache usage, let the thread pool know
1957              * there's work to do.
1958              */
1959             alert();
1960 
1961             /*
1962              * Only do eviction if the memory budget overage fulfills the
1963              * critical eviction requirements. This allows evictor threads to
1964              * take the burden of eviction whenever possible, rather than
1965              * slowing other threads and risking a growing cleaner or
1966              * compressor backlog.
1967              */
1968             if (arbiter.needCriticalEviction()) {
1969                 doEvict(EvictionSource.DAEMON, backgroundIO);
1970             }
1971         }
1972     }
1973 
1974     /*
1975      * Eviction invoked by the API
1976      */
doManualEvict()1977     public void doManualEvict()
1978         throws DatabaseException {
1979 
1980         doEvict(EvictionSource.MANUAL, true/*backgroundIO*/);
1981     }
1982 
1983     /**
1984      * Evict a specific IN, used by cache modes.
1985      */
doEvictOneIN(IN target, EvictionSource source)1986     public void doEvictOneIN(IN target, EvictionSource source) {
1987 
1988         if (!reentrancyGuard.enter()) {
1989             return;
1990         }
1991 
1992         try {
1993             assert(target.isBIN());
1994             assert(target.isLatchOwner());
1995 
1996             remove(target);
1997 
1998             target.releaseLatch();
1999 
2000             long evictedBytes = processTarget(
2001                 null /* rootEvictor */, target, null /* parent */,
2002                 -1 /* entry index within parent */,
2003                 false /* backgroundIO */, source, null /* debug stats */);
2004 
2005             numBytesEvicted[source.ordinal()].add(evictedBytes);
2006 
2007         } finally {
2008             reentrancyGuard.leave();
2009         }
2010     }
2011 
2012 
2013     /**
2014      * Let the eviction pool know there's work to do.
2015      */
alert()2016     public void alert() {
2017         if (!runEvictorThreads) {
2018             return;
2019         }
2020 
2021         evictionPool.execute(new BackgroundEvictTask(this));
2022     }
2023 
2024     /**
2025      * This is where the real work is done.
2026      * Can execute concurrently, called by app threads or by background evictor
2027      */
doEvict(EvictionSource source, boolean backgroundIO)2028     void doEvict(EvictionSource source, boolean backgroundIO)
2029         throws DatabaseException {
2030 
2031         if (!isEnabled) {
2032             return;
2033         }
2034 
2035         if (!reentrancyGuard.enter()) {
2036             return;
2037         }
2038 
2039         nEvictionRuns.increment();
2040 
2041         try {
2042 
2043             /*
2044              * Repeat as necessary to keep up with allocations.  Stop if no
2045              * progress is made, to prevent an infinite loop.
2046              */
2047             boolean progress = true;
2048             int nBatches = 0;
2049             long bytesEvicted = 0;
2050 
2051             EvictionDebugStats evictionStats = null;
2052             if (collectEvictionDebugStats) {
2053                 new EvictionDebugStats();
2054                 evictionStats.reset();
2055                 evictionStats.mixedSize = getMixedLRUSize();
2056                 evictionStats.dirtySize = getDirtyLRUSize();
2057             }
2058 
2059             while (progress &&
2060                    nBatches < MAX_BATCHES_PER_RUN &&
2061                    !shutdownRequested.get()) {
2062 
2063                 /*
2064                  * Do eviction only if memory consumption is over budget.
2065                  * If so, try to evict (memoryConsumption + M - maxMemory)
2066                  * bytes, where M is a config param.
2067                  */
2068                 long maxEvictBytes = arbiter.getEvictionPledge();
2069 
2070                 if (maxEvictBytes == 0) {
2071                     break;
2072                 }
2073 
2074                 bytesEvicted = evictBatch(
2075                     source, backgroundIO, maxEvictBytes, evictionStats);
2076 
2077                 numBytesEvicted[source.ordinal()].add(bytesEvicted);
2078 
2079                 if (bytesEvicted == 0) {
2080 
2081                     if (arbiter.stillNeedsEviction() &&
2082                         numNoEvictionEvents == 0 &&
2083                         logger.isLoggable(Level.FINE)) {
2084                         ++numNoEvictionEvents;
2085                         LoggerUtils.fine(
2086                             logger, firstEnvImpl,
2087                             "Eviction pass failed to evict any bytes");
2088                     } else {
2089                         ++numNoEvictionEvents;
2090                     }
2091 
2092                     progress = false;
2093                 } else {
2094                     numNoEvictionEvents = 0;
2095                 }
2096 
2097                 nBatches += 1;
2098             }
2099 
2100             if (evictionStats != null) {
2101                 System.out.println(evictionStats.toString());
2102             }
2103 
2104             /* For debugging. */
2105             if (source == EvictionSource.EVICTORTHREAD) {
2106                 if (logger.isLoggable(Level.FINEST)) {
2107                     LoggerUtils.finest(logger, firstEnvImpl,
2108                                        "Thread evicted " + bytesEvicted +
2109                                        " bytes in " + nBatches + " batches");
2110                 }
2111             }
2112         } finally {
2113             reentrancyGuard.leave();
2114         }
2115     }
2116 
2117 
2118     /**
2119      * Not private because it is used in unit test.
2120      */
evictBatch( Evictor.EvictionSource source, boolean bgIO, long maxEvictBytes, EvictionDebugStats evictionStats)2121     long evictBatch(
2122         Evictor.EvictionSource source,
2123         boolean bgIO,
2124         long maxEvictBytes,
2125         EvictionDebugStats evictionStats)
2126         throws DatabaseException {
2127 
2128         long totalEvictedBytes = 0;
2129         boolean inMixedLRUSet = true;
2130         int numNodesScannedThisBatch = 0;
2131         long maxNodesScannedThisBatch = getMixedLRUSize();
2132         maxNodesScannedThisBatch += numLRULists;
2133 
2134         assert TestHookExecute.doHookSetupIfSet(evictProfile);
2135 
2136         /*
2137          * Perform special eviction,i.e., evict non-tree memory.
2138          *
2139          * TODO: special eviction is done serially. We may want to absolve
2140          * application threads of that responsibility, to avoid blocking, and
2141          * only have evictor threads do special eviction.
2142          */
2143         synchronized (this) {
2144             if (isShared) {
2145                 int numEnvs = envInfos.size();
2146                 if (numEnvs > 0) {
2147                     if (specialEvictionIndex >= numEnvs) {
2148                         specialEvictionIndex = 0;
2149                     }
2150                     EnvInfo info = envInfos.get(specialEvictionIndex);
2151                     specialEvictionIndex++;
2152 
2153                    totalEvictedBytes = info.env.specialEviction();
2154                 }
2155             } else {
2156                 totalEvictedBytes = firstEnvImpl.specialEviction();
2157             }
2158         }
2159 
2160         /* Use local caching to reduce DbTree.getDb overhead. [#21330] */
2161         final DbCache dbCache = new DbCache(isShared, dbCacheClearCount);
2162         final MemoryBudget memBudget = firstEnvImpl.getMemoryBudget();
2163 
2164         try {
2165             while (totalEvictedBytes < maxEvictBytes &&
2166                    numNodesScannedThisBatch < maxNodesScannedThisBatch &&
2167                    arbiter.stillNeedsEviction()) {
2168 
2169                 if (!isShared && !memBudget.isTreeUsageAboveMinimum()) {
2170                     break;
2171                 }
2172 
2173                 final IN target = getNextTarget(inMixedLRUSet);
2174 
2175                 numNodesScannedThisBatch++;
2176 
2177                 if (target != null) {
2178 
2179                     nNodesTargeted.increment();
2180 
2181                     if (evictionStats != null) {
2182                         evictionStats.incNumSelected();
2183                     }
2184 
2185                     assert TestHookExecute.doHookIfSet(evictProfile, target);
2186 
2187                     final DatabaseImpl targetDb = target.getDatabase();
2188                     final EnvironmentImpl dbEnv = targetDb.getEnv();
2189 
2190                     /*
2191                      * Check to make sure the target's DB was not deleted or
2192                      * truncated after selecting the target. Furthermore,
2193                      * prevent the DB from being deleted while we're working
2194                      * with it (this is done by the dbCache.getDb() call).
2195                      *
2196                      * Also check that the refreshedDb is the same instance
2197                      * as the targetDb. If not, then the MapLN associated with
2198                      * targetDb was recently evicted (which can happen after
2199                      * all handles to the DB are closed). In this case,
2200                      * targetDb and its INs are orphaned and cannot be
2201                      * processed; they should simply be removed from the
2202                      * LRU [#21686]
2203                      */
2204                     final DatabaseImpl refreshedDb =
2205                     dbCache.getDb(dbEnv, targetDb.getId());
2206 
2207                     if (refreshedDb != null &&
2208                         !refreshedDb.isDeleted() &&
2209                         refreshedDb == targetDb) {
2210 
2211                         long evictedBytes = 0;
2212 
2213                         if (target.isDbRoot()) {
2214                             RootEvictor rootEvictor = new RootEvictor();
2215                             rootEvictor.target = target;
2216                             rootEvictor.backgroundIO = bgIO;
2217                             rootEvictor.source = source;
2218                             rootEvictor.stats = evictionStats;
2219 
2220                             /* try to evict the root */
2221                             targetDb.getTree().withRootLatchedExclusive(
2222                                 rootEvictor);
2223 
2224                             /*
2225                              * If the root IN was flushed, write the dirtied
2226                              *  MapLN.
2227                              */
2228                             if (rootEvictor.flushed) {
2229                                 dbEnv.getDbTree().modifyDbRoot(targetDb);
2230                             }
2231 
2232                             evictedBytes = rootEvictor.evictedBytes;
2233 
2234                         } else {
2235                             evictedBytes = processTarget(
2236                                 null,  /* rootEvictor */
2237                                 target, null, /* parent */
2238                                 -1, /* parent entry index */
2239                                 bgIO, source, evictionStats);
2240                         }
2241 
2242                         totalEvictedBytes += evictedBytes;
2243 
2244                     } else {
2245                         /*
2246                          * We don't expect to find in the INList an IN whose
2247                          * database that has finished delete processing,
2248                          * because it should have been removed from the
2249                          * INList during post-delete cleanup.
2250                          */
2251                         if (targetDb.isDeleteFinished() &&
2252                             target.getInListResident()) {
2253                             final String inInfo =
2254                                 " IN type=" + target.getLogType() + " id=" +
2255                                 target.getNodeId() + " not expected on INList";
2256                             final String errMsg = (refreshedDb == null) ?
2257                                 inInfo :
2258                                 ("Database " + refreshedDb.getDebugName() +
2259                                  " id=" + refreshedDb.getId() + " rootLsn=" +
2260                                  DbLsn.getNoFormatString
2261                                  (refreshedDb.getTree().getRootLsn()) +
2262                                  ' ' + inInfo);
2263 
2264                             throw EnvironmentFailureException.
2265                                 unexpectedState(errMsg);
2266                         }
2267                     }
2268                 }
2269 
2270                 /*
2271                  * Move to the dirty LRUSet, if we are done processing the
2272                  * mixed LRUSet.
2273                  */
2274                 if (numNodesScannedThisBatch >= maxNodesScannedThisBatch &&
2275                     totalEvictedBytes < maxEvictBytes &&
2276                     inMixedLRUSet) {
2277 
2278                     numNodesScannedThisBatch = 0;
2279                     maxNodesScannedThisBatch = getDirtyLRUSize();
2280                     maxNodesScannedThisBatch += numLRULists;
2281                     inMixedLRUSet = false;
2282 
2283                     if (evictionStats != null) {
2284                         evictionStats.inMixedLRU = false;
2285                     }
2286                 }
2287             }
2288         } finally {
2289             dbCache.releaseDbs(firstEnvImpl);
2290         }
2291 
2292         return totalEvictedBytes;
2293     }
2294 
getNextTarget(boolean inMixedLRUSet)2295     private IN getNextTarget(boolean inMixedLRUSet) {
2296 
2297         if (inMixedLRUSet) {
2298             int listId = Math.abs(nextMixedLRUList++) % numLRULists;
2299             IN target = mixedLRUSet[listId].removeFront();
2300 
2301             if (target != null &&
2302                 ((traceUINs && target.isUpperIN()) ||
2303                  (traceBINs && target.isBIN()))) {
2304                 LoggerUtils.envLogMsg(
2305                     traceLevel, target.getEnv(),
2306                     Thread.currentThread().getId() + "-" +
2307                     Thread.currentThread().getName() +
2308                     "-" + target.getEnv().getName() +
2309                     " XXXX Mixed Eviction target: " +
2310                     target.getNodeId());
2311             }
2312 
2313             return target;
2314         }
2315 
2316         int listId = Math.abs(nextDirtyLRUList++) % numLRULists;
2317         IN target = dirtyLRUSet[listId].removeFront();
2318 
2319         if (target != null &&
2320             ((traceUINs && target.isUpperIN()) ||
2321              (traceBINs && target.isBIN()))) {
2322             LoggerUtils.envLogMsg(
2323                 traceLevel, target.getEnv(),
2324                 Thread.currentThread().getId() + "-" +
2325                 Thread.currentThread().getName() +
2326                 "-" + target.getEnv().getName() +
2327                 " XXXX Dirty Eviction target: " + target.getNodeId());
2328         }
2329 
2330         return target;
2331     }
2332 
2333     class RootEvictor implements WithRootLatched {
2334 
2335         IN target;
2336         boolean backgroundIO;
2337         EvictionSource source;
2338         EvictionDebugStats stats = null;
2339 
2340         ChildReference rootRef;
2341         boolean flushed = false;
2342         long evictedBytes = 0;
2343 
doWork(ChildReference root)2344         public IN doWork(ChildReference root)
2345             throws DatabaseException {
2346 
2347             /*
2348              * Do not call fetchTarget since this root or DB should be
2349              * resident already if it is to be the target of eviction. If
2350              * it is not present, it has been evicted by another thread and
2351              * should not be fetched for two reasons: 1) this would be
2352              * counterproductive, 2) to guard against bringing in a root
2353              * for an evicted DB.
2354              */
2355             IN rootIN = (IN) root.getTarget();
2356             if (rootIN == null) {
2357                 return null;
2358             }
2359 
2360             rootRef = root;
2361 
2362             /*
2363              * Latch the target and re-check that all conditions still hold.
2364              * The latch on the target will be released by processTarget().
2365              */
2366             rootIN.latch(CacheMode.UNCHANGED);
2367 
2368             if (rootIN == target && rootIN.isRoot()) {
2369                 evictedBytes = processTarget(
2370                     this, null, /* target */
2371                     null, /* parent */
2372                     -1, /* entry index within parent */
2373                     backgroundIO, source, stats);
2374             } else {
2375                 rootIN.releaseLatch();
2376             }
2377 
2378             return null;
2379         }
2380     }
2381 
2382     /**
2383      * Decide what to do with an eviction target and carry out the decision.
2384      * Return the number of bytes evicted (if any).
2385      *
2386      * This method is called from evictBatch() after an IN has been selected
2387      * for eviction. It EX-latches the IN and determines whether it can/should
2388      * really be evicted, and if not what is the appropriate action to be
2389      * taken by the evicting thread.
2390      *
2391      * If a decision is taken to evict the target or mutate it to a BINDelta,
2392      * the target must first be unlatched and its parent must be searched
2393      * within the tree. During this search, many things can happen to the
2394      * unlatched target, and as a result, after the parent is found and the
2395      * target is relatched, processTarget() calls itself recursivelly to
2396      * re-consider all the possible actions on the target.
2397      */
processTarget( RootEvictor rootEvictor, IN target, IN parent, int index, boolean bgIO, EvictionSource source, EvictionDebugStats stats)2398     private long processTarget(
2399         RootEvictor rootEvictor,
2400         IN target,
2401         IN parent,
2402         int index,
2403         boolean bgIO,
2404         EvictionSource source,
2405         EvictionDebugStats stats)
2406         throws DatabaseException {
2407 
2408         boolean targetIsLatched = false;
2409         boolean parentIsLatched = false;
2410         long evictedBytes = 0;
2411 
2412         if (stats != null) {
2413             stats.withParent = (parent != null || rootEvictor != null);
2414         }
2415 
2416         try {
2417             if (parent != null) {
2418                 assert(parent.isLatchExclusiveOwner());
2419                 parentIsLatched = true;
2420 
2421                 if (target != parent.getTarget(index)) {
2422                     skip(target, stats);
2423                     return 0;
2424                 }
2425 
2426                 target.latch(CacheMode.UNCHANGED);
2427 
2428             } else if (rootEvictor != null) {
2429                 target = rootEvictor.target;
2430 
2431             } else {
2432                 target.latch(CacheMode.UNCHANGED);
2433             }
2434 
2435             targetIsLatched = true;
2436 
2437             DatabaseImpl db = target.getDatabase();
2438             EnvironmentImpl dbEnv = db.getEnv();
2439 
2440             if (!target.getInListResident() || contains(target)) {
2441                 /*
2442                  * The node was put back to the LRU, and then possibly evicted
2443                  * by other threads before this thread could latch it.
2444                  */
2445                 skip(target, stats);
2446                 return 0;
2447             }
2448 
2449             if (target.getDirty() && dbEnv.isReadOnly()) {
2450                 /* Cannot evict dirty nodes in a read-only environment. */
2451                 skip(target, stats);
2452                 return 0;
2453             }
2454 
2455             /*
2456              * Normally, UINs that have cached children are not in the LRU,
2457              * and as a result, cannot be selected for eviction. However, a
2458              * childless UIN may be selected for eviction and then acquire
2459              * cached children in the time after its removal from its LRUSet
2460              * and before it is EX-latched by the evicting thread.
2461              */
2462             if (target.isUpperIN() && target.hasCachedChildrenFlag()) {
2463                 assert(target.hasResidentChildren());
2464                 skip(target, stats);
2465                 return 0;
2466             }
2467 
2468             /*
2469              * Disallow eviction of the mapping and naming DB roots, because
2470              * their eviction and re-fetching is a special case that is not
2471              * worth supporting.  [#13415]
2472              */
2473             if (target.isRoot()) {
2474                 DatabaseId dbId = db.getId();
2475                 if (dbId.equals(DbTree.ID_DB_ID) ||
2476                     dbId.equals(DbTree.NAME_DB_ID)) {
2477                     skip(target, stats);
2478                     return 0;
2479                 }
2480             }
2481 
2482             if (dbEnv.getSharedCache()) {
2483 
2484                 if (dbEnv.isClosed() || dbEnv.isInvalid()) {
2485                     skip(target, stats);
2486                     return 0;
2487                 }
2488 
2489                 if (!dbEnv.getMemoryBudget().isTreeUsageAboveMinimum()) {
2490                     putBack(target, stats, 1);
2491                     return 0;
2492                 }
2493             }
2494 
2495             if (target.isPinned()) {
2496                 putBack(target, stats, 2);
2497                 return 0;
2498             }
2499 
2500             /*
2501              * LRU-TODO: this is just an approximation of the KEEP_HOT
2502              * cachemode.
2503              */
2504             if (target.isBIN() && target.getGeneration() == Long.MAX_VALUE) {
2505                 target.setGeneration(0);
2506                 putBack(target, stats, 3);
2507                 return 0;
2508             }
2509 
2510             /*
2511              * Target and LSN can be null in DW. Not evictable in that case.
2512              */
2513             for (int i = 0; i < target.getNEntries(); ++i) {
2514                 if (target.getLsn(i) == DbLsn.NULL_LSN &&
2515                     !target.isResident(i)) {
2516                     putBack(target, stats, 4);
2517                     return 0;
2518                 }
2519             }
2520 
2521             /*
2522              * Attempt partial eviction. The partialEviction() method also
2523              * determines whether the IN in evictable or not. For now,
2524              * partialEviction() will consider a node to be non-evictable if
2525              * it is a BIN that (a) has cursors registered on it, or (b) has
2526              * a resident non-evictable LN, which can happen only for MapLNs
2527              * (see MapLN.isEvictable()).
2528              */
2529             evictedBytes = target.partialEviction();
2530 
2531             boolean isEvictable = (evictedBytes & IN.NON_EVICTABLE_IN) == 0;
2532             evictedBytes = evictedBytes & ~IN.NON_EVICTABLE_IN;
2533 
2534             /*
2535              * If we could evict some bytes from this node, put it back in
2536              * the LRU, unless it is a BIN accessed in EVICT_BIN or MAKE_COLD
2537              * cache mode, in which case we should evict it, if possible.
2538              */
2539             if (evictedBytes > 0 &&
2540                 (target.isUpperIN() || source != EvictionSource.CACHEMODE)) {
2541                 strippedPutBack(target, stats);
2542                 return evictedBytes;
2543             }
2544 
2545             if (!isEvictable) {
2546                 /* The node is not evictable */
2547                 putBack(target, stats, 5);
2548                 return evictedBytes;
2549             }
2550 
2551             /*
2552              * Give the node a second chance, if it is a full BIN that can
2553              * be mutated to a BINDelta and the cache mode is not EVICT_BIN
2554              * or MAKE_COLD.
2555              */
2556             if (target.isBIN() &&
2557                 source != EvictionSource.CACHEMODE &&
2558                 getMutateToBINDeltas() &&
2559                 ((BIN)target).canMutateToBINDelta()) {
2560 
2561                 BIN bin = (BIN)target;
2562                 evictedBytes += bin.mutateToBINDelta();
2563                 assert(evictedBytes > 0);
2564                 binDeltaPutBack(target, stats);
2565 
2566                 return evictedBytes;
2567             }
2568 
2569             /*
2570              * Give the node a second chance, if it is dirty, not in the
2571              * dirty LRUSet already, and is either a UIN or the cache mode
2572              * is not EVICT_BIN or MAKE_COLD.
2573              */
2574             if ((target.isUpperIN() ||
2575                  source != EvictionSource.CACHEMODE) &&
2576                 useDirtyLRUSet() &&
2577                 target.getDirty() &&
2578                 !target.isInDirtyLRU()) {
2579 
2580                 moveToDirtyLRU(target, stats);
2581                 return evictedBytes;
2582             }
2583 
2584             /*
2585              * Evict the node. To do so, we must find and latch the
2586              * parent IN first, if we have not done this already.
2587              */
2588             if (rootEvictor != null) {
2589                 evictedBytes += evictRoot(rootEvictor, bgIO, source, stats);
2590 
2591             } else if (parent != null) {
2592                 evictedBytes += evict(
2593                     target, parent, index, bgIO, source, stats);
2594 
2595             } else {
2596                 assert TestHookExecute.doHookIfSet(preEvictINHook);
2597                 targetIsLatched = false;
2598                 evictedBytes += findParentAndRetry(
2599                     target, bgIO, source, stats);
2600             }
2601 
2602             return evictedBytes;
2603 
2604         } finally {
2605             if (targetIsLatched) {
2606                 target.releaseLatch();
2607             }
2608 
2609             if (parentIsLatched) {
2610                 parent.releaseLatch();
2611             }
2612         }
2613     }
2614 
skip(IN target, EvictionDebugStats stats)2615     private void skip(IN target, EvictionDebugStats stats) {
2616 
2617         if ((traceUINs && target.isUpperIN()) ||
2618             (traceBINs && target.isBIN())) {
2619             LoggerUtils.envLogMsg(
2620                 traceLevel, target.getEnv(),
2621                 Thread.currentThread().getId() + "-" +
2622                 Thread.currentThread().getName() +
2623                 "-" + target.getEnv().getName() +
2624                 " XXXX SKIPPED Eviction Target: " +
2625                 target.getNodeId());
2626         }
2627 
2628         nNodesSkipped.increment();
2629     }
2630 
putBack(IN target, EvictionDebugStats stats, int caller)2631     private void putBack(IN target, EvictionDebugStats stats, int caller) {
2632 
2633         if ((traceUINs && target.isUpperIN()) ||
2634             (traceBINs && target.isBIN())) {
2635             LoggerUtils.envLogMsg(
2636                 traceLevel, target.getEnv(),
2637                 Thread.currentThread().getId() + "-" +
2638                 Thread.currentThread().getName() +
2639                 "-" + target.getEnv().getName() +
2640                 " XXXX PUT-BACK-" + caller + " Eviction Target: " +
2641                 target.getNodeId());
2642         }
2643 
2644         if (target.isInDirtyLRU()) {
2645             dirtyAddBack(target);
2646         } else {
2647             addBack(target);
2648         }
2649 
2650         if (stats != null) {
2651             stats.incNumPutBack();
2652         }
2653 
2654         nNodesPutBack.increment();
2655     }
2656 
strippedPutBack(IN target, EvictionDebugStats stats)2657     private void strippedPutBack(IN target, EvictionDebugStats stats) {
2658 
2659         if ((traceUINs && target.isUpperIN()) ||
2660             (traceBINs && target.isBIN())) {
2661             LoggerUtils.envLogMsg(
2662                 traceLevel, target.getEnv(),
2663                 Thread.currentThread().getId() + "-" +
2664                 Thread.currentThread().getName() +
2665                 "-" + target.getEnv().getName() +
2666                 " XXXX STRIPPED Eviction Target: " +
2667                 target.getNodeId());
2668         }
2669 
2670         if (target.isInDirtyLRU()) {
2671             dirtyAddBack(target);
2672         } else {
2673             addBack(target);
2674         }
2675 
2676         if (stats != null) {
2677             stats.incNumStripped();
2678         }
2679 
2680         nNodesStripped.increment();
2681     }
2682 
binDeltaPutBack(IN target, EvictionDebugStats stats)2683     private void binDeltaPutBack(IN target, EvictionDebugStats stats) {
2684 
2685         if ((traceUINs && target.isUpperIN()) ||
2686             (traceBINs && target.isBIN())) {
2687             LoggerUtils.envLogMsg(
2688                 traceLevel, target.getEnv(),
2689                 Thread.currentThread().getId() + "-" +
2690                 Thread.currentThread().getName() +
2691                 "-" + target.getEnv().getName() +
2692                 " XXXX MUTATED Eviction Target: " +
2693                 target.getNodeId());
2694         }
2695 
2696         if (target.isInDirtyLRU()) {
2697             dirtyAddBack(target);
2698         } else {
2699             addBack(target);
2700         }
2701 
2702         if (stats != null) {
2703             stats.incNumMutated();
2704         }
2705 
2706         nNodesMutated.increment();
2707     }
2708 
moveToDirtyLRU(IN target, EvictionDebugStats stats)2709     private void moveToDirtyLRU(IN target, EvictionDebugStats stats) {
2710 
2711         if ((traceUINs && target.isUpperIN()) ||
2712             (traceBINs && target.isBIN())) {
2713             LoggerUtils.envLogMsg(
2714                 traceLevel, target.getEnv(),
2715                 Thread.currentThread().getId() + "-" +
2716                 Thread.currentThread().getName() +
2717                 "-" + target.getEnv().getName() +
2718                 " XXXX MOVED-TO_DIRTY Eviction Target: " +
2719                 target.getNodeId());
2720         }
2721 
2722         if (stats != null) {
2723             stats.incNumMoved(target.isBIN());
2724         }
2725 
2726         dirtyAddFront(target);
2727 
2728         nNodesMovedToDirtyLRU.increment();
2729     }
2730 
findParentAndRetry( IN target, boolean backgroundIO, EvictionSource source, EvictionDebugStats stats)2731     private long findParentAndRetry(
2732         IN target,
2733         boolean backgroundIO,
2734         EvictionSource source,
2735         EvictionDebugStats stats) {
2736 
2737         Tree tree = target.getDatabase().getTree();
2738 
2739         /*
2740          * Pass false for doFetch to avoid fetching a full BIN when a
2741          * delta is in cache. This also avoids a fetch when the node
2742          * was evicted while unlatched, but that should be very rare.
2743          */
2744         SearchResult result = tree.getParentINForChildIN(
2745             target, false, /*useTargetLevel*/
2746             false, /*doFetch*/ CacheMode.UNCHANGED);
2747 
2748         if (result.exactParentFound) {
2749             return processTarget(null, /* rootEvictor */
2750                                  target,
2751                                  result.parent,
2752                                  result.index,
2753                                  backgroundIO,
2754                                  source,
2755                                  stats);
2756         }
2757 
2758         /*
2759          * The target has been detached from the tree and it should stay
2760          * out of the LRU. It should not be in the INList, because whenever
2761          * we detach a node we remove it from the INList, but in case we
2762          * forgot to do this somewhere, we can just remove it here.
2763          */
2764         assert(result.parent == null);
2765 
2766         target.latch(CacheMode.UNCHANGED);
2767 
2768         try {
2769             if (target.getInListResident()) {
2770 
2771                 firstEnvImpl.getInMemoryINs().remove(target);
2772 
2773                 throw EnvironmentFailureException.unexpectedState(
2774                     "Node " + target.getNodeId() +
2775                     " has been detached from the in-memory tree, " +
2776                     "but it is still in the INList");
2777             }
2778         } finally {
2779             target.releaseLatch();
2780         }
2781 
2782         return 0;
2783     }
2784 
evict( IN target, IN parent, int index, boolean backgroundIO, EvictionSource source, EvictionDebugStats stats)2785     private long evict(
2786         IN target,
2787         IN parent,
2788         int index,
2789         boolean backgroundIO,
2790         EvictionSource source,
2791         EvictionDebugStats stats) {
2792 
2793         DatabaseImpl db = target.getDatabase();
2794         EnvironmentImpl dbEnv = db.getEnv();
2795         long targetLsn = DbLsn.NULL_LSN;
2796         boolean newTargetLsn = false;
2797 
2798         if (target.getDirty()) {
2799 
2800             boolean logProvisional = coordinateWithCheckpoint(target, parent);
2801 
2802             targetLsn = target.log(dbEnv.getLogManager(),
2803                                    allowBinDeltas,
2804                                    true, /*allowCompress*/
2805                                    logProvisional,
2806                                    backgroundIO,
2807                                    parent);
2808             newTargetLsn = true;
2809         } else {
2810             targetLsn = parent.getLsn(index);
2811         }
2812 
2813         assert(targetLsn != DbLsn.NULL_LSN);
2814 
2815         if (targetLsn != DbLsn.NULL_LSN) {
2816 
2817             long evictedBytes = target.getBudgetedMemorySize();
2818 
2819             parent.detachNode(index, newTargetLsn/*updateLsn*/, targetLsn);
2820 
2821             nNodesEvicted.increment();
2822             if (stats != null) {
2823                 stats.incNumEvicted(target.isBIN());
2824             }
2825 
2826             return evictedBytes;
2827         } else {
2828             //can we ever really reach here? How? ?????
2829             addBack(target);
2830             return 0;
2831         }
2832     }
2833 
evictRoot( RootEvictor rootEvictor, boolean backgroundIO, EvictionSource source, EvictionDebugStats stats)2834     private long evictRoot(
2835         RootEvictor rootEvictor,
2836         boolean backgroundIO,
2837         EvictionSource source,
2838         EvictionDebugStats stats) {
2839 
2840         final ChildReference rootRef = rootEvictor.rootRef;
2841         final IN target = (IN) rootRef.getTarget();
2842         final DatabaseImpl db = target.getDatabase();
2843         final EnvironmentImpl dbEnv = db.getEnv();
2844         final INList inList = dbEnv.getInMemoryINs();
2845 
2846         boolean logProvisional =
2847             coordinateWithCheckpoint(target, null /*parent*/);
2848 
2849         if (target.getDirty()) {
2850             long newLsn = target.log(dbEnv.getLogManager(),
2851                                      false, // allowDeltas
2852                                      false, // allowCompress
2853                                      logProvisional,
2854                                      backgroundIO,
2855                                      null); // parent
2856 
2857             rootRef.setLsn(newLsn);
2858             rootEvictor.flushed = true;
2859         }
2860 
2861         inList.remove(target);
2862 
2863         long evictBytes = target.getBudgetedMemorySize();
2864 
2865         rootRef.clearTarget();
2866 
2867         nNodesEvicted.increment();
2868         nRootNodesEvicted.increment();
2869         if (stats != null) {
2870             stats.incNumEvicted(false);
2871         }
2872 
2873         return evictBytes;
2874     }
2875 
2876     /**
2877      * Coordinates an eviction with an in-progress checkpoint and returns
2878      * whether provisional logging is needed.
2879      *
2880      * @return true if the target must be logged provisionally.
2881      */
coordinateWithCheckpoint(IN target, IN parent)2882     private boolean coordinateWithCheckpoint(IN target, IN parent) {
2883 
2884         EnvironmentImpl dbEnv = target.getDatabase().getEnv();
2885 
2886         /*
2887          * The checkpointer could be null if it was shutdown or never
2888          * started.
2889          */
2890         Checkpointer ckpter = dbEnv.getCheckpointer();
2891         if (ckpter == null) {
2892             return false;
2893         }
2894         return ckpter.coordinateEvictionWithCheckpoint(target, parent);
2895     }
2896 
isCacheFull()2897     public boolean isCacheFull() {
2898         return arbiter.isCacheFull();
2899     }
2900 
wasCacheEverFull()2901     public boolean wasCacheEverFull() {
2902         return arbiter.wasCacheEverFull();
2903     }
2904 
2905    /* For unit testing only. */
setRunnableHook(TestHook<Boolean> hook)2906     public void setRunnableHook(TestHook<Boolean> hook) {
2907         arbiter.setRunnableHook(hook);
2908     }
2909 
2910     /* For unit testing only. */
setPreEvictINHook(TestHook<Object> hook)2911     public void setPreEvictINHook(TestHook<Object> hook) {
2912         preEvictINHook = hook;
2913     }
2914 
2915     /* For unit testing only. */
setEvictProfileHook(TestHook<IN> hook)2916     public void setEvictProfileHook(TestHook<IN> hook) {
2917         evictProfile = hook;
2918     }
2919 
getStatsGroup()2920     public StatGroup getStatsGroup() {
2921         return stats;
2922     }
2923 
2924     /**
2925      * Load stats.
2926      */
loadStats(StatsConfig config)2927     public StatGroup loadStats(StatsConfig config) {
2928 
2929         if (isShared) {
2930             sharedCacheEnvs.set(envInfos.size());
2931         }
2932 
2933         float binFetchMisses = (float)nBINFetchMiss.get();
2934         float binFetches = (float)nBINFetch.get();
2935 
2936         binFetchMissRatio.set(
2937             (binFetches > 0 ? (binFetchMisses / binFetches) : 0));
2938 
2939         StatGroup copy = stats.cloneGroup(config.getClear());
2940 
2941         /*
2942          * These stats are not cleared. They represent the current state of
2943          * the cache.
2944          */
2945         new LongStat(copy, CACHED_IN_SPARSE_TARGET, nINSparseTarget.get());
2946         new LongStat(copy, CACHED_IN_NO_TARGET, nINNoTarget.get());
2947         new LongStat(copy, CACHED_IN_COMPACT_KEY, nINCompactKey.get());
2948 
2949         copy.addAll(getINListStats(config));
2950 
2951         return copy;
2952     }
2953 
getINListStats(StatsConfig config)2954     private StatGroup getINListStats(StatsConfig config) {
2955 
2956         if (isShared) {
2957 
2958             StatGroup totalINListStats = new StatGroup("temp", "temp");
2959 
2960             if (config.getFast()) {
2961 
2962                 /*
2963                  * This is a slow stat for shared envs, because of the need to
2964                  * synchronize.
2965                  */
2966                 return totalINListStats;
2967             }
2968 
2969             List<EnvInfo> copy = null;
2970             synchronized(this) {
2971                 copy = new ArrayList<EnvInfo>(envInfos);
2972             }
2973 
2974             for (EnvInfo ei: copy) {
2975                 totalINListStats.addAll(ei.env.getInMemoryINs().loadStats());
2976             }
2977 
2978             return totalINListStats;
2979         } else {
2980             return firstEnvImpl.getInMemoryINs().loadStats();
2981         }
2982     }
2983 
incNumLNsEvicted(long inc)2984     public void incNumLNsEvicted(long inc) {
2985         nLNsEvicted.add(inc);
2986     }
2987 
2988     /**
2989      * Update the appropriate fetch stat, based on node type.
2990      */
incLNFetchStats(boolean isMiss)2991     public void incLNFetchStats(boolean isMiss) {
2992         nLNFetch.increment();
2993         if (isMiss) {
2994             nLNFetchMiss.increment();
2995         }
2996     }
2997 
incUINFetchStats(boolean isMiss)2998     public void incUINFetchStats(boolean isMiss) {
2999         nUpperINFetch.increment();
3000         if (isMiss) {
3001             nUpperINFetchMiss.increment();
3002         }
3003     }
3004 
incBINFetchStats(boolean isMiss, boolean isDelta)3005     public void incBINFetchStats(boolean isMiss, boolean isDelta) {
3006         nBINFetch.increment();
3007         if (isMiss) {
3008             nBINFetchMiss.increment();
3009             if (isDelta) {
3010                 nBINDeltaFetchMiss.increment();
3011             }
3012         }
3013     }
3014 
incFullBINMissStats()3015     public void incFullBINMissStats() {
3016         nFullBINMiss.increment();
3017     }
3018 
incBinDeltaBlindOps()3019     public void incBinDeltaBlindOps() {
3020         nBinDeltaBlindOps.increment();
3021     }
3022 
getNINSparseTarget()3023     public AtomicLong getNINSparseTarget() {
3024         return nINSparseTarget;
3025     }
3026 
getNINNoTarget()3027     public AtomicLong getNINNoTarget() {
3028         return nINNoTarget;
3029     }
3030 
getNINCompactKey()3031     public AtomicLong getNINCompactKey() {
3032         return nINCompactKey;
3033     }
3034 
3035 
3036     static class ReentrancyGuard {
3037         private final ConcurrentHashMap<Thread, Thread> activeThreads;
3038         private final EnvironmentImpl envImpl;
3039         private final Logger logger;
3040 
ReentrancyGuard(EnvironmentImpl envImpl, Logger logger)3041         ReentrancyGuard(EnvironmentImpl envImpl, Logger logger) {
3042             this.envImpl = envImpl;
3043             this.logger = logger;
3044             activeThreads = new ConcurrentHashMap<Thread, Thread>();
3045         }
3046 
enter()3047         boolean enter() {
3048             Thread thisThread = Thread.currentThread();
3049             if (activeThreads.containsKey(thisThread)) {
3050                 /* We don't really expect a reentrant call. */
3051                 LoggerUtils.severe(logger, envImpl,
3052                                    "reentrant call to eviction from " +
3053                                    LoggerUtils.getStackTrace());
3054 
3055                 /* If running w/assertions, in testing mode, assert here. */
3056                 assert false: "reentrant call to eviction from " +
3057                     LoggerUtils.getStackTrace();
3058                 return false;
3059             }
3060 
3061             activeThreads.put(thisThread, thisThread);
3062             return true;
3063         }
3064 
leave()3065         void leave() {
3066             assert activeThreads.contains(Thread.currentThread());
3067             activeThreads.remove(Thread.currentThread());
3068         }
3069     }
3070 
3071     static class BackgroundEvictTask implements Runnable {
3072 
3073         private final Evictor evictor;
3074         private final boolean backgroundIO;
3075 
BackgroundEvictTask(Evictor evictor)3076         BackgroundEvictTask(Evictor evictor) {
3077             this.evictor = evictor;
3078             this.backgroundIO = true;
3079         }
3080 
run()3081         public void run() {
3082             evictor.doEvict(EvictionSource.EVICTORTHREAD, backgroundIO);
3083         }
3084     }
3085 
3086     static class RejectEvictHandler implements RejectedExecutionHandler {
3087 
3088         private final AtomicLongStat threadUnavailableStat;
3089 
RejectEvictHandler(AtomicLongStat threadUnavailableStat)3090         RejectEvictHandler(AtomicLongStat threadUnavailableStat) {
3091             this.threadUnavailableStat = threadUnavailableStat;
3092         }
3093 
rejectedExecution(Runnable r, ThreadPoolExecutor executor)3094         public void rejectedExecution(Runnable r,
3095                                       ThreadPoolExecutor executor) {
3096             threadUnavailableStat.increment();
3097         }
3098     }
3099 
3100     /**
3101      * Caches DatabaseImpls to reduce DbTree.getDb overhead.
3102      *
3103      * SharedEvictor, unlike PrivateEvictor, must maintain a cache map for each
3104      * EnvironmentImpl, since each cache map is logically associated with a
3105      * single DbTree instance.
3106      */
3107     static class DbCache {
3108 
3109         boolean shared = false;
3110 
3111         int nOperations = 0;
3112 
3113         int dbCacheClearCount = 0;
3114 
3115         final Map<EnvironmentImpl, Map<DatabaseId, DatabaseImpl>> envMap;
3116 
3117         final Map<DatabaseId, DatabaseImpl> dbMap;
3118 
DbCache(boolean shared, int dbCacheClearCount)3119         DbCache(boolean shared, int dbCacheClearCount) {
3120 
3121             this.shared = shared;
3122             this.dbCacheClearCount = dbCacheClearCount;
3123 
3124             if (shared) {
3125                 envMap =
3126                 new HashMap<EnvironmentImpl, Map<DatabaseId, DatabaseImpl>>();
3127 
3128                 dbMap = null;
3129             } else {
3130                 dbMap = new HashMap<DatabaseId, DatabaseImpl>();
3131                 envMap = null;
3132             }
3133         }
3134 
3135         /**
3136          * Calls DbTree.getDb for the given environment and database ID, and
3137          * caches the result to optimize multiple calls for the same DB.
3138          *
3139          * @param envImpl identifies which environment the dbId parameter
3140          * belongs to.  For PrivateEvictor, it is the same as the
3141          * Evictor.firstEnvImpl field.
3142          *
3143          * @param dbId is the DB to get.
3144          */
getDb(EnvironmentImpl env, DatabaseId dbId)3145         DatabaseImpl getDb(EnvironmentImpl env, DatabaseId dbId) {
3146 
3147             Map<DatabaseId, DatabaseImpl> map;
3148 
3149             if (shared) {
3150                 map = envMap.get(env);
3151                 if (map == null) {
3152                     map = new HashMap<DatabaseId, DatabaseImpl>();
3153                     envMap.put(env, map);
3154                 }
3155             } else {
3156                 map = dbMap;
3157             }
3158 
3159             /*
3160              * Clear DB cache after dbCacheClearCount operations, to
3161              * prevent starving other threads that need exclusive access to
3162              * the MapLN (for example, DbTree.deleteMapLN).  [#21015]
3163              *
3164              * Note that we clear the caches for all environments after
3165              * dbCacheClearCount total operations, rather than after
3166              * dbCacheClearCount operations for a single environment,
3167              * because the total is a more accurate representation of
3168              * elapsed time, during which other threads may be waiting for
3169              * exclusive access to the MapLN.
3170              */
3171             nOperations += 1;
3172             if ((nOperations % dbCacheClearCount) == 0) {
3173                 releaseDbs(env);
3174             }
3175 
3176             return env.getDbTree().getDb(dbId, -1, map);
3177         }
3178 
3179         /**
3180          * Calls DbTree.releaseDb for cached DBs, and clears the cache.
3181          */
releaseDbs(EnvironmentImpl env)3182         void releaseDbs(EnvironmentImpl env) {
3183             if (shared) {
3184                 for (Map.Entry<EnvironmentImpl, Map<DatabaseId, DatabaseImpl>>
3185                      entry : envMap.entrySet()) {
3186 
3187                     final EnvironmentImpl sharingEnv = entry.getKey();
3188                     final Map<DatabaseId, DatabaseImpl> map = entry.getValue();
3189 
3190                     sharingEnv.getDbTree().releaseDbs(map);
3191                     map.clear();
3192                 }
3193             } else {
3194                 env.getDbTree().releaseDbs(dbMap);
3195                 dbMap.clear();
3196             }
3197         }
3198     }
3199 }
3200