1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2002, 2014 Oracle and/or its affiliates. All rights reserved. 5 * 6 */ 7 8 package com.sleepycat.je.evictor; 9 10 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_DELTA_BLIND_OPS; 11 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_DELTA_FETCH_MISS; 12 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_FETCH; 13 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_FETCH_MISS; 14 import static com.sleepycat.je.evictor.EvictorStatDefinition.BIN_FETCH_MISS_RATIO; 15 import static com.sleepycat.je.evictor.EvictorStatDefinition.FULL_BIN_MISS; 16 import static com.sleepycat.je.evictor.EvictorStatDefinition.CACHED_IN_COMPACT_KEY; 17 import static com.sleepycat.je.evictor.EvictorStatDefinition.CACHED_IN_NO_TARGET; 18 import static com.sleepycat.je.evictor.EvictorStatDefinition.CACHED_IN_SPARSE_TARGET; 19 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_MUTATED; 20 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_STRIPPED; 21 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_EVICTION_RUNS; 22 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_EVICTED; 23 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_LNS_EVICTED; 24 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_TARGETED; 25 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_ROOT_NODES_EVICTED; 26 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_PUT_BACK; 27 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_SKIPPED; 28 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_NODES_MOVED_TO_DIRTY_LRU; 29 import static com.sleepycat.je.evictor.EvictorStatDefinition.EVICTOR_SHARED_CACHE_ENVS; 30 import static com.sleepycat.je.evictor.EvictorStatDefinition.GROUP_DESC; 31 import static com.sleepycat.je.evictor.EvictorStatDefinition.GROUP_NAME; 32 import static com.sleepycat.je.evictor.EvictorStatDefinition.LN_FETCH; 33 import static com.sleepycat.je.evictor.EvictorStatDefinition.LN_FETCH_MISS; 34 import static com.sleepycat.je.evictor.EvictorStatDefinition.NUM_BYTES_EVICTED_DESC; 35 import static com.sleepycat.je.evictor.EvictorStatDefinition.UPPER_IN_FETCH; 36 import static com.sleepycat.je.evictor.EvictorStatDefinition.UPPER_IN_FETCH_MISS; 37 import static com.sleepycat.je.evictor.EvictorStatDefinition.THREAD_UNAVAILABLE; 38 39 40 import java.lang.AssertionError; 41 import java.util.EnumSet; 42 import java.util.HashMap; 43 import java.util.Map; 44 import java.util.ArrayList; 45 import java.util.List; 46 import java.util.logging.Level; 47 import java.util.concurrent.ArrayBlockingQueue; 48 import java.util.concurrent.ConcurrentHashMap; 49 import java.util.concurrent.RejectedExecutionHandler; 50 import java.util.concurrent.ThreadPoolExecutor; 51 import java.util.concurrent.TimeUnit; 52 import java.util.concurrent.atomic.AtomicBoolean; 53 import java.util.concurrent.atomic.AtomicLong; 54 55 import java.util.logging.Logger; 56 57 import com.sleepycat.je.CacheMode; 58 import com.sleepycat.je.DatabaseException; 59 import com.sleepycat.je.EnvironmentFailureException; 60 import com.sleepycat.je.EnvironmentMutableConfig; 61 import com.sleepycat.je.StatsConfig; 62 import com.sleepycat.je.config.EnvironmentParams; 63 import com.sleepycat.je.dbi.DatabaseId; 64 import com.sleepycat.je.dbi.DatabaseImpl; 65 import com.sleepycat.je.dbi.DbConfigManager; 66 import com.sleepycat.je.dbi.DbTree; 67 import com.sleepycat.je.dbi.EnvConfigObserver; 68 import com.sleepycat.je.dbi.EnvironmentImpl; 69 import com.sleepycat.je.dbi.INList; 70 import com.sleepycat.je.dbi.MemoryBudget; 71 import com.sleepycat.je.recovery.Checkpointer; 72 import com.sleepycat.je.tree.BIN; 73 import com.sleepycat.je.tree.IN; 74 import com.sleepycat.je.tree.Tree; 75 import com.sleepycat.je.tree.SearchResult; 76 import com.sleepycat.je.tree.ChildReference; 77 import com.sleepycat.je.tree.WithRootLatched; 78 import com.sleepycat.je.utilint.AtomicLongStat; 79 import com.sleepycat.je.utilint.DbLsn; 80 import com.sleepycat.je.utilint.IntStat; 81 import com.sleepycat.je.utilint.LoggerUtils; 82 import com.sleepycat.je.utilint.FloatStat; 83 import com.sleepycat.je.utilint.LongStat; 84 import com.sleepycat.je.utilint.StatDefinition; 85 import com.sleepycat.je.utilint.StatGroup; 86 import com.sleepycat.je.utilint.StoppableThreadFactory; 87 import com.sleepycat.je.utilint.TestHook; 88 import com.sleepycat.je.utilint.TestHookExecute; 89 90 /** 91 * 92 * Overview 93 * -------- 94 * 95 * The Evictor is responsible for managing the JE cache. The cache is 96 * actually a collection of in-memory btree nodes, implemented by the 97 * com.sleepycat.je.dbi.INList class. A subset of the nodes in te INList 98 * are candidates for eviction. This subset is tracked in one or more 99 * LRULists, which are maintained by the Evictor. When a node is evicted, 100 * it is detached from its contining BTree and then removed from the INList 101 * and from its containig LRUList. Once all references to an evicted node 102 * are removed, it can be GC'd by the JVM. 103 * 104 * The Evictor owns a pool of threads that are available to handle eviction 105 * tasks. The eviction pool is a standard java.util.concurrent thread pool, 106 * and can be mutably configured in terms of core threads, max threads, and 107 * keepalive times. 108 * 109 * Eviction is carried out by three types of threads: 110 * 1. An application thread, in the course of doing critical eviction. 111 * 2. Daemon threads, such as the cleaner or INCompressor, in the course of 112 * doing their respective duties. 113 * 3. Eviction pool threads. 114 * 115 * Memory consumption is tracked by the MemoryBudget. The Arbiter, which is 116 * also owned by the Evictor, is used to query the MemoryBudget and determine 117 * whether eviction is actually needed, and if so, how many bytes should be 118 * evicted by an evicting thread. 119 * 120 * Multiple threads can do eviction concurrently. As a result, it's important 121 * that eviction is both thread safe and as parallel as possible. Memory 122 * thresholds are generally accounted for in an unsynchronized fashion, and are 123 * seen as advisory. The only point of true synchronization is around the 124 * selection of a node for eviction. The act of eviction itself can be done 125 * concurrently. 126 * 127 * The eviction method is not reentrant, and a simple concurrent hash map 128 * of threads is used to prevent recursive calls. 129 * 130 * Details on the implementation of the LRU-based eviction policy 131 * -------------------------------------------------------------- 132 * 133 * ------------------ 134 * Data structures 135 * ------------------ 136 * 137 * An LRU eviction policy is approximated by one or more LRULists. An LRUList 138 * is a doubly linked list consisting of BTree nodes. If a node participates 139 * in an LRUList, then whenever it is accessed, it moves to the "back" of the 140 * list. When eviction is needed, the evictor evicts the nodes at the "front" 141 * of the LRULists. 142 * 143 * An LRUList is implemented as 2 IN references: a "front" ref pointing to the 144 * IN at the front of the list and a "back" ref, pointing to the IN at the back 145 * of the list. In addition, each IN has "nextLRUNode" and "prevLRUNode" refs 146 * for participating in an LRUList. This implementation works because an IN can 147 * belong to at most 1 LRUList at a time. Furthermore, it is the responsibility 148 * of the Evictor to know which LRUList a node belongs to at any given time 149 * (more on this below). As a result, each LRUList can assume that a node will 150 * either not be in any list at all, or will belong to "this" list. This way, 151 * membership of a node to an LRUList can be tested by just checking that 152 * either the nextLRUNode or prevLRUNode field of the node is non-null. 153 * 154 * The operations on an LRUList are: 155 * 156 * - addBack(IN) : 157 * Insert an IN at the back of the list. Assert that the node does not belong 158 * to an LRUList already. 159 * 160 * - addFront(IN) : 161 * Insert an IN at the front of the list. Assert that the node does not belong 162 * to an LRUList already. 163 * 164 * - moveBack(IN) : 165 * Move an IN to the back of the list, if it is in the list already. Noop 166 * if the node is not in the list. 167 * 168 * - moveFront(IN) : 169 * Move an IN to the front of the list, if it is in the list already. Noop 170 * if the node is not in the list. 171 * 172 * - removeFront() : 173 * Remove the IN at the front of the list and return it to the caller. 174 * Return null if the list is empty. 175 * 176 * - remove(IN) : 177 * Remove the IN from the list, if it is there. Return true if the node was 178 * in the list, false otherwise. 179 * 180 * - contains(IN): 181 * Return true if the node is contained in the list, false otherwise. 182 * 183 * All of the above methods are synchronized on the LRUList object. This may 184 * create a synchronization bottleneck. To alleviate this, the Evictor uses 185 * multiple LRULists, which taken together comprise a logical LRU list, called 186 * an LRUSet. The number of LRULists per LRUSet (numLRULists) is fixed and 187 * determined by a config parameter (max of 64). The LRULists are stored in 188 * an array whose length is numLRULists. 189 * 190 * The Evictor actually maintains 2 LRUSets: the "mixed" one and the "dirty" 191 * one. Within an LRUSet, the nodeId is used to place a node to an LRUList: a 192 * node with id N goes to the (N % numLRULists)-th list. In addition, each 193 * node has a flag (isInDirtyLRU) to identify which LRUSet it belongs to. 194 * This way, the Evictor knows which LRUList a node should belong to, and 195 * accesses the appropriate LRUList instance when it needs to add/remove/move 196 * a node within the LRU. 197 * 198 * Access to the isInDirtyLRU flag is synchronized via the SH/EX node latch. 199 * 200 * Justification for the mixed and dirty LRUSets: We would like to keep dirty 201 * INs in memory as much as possible to achieve "write absorption". Ideally, 202 * dirty INs should be logged by the checkpointer only. So, we would like to 203 * have the option in the Evictor to chose a clean IN to evict over a dirty 204 * IN, even if the dirty IN is colder than the clean IN. In this mode, having 205 * a single LRUSet will not perform very well in the situation when most (or 206 * a lot) or the INs are dirty (because each time we get a dirty IN from an 207 * LRUList, we would have to put it back to the list and try another IN until 208 * we find a clean one, thus spending a lot of CPU time trying to select an 209 * eviction target). 210 * 211 * Withinh each LRUSet, picking an LRUList to evict from is done in a round- 212 * robin fashion. To this end, the Evictor maintains 2 int counters: 213 * nextMixedLRUList and nextDirtyLRUList. To evict from the mixed LRUSet, an 214 * evicting thread picks the (nextMixedLRUList % numLRULists)-th list, and 215 * then increments nextMixedLRUList. Similarly, to evict from the dirty LRUSet, 216 * an evicting thread picks the (nextDirtyLRUList % numLRULists)-th list, 217 * and then increments nextDirtyLRUList. This does not have to be done in a 218 * synchronized way. 219 * 220 * A new flag (called hasCachedChildren) is added to each IN to indicate 221 * whether the IN has cached children or not. This flag is used and maintained 222 * for upper INs (UINs) only. The need for this flag is explained below. 223 * Access to this flag is synchronized via the SH/EX node latch. 224 * 225 * --------------------------------------------------------------------------- 226 * LRUSet management: adding/removing/moving INs in/out of/within the LRUSets 227 * --------------------------------------------------------------------------- 228 * 229 * We don't want to track upper IN (UIN) nodes that have cached children. 230 * There are 2 reasons for this: (a) we cannot evict UINs with cached children 231 * (the children must be evicted first) and (b) UINs will normally have high 232 * access rate, and would add a lot of CPU overhead if they were tracked. 233 * 234 * The hasCachedChildren flag is used as a quick way to determine whether a 235 * UIN has cached children or not. 236 * 237 * Adding a node to the LRU. 238 * ------------------------- 239 * 240 * A IN N is added in an LRUSet via one of the following Evictor methods: 241 * addBack(IN), addFront(IN), dirtyAddBack(IN), or dirtyAddFront(IN). The 242 * first 2 add the node to the mixed LRUSet and set its isInDirtyLRU flag 243 * to false. The last 2 add the node to the dirty LRUSet and set its 244 * isInDirtyLRU flag to true. 245 * 246 * Note: DINs and DBINs are never added to the LRU. 247 * 248 * A node N is added to the LRU in the following situations: 249 * 250 * 1. N is fetched into memory from the log. Evictor.addBack(N) is called 251 * inside IN.postfetchInit() (just before N is connected to its parent). 252 * 253 * 2. N is a brand new node created during a split, and either N is a BIN or 254 * N does not get any cached children from its split sibling. 255 * Evictor.addFront(N) is called if N is a BIN and the cachemode is 256 * MAKE_COLD or EVICT_BIN. Otherwise, Evictor.addBack(child) is called. 257 * 258 * 3. N is a UIN that is being split, and before the split it had cached 259 * children, but all its cached children have now moved to its newly 260 * created sibling. Evictor.addBack(N) is called in this case. 261 * 262 * 4. N is a UIN that looses its last cached child (either because the child is 263 * evicted or it is deleted). Evictor.addBack(N) is called inside 264 * IN.setTarget(), if the target is null, N is a UIN, N's hasCachedChildren 265 * flag is true, and N after setting the target to null, N has no remaining 266 * cached children. 267 * 268 * 5. N is the 1st BIN in a brand new tree. In this case, Evictor.addBack(N) 269 * is called inside Tree.findBinForInsert(). 270 * 271 * 6. N is a node visited during IN.rebuildINList() and N is either a BIN or 272 * a UIN with no cached children. 273 * 274 * 7. An evicting thread T removes N from the LRU, but after T EX-latches N, 275 * it determines that N is not evictable or should not be evicted, and 276 * should be put back in the LRU. T puts N back to the LRU using one of 277 * the above 4 methods (for details, read about the eviction processing 278 * below), but ONLY IF (a) N is still in the INList, and (b) N is not in 279 * the LRU already. 280 * 281 * Case (b) can happen if N is a UIN and after T removed N from the LRU 282 * but before T could latch N, another thread T1 added a child to N and 283 * removed that child. Thus, by item 4 above, T1 adds N back to the LRU. 284 * Furthermore, since N is now back in the LRU, case (a) can now happen 285 * as well if another thread can evict N before T latches it. 286 * 287 * 8. When the checkpointer (or any other thread/operation) cleans a dirty IN, 288 * it must move it from the dirty LRUSet (if there) to the mixed one. This 289 * is done via the Evictor.moveToMixedLRU(N) method: If the isInDirtyLRU 290 * flag of N is true, LRUList.remove(N) is called to remove the node from 291 * the dirty LRUSet. If N was indeed in the dirty LRUSet (i.e., 292 * LRUList.remove() returns true), addBack(N) is called to put it in the 293 * mixed LRUSet. 294 * 295 * By moving N to the mixed LRUSet only after atomically removing it from 296 * the dirty LRUSet and checking that it was indeed there, we prevent N 297 * from being added into the LRU if N has been or would be removed from 298 * the LRU by a concurrently running evicting thread. 299 * 300 * In cases 2, 3, 4, 5, 7, and 8 N is EX-latched. In case 1, the node is not 301 * latched, but it is inaccessible by any other threads because it is not 302 * conected to its parent yet and the parent is EX-latched (but N has already 303 * been inserted in the INList; can this create any problems ?????). In case 304 * 6 there is only one thread running. So, in all cases it's ok to set the 305 * isInDirtyLRU flag of the node. 306 * 307 * Question: can a thread T try to add a node N, seen as a Java obj instance, 308 * into the LRU, while N is already there? I believe not, and LRUList addBack() 309 * and addFront() methods assert that this cannot happen. In cases 1, 2, and 5 310 * above N is newly created node, so it cannot be in the LRU already. In cases 311 * 3 and 4, N is a UIN that has cached children, so it cannot be in the LRU. 312 * In case 6 there is only 1 thread. Finally, in cases 7 and 8, T checks that 313 * N is not in the LRU before attempting to add it (and the situation cannot 314 * change between tis check and the insertion into the LRU because N is EX- 315 * latched). 316 * 317 * Question: can a thread T try to add a node N, seen as a logical entity 318 * represented by its nodeId, into the LRU, while N is already there? 319 * Specifically, (a) can two Java instances, N1 and N2, of the same node 320 * N exist in memory at the same time, and (b) while N1 is in the LRU, can 321 * a thread T try to add N2 in the LRU? The answer to (a) is "yes", and as 322 * far as I can think, the answer to (b) is "no", but there is no explicit 323 * check in the code for this. Consider the following sequence of events: 324 * Initially only N1 is in memory and in the LRU. An evicting thread T1 325 * removes N1 from the LRU, thread T2 adds N1 in the LRU, thread T3 removes 326 * N1 from the LRU and actually evicts it, thread T4 fetches N from the log, 327 * thus creating instance N2 and adding N2 to the LRU, thread T1 finally 328 * EX-latches N1 and has to decide what to do with it. The check in case 329 * 7a above makes sure that N1 will not go back to the LRU. In fact the 330 * same check makes sure that N1 will not be evicted (i.e., logged, if 331 * dirty). T1 will just skip N1, thus allowing it to be GCed. 332 * 333 * Removing a node from the LRU 334 * ---------------------------- 335 * 336 * A node is removed from the LRU when it is selected as an eviction target 337 * by an evicting thread. The thread chooses an LRUList list to evict from 338 * and calls removeFront() on it. The node is not latched when it is removed 339 * from the LRU in this case. The evicting thread is going to EX-latch the 340 * node shortly after the removal. But as explain already earlier, between 341 * the removal and the latching, another thread may put the node back to the 342 * LRU, and as a result, another thread may also choose the same node for 343 * eviciton. The node may also be detached from the BTree, or its database 344 * closed, or deleted. 345 * 346 * A node may also be removing from the LRU by a non-evicting thread. This 347 * is done via the Evictor.remove(IN) method. The method checks the node's 348 * isInDrtryLRU flag to determine which LRUSet the node belongs to (if any) 349 * and then calls LRUList.remove(N). The node must be at least SH latched 350 * when the method is called. The method is a noop if the node is not in the 351 * LRU. The node may not belong to any LRUList, because it has been selected 352 * for eviction by another thread (and thus removed from LRU), but the 353 * evicting thread has not yet latched the node. There are 3 cases (listed 354 * below) where Evictor.remove(N) is called. In the first two cases 355 * Evictor.rempove(N) is invoked from INList.removeInternal(N). This makes 356 * sure that N is removed from the LRU whenever it it removed from the 357 * INList (to guarantee that the nodes in the LRU are always a subset of 358 * the nodes in the INList). 359 * 360 * 1. When a tree branch containing N gets detached from its tree. In this 361 * case, INList.remove(N) is invoked inside accountForSubtreeRemoval() or 362 * accountForDeferredWriteSubtreeRemoval(). 363 * 364 * 2. When the database containing N gets deleted or truncated. In this case, 365 * INList.iter.remove() is called in DatabaseImpl.finishDeleteProcessing(). 366 * 367 * 3. N is a UIN with no cached children (hasCachedChildren flag is false) 368 * and a new child for N is fetched. The call to Evictor.remove(N) is 369 * done inside IN.setTarget(). 370 * 371 * Moving a node within the LRU 372 * ---------------------------- 373 * 374 * A node N is moved within its containing LRUList (if any) via the Evictor 375 * moveBack(IN) and moveFront(IN) methods. The methods check the isInDirtyLRU 376 * flag of the node to determine the LRUSet the node belongs to and then move 377 * the node to the back or to the front of the LRUList. The node will be at 378 * least SH latched when these methods are called. Normally, the IN will be 379 * in an LRUList. However, it may not belong to any LRUList, because it has 380 * been selected for eviction by another thread (and thus removed from LRU), 381 * but the evicting thread has not yet EX-latched the node. In this case, 382 * these methods are is a noop. The methods are called in the following 383 * situations: 384 * 385 * 1. N is latched with cachemode DEFAULT, KEEP_HOT, or EVICT_LN and N is a 386 * BIN or a UIN with no cached children (the hasCachedChildren flag is 387 * used to check if the UIN has cached children, so we don't need to 388 * iterate over all of the node's child entries). In this case, 389 * Evictor.moveBack(N) . 390 * 391 * 2. N is latched with cachemode MAKE_COLD or EVICT_BIN and N is a BIN. 392 * In this case, Evictor.moveFront(N) is called. 393 * 394 * ------------------- 395 * Eviction Processing 396 * ------------------- 397 * 398 * A thread can initiate eviction by invoking the Evictor.doEviction() method. 399 * This method implements an "eviction run". An eviction run consists of a 400 * number of "eviction passes", where each pass is given as input a maximum 401 * number of bytes to evict. An eviction pass is implemented by the 402 * Evictor.evictBatch() method. 403 * 404 * Inside Evictor.evictBatch(), an evicting thread T: 405 * 406 * 1. Picks the mixed LRUset initially as the "current" LRUSet to be processed, 407 * 408 * 2. Initializes the max number of nodes to be processed per LRUSet to the 409 * current size of the mixed LRUSet, 410 * 411 * 3. Executes the following loop: 412 * 413 * 3.1. Picks a non-empty LRUList from the current LRUSet in a round-robin 414 * fashion, as explained earlier, and invokes LRUList.removeFront() to 415 * remove the node N at the front of the list. N becomes the current 416 * eviction target. 417 * 418 * 3.2. If the DB node N belongs to has been deleted or closed, skips this node, 419 * i.e., leaves N outside the LRU and goes to 3.4. 420 * 421 * 3.3. Calls ProcessTarget(N) (see below) 422 * 423 * 3.4. If the current LRUset is the mixed one and the number of target nodes 424 * processed reaches the max number allowed, the dirty LRUSet becomes the 425 * current one, the max number of nodes to be processed per LRUSet is set 426 * set to the current size of the dirty LRUSet, and the number of nodes 427 * processed is reset to 0. 428 * 429 * 3.5. Breaks the loop if the max number of bytes to evict during this pass 430 * has been reached, or memConsumption is less than (maxMemory - M) (where 431 * M is a config param), or the number of nodes that have been processed 432 * in the current LRUSet reaches the max allowed. 433 * 434 * -------------------------- 435 * The processTarget() method 436 * -------------------------- 437 * 438 * This method is called after a node N has been selected for eviction (and as 439 * result, removed from the LRU). The method EX-latches N and determines 440 * whether it can/should really be evicted, and if not what is the appropriate 441 * action to be taken by the evicting thread. Before returning, the method 442 * unlatches N. Finally, it returns the number of bytes evicted (if any). 443 * 444 * If a decision is taken to evict N or mutate it to a BINDelta, N must first 445 * be unlatched and its parent must be searched within the tree. During this 446 * search, many things can happen to the unlatched N, and as a result, after 447 * the parent is found and the N is relatched, processTarget() calls itself 448 * recursivelly to re-consider all the possible actions for N. 449 * 450 * Let T be an evicting thread running processTarget() to determine what to do 451 * with a target node N. The following is the list of possible outcomes: 452 * 453 * 1. SKIP - Do nothing with N if: 454 * (a) N is in the LRU. This can happen if N is a UIN and while it is 455 * unlatched by T, other threads fetch one or more of N's children, 456 * but then all of N's children are removed again, thus causing N to 457 * be put back to the LRU. 458 * (b) N is not in the INList. Given than N can be put back to the LRU while 459 * it is unlatched by T, it can also be selected as an eviction target 460 * by another thread and actually be evicted. 461 * (c) N is a UIN with cached children. N could have acquired children 462 * after the evicting thread removed it from the LRU, but before the 463 * evicting thread could EX-latch it. 464 * (d) N is the root of the DB naming tree or the DBmapping tree. 465 * (e) N is dirty, but the DB is read-only. 466 * (f) N's environment used a shared cache and the environment has been 467 * closed or invalidated. 468 * (g) If a decision was taken to evict od mutate N, but the tree search 469 * (using N's keyId) to find N's parent, failed to find the parent, or 470 * N itself. This can happen if during the search, N was evicted by 471 * another thread, or a branch containing N was completely removed 472 * from the tree. 473 * 474 * 2. PUT BACK - Put N to the back of the LRUSet it last belonged to, if: 475 * (a) It is a BIN that was last accessed with KEEP_HOT cache mode. 476 * (b) N has an entry with a NULL LSN and a null target. 477 * 478 * 3. PARTIAL EVICT - perform partial eviction on N, if none of the cases 479 * listed above is true. Currently, paretial eviction applies to BINs only 480 * and involves the eviction (stripping) of evictable LNs. If a cached LN 481 * is not evictable, the whole BIN is not evictable as well. Currently, 482 * only MapLNs may be non-evictable (see MapLN.isEvictable()). 483 * 484 * After partial eviction is performed the following outcomes are possible: 485 * 486 * 4. STRIPPED PUT BACK - Put N to the back of the LRUSet it last belonged to, 487 * if partial eviction did evict any bytes, and N is not a BIN in EVICT_BIN 488 * or MAKE_COLD cache mode. 489 * 490 * 5. PUT BACK - Put N to the back of the LRUSet it last belonged to, if 491 * no bytes were stripped, but partial eviction determined that N is not 492 * evictable. 493 * 494 * 6. MUTATE - Mutate N to a BINDelta, if none of the above apply and N is a 495 * BIN that can be mutated. 496 * 497 * 7. MOVE TO DIRTY LRU - Move N to the front of the dirty LRUSet, if none of 498 * the above apply and N is a dirty node that last belonged to the mixed 499 * LRUSet. 500 * 501 * 8. EVICT - Evict N is none of the above apply. 502 * 503 * ------- 504 * TODOs: 505 * ------- 506 * 507 * 1. Determine what todo with the KEEP_HOT cache mode 508 * 509 * 2. Decide what to do about assertions (keep, remove, convert to JE excpetions, 510 * convert to DEBUG-only expensive checks). 511 * 512 */ 513 public class Evictor implements EnvConfigObserver { 514 515 /* 516 * If new eviction source enums are added, a new stat is created, and 517 * EnvironmentStats must be updated to add a getter method. 518 * 519 * CRITICAL eviction is called by operations executed app or daemon 520 * threads which detect that the cache has reached its limits 521 * CACHE_MODE eviction is called by operations that use a specific 522 * Cursor. 523 * EVICTORThread is the eviction pool 524 * MANUAL is the call to Environment.evictMemory, called by recovery or 525 * application code. 526 */ 527 public enum EvictionSource { 528 /* Using ordinal for array values! */ 529 EVICTORTHREAD, MANUAL, CRITICAL, CACHEMODE, DAEMON; 530 getNumBytesEvictedStatDef()531 public StatDefinition getNumBytesEvictedStatDef() { 532 return new StatDefinition("nBytesEvicted" + toString(), 533 NUM_BYTES_EVICTED_DESC); 534 } 535 } 536 537 /* 538 * The purpose of EvictionDebugStats is to capture the stats of a single 539 * eviction run (i.e., an execution of the Evictor.doEviction() method by 540 * a single thread). An instance of EvictionDebugStats is created at the 541 * start of doEviction() and is passed around to the methods called from 542 * doEviction(). At the end of doEviction(), the EvictionDebugStats 543 * instance can be printed out (for debugging), or (TODO) the captured 544 * stats can be loaded to the global Evictor.stats. 545 */ 546 static class EvictionDebugStats { 547 boolean inMixedLRU; 548 boolean withParent; 549 550 long mixedSize; 551 long dirtySize; 552 553 int numSelectedMixed; 554 int numSelectedDirty; 555 556 int numPutBackMixed; 557 int numPutBackDirty; 558 559 int numBINsStripped1Mixed; 560 int numBINsStripped2Mixed; 561 int numBINsStripped1Dirty; 562 int numBINsStripped2Dirty; 563 564 int numBINsMutatedMixed; 565 int numBINsMutatedDirty; 566 567 int numUINsMoved1; 568 int numUINsMoved2; 569 int numBINsMoved1; 570 int numBINsMoved2; 571 572 int numUINsEvictedMixed; 573 int numUINsEvictedDirty; 574 int numBINsEvictedMixed; 575 int numBINsEvictedDirty; 576 reset()577 void reset() { 578 inMixedLRU = true; 579 withParent = false; 580 581 mixedSize = 0; 582 dirtySize = 0; 583 584 numSelectedMixed = 0; 585 numSelectedDirty = 0; 586 587 numPutBackMixed = 0; 588 numPutBackDirty = 0; 589 590 numBINsStripped1Mixed = 0; 591 numBINsStripped2Mixed = 0; 592 numBINsStripped1Dirty = 0; 593 numBINsStripped2Dirty = 0; 594 595 numBINsMutatedMixed = 0; 596 numBINsMutatedDirty = 0; 597 598 numUINsMoved1 = 0; 599 numUINsMoved2 = 0; 600 numBINsMoved1 = 0; 601 numBINsMoved2 = 0; 602 603 numUINsEvictedMixed = 0; 604 numUINsEvictedDirty = 0; 605 numBINsEvictedMixed = 0; 606 numBINsEvictedDirty = 0; 607 } 608 incNumSelected()609 void incNumSelected() { 610 if (inMixedLRU) { 611 numSelectedMixed++; 612 } else { 613 numSelectedDirty++; 614 } 615 } 616 incNumPutBack()617 void incNumPutBack() { 618 if (inMixedLRU) { 619 numPutBackMixed++; 620 } else { 621 numPutBackDirty++; 622 } 623 } 624 incNumStripped()625 void incNumStripped() { 626 if (inMixedLRU) { 627 if (withParent) { 628 numBINsStripped2Mixed++; 629 } else { 630 numBINsStripped1Mixed++; 631 } 632 } else { 633 if (withParent) { 634 numBINsStripped2Dirty++; 635 } else { 636 numBINsStripped1Dirty++; 637 } 638 } 639 } 640 incNumMutated()641 void incNumMutated() { 642 if (inMixedLRU) { 643 numBINsMutatedMixed++; 644 } else { 645 numBINsMutatedDirty++; 646 } 647 } 648 incNumMoved(boolean isBIN)649 void incNumMoved(boolean isBIN) { 650 if (withParent) { 651 if (isBIN) { 652 numBINsMoved2++; 653 } else { 654 numUINsMoved2++; 655 } 656 } else { 657 if (isBIN) { 658 numBINsMoved1++; 659 } else { 660 numUINsMoved1++; 661 } 662 } 663 } 664 incNumEvicted(boolean isBIN)665 void incNumEvicted(boolean isBIN) { 666 if (inMixedLRU) { 667 if (isBIN) { 668 numBINsEvictedMixed++; 669 } else { 670 numUINsEvictedMixed++; 671 } 672 } else { 673 if (isBIN) { 674 numBINsEvictedDirty++; 675 } else { 676 numUINsEvictedDirty++; 677 } 678 } 679 } 680 toString()681 public String toString() { 682 StringBuilder sb = new StringBuilder(); 683 684 sb.append("Eviction stats MIXED: size = "); 685 sb.append(mixedSize); 686 sb.append("\n"); 687 sb.append("selected = "); 688 sb.append(numSelectedMixed); 689 sb.append(" | "); 690 sb.append("put back = "); 691 sb.append(numPutBackMixed); 692 sb.append(" | "); 693 sb.append("stripped = "); 694 sb.append(numBINsStripped1Mixed); 695 sb.append("/"); 696 sb.append(numBINsStripped2Mixed); 697 sb.append(" | "); 698 sb.append("mutated = "); 699 sb.append(numBINsMutatedMixed); 700 sb.append(" | "); 701 sb.append("moved = "); 702 sb.append(numBINsMoved1); 703 sb.append("/"); 704 sb.append(numBINsMoved2); 705 sb.append(" - "); 706 sb.append(numUINsMoved1); 707 sb.append("/"); 708 sb.append(numUINsMoved2); 709 sb.append(" | "); 710 sb.append("evicted = "); 711 sb.append(numBINsEvictedMixed); 712 sb.append(" - "); 713 sb.append(numUINsEvictedMixed); 714 sb.append("\n"); 715 716 sb.append("Eviction stats DIRTY: size = "); 717 sb.append(dirtySize); 718 sb.append("\n"); 719 sb.append("selected = "); 720 sb.append(numSelectedDirty); 721 sb.append(" | "); 722 sb.append("put back = "); 723 sb.append(numPutBackDirty); 724 sb.append(" | "); 725 sb.append("stripped = "); 726 sb.append(numBINsStripped1Dirty); 727 sb.append("/"); 728 sb.append(numBINsStripped2Dirty); 729 sb.append(" | "); 730 sb.append("mutated = "); 731 sb.append(numBINsMutatedDirty); 732 sb.append(" | "); 733 sb.append("evicted = "); 734 sb.append(numBINsEvictedDirty); 735 sb.append(" - "); 736 sb.append(numUINsEvictedDirty); 737 sb.append("\n"); 738 739 return sb.toString(); 740 } 741 } 742 743 /* 744 * The purpose of LRUDebugStats is to capture stats on the current state 745 * of an LRUSet. This is done via a call to LRUEvictor.getMixedLRUStats(), 746 * or LRUEvictor.getDirtyLRUStats(). For now at least, these mehods are 747 * meant to be used for debugging and unit testing only. 748 */ 749 static class LRUDebugStats { 750 int size; 751 int dirtySize; 752 753 int numBINs; 754 int numDirtyBINs; 755 756 int numStrippedBINs; 757 int numDirtyStrippedBINs; 758 reset()759 void reset() { 760 size = 0; 761 dirtySize = 0; 762 numBINs = 0; 763 numDirtyBINs = 0; 764 numStrippedBINs = 0; 765 numDirtyStrippedBINs = 0; 766 } 767 toString()768 public String toString() { 769 StringBuilder sb = new StringBuilder(); 770 771 sb.append("Clean/Dirty INs = "); 772 sb.append(size - dirtySize); 773 sb.append("/"); 774 sb.append(dirtySize); 775 776 sb.append(" BINs = "); 777 sb.append(numBINs - numDirtyBINs); 778 sb.append("/"); 779 sb.append(numDirtyBINs); 780 781 sb.append(" Stripped BINs = "); 782 sb.append(numStrippedBINs - numDirtyStrippedBINs); 783 sb.append("/"); 784 sb.append(numDirtyStrippedBINs); 785 786 return sb.toString(); 787 } 788 } 789 790 /* 791 * LRUList implementation 792 */ 793 static class LRUList { 794 795 private static final boolean doExpensiveCheck = false; 796 797 private final int id; 798 799 private int size = 0; 800 801 private IN front = null; 802 private IN back = null; 803 LRUList(int id)804 LRUList(int id) { 805 this.id = id; 806 } 807 addBack(IN node)808 synchronized void addBack(IN node) { 809 810 /* Make sure node is not in any LRUlist already */ 811 if (node.getNextLRUNode() != null || 812 node.getPrevLRUNode() != null) { 813 814 throw EnvironmentFailureException.unexpectedState( 815 node.getEnv(), 816 Thread.currentThread().getId() + "-" + 817 Thread.currentThread().getName() + 818 "-" + node.getEnv().getName() + 819 "Attempting to add node " + node.getNodeId() + 820 " in the LRU, but node is already in the LRU."); 821 } 822 assert(!node.isDIN() && !node.isDBIN()); 823 824 node.setNextLRUNode(node); 825 826 if (back != null) { 827 node.setPrevLRUNode(back); 828 back.setNextLRUNode(node); 829 } else { 830 assert(front == null); 831 node.setPrevLRUNode(node); 832 } 833 834 back = node; 835 836 if (front == null) { 837 front = back; 838 } 839 840 ++size; 841 } 842 addFront(IN node)843 synchronized void addFront(IN node) { 844 845 /* Make sure node is not in any LRUlist already */ 846 if (node.getNextLRUNode() != null || 847 node.getPrevLRUNode() != null) { 848 849 throw EnvironmentFailureException.unexpectedState( 850 node.getEnv(), 851 Thread.currentThread().getId() + "-" + 852 Thread.currentThread().getName() + 853 "-" + node.getEnv().getName() + 854 "Attempting to add node " + node.getNodeId() + 855 " in the LRU, but node is already in the LRU."); 856 } 857 assert(!node.isDIN() && !node.isDBIN()); 858 859 node.setPrevLRUNode(node); 860 861 if (front != null) { 862 node.setNextLRUNode(front); 863 front.setPrevLRUNode(node); 864 } else { 865 assert(back == null); 866 node.setNextLRUNode(node); 867 } 868 869 front = node; 870 871 if (back == null) { 872 back = front; 873 } 874 875 ++size; 876 } 877 moveBack(IN node)878 synchronized void moveBack(IN node) { 879 880 /* If the node is not in the list, don't do anything */ 881 if (node.getNextLRUNode() == null) { 882 assert(node.getPrevLRUNode() == null); 883 return; 884 } 885 886 if (doExpensiveCheck && !contains2(node)) { 887 System.out.println("LRUList.moveBack(): list " + id + 888 "does not contain node " + 889 node.getNodeId() + 890 " Thread: " + 891 Thread.currentThread().getId() + "-" + 892 Thread.currentThread().getName() + 893 " isBIN: " + node.isBIN() + 894 " inDirtyLRU: " + node.isInDirtyLRU()); 895 assert(false); 896 } 897 898 if (node.getNextLRUNode() == node) { 899 /* The node is aready at the back */ 900 assert(back == node); 901 assert(node.getPrevLRUNode().getNextLRUNode() == node); 902 903 } else { 904 assert(front != back); 905 assert(size > 1); 906 907 if (node.getPrevLRUNode() == node) { 908 /* the node is at the front */ 909 assert(front == node); 910 assert(node.getNextLRUNode().getPrevLRUNode() == node); 911 912 front = node.getNextLRUNode(); 913 front.setPrevLRUNode(front); 914 } else { 915 /* the node is in the "middle" */ 916 assert(front != node && back != node); 917 assert(node.getPrevLRUNode().getNextLRUNode() == node); 918 assert(node.getNextLRUNode().getPrevLRUNode() == node); 919 920 node.getPrevLRUNode().setNextLRUNode(node.getNextLRUNode()); 921 node.getNextLRUNode().setPrevLRUNode(node.getPrevLRUNode()); 922 } 923 924 node.setNextLRUNode(node); 925 node.setPrevLRUNode(back); 926 927 back.setNextLRUNode(node); 928 back = node; 929 } 930 } 931 moveFront(IN node)932 synchronized void moveFront(IN node) { 933 934 /* If the node is not in the list, don't do anything */ 935 if (node.getNextLRUNode() == null) { 936 assert(node.getPrevLRUNode() == null); 937 return; 938 } 939 940 if (doExpensiveCheck && !contains2(node)) { 941 System.out.println("LRUList.moveFront(): list " + id + 942 "does not contain node " + 943 node.getNodeId() + 944 " Thread: " + 945 Thread.currentThread().getId() + "-" + 946 Thread.currentThread().getName() + 947 " isBIN: " + node.isBIN() + 948 " inDirtyLRU: " + node.isInDirtyLRU()); 949 assert(false); 950 } 951 952 if (node.getPrevLRUNode() == node) { 953 /* the node is aready at the front */ 954 assert(front == node); 955 assert(node.getNextLRUNode().getPrevLRUNode() == node); 956 957 } else { 958 assert(front != back); 959 assert(size > 1); 960 961 if (node.getNextLRUNode() == node) { 962 /* the node is at the back */ 963 assert(back == node); 964 assert(node.getPrevLRUNode().getNextLRUNode() == node); 965 966 back = node.getPrevLRUNode(); 967 back.setNextLRUNode(back); 968 } else { 969 /* the node is in the "middle" */ 970 assert(front != node && back != node); 971 assert(node.getPrevLRUNode().getNextLRUNode() == node); 972 assert(node.getNextLRUNode().getPrevLRUNode() == node); 973 974 node.getPrevLRUNode().setNextLRUNode(node.getNextLRUNode()); 975 node.getNextLRUNode().setPrevLRUNode(node.getPrevLRUNode()); 976 } 977 978 node.setPrevLRUNode(node); 979 node.setNextLRUNode(front); 980 981 front.setPrevLRUNode(node); 982 front = node; 983 } 984 } 985 removeFront()986 synchronized IN removeFront() { 987 if (front == null) { 988 assert(back == null); 989 return null; 990 } 991 992 IN res = front; 993 994 if (front == back) { 995 assert(front.getNextLRUNode() == front); 996 assert(front.getPrevLRUNode() == front); 997 assert(size == 1); 998 999 front = null; 1000 back = null; 1001 1002 } else { 1003 assert(size > 1); 1004 1005 front = front.getNextLRUNode(); 1006 front.setPrevLRUNode(front); 1007 } 1008 1009 res.setNextLRUNode(null); 1010 res.setPrevLRUNode(null); 1011 --size; 1012 1013 return res; 1014 } 1015 remove(IN node)1016 synchronized boolean remove(IN node) { 1017 1018 /* If the node is not in the list, don't do anything */ 1019 if (node.getNextLRUNode() == null) { 1020 assert(node.getPrevLRUNode() == null); 1021 return false; 1022 } 1023 1024 assert(node.getPrevLRUNode() != null); 1025 1026 if (doExpensiveCheck && !contains2(node)) { 1027 System.out.println("LRUList.remove(): list " + id + 1028 "does not contain node " + 1029 node.getNodeId() + 1030 " Thread: " + 1031 Thread.currentThread().getId() + "-" + 1032 Thread.currentThread().getName() + 1033 " isBIN: " + node.isBIN() + 1034 " inDirtyLRU: " + node.isInDirtyLRU()); 1035 assert(false); 1036 } 1037 1038 if (front == back) { 1039 assert(size == 1); 1040 assert(front == node); 1041 assert(front.getNextLRUNode() == front); 1042 assert(front.getPrevLRUNode() == front); 1043 1044 front = null; 1045 back = null; 1046 1047 } else if (node.getPrevLRUNode() == node) { 1048 /* node is at the front */ 1049 assert(front == node); 1050 assert(node.getNextLRUNode().getPrevLRUNode() == node); 1051 1052 front = node.getNextLRUNode(); 1053 front.setPrevLRUNode(front); 1054 1055 } else if (node.getNextLRUNode() == node) { 1056 /* the node is at the back */ 1057 assert(back == node); 1058 assert(node.getPrevLRUNode().getNextLRUNode() == node); 1059 1060 back = node.getPrevLRUNode(); 1061 back.setNextLRUNode(back); 1062 } else { 1063 /* the node is in the "middle" */ 1064 assert(size > 2); 1065 assert(front != back); 1066 assert(front != node && back != node); 1067 assert(node.getPrevLRUNode().getNextLRUNode() == node); 1068 assert(node.getNextLRUNode().getPrevLRUNode() == node); 1069 1070 node.getPrevLRUNode().setNextLRUNode(node.getNextLRUNode()); 1071 node.getNextLRUNode().setPrevLRUNode(node.getPrevLRUNode()); 1072 } 1073 1074 node.setNextLRUNode(null); 1075 node.setPrevLRUNode(null); 1076 --size; 1077 1078 return true; 1079 } 1080 removeINsForEnv(EnvironmentImpl env)1081 synchronized void removeINsForEnv(EnvironmentImpl env) { 1082 1083 if (front == null) { 1084 assert(back == null); 1085 return; 1086 } 1087 1088 IN node = front; 1089 1090 while (true) { 1091 1092 IN nextNode = node.getNextLRUNode(); 1093 IN prevNode = node.getPrevLRUNode(); 1094 1095 if (node.getDatabase().getEnv() == env) { 1096 1097 node.setNextLRUNode(null); 1098 node.setPrevLRUNode(null); 1099 1100 if (front == back) { 1101 assert(size == 1); 1102 assert(front == node); 1103 assert(nextNode == front); 1104 assert(prevNode == front); 1105 1106 front = null; 1107 back = null; 1108 --size; 1109 break; 1110 1111 } else if (prevNode == node) { 1112 /* node is at the front */ 1113 assert(size > 1); 1114 assert(front == node); 1115 assert(nextNode.getPrevLRUNode() == node); 1116 1117 front = nextNode; 1118 front.setPrevLRUNode(front); 1119 node = front; 1120 --size; 1121 1122 } else if (nextNode == node) { 1123 /* the node is at the back */ 1124 assert(size > 1); 1125 assert(back == node); 1126 assert(prevNode.getNextLRUNode() == node); 1127 1128 back = prevNode; 1129 back.setNextLRUNode(back); 1130 --size; 1131 break; 1132 } else { 1133 /* the node is in the "middle" */ 1134 assert(size > 2); 1135 assert(front != back); 1136 assert(front != node && back != node); 1137 assert(prevNode.getNextLRUNode() == node); 1138 assert(nextNode.getPrevLRUNode() == node); 1139 1140 prevNode.setNextLRUNode(nextNode); 1141 nextNode.setPrevLRUNode(prevNode); 1142 node = nextNode; 1143 --size; 1144 } 1145 } else if (nextNode == node) { 1146 break; 1147 } else { 1148 node = nextNode; 1149 } 1150 } 1151 } 1152 contains(IN node)1153 synchronized boolean contains(IN node) { 1154 return (node.getNextLRUNode() != null); 1155 } 1156 contains2(IN node)1157 private boolean contains2(IN node) { 1158 1159 if (front == null) { 1160 assert(back == null); 1161 return false; 1162 } 1163 1164 IN curr = front; 1165 1166 while (true) { 1167 if (curr == node) { 1168 return true; 1169 } 1170 1171 if (curr.getNextLRUNode() == curr) { 1172 break; 1173 } 1174 1175 curr = curr.getNextLRUNode(); 1176 } 1177 1178 return false; 1179 } 1180 getSize()1181 int getSize() { 1182 return size; 1183 } 1184 getStats(EnvironmentImpl env, LRUDebugStats stats)1185 synchronized void getStats(EnvironmentImpl env, LRUDebugStats stats) { 1186 1187 if (front == null) { 1188 assert(back == null); 1189 return; 1190 } 1191 1192 IN curr = front; 1193 1194 while (true) { 1195 if (env == null || curr.getEnv() == env) { 1196 stats.size++; 1197 1198 if (curr.getDirty()) { 1199 stats.dirtySize++; 1200 } 1201 1202 if (curr.isBIN()) { 1203 stats.numBINs++; 1204 1205 if (curr.getDirty()) { 1206 stats.numDirtyBINs++; 1207 } 1208 1209 if (!curr.hasResidentChildren()) { 1210 stats.numStrippedBINs++; 1211 1212 if (curr.getDirty()) { 1213 stats.numDirtyStrippedBINs++; 1214 } 1215 } 1216 } 1217 } 1218 1219 if (curr.getNextLRUNode() == curr) { 1220 break; 1221 } 1222 1223 curr = curr.getNextLRUNode(); 1224 } 1225 } 1226 } 1227 1228 /** 1229 * EnvInfo stores info related to the environments that share this evictor. 1230 */ 1231 private static class EnvInfo { 1232 EnvironmentImpl env; 1233 INList ins; 1234 } 1235 1236 /* Prevent endless eviction loops under extreme resource constraints. */ 1237 static final int MAX_BATCHES_PER_RUN = 100; 1238 1239 private static final boolean traceUINs = false; 1240 private static final boolean traceBINs = false; 1241 private static final Level traceLevel = Level.INFO; 1242 1243 /* LRU-TODO: remove */ 1244 private static final boolean collectEvictionDebugStats = false; 1245 1246 /** 1247 * Number of LRULists per LRUSet. This is a configuration parameter. 1248 * 1249 * In general, using only one LRUList may create a synchronization 1250 * bottleneck, because all LRUList methods are synchronized and are 1251 * invoked with high frequency from multiple thread. To alleviate 1252 * this bottleneck, we need the option to break a single LRUList 1253 * into multiple ones comprising an "LRUSet" (even though this 1254 * reduces the quality of the LRU approximation). 1255 */ 1256 private final int numLRULists; 1257 1258 /** 1259 * Whether to actually use the dirtyLRUSet or not. This is a configuration 1260 * parameter. 1261 */ 1262 private final boolean useDirtyLRUSet; 1263 1264 /* 1265 * Whether to allow deltas when logging a dirty BIN that is being evicted. 1266 * This is a configuration parameter. 1267 */ 1268 private final boolean allowBinDeltas; 1269 1270 /* 1271 * Whether to mutate BINs to BIN deltas rather than evicting the full node. 1272 * This is a configuration parameter. 1273 */ 1274 private final boolean mutateBins; 1275 1276 /* 1277 * Access count after which we clear the DatabaseImpl cache. 1278 * This is a configuration parameter. 1279 */ 1280 private int dbCacheClearCount; 1281 1282 /* 1283 * This is a configuration parameter. If true, eviction is done by a pool 1284 * of evictor threads, as well as being done inline by application threads. 1285 * Note: runEvictorThreads is needed as a distinct flag, rather than 1286 * setting maxThreads to 0, because the ThreadPoolExecutor does not permit 1287 * maxThreads to be 0. 1288 */ 1289 private boolean runEvictorThreads; 1290 1291 /* This is a configuration parameter. */ 1292 private int terminateMillis; 1293 1294 /* The thread pool used to manage the background evictor threads. */ 1295 private final ThreadPoolExecutor evictionPool; 1296 1297 /* Flag to help shutdown launched eviction tasks. */ 1298 private AtomicBoolean shutdownRequested; 1299 1300 /* 1301 * Whether this evictor (and the memory cache) is shared by multiple 1302 * environments 1303 */ 1304 private final boolean isShared; 1305 1306 /* 1307 * In case of multiple environments sharing a cache (and this Evictor), 1308 * firstEnvImpl references the 1st EnvironmentImpl to be created with 1309 * the shared cache. 1310 */ 1311 private final EnvironmentImpl firstEnvImpl; 1312 1313 private final List<EnvInfo> envInfos; 1314 1315 /** 1316 * This is used only when this evictor is shared by multiple envs. It 1317 * "points" to the next env to perform "special eviction" in. 1318 */ 1319 private int specialEvictionIndex = 0; 1320 1321 /* 1322 * 1323 */ 1324 private final Arbiter arbiter; 1325 1326 /** 1327 * mixedLRUSet contains both clean and dirty nodes. A freshly cached node 1328 * goes into this LRUSet. A dirty node will go to the dirtyLRUSet if it is 1329 * selected for eviction from the mixedLRUSet. A node will move from the 1330 * dirtyLRUSet to the mixedLRUSet when it gets logged (i.e., cleaned) by 1331 * the checkpointer. 1332 */ 1333 private final LRUList[] mixedLRUSet; 1334 private final LRUList[] dirtyLRUSet; 1335 1336 /** 1337 * nextMixedLRUList is used to implement the traversal of the lists in 1338 * the mixedLRUSet by one or more evicting threads. Such a thread will 1339 * select for eviction the front node from the (nextMixedLRUList % 1340 * numLRULists)-th list, and then increment nextMixedLRUList. 1341 * nextDirtyLRUList plays the same role for the dirty LRUSet. 1342 */ 1343 private int nextMixedLRUList = 0; 1344 private int nextDirtyLRUList = 0; 1345 1346 /* 1347 * The evictor is disabled during the 1st phase of recovery. The 1348 * RecoveryManager enables the evictor after it finishes its 1st 1349 * phase. 1350 */ 1351 private boolean isEnabled = false; 1352 1353 /* Eviction calls cannot be recursive. */ 1354 private ReentrancyGuard reentrancyGuard; 1355 1356 private Logger logger; 1357 1358 /* 1359 * Stats 1360 */ 1361 final StatGroup stats; 1362 1363 /* 1364 * Number of eviction tasks that were submitted to the background evictor 1365 * pool, but were refused because all eviction threads were busy. 1366 */ 1367 final AtomicLongStat nThreadUnavailable; 1368 1369 /* Number of evictBatch() invocations. */ 1370 final LongStat nEvictionRuns; 1371 1372 /* 1373 * Number of nodes selected as eviction targets. An eviction target may 1374 * actually be evicted, or skipped, or put back to the LRU, potentially 1375 * after partial eviction or BIN-delta mutation is done on it. 1376 */ 1377 final LongStat nNodesTargeted; 1378 1379 /* Number of nodes evicted. */ 1380 final LongStat nNodesEvicted; 1381 1382 /* Number of closed database root nodes evicted. */ 1383 final LongStat nRootNodesEvicted; 1384 1385 /* Number of LNs evicted. */ 1386 final LongStat nLNsEvicted; 1387 1388 /* Number of BINs stripped. */ 1389 final LongStat nNodesStripped; 1390 1391 /* Number of BINs mutated to deltas. */ 1392 final LongStat nNodesMutated; 1393 1394 /* Number of target nodes put back to the LRU w/o any other action taken */ 1395 final LongStat nNodesPutBack; 1396 1397 /* Number of target nodes skipped. */ 1398 final LongStat nNodesSkipped; 1399 1400 /* Number of target nodes moved to the dirty LRU */ 1401 final LongStat nNodesMovedToDirtyLRU; 1402 1403 /* Number of bytes evicted per eviction source. */ 1404 final AtomicLongStat[] numBytesEvicted; 1405 1406 /* 1407 * Tree related cache hit/miss stats. A subset of the cache misses recorded 1408 * by the log manager, in that these only record tree node hits and misses. 1409 * Recorded by IN.fetchIN and IN.fetchLN, but grouped with evictor stats. 1410 * Use AtomicLongStat for multithreading safety. 1411 */ 1412 final AtomicLongStat nLNFetch; 1413 1414 final AtomicLongStat nLNFetchMiss; 1415 1416 /* 1417 * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called 1418 * to fetch a UIN. 1419 */ 1420 final AtomicLongStat nUpperINFetch; 1421 1422 /* 1423 * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called 1424 * to fetch a UIN and that UIN was not already cached. 1425 */ 1426 final AtomicLongStat nUpperINFetchMiss; 1427 1428 /* 1429 * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called 1430 * to fetch a BIN. 1431 */ 1432 final AtomicLongStat nBINFetch; 1433 1434 /* 1435 * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called 1436 * to fetch a BIN and that BIN was not already cached. 1437 */ 1438 final AtomicLongStat nBINFetchMiss; 1439 1440 /* 1441 * Number of times IN.fetchIN() or IN.fetchINWithNoLatch() was called 1442 * to fetch a BIN, that BIN was not already cached, and a BIN-delta was 1443 * fetched from disk. 1444 */ 1445 final AtomicLongStat nBINDeltaFetchMiss; 1446 1447 final FloatStat binFetchMissRatio; 1448 1449 /* 1450 * Number of calls to BIN.mutateToFullBIN() 1451 */ 1452 final AtomicLongStat nFullBINMiss; 1453 1454 /* 1455 * Number of blind operations on BIN deltas 1456 */ 1457 final AtomicLongStat nBinDeltaBlindOps; 1458 1459 /* Stats for IN compact array representations currently in cache. */ 1460 final AtomicLong nINSparseTarget; 1461 final AtomicLong nINNoTarget; 1462 final AtomicLong nINCompactKey; 1463 1464 /* Number of envs sharing the cache. */ 1465 final IntStat sharedCacheEnvs; 1466 1467 /* Debugging and unit test support. */ 1468 1469 /* 1470 * Number of consecutive "no-eviction" events (i.e. when evictBatch() 1471 * returns 0). It is incremented at each "no-eviction" event and reset 1472 * to 0 when eviction does occur. It is used to determine whether to 1473 * log a WARNING for a "no-eviction" event: only 1 warning is logged 1474 * per sequence of consecutive "no-eviction" events (to avoid flooding 1475 * the logger files). 1476 */ 1477 int numNoEvictionEvents = 0; 1478 1479 TestHook<Object> preEvictINHook; 1480 TestHook<IN> evictProfile; 1481 Evictor(EnvironmentImpl envImpl)1482 public Evictor(EnvironmentImpl envImpl) 1483 throws DatabaseException { 1484 1485 isShared = envImpl.getSharedCache(); 1486 1487 firstEnvImpl = envImpl; 1488 1489 /* Do the stats definitions. */ 1490 stats = new StatGroup(GROUP_NAME, GROUP_DESC); 1491 1492 nEvictionRuns = new LongStat(stats, EVICTOR_EVICTION_RUNS); 1493 1494 nNodesTargeted = new LongStat(stats, EVICTOR_NODES_TARGETED); 1495 nNodesEvicted = new LongStat(stats, EVICTOR_NODES_EVICTED); 1496 nRootNodesEvicted = new LongStat(stats, EVICTOR_ROOT_NODES_EVICTED); 1497 nLNsEvicted = new LongStat(stats, EVICTOR_LNS_EVICTED); 1498 nNodesStripped = new LongStat(stats, EVICTOR_NODES_STRIPPED); 1499 nNodesMutated = new LongStat(stats, EVICTOR_NODES_MUTATED); 1500 nNodesPutBack = new LongStat(stats, EVICTOR_NODES_PUT_BACK); 1501 nNodesSkipped = new LongStat(stats, EVICTOR_NODES_SKIPPED); 1502 nNodesMovedToDirtyLRU = new LongStat(stats, EVICTOR_NODES_MOVED_TO_DIRTY_LRU); 1503 1504 nLNFetch = new AtomicLongStat(stats, LN_FETCH); 1505 nBINFetch = new AtomicLongStat(stats, BIN_FETCH); 1506 nUpperINFetch = new AtomicLongStat(stats, UPPER_IN_FETCH); 1507 nLNFetchMiss = new AtomicLongStat(stats, LN_FETCH_MISS); 1508 nBINFetchMiss = new AtomicLongStat(stats, BIN_FETCH_MISS); 1509 nBINDeltaFetchMiss = new AtomicLongStat(stats, BIN_DELTA_FETCH_MISS); 1510 nUpperINFetchMiss = new AtomicLongStat(stats, UPPER_IN_FETCH_MISS); 1511 nFullBINMiss = new AtomicLongStat(stats, FULL_BIN_MISS); 1512 nBinDeltaBlindOps = new AtomicLongStat(stats, BIN_DELTA_BLIND_OPS); 1513 binFetchMissRatio = new FloatStat(stats, BIN_FETCH_MISS_RATIO); 1514 1515 nThreadUnavailable = new AtomicLongStat(stats, THREAD_UNAVAILABLE); 1516 1517 nINSparseTarget = new AtomicLong(0); 1518 nINNoTarget = new AtomicLong(0); 1519 nINCompactKey = new AtomicLong(0); 1520 1521 sharedCacheEnvs = new IntStat(stats, EVICTOR_SHARED_CACHE_ENVS); 1522 1523 EnumSet<EvictionSource> allSources = 1524 EnumSet.allOf(EvictionSource.class); 1525 1526 int numSources = allSources.size(); 1527 1528 numBytesEvicted = new AtomicLongStat[numSources]; 1529 1530 for (EvictionSource source : allSources) { 1531 1532 int index = source.ordinal(); 1533 1534 numBytesEvicted[index] = new AtomicLongStat( 1535 stats, source.getNumBytesEvictedStatDef()); 1536 } 1537 1538 arbiter = new Arbiter(firstEnvImpl); 1539 1540 logger = LoggerUtils.getLogger(getClass()); 1541 reentrancyGuard = new ReentrancyGuard(firstEnvImpl, logger); 1542 shutdownRequested = new AtomicBoolean(false); 1543 1544 DbConfigManager configManager = firstEnvImpl.getConfigManager(); 1545 1546 int corePoolSize = configManager.getInt( 1547 EnvironmentParams.EVICTOR_CORE_THREADS); 1548 int maxPoolSize = configManager.getInt( 1549 EnvironmentParams.EVICTOR_MAX_THREADS); 1550 long keepAliveTime = configManager.getDuration( 1551 EnvironmentParams.EVICTOR_KEEP_ALIVE); 1552 terminateMillis = configManager.getDuration( 1553 EnvironmentParams.EVICTOR_TERMINATE_TIMEOUT); 1554 dbCacheClearCount = configManager.getInt( 1555 EnvironmentParams.ENV_DB_CACHE_CLEAR_COUNT); 1556 numLRULists = configManager.getInt( 1557 EnvironmentParams.EVICTOR_N_LRU_LISTS); 1558 useDirtyLRUSet = configManager.getBoolean( 1559 EnvironmentParams.EVICTOR_USE_DIRTY_LRU); 1560 1561 mixedLRUSet = new LRUList[numLRULists]; 1562 dirtyLRUSet = new LRUList[numLRULists]; 1563 1564 for (int i = 0; i < numLRULists; ++i) { 1565 mixedLRUSet[i] = new LRUList(i); 1566 dirtyLRUSet[i] = new LRUList(numLRULists + i); 1567 } 1568 1569 if (isShared) { 1570 envInfos = new ArrayList<EnvInfo>(); 1571 } else { 1572 envInfos = null; 1573 } 1574 1575 RejectedExecutionHandler rejectHandler = new RejectEvictHandler( 1576 nThreadUnavailable); 1577 1578 evictionPool = new ThreadPoolExecutor( 1579 corePoolSize, maxPoolSize, keepAliveTime, 1580 TimeUnit.MILLISECONDS, 1581 new ArrayBlockingQueue<Runnable>(1), 1582 new StoppableThreadFactory(firstEnvImpl, "JEEvictor", logger), 1583 rejectHandler); 1584 1585 runEvictorThreads = configManager.getBoolean( 1586 EnvironmentParams.ENV_RUN_EVICTOR); 1587 1588 allowBinDeltas = configManager.getBoolean( 1589 EnvironmentParams.EVICTOR_ALLOW_BIN_DELTAS); 1590 1591 mutateBins = configManager.getBoolean( 1592 EnvironmentParams.EVICTOR_MUTATE_BINS); 1593 1594 /* 1595 * Request notification of mutable property changes. Do this after all 1596 * fields in the evictor have been initialized, in case this is called 1597 * quite soon. 1598 */ 1599 firstEnvImpl.addConfigObserver(this); 1600 } 1601 1602 /** 1603 * Respond to config updates. 1604 */ envConfigUpdate( DbConfigManager configManager, EnvironmentMutableConfig ignore)1605 public void envConfigUpdate( 1606 DbConfigManager configManager, 1607 EnvironmentMutableConfig ignore) 1608 throws DatabaseException { 1609 1610 int corePoolSize = configManager.getInt( 1611 EnvironmentParams.EVICTOR_CORE_THREADS); 1612 int maxPoolSize = configManager.getInt( 1613 EnvironmentParams.EVICTOR_MAX_THREADS); 1614 long keepAliveTime = configManager.getDuration( 1615 EnvironmentParams.EVICTOR_KEEP_ALIVE); 1616 terminateMillis = configManager.getDuration( 1617 EnvironmentParams.EVICTOR_TERMINATE_TIMEOUT); 1618 dbCacheClearCount = configManager.getInt( 1619 EnvironmentParams.ENV_DB_CACHE_CLEAR_COUNT); 1620 1621 evictionPool.setCorePoolSize(corePoolSize); 1622 evictionPool.setMaximumPoolSize(maxPoolSize); 1623 evictionPool.setKeepAliveTime(keepAliveTime, TimeUnit.MILLISECONDS); 1624 1625 runEvictorThreads = configManager.getBoolean( 1626 EnvironmentParams.ENV_RUN_EVICTOR); 1627 } 1628 useDirtyLRUSet()1629 public boolean useDirtyLRUSet() { 1630 return useDirtyLRUSet; 1631 } 1632 getMutateToBINDeltas()1633 public boolean getMutateToBINDeltas() { 1634 return mutateBins; 1635 } 1636 isEnabled()1637 public boolean isEnabled() { 1638 return isEnabled; 1639 } 1640 setEnabled(boolean v)1641 public void setEnabled(boolean v) { 1642 isEnabled = v; 1643 } 1644 1645 /** 1646 * @hidden 1647 * Return the ThreadPool, used by unit testing only. 1648 */ getThreadPool()1649 public ThreadPoolExecutor getThreadPool() { 1650 return evictionPool; 1651 } 1652 1653 /** 1654 * Request and wait for a shutdown of all running eviction tasks. 1655 */ shutdown()1656 public void shutdown() { 1657 1658 /* 1659 * Set the shutdown flag so that outstanding eviction tasks end 1660 * early. The call to evictionPool.shutdown is a ThreadPoolExecutor 1661 * call, and is an orderly shutdown that waits for and in flight tasks 1662 * to end. 1663 */ 1664 shutdownRequested.set(true); 1665 evictionPool.shutdown(); 1666 1667 /* 1668 * AwaitTermination will wait for the timeout period, or will be 1669 * interrupted, but we don't really care which it is. The evictor 1670 * shouldn't be interrupted, but if it is, something urgent is 1671 * happening. 1672 */ 1673 boolean shutdownFinished = false; 1674 try { 1675 shutdownFinished = 1676 evictionPool.awaitTermination(terminateMillis, 1677 TimeUnit.MILLISECONDS); 1678 } catch (InterruptedException e) { 1679 /* We've been interrupted, just give up and end. */ 1680 } finally { 1681 if (!shutdownFinished) { 1682 evictionPool.shutdownNow(); 1683 } 1684 } 1685 } 1686 requestShutdownPool()1687 public void requestShutdownPool() { 1688 shutdownRequested.set(true); 1689 evictionPool.shutdown(); 1690 } 1691 addEnvironment(EnvironmentImpl env)1692 public synchronized void addEnvironment(EnvironmentImpl env) { 1693 1694 if (isShared) { 1695 int numEnvs = envInfos.size(); 1696 for (int i = 0; i < numEnvs; i += 1) { 1697 EnvInfo info = envInfos.get(i); 1698 if (info.env == env) { 1699 return; 1700 } 1701 } 1702 1703 EnvInfo info = new EnvInfo(); 1704 info.env = env; 1705 info.ins = env.getInMemoryINs(); 1706 envInfos.add(info); 1707 } else { 1708 throw EnvironmentFailureException.unexpectedState(); 1709 } 1710 } 1711 removeEnvironment(EnvironmentImpl env)1712 public synchronized void removeEnvironment(EnvironmentImpl env) { 1713 if (isShared) { 1714 int numEnvs = envInfos.size(); 1715 for (int i = 0; i < numEnvs; i += 1) { 1716 EnvInfo info = envInfos.get(i); 1717 1718 if (info.env == env) { 1719 1720 try { 1721 for (int j = 0; j < numLRULists; ++j) { 1722 mixedLRUSet[j].removeINsForEnv(env); 1723 dirtyLRUSet[j].removeINsForEnv(env); 1724 } 1725 } catch (AssertionError e) { 1726 System.out.println("YYYYYYYYYY " + e); 1727 e.printStackTrace(System.out); 1728 throw e; 1729 } 1730 1731 envInfos.remove(i); 1732 return; 1733 } 1734 } 1735 } else { 1736 throw EnvironmentFailureException.unexpectedState(); 1737 } 1738 } 1739 checkEnv(EnvironmentImpl env)1740 public synchronized boolean checkEnv(EnvironmentImpl env) { 1741 if (isShared) { 1742 int numEnvs = envInfos.size(); 1743 for (int i = 0; i < numEnvs; i += 1) { 1744 EnvInfo info = envInfos.get(i); 1745 if (env == info.env) { 1746 return true; 1747 } 1748 } 1749 1750 return false; 1751 1752 } else { 1753 throw EnvironmentFailureException.unexpectedState(); 1754 } 1755 } 1756 1757 /** 1758 * Add the node to the back of the mixed LRUSet. The node is either 1759 * EX-latched already or is inaccessible from other threads. 1760 */ addBack(IN node)1761 public void addBack(IN node) { 1762 1763 if (isEnabled && node.getEnv().getInMemoryINs().isEnabled()) { 1764 1765 assert(node.getInListResident()); 1766 1767 node.setInDirtyLRU(false); 1768 mixedLRUSet[(int)(node.getNodeId() % numLRULists)].addBack(node); 1769 } 1770 } 1771 1772 /** 1773 * Add the node to the front of the mixed LRUSet. The node is either 1774 * EX-latched already or is inaccessible from other threads. 1775 */ addFront(IN node)1776 public void addFront(IN node) { 1777 1778 if (isEnabled && node.getEnv().getInMemoryINs().isEnabled()) { 1779 1780 assert(node.getInListResident()); 1781 1782 node.setInDirtyLRU(false); 1783 mixedLRUSet[(int)(node.getNodeId() % numLRULists)].addFront(node); 1784 } 1785 } 1786 1787 /* 1788 * Add the node to the back of the dirty LRUSet. 1789 */ dirtyAddBack(IN node)1790 private void dirtyAddBack(IN node) { 1791 1792 assert(node.isLatchExclusiveOwner()); 1793 assert(node.getInListResident()); 1794 assert(useDirtyLRUSet); 1795 1796 node.setInDirtyLRU(true); 1797 dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].addBack(node); 1798 } 1799 1800 /* 1801 * Add the node to the front of the dirty LRUSet. 1802 */ dirtyAddFront(IN node)1803 private void dirtyAddFront(IN node) { 1804 1805 assert(node.isLatchExclusiveOwner()); 1806 assert(node.getInListResident()); 1807 assert(useDirtyLRUSet); 1808 1809 node.setInDirtyLRU(true); 1810 dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].addFront(node); 1811 } 1812 1813 /** 1814 * Move the node to the back of its contining LRUList, if any. 1815 */ moveBack(IN node)1816 public void moveBack(IN node) { 1817 1818 assert(node.isLatchOwner()); 1819 1820 if (node.isInDirtyLRU()) { 1821 dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].moveBack(node); 1822 } else { 1823 mixedLRUSet[(int)(node.getNodeId() % numLRULists)].moveBack(node); 1824 } 1825 } 1826 1827 /** 1828 * Move the node to the front of its contining LRUList, if any. 1829 */ moveFront(IN node)1830 public void moveFront(IN node) { 1831 1832 assert(node.isLatchOwner()); 1833 1834 if (node.isInDirtyLRU()) { 1835 dirtyLRUSet[(int)(node.getNodeId() % numLRULists)].moveFront(node); 1836 } else { 1837 mixedLRUSet[(int)(node.getNodeId() % numLRULists)].moveFront(node); 1838 } 1839 } 1840 1841 /** 1842 * Remove a node from its current LRUList, if any. 1843 */ remove(IN node)1844 public void remove(IN node) { 1845 1846 assert(node.isLatchOwner()); 1847 1848 int listId = (int)(node.getNodeId() % numLRULists); 1849 1850 if (node.isInDirtyLRU()) { 1851 dirtyLRUSet[listId].remove(node); 1852 } else { 1853 mixedLRUSet[listId].remove(node); 1854 } 1855 } 1856 1857 /** 1858 * Move the node from the dirty LRUSet to the mixed LRUSet, if the node 1859 * is indeed in the mixed LRUSet. 1860 */ moveToMixedLRU(IN node)1861 public void moveToMixedLRU(IN node) { 1862 1863 if (useDirtyLRUSet) { 1864 1865 assert(node.isLatchExclusiveOwner()); 1866 1867 if (node.isInDirtyLRU()) { 1868 int listId = (int)(node.getNodeId() % numLRULists); 1869 1870 if (dirtyLRUSet[listId].remove(node)) { 1871 assert(node.getInListResident()); 1872 node.setInDirtyLRU(false); 1873 mixedLRUSet[listId].addBack(node); 1874 } 1875 } 1876 } 1877 } 1878 contains(IN node)1879 public boolean contains(IN node) { 1880 1881 assert(node.isLatchOwner()); 1882 1883 int listId = (int)(node.getNodeId() % numLRULists); 1884 1885 if (node.isInDirtyLRU()) { 1886 return dirtyLRUSet[listId].contains(node); 1887 } 1888 return mixedLRUSet[listId].contains(node); 1889 } 1890 getMixedLRUSize()1891 long getMixedLRUSize() { 1892 long size = 0; 1893 for (int i = 0; i < numLRULists; ++i) { 1894 size += mixedLRUSet[i].getSize(); 1895 } 1896 1897 return size; 1898 } 1899 getDirtyLRUSize()1900 long getDirtyLRUSize() { 1901 long size = 0; 1902 for (int i = 0; i < numLRULists; ++i) { 1903 size += dirtyLRUSet[i].getSize(); 1904 } 1905 1906 return size; 1907 } 1908 getMixedLRUStats(EnvironmentImpl env, LRUDebugStats stats)1909 void getMixedLRUStats(EnvironmentImpl env, LRUDebugStats stats) { 1910 stats.reset(); 1911 for (int i = 0; i < numLRULists; ++i) { 1912 mixedLRUSet[i].getStats(env, stats); 1913 } 1914 } 1915 getDirtyLRUStats(EnvironmentImpl env, LRUDebugStats stats)1916 void getDirtyLRUStats(EnvironmentImpl env, LRUDebugStats stats) { 1917 stats.reset(); 1918 for (int i = 0; i < numLRULists; ++i) { 1919 dirtyLRUSet[i].getStats(env, stats); 1920 } 1921 } 1922 1923 /** 1924 * This method is called from application threads for every cursor 1925 * operation. 1926 */ doCriticalEviction(boolean backgroundIO)1927 public void doCriticalEviction(boolean backgroundIO) { 1928 1929 if (arbiter.isOverBudget()) { 1930 1931 /* 1932 * Any time there's excessive cache usage, let the thread pool know 1933 * there's work to do. 1934 */ 1935 alert(); 1936 1937 /* 1938 * Only do eviction if the memory budget overage fulfills the 1939 * critical eviction requirements. We want to avoid having 1940 * application thread do eviction. 1941 */ 1942 if (arbiter.needCriticalEviction()) { 1943 doEvict(EvictionSource.CRITICAL, backgroundIO); 1944 } 1945 } 1946 } 1947 1948 /** 1949 * This method is called from daemon threads for every operation. 1950 */ doDaemonEviction(boolean backgroundIO)1951 public void doDaemonEviction(boolean backgroundIO) { 1952 1953 if (arbiter.isOverBudget()) { 1954 1955 /* 1956 * Any time there's excessive cache usage, let the thread pool know 1957 * there's work to do. 1958 */ 1959 alert(); 1960 1961 /* 1962 * Only do eviction if the memory budget overage fulfills the 1963 * critical eviction requirements. This allows evictor threads to 1964 * take the burden of eviction whenever possible, rather than 1965 * slowing other threads and risking a growing cleaner or 1966 * compressor backlog. 1967 */ 1968 if (arbiter.needCriticalEviction()) { 1969 doEvict(EvictionSource.DAEMON, backgroundIO); 1970 } 1971 } 1972 } 1973 1974 /* 1975 * Eviction invoked by the API 1976 */ doManualEvict()1977 public void doManualEvict() 1978 throws DatabaseException { 1979 1980 doEvict(EvictionSource.MANUAL, true/*backgroundIO*/); 1981 } 1982 1983 /** 1984 * Evict a specific IN, used by cache modes. 1985 */ doEvictOneIN(IN target, EvictionSource source)1986 public void doEvictOneIN(IN target, EvictionSource source) { 1987 1988 if (!reentrancyGuard.enter()) { 1989 return; 1990 } 1991 1992 try { 1993 assert(target.isBIN()); 1994 assert(target.isLatchOwner()); 1995 1996 remove(target); 1997 1998 target.releaseLatch(); 1999 2000 long evictedBytes = processTarget( 2001 null /* rootEvictor */, target, null /* parent */, 2002 -1 /* entry index within parent */, 2003 false /* backgroundIO */, source, null /* debug stats */); 2004 2005 numBytesEvicted[source.ordinal()].add(evictedBytes); 2006 2007 } finally { 2008 reentrancyGuard.leave(); 2009 } 2010 } 2011 2012 2013 /** 2014 * Let the eviction pool know there's work to do. 2015 */ alert()2016 public void alert() { 2017 if (!runEvictorThreads) { 2018 return; 2019 } 2020 2021 evictionPool.execute(new BackgroundEvictTask(this)); 2022 } 2023 2024 /** 2025 * This is where the real work is done. 2026 * Can execute concurrently, called by app threads or by background evictor 2027 */ doEvict(EvictionSource source, boolean backgroundIO)2028 void doEvict(EvictionSource source, boolean backgroundIO) 2029 throws DatabaseException { 2030 2031 if (!isEnabled) { 2032 return; 2033 } 2034 2035 if (!reentrancyGuard.enter()) { 2036 return; 2037 } 2038 2039 nEvictionRuns.increment(); 2040 2041 try { 2042 2043 /* 2044 * Repeat as necessary to keep up with allocations. Stop if no 2045 * progress is made, to prevent an infinite loop. 2046 */ 2047 boolean progress = true; 2048 int nBatches = 0; 2049 long bytesEvicted = 0; 2050 2051 EvictionDebugStats evictionStats = null; 2052 if (collectEvictionDebugStats) { 2053 new EvictionDebugStats(); 2054 evictionStats.reset(); 2055 evictionStats.mixedSize = getMixedLRUSize(); 2056 evictionStats.dirtySize = getDirtyLRUSize(); 2057 } 2058 2059 while (progress && 2060 nBatches < MAX_BATCHES_PER_RUN && 2061 !shutdownRequested.get()) { 2062 2063 /* 2064 * Do eviction only if memory consumption is over budget. 2065 * If so, try to evict (memoryConsumption + M - maxMemory) 2066 * bytes, where M is a config param. 2067 */ 2068 long maxEvictBytes = arbiter.getEvictionPledge(); 2069 2070 if (maxEvictBytes == 0) { 2071 break; 2072 } 2073 2074 bytesEvicted = evictBatch( 2075 source, backgroundIO, maxEvictBytes, evictionStats); 2076 2077 numBytesEvicted[source.ordinal()].add(bytesEvicted); 2078 2079 if (bytesEvicted == 0) { 2080 2081 if (arbiter.stillNeedsEviction() && 2082 numNoEvictionEvents == 0 && 2083 logger.isLoggable(Level.FINE)) { 2084 ++numNoEvictionEvents; 2085 LoggerUtils.fine( 2086 logger, firstEnvImpl, 2087 "Eviction pass failed to evict any bytes"); 2088 } else { 2089 ++numNoEvictionEvents; 2090 } 2091 2092 progress = false; 2093 } else { 2094 numNoEvictionEvents = 0; 2095 } 2096 2097 nBatches += 1; 2098 } 2099 2100 if (evictionStats != null) { 2101 System.out.println(evictionStats.toString()); 2102 } 2103 2104 /* For debugging. */ 2105 if (source == EvictionSource.EVICTORTHREAD) { 2106 if (logger.isLoggable(Level.FINEST)) { 2107 LoggerUtils.finest(logger, firstEnvImpl, 2108 "Thread evicted " + bytesEvicted + 2109 " bytes in " + nBatches + " batches"); 2110 } 2111 } 2112 } finally { 2113 reentrancyGuard.leave(); 2114 } 2115 } 2116 2117 2118 /** 2119 * Not private because it is used in unit test. 2120 */ evictBatch( Evictor.EvictionSource source, boolean bgIO, long maxEvictBytes, EvictionDebugStats evictionStats)2121 long evictBatch( 2122 Evictor.EvictionSource source, 2123 boolean bgIO, 2124 long maxEvictBytes, 2125 EvictionDebugStats evictionStats) 2126 throws DatabaseException { 2127 2128 long totalEvictedBytes = 0; 2129 boolean inMixedLRUSet = true; 2130 int numNodesScannedThisBatch = 0; 2131 long maxNodesScannedThisBatch = getMixedLRUSize(); 2132 maxNodesScannedThisBatch += numLRULists; 2133 2134 assert TestHookExecute.doHookSetupIfSet(evictProfile); 2135 2136 /* 2137 * Perform special eviction,i.e., evict non-tree memory. 2138 * 2139 * TODO: special eviction is done serially. We may want to absolve 2140 * application threads of that responsibility, to avoid blocking, and 2141 * only have evictor threads do special eviction. 2142 */ 2143 synchronized (this) { 2144 if (isShared) { 2145 int numEnvs = envInfos.size(); 2146 if (numEnvs > 0) { 2147 if (specialEvictionIndex >= numEnvs) { 2148 specialEvictionIndex = 0; 2149 } 2150 EnvInfo info = envInfos.get(specialEvictionIndex); 2151 specialEvictionIndex++; 2152 2153 totalEvictedBytes = info.env.specialEviction(); 2154 } 2155 } else { 2156 totalEvictedBytes = firstEnvImpl.specialEviction(); 2157 } 2158 } 2159 2160 /* Use local caching to reduce DbTree.getDb overhead. [#21330] */ 2161 final DbCache dbCache = new DbCache(isShared, dbCacheClearCount); 2162 final MemoryBudget memBudget = firstEnvImpl.getMemoryBudget(); 2163 2164 try { 2165 while (totalEvictedBytes < maxEvictBytes && 2166 numNodesScannedThisBatch < maxNodesScannedThisBatch && 2167 arbiter.stillNeedsEviction()) { 2168 2169 if (!isShared && !memBudget.isTreeUsageAboveMinimum()) { 2170 break; 2171 } 2172 2173 final IN target = getNextTarget(inMixedLRUSet); 2174 2175 numNodesScannedThisBatch++; 2176 2177 if (target != null) { 2178 2179 nNodesTargeted.increment(); 2180 2181 if (evictionStats != null) { 2182 evictionStats.incNumSelected(); 2183 } 2184 2185 assert TestHookExecute.doHookIfSet(evictProfile, target); 2186 2187 final DatabaseImpl targetDb = target.getDatabase(); 2188 final EnvironmentImpl dbEnv = targetDb.getEnv(); 2189 2190 /* 2191 * Check to make sure the target's DB was not deleted or 2192 * truncated after selecting the target. Furthermore, 2193 * prevent the DB from being deleted while we're working 2194 * with it (this is done by the dbCache.getDb() call). 2195 * 2196 * Also check that the refreshedDb is the same instance 2197 * as the targetDb. If not, then the MapLN associated with 2198 * targetDb was recently evicted (which can happen after 2199 * all handles to the DB are closed). In this case, 2200 * targetDb and its INs are orphaned and cannot be 2201 * processed; they should simply be removed from the 2202 * LRU [#21686] 2203 */ 2204 final DatabaseImpl refreshedDb = 2205 dbCache.getDb(dbEnv, targetDb.getId()); 2206 2207 if (refreshedDb != null && 2208 !refreshedDb.isDeleted() && 2209 refreshedDb == targetDb) { 2210 2211 long evictedBytes = 0; 2212 2213 if (target.isDbRoot()) { 2214 RootEvictor rootEvictor = new RootEvictor(); 2215 rootEvictor.target = target; 2216 rootEvictor.backgroundIO = bgIO; 2217 rootEvictor.source = source; 2218 rootEvictor.stats = evictionStats; 2219 2220 /* try to evict the root */ 2221 targetDb.getTree().withRootLatchedExclusive( 2222 rootEvictor); 2223 2224 /* 2225 * If the root IN was flushed, write the dirtied 2226 * MapLN. 2227 */ 2228 if (rootEvictor.flushed) { 2229 dbEnv.getDbTree().modifyDbRoot(targetDb); 2230 } 2231 2232 evictedBytes = rootEvictor.evictedBytes; 2233 2234 } else { 2235 evictedBytes = processTarget( 2236 null, /* rootEvictor */ 2237 target, null, /* parent */ 2238 -1, /* parent entry index */ 2239 bgIO, source, evictionStats); 2240 } 2241 2242 totalEvictedBytes += evictedBytes; 2243 2244 } else { 2245 /* 2246 * We don't expect to find in the INList an IN whose 2247 * database that has finished delete processing, 2248 * because it should have been removed from the 2249 * INList during post-delete cleanup. 2250 */ 2251 if (targetDb.isDeleteFinished() && 2252 target.getInListResident()) { 2253 final String inInfo = 2254 " IN type=" + target.getLogType() + " id=" + 2255 target.getNodeId() + " not expected on INList"; 2256 final String errMsg = (refreshedDb == null) ? 2257 inInfo : 2258 ("Database " + refreshedDb.getDebugName() + 2259 " id=" + refreshedDb.getId() + " rootLsn=" + 2260 DbLsn.getNoFormatString 2261 (refreshedDb.getTree().getRootLsn()) + 2262 ' ' + inInfo); 2263 2264 throw EnvironmentFailureException. 2265 unexpectedState(errMsg); 2266 } 2267 } 2268 } 2269 2270 /* 2271 * Move to the dirty LRUSet, if we are done processing the 2272 * mixed LRUSet. 2273 */ 2274 if (numNodesScannedThisBatch >= maxNodesScannedThisBatch && 2275 totalEvictedBytes < maxEvictBytes && 2276 inMixedLRUSet) { 2277 2278 numNodesScannedThisBatch = 0; 2279 maxNodesScannedThisBatch = getDirtyLRUSize(); 2280 maxNodesScannedThisBatch += numLRULists; 2281 inMixedLRUSet = false; 2282 2283 if (evictionStats != null) { 2284 evictionStats.inMixedLRU = false; 2285 } 2286 } 2287 } 2288 } finally { 2289 dbCache.releaseDbs(firstEnvImpl); 2290 } 2291 2292 return totalEvictedBytes; 2293 } 2294 getNextTarget(boolean inMixedLRUSet)2295 private IN getNextTarget(boolean inMixedLRUSet) { 2296 2297 if (inMixedLRUSet) { 2298 int listId = Math.abs(nextMixedLRUList++) % numLRULists; 2299 IN target = mixedLRUSet[listId].removeFront(); 2300 2301 if (target != null && 2302 ((traceUINs && target.isUpperIN()) || 2303 (traceBINs && target.isBIN()))) { 2304 LoggerUtils.envLogMsg( 2305 traceLevel, target.getEnv(), 2306 Thread.currentThread().getId() + "-" + 2307 Thread.currentThread().getName() + 2308 "-" + target.getEnv().getName() + 2309 " XXXX Mixed Eviction target: " + 2310 target.getNodeId()); 2311 } 2312 2313 return target; 2314 } 2315 2316 int listId = Math.abs(nextDirtyLRUList++) % numLRULists; 2317 IN target = dirtyLRUSet[listId].removeFront(); 2318 2319 if (target != null && 2320 ((traceUINs && target.isUpperIN()) || 2321 (traceBINs && target.isBIN()))) { 2322 LoggerUtils.envLogMsg( 2323 traceLevel, target.getEnv(), 2324 Thread.currentThread().getId() + "-" + 2325 Thread.currentThread().getName() + 2326 "-" + target.getEnv().getName() + 2327 " XXXX Dirty Eviction target: " + target.getNodeId()); 2328 } 2329 2330 return target; 2331 } 2332 2333 class RootEvictor implements WithRootLatched { 2334 2335 IN target; 2336 boolean backgroundIO; 2337 EvictionSource source; 2338 EvictionDebugStats stats = null; 2339 2340 ChildReference rootRef; 2341 boolean flushed = false; 2342 long evictedBytes = 0; 2343 doWork(ChildReference root)2344 public IN doWork(ChildReference root) 2345 throws DatabaseException { 2346 2347 /* 2348 * Do not call fetchTarget since this root or DB should be 2349 * resident already if it is to be the target of eviction. If 2350 * it is not present, it has been evicted by another thread and 2351 * should not be fetched for two reasons: 1) this would be 2352 * counterproductive, 2) to guard against bringing in a root 2353 * for an evicted DB. 2354 */ 2355 IN rootIN = (IN) root.getTarget(); 2356 if (rootIN == null) { 2357 return null; 2358 } 2359 2360 rootRef = root; 2361 2362 /* 2363 * Latch the target and re-check that all conditions still hold. 2364 * The latch on the target will be released by processTarget(). 2365 */ 2366 rootIN.latch(CacheMode.UNCHANGED); 2367 2368 if (rootIN == target && rootIN.isRoot()) { 2369 evictedBytes = processTarget( 2370 this, null, /* target */ 2371 null, /* parent */ 2372 -1, /* entry index within parent */ 2373 backgroundIO, source, stats); 2374 } else { 2375 rootIN.releaseLatch(); 2376 } 2377 2378 return null; 2379 } 2380 } 2381 2382 /** 2383 * Decide what to do with an eviction target and carry out the decision. 2384 * Return the number of bytes evicted (if any). 2385 * 2386 * This method is called from evictBatch() after an IN has been selected 2387 * for eviction. It EX-latches the IN and determines whether it can/should 2388 * really be evicted, and if not what is the appropriate action to be 2389 * taken by the evicting thread. 2390 * 2391 * If a decision is taken to evict the target or mutate it to a BINDelta, 2392 * the target must first be unlatched and its parent must be searched 2393 * within the tree. During this search, many things can happen to the 2394 * unlatched target, and as a result, after the parent is found and the 2395 * target is relatched, processTarget() calls itself recursivelly to 2396 * re-consider all the possible actions on the target. 2397 */ processTarget( RootEvictor rootEvictor, IN target, IN parent, int index, boolean bgIO, EvictionSource source, EvictionDebugStats stats)2398 private long processTarget( 2399 RootEvictor rootEvictor, 2400 IN target, 2401 IN parent, 2402 int index, 2403 boolean bgIO, 2404 EvictionSource source, 2405 EvictionDebugStats stats) 2406 throws DatabaseException { 2407 2408 boolean targetIsLatched = false; 2409 boolean parentIsLatched = false; 2410 long evictedBytes = 0; 2411 2412 if (stats != null) { 2413 stats.withParent = (parent != null || rootEvictor != null); 2414 } 2415 2416 try { 2417 if (parent != null) { 2418 assert(parent.isLatchExclusiveOwner()); 2419 parentIsLatched = true; 2420 2421 if (target != parent.getTarget(index)) { 2422 skip(target, stats); 2423 return 0; 2424 } 2425 2426 target.latch(CacheMode.UNCHANGED); 2427 2428 } else if (rootEvictor != null) { 2429 target = rootEvictor.target; 2430 2431 } else { 2432 target.latch(CacheMode.UNCHANGED); 2433 } 2434 2435 targetIsLatched = true; 2436 2437 DatabaseImpl db = target.getDatabase(); 2438 EnvironmentImpl dbEnv = db.getEnv(); 2439 2440 if (!target.getInListResident() || contains(target)) { 2441 /* 2442 * The node was put back to the LRU, and then possibly evicted 2443 * by other threads before this thread could latch it. 2444 */ 2445 skip(target, stats); 2446 return 0; 2447 } 2448 2449 if (target.getDirty() && dbEnv.isReadOnly()) { 2450 /* Cannot evict dirty nodes in a read-only environment. */ 2451 skip(target, stats); 2452 return 0; 2453 } 2454 2455 /* 2456 * Normally, UINs that have cached children are not in the LRU, 2457 * and as a result, cannot be selected for eviction. However, a 2458 * childless UIN may be selected for eviction and then acquire 2459 * cached children in the time after its removal from its LRUSet 2460 * and before it is EX-latched by the evicting thread. 2461 */ 2462 if (target.isUpperIN() && target.hasCachedChildrenFlag()) { 2463 assert(target.hasResidentChildren()); 2464 skip(target, stats); 2465 return 0; 2466 } 2467 2468 /* 2469 * Disallow eviction of the mapping and naming DB roots, because 2470 * their eviction and re-fetching is a special case that is not 2471 * worth supporting. [#13415] 2472 */ 2473 if (target.isRoot()) { 2474 DatabaseId dbId = db.getId(); 2475 if (dbId.equals(DbTree.ID_DB_ID) || 2476 dbId.equals(DbTree.NAME_DB_ID)) { 2477 skip(target, stats); 2478 return 0; 2479 } 2480 } 2481 2482 if (dbEnv.getSharedCache()) { 2483 2484 if (dbEnv.isClosed() || dbEnv.isInvalid()) { 2485 skip(target, stats); 2486 return 0; 2487 } 2488 2489 if (!dbEnv.getMemoryBudget().isTreeUsageAboveMinimum()) { 2490 putBack(target, stats, 1); 2491 return 0; 2492 } 2493 } 2494 2495 if (target.isPinned()) { 2496 putBack(target, stats, 2); 2497 return 0; 2498 } 2499 2500 /* 2501 * LRU-TODO: this is just an approximation of the KEEP_HOT 2502 * cachemode. 2503 */ 2504 if (target.isBIN() && target.getGeneration() == Long.MAX_VALUE) { 2505 target.setGeneration(0); 2506 putBack(target, stats, 3); 2507 return 0; 2508 } 2509 2510 /* 2511 * Target and LSN can be null in DW. Not evictable in that case. 2512 */ 2513 for (int i = 0; i < target.getNEntries(); ++i) { 2514 if (target.getLsn(i) == DbLsn.NULL_LSN && 2515 !target.isResident(i)) { 2516 putBack(target, stats, 4); 2517 return 0; 2518 } 2519 } 2520 2521 /* 2522 * Attempt partial eviction. The partialEviction() method also 2523 * determines whether the IN in evictable or not. For now, 2524 * partialEviction() will consider a node to be non-evictable if 2525 * it is a BIN that (a) has cursors registered on it, or (b) has 2526 * a resident non-evictable LN, which can happen only for MapLNs 2527 * (see MapLN.isEvictable()). 2528 */ 2529 evictedBytes = target.partialEviction(); 2530 2531 boolean isEvictable = (evictedBytes & IN.NON_EVICTABLE_IN) == 0; 2532 evictedBytes = evictedBytes & ~IN.NON_EVICTABLE_IN; 2533 2534 /* 2535 * If we could evict some bytes from this node, put it back in 2536 * the LRU, unless it is a BIN accessed in EVICT_BIN or MAKE_COLD 2537 * cache mode, in which case we should evict it, if possible. 2538 */ 2539 if (evictedBytes > 0 && 2540 (target.isUpperIN() || source != EvictionSource.CACHEMODE)) { 2541 strippedPutBack(target, stats); 2542 return evictedBytes; 2543 } 2544 2545 if (!isEvictable) { 2546 /* The node is not evictable */ 2547 putBack(target, stats, 5); 2548 return evictedBytes; 2549 } 2550 2551 /* 2552 * Give the node a second chance, if it is a full BIN that can 2553 * be mutated to a BINDelta and the cache mode is not EVICT_BIN 2554 * or MAKE_COLD. 2555 */ 2556 if (target.isBIN() && 2557 source != EvictionSource.CACHEMODE && 2558 getMutateToBINDeltas() && 2559 ((BIN)target).canMutateToBINDelta()) { 2560 2561 BIN bin = (BIN)target; 2562 evictedBytes += bin.mutateToBINDelta(); 2563 assert(evictedBytes > 0); 2564 binDeltaPutBack(target, stats); 2565 2566 return evictedBytes; 2567 } 2568 2569 /* 2570 * Give the node a second chance, if it is dirty, not in the 2571 * dirty LRUSet already, and is either a UIN or the cache mode 2572 * is not EVICT_BIN or MAKE_COLD. 2573 */ 2574 if ((target.isUpperIN() || 2575 source != EvictionSource.CACHEMODE) && 2576 useDirtyLRUSet() && 2577 target.getDirty() && 2578 !target.isInDirtyLRU()) { 2579 2580 moveToDirtyLRU(target, stats); 2581 return evictedBytes; 2582 } 2583 2584 /* 2585 * Evict the node. To do so, we must find and latch the 2586 * parent IN first, if we have not done this already. 2587 */ 2588 if (rootEvictor != null) { 2589 evictedBytes += evictRoot(rootEvictor, bgIO, source, stats); 2590 2591 } else if (parent != null) { 2592 evictedBytes += evict( 2593 target, parent, index, bgIO, source, stats); 2594 2595 } else { 2596 assert TestHookExecute.doHookIfSet(preEvictINHook); 2597 targetIsLatched = false; 2598 evictedBytes += findParentAndRetry( 2599 target, bgIO, source, stats); 2600 } 2601 2602 return evictedBytes; 2603 2604 } finally { 2605 if (targetIsLatched) { 2606 target.releaseLatch(); 2607 } 2608 2609 if (parentIsLatched) { 2610 parent.releaseLatch(); 2611 } 2612 } 2613 } 2614 skip(IN target, EvictionDebugStats stats)2615 private void skip(IN target, EvictionDebugStats stats) { 2616 2617 if ((traceUINs && target.isUpperIN()) || 2618 (traceBINs && target.isBIN())) { 2619 LoggerUtils.envLogMsg( 2620 traceLevel, target.getEnv(), 2621 Thread.currentThread().getId() + "-" + 2622 Thread.currentThread().getName() + 2623 "-" + target.getEnv().getName() + 2624 " XXXX SKIPPED Eviction Target: " + 2625 target.getNodeId()); 2626 } 2627 2628 nNodesSkipped.increment(); 2629 } 2630 putBack(IN target, EvictionDebugStats stats, int caller)2631 private void putBack(IN target, EvictionDebugStats stats, int caller) { 2632 2633 if ((traceUINs && target.isUpperIN()) || 2634 (traceBINs && target.isBIN())) { 2635 LoggerUtils.envLogMsg( 2636 traceLevel, target.getEnv(), 2637 Thread.currentThread().getId() + "-" + 2638 Thread.currentThread().getName() + 2639 "-" + target.getEnv().getName() + 2640 " XXXX PUT-BACK-" + caller + " Eviction Target: " + 2641 target.getNodeId()); 2642 } 2643 2644 if (target.isInDirtyLRU()) { 2645 dirtyAddBack(target); 2646 } else { 2647 addBack(target); 2648 } 2649 2650 if (stats != null) { 2651 stats.incNumPutBack(); 2652 } 2653 2654 nNodesPutBack.increment(); 2655 } 2656 strippedPutBack(IN target, EvictionDebugStats stats)2657 private void strippedPutBack(IN target, EvictionDebugStats stats) { 2658 2659 if ((traceUINs && target.isUpperIN()) || 2660 (traceBINs && target.isBIN())) { 2661 LoggerUtils.envLogMsg( 2662 traceLevel, target.getEnv(), 2663 Thread.currentThread().getId() + "-" + 2664 Thread.currentThread().getName() + 2665 "-" + target.getEnv().getName() + 2666 " XXXX STRIPPED Eviction Target: " + 2667 target.getNodeId()); 2668 } 2669 2670 if (target.isInDirtyLRU()) { 2671 dirtyAddBack(target); 2672 } else { 2673 addBack(target); 2674 } 2675 2676 if (stats != null) { 2677 stats.incNumStripped(); 2678 } 2679 2680 nNodesStripped.increment(); 2681 } 2682 binDeltaPutBack(IN target, EvictionDebugStats stats)2683 private void binDeltaPutBack(IN target, EvictionDebugStats stats) { 2684 2685 if ((traceUINs && target.isUpperIN()) || 2686 (traceBINs && target.isBIN())) { 2687 LoggerUtils.envLogMsg( 2688 traceLevel, target.getEnv(), 2689 Thread.currentThread().getId() + "-" + 2690 Thread.currentThread().getName() + 2691 "-" + target.getEnv().getName() + 2692 " XXXX MUTATED Eviction Target: " + 2693 target.getNodeId()); 2694 } 2695 2696 if (target.isInDirtyLRU()) { 2697 dirtyAddBack(target); 2698 } else { 2699 addBack(target); 2700 } 2701 2702 if (stats != null) { 2703 stats.incNumMutated(); 2704 } 2705 2706 nNodesMutated.increment(); 2707 } 2708 moveToDirtyLRU(IN target, EvictionDebugStats stats)2709 private void moveToDirtyLRU(IN target, EvictionDebugStats stats) { 2710 2711 if ((traceUINs && target.isUpperIN()) || 2712 (traceBINs && target.isBIN())) { 2713 LoggerUtils.envLogMsg( 2714 traceLevel, target.getEnv(), 2715 Thread.currentThread().getId() + "-" + 2716 Thread.currentThread().getName() + 2717 "-" + target.getEnv().getName() + 2718 " XXXX MOVED-TO_DIRTY Eviction Target: " + 2719 target.getNodeId()); 2720 } 2721 2722 if (stats != null) { 2723 stats.incNumMoved(target.isBIN()); 2724 } 2725 2726 dirtyAddFront(target); 2727 2728 nNodesMovedToDirtyLRU.increment(); 2729 } 2730 findParentAndRetry( IN target, boolean backgroundIO, EvictionSource source, EvictionDebugStats stats)2731 private long findParentAndRetry( 2732 IN target, 2733 boolean backgroundIO, 2734 EvictionSource source, 2735 EvictionDebugStats stats) { 2736 2737 Tree tree = target.getDatabase().getTree(); 2738 2739 /* 2740 * Pass false for doFetch to avoid fetching a full BIN when a 2741 * delta is in cache. This also avoids a fetch when the node 2742 * was evicted while unlatched, but that should be very rare. 2743 */ 2744 SearchResult result = tree.getParentINForChildIN( 2745 target, false, /*useTargetLevel*/ 2746 false, /*doFetch*/ CacheMode.UNCHANGED); 2747 2748 if (result.exactParentFound) { 2749 return processTarget(null, /* rootEvictor */ 2750 target, 2751 result.parent, 2752 result.index, 2753 backgroundIO, 2754 source, 2755 stats); 2756 } 2757 2758 /* 2759 * The target has been detached from the tree and it should stay 2760 * out of the LRU. It should not be in the INList, because whenever 2761 * we detach a node we remove it from the INList, but in case we 2762 * forgot to do this somewhere, we can just remove it here. 2763 */ 2764 assert(result.parent == null); 2765 2766 target.latch(CacheMode.UNCHANGED); 2767 2768 try { 2769 if (target.getInListResident()) { 2770 2771 firstEnvImpl.getInMemoryINs().remove(target); 2772 2773 throw EnvironmentFailureException.unexpectedState( 2774 "Node " + target.getNodeId() + 2775 " has been detached from the in-memory tree, " + 2776 "but it is still in the INList"); 2777 } 2778 } finally { 2779 target.releaseLatch(); 2780 } 2781 2782 return 0; 2783 } 2784 evict( IN target, IN parent, int index, boolean backgroundIO, EvictionSource source, EvictionDebugStats stats)2785 private long evict( 2786 IN target, 2787 IN parent, 2788 int index, 2789 boolean backgroundIO, 2790 EvictionSource source, 2791 EvictionDebugStats stats) { 2792 2793 DatabaseImpl db = target.getDatabase(); 2794 EnvironmentImpl dbEnv = db.getEnv(); 2795 long targetLsn = DbLsn.NULL_LSN; 2796 boolean newTargetLsn = false; 2797 2798 if (target.getDirty()) { 2799 2800 boolean logProvisional = coordinateWithCheckpoint(target, parent); 2801 2802 targetLsn = target.log(dbEnv.getLogManager(), 2803 allowBinDeltas, 2804 true, /*allowCompress*/ 2805 logProvisional, 2806 backgroundIO, 2807 parent); 2808 newTargetLsn = true; 2809 } else { 2810 targetLsn = parent.getLsn(index); 2811 } 2812 2813 assert(targetLsn != DbLsn.NULL_LSN); 2814 2815 if (targetLsn != DbLsn.NULL_LSN) { 2816 2817 long evictedBytes = target.getBudgetedMemorySize(); 2818 2819 parent.detachNode(index, newTargetLsn/*updateLsn*/, targetLsn); 2820 2821 nNodesEvicted.increment(); 2822 if (stats != null) { 2823 stats.incNumEvicted(target.isBIN()); 2824 } 2825 2826 return evictedBytes; 2827 } else { 2828 //can we ever really reach here? How? ????? 2829 addBack(target); 2830 return 0; 2831 } 2832 } 2833 evictRoot( RootEvictor rootEvictor, boolean backgroundIO, EvictionSource source, EvictionDebugStats stats)2834 private long evictRoot( 2835 RootEvictor rootEvictor, 2836 boolean backgroundIO, 2837 EvictionSource source, 2838 EvictionDebugStats stats) { 2839 2840 final ChildReference rootRef = rootEvictor.rootRef; 2841 final IN target = (IN) rootRef.getTarget(); 2842 final DatabaseImpl db = target.getDatabase(); 2843 final EnvironmentImpl dbEnv = db.getEnv(); 2844 final INList inList = dbEnv.getInMemoryINs(); 2845 2846 boolean logProvisional = 2847 coordinateWithCheckpoint(target, null /*parent*/); 2848 2849 if (target.getDirty()) { 2850 long newLsn = target.log(dbEnv.getLogManager(), 2851 false, // allowDeltas 2852 false, // allowCompress 2853 logProvisional, 2854 backgroundIO, 2855 null); // parent 2856 2857 rootRef.setLsn(newLsn); 2858 rootEvictor.flushed = true; 2859 } 2860 2861 inList.remove(target); 2862 2863 long evictBytes = target.getBudgetedMemorySize(); 2864 2865 rootRef.clearTarget(); 2866 2867 nNodesEvicted.increment(); 2868 nRootNodesEvicted.increment(); 2869 if (stats != null) { 2870 stats.incNumEvicted(false); 2871 } 2872 2873 return evictBytes; 2874 } 2875 2876 /** 2877 * Coordinates an eviction with an in-progress checkpoint and returns 2878 * whether provisional logging is needed. 2879 * 2880 * @return true if the target must be logged provisionally. 2881 */ coordinateWithCheckpoint(IN target, IN parent)2882 private boolean coordinateWithCheckpoint(IN target, IN parent) { 2883 2884 EnvironmentImpl dbEnv = target.getDatabase().getEnv(); 2885 2886 /* 2887 * The checkpointer could be null if it was shutdown or never 2888 * started. 2889 */ 2890 Checkpointer ckpter = dbEnv.getCheckpointer(); 2891 if (ckpter == null) { 2892 return false; 2893 } 2894 return ckpter.coordinateEvictionWithCheckpoint(target, parent); 2895 } 2896 isCacheFull()2897 public boolean isCacheFull() { 2898 return arbiter.isCacheFull(); 2899 } 2900 wasCacheEverFull()2901 public boolean wasCacheEverFull() { 2902 return arbiter.wasCacheEverFull(); 2903 } 2904 2905 /* For unit testing only. */ setRunnableHook(TestHook<Boolean> hook)2906 public void setRunnableHook(TestHook<Boolean> hook) { 2907 arbiter.setRunnableHook(hook); 2908 } 2909 2910 /* For unit testing only. */ setPreEvictINHook(TestHook<Object> hook)2911 public void setPreEvictINHook(TestHook<Object> hook) { 2912 preEvictINHook = hook; 2913 } 2914 2915 /* For unit testing only. */ setEvictProfileHook(TestHook<IN> hook)2916 public void setEvictProfileHook(TestHook<IN> hook) { 2917 evictProfile = hook; 2918 } 2919 getStatsGroup()2920 public StatGroup getStatsGroup() { 2921 return stats; 2922 } 2923 2924 /** 2925 * Load stats. 2926 */ loadStats(StatsConfig config)2927 public StatGroup loadStats(StatsConfig config) { 2928 2929 if (isShared) { 2930 sharedCacheEnvs.set(envInfos.size()); 2931 } 2932 2933 float binFetchMisses = (float)nBINFetchMiss.get(); 2934 float binFetches = (float)nBINFetch.get(); 2935 2936 binFetchMissRatio.set( 2937 (binFetches > 0 ? (binFetchMisses / binFetches) : 0)); 2938 2939 StatGroup copy = stats.cloneGroup(config.getClear()); 2940 2941 /* 2942 * These stats are not cleared. They represent the current state of 2943 * the cache. 2944 */ 2945 new LongStat(copy, CACHED_IN_SPARSE_TARGET, nINSparseTarget.get()); 2946 new LongStat(copy, CACHED_IN_NO_TARGET, nINNoTarget.get()); 2947 new LongStat(copy, CACHED_IN_COMPACT_KEY, nINCompactKey.get()); 2948 2949 copy.addAll(getINListStats(config)); 2950 2951 return copy; 2952 } 2953 getINListStats(StatsConfig config)2954 private StatGroup getINListStats(StatsConfig config) { 2955 2956 if (isShared) { 2957 2958 StatGroup totalINListStats = new StatGroup("temp", "temp"); 2959 2960 if (config.getFast()) { 2961 2962 /* 2963 * This is a slow stat for shared envs, because of the need to 2964 * synchronize. 2965 */ 2966 return totalINListStats; 2967 } 2968 2969 List<EnvInfo> copy = null; 2970 synchronized(this) { 2971 copy = new ArrayList<EnvInfo>(envInfos); 2972 } 2973 2974 for (EnvInfo ei: copy) { 2975 totalINListStats.addAll(ei.env.getInMemoryINs().loadStats()); 2976 } 2977 2978 return totalINListStats; 2979 } else { 2980 return firstEnvImpl.getInMemoryINs().loadStats(); 2981 } 2982 } 2983 incNumLNsEvicted(long inc)2984 public void incNumLNsEvicted(long inc) { 2985 nLNsEvicted.add(inc); 2986 } 2987 2988 /** 2989 * Update the appropriate fetch stat, based on node type. 2990 */ incLNFetchStats(boolean isMiss)2991 public void incLNFetchStats(boolean isMiss) { 2992 nLNFetch.increment(); 2993 if (isMiss) { 2994 nLNFetchMiss.increment(); 2995 } 2996 } 2997 incUINFetchStats(boolean isMiss)2998 public void incUINFetchStats(boolean isMiss) { 2999 nUpperINFetch.increment(); 3000 if (isMiss) { 3001 nUpperINFetchMiss.increment(); 3002 } 3003 } 3004 incBINFetchStats(boolean isMiss, boolean isDelta)3005 public void incBINFetchStats(boolean isMiss, boolean isDelta) { 3006 nBINFetch.increment(); 3007 if (isMiss) { 3008 nBINFetchMiss.increment(); 3009 if (isDelta) { 3010 nBINDeltaFetchMiss.increment(); 3011 } 3012 } 3013 } 3014 incFullBINMissStats()3015 public void incFullBINMissStats() { 3016 nFullBINMiss.increment(); 3017 } 3018 incBinDeltaBlindOps()3019 public void incBinDeltaBlindOps() { 3020 nBinDeltaBlindOps.increment(); 3021 } 3022 getNINSparseTarget()3023 public AtomicLong getNINSparseTarget() { 3024 return nINSparseTarget; 3025 } 3026 getNINNoTarget()3027 public AtomicLong getNINNoTarget() { 3028 return nINNoTarget; 3029 } 3030 getNINCompactKey()3031 public AtomicLong getNINCompactKey() { 3032 return nINCompactKey; 3033 } 3034 3035 3036 static class ReentrancyGuard { 3037 private final ConcurrentHashMap<Thread, Thread> activeThreads; 3038 private final EnvironmentImpl envImpl; 3039 private final Logger logger; 3040 ReentrancyGuard(EnvironmentImpl envImpl, Logger logger)3041 ReentrancyGuard(EnvironmentImpl envImpl, Logger logger) { 3042 this.envImpl = envImpl; 3043 this.logger = logger; 3044 activeThreads = new ConcurrentHashMap<Thread, Thread>(); 3045 } 3046 enter()3047 boolean enter() { 3048 Thread thisThread = Thread.currentThread(); 3049 if (activeThreads.containsKey(thisThread)) { 3050 /* We don't really expect a reentrant call. */ 3051 LoggerUtils.severe(logger, envImpl, 3052 "reentrant call to eviction from " + 3053 LoggerUtils.getStackTrace()); 3054 3055 /* If running w/assertions, in testing mode, assert here. */ 3056 assert false: "reentrant call to eviction from " + 3057 LoggerUtils.getStackTrace(); 3058 return false; 3059 } 3060 3061 activeThreads.put(thisThread, thisThread); 3062 return true; 3063 } 3064 leave()3065 void leave() { 3066 assert activeThreads.contains(Thread.currentThread()); 3067 activeThreads.remove(Thread.currentThread()); 3068 } 3069 } 3070 3071 static class BackgroundEvictTask implements Runnable { 3072 3073 private final Evictor evictor; 3074 private final boolean backgroundIO; 3075 BackgroundEvictTask(Evictor evictor)3076 BackgroundEvictTask(Evictor evictor) { 3077 this.evictor = evictor; 3078 this.backgroundIO = true; 3079 } 3080 run()3081 public void run() { 3082 evictor.doEvict(EvictionSource.EVICTORTHREAD, backgroundIO); 3083 } 3084 } 3085 3086 static class RejectEvictHandler implements RejectedExecutionHandler { 3087 3088 private final AtomicLongStat threadUnavailableStat; 3089 RejectEvictHandler(AtomicLongStat threadUnavailableStat)3090 RejectEvictHandler(AtomicLongStat threadUnavailableStat) { 3091 this.threadUnavailableStat = threadUnavailableStat; 3092 } 3093 rejectedExecution(Runnable r, ThreadPoolExecutor executor)3094 public void rejectedExecution(Runnable r, 3095 ThreadPoolExecutor executor) { 3096 threadUnavailableStat.increment(); 3097 } 3098 } 3099 3100 /** 3101 * Caches DatabaseImpls to reduce DbTree.getDb overhead. 3102 * 3103 * SharedEvictor, unlike PrivateEvictor, must maintain a cache map for each 3104 * EnvironmentImpl, since each cache map is logically associated with a 3105 * single DbTree instance. 3106 */ 3107 static class DbCache { 3108 3109 boolean shared = false; 3110 3111 int nOperations = 0; 3112 3113 int dbCacheClearCount = 0; 3114 3115 final Map<EnvironmentImpl, Map<DatabaseId, DatabaseImpl>> envMap; 3116 3117 final Map<DatabaseId, DatabaseImpl> dbMap; 3118 DbCache(boolean shared, int dbCacheClearCount)3119 DbCache(boolean shared, int dbCacheClearCount) { 3120 3121 this.shared = shared; 3122 this.dbCacheClearCount = dbCacheClearCount; 3123 3124 if (shared) { 3125 envMap = 3126 new HashMap<EnvironmentImpl, Map<DatabaseId, DatabaseImpl>>(); 3127 3128 dbMap = null; 3129 } else { 3130 dbMap = new HashMap<DatabaseId, DatabaseImpl>(); 3131 envMap = null; 3132 } 3133 } 3134 3135 /** 3136 * Calls DbTree.getDb for the given environment and database ID, and 3137 * caches the result to optimize multiple calls for the same DB. 3138 * 3139 * @param envImpl identifies which environment the dbId parameter 3140 * belongs to. For PrivateEvictor, it is the same as the 3141 * Evictor.firstEnvImpl field. 3142 * 3143 * @param dbId is the DB to get. 3144 */ getDb(EnvironmentImpl env, DatabaseId dbId)3145 DatabaseImpl getDb(EnvironmentImpl env, DatabaseId dbId) { 3146 3147 Map<DatabaseId, DatabaseImpl> map; 3148 3149 if (shared) { 3150 map = envMap.get(env); 3151 if (map == null) { 3152 map = new HashMap<DatabaseId, DatabaseImpl>(); 3153 envMap.put(env, map); 3154 } 3155 } else { 3156 map = dbMap; 3157 } 3158 3159 /* 3160 * Clear DB cache after dbCacheClearCount operations, to 3161 * prevent starving other threads that need exclusive access to 3162 * the MapLN (for example, DbTree.deleteMapLN). [#21015] 3163 * 3164 * Note that we clear the caches for all environments after 3165 * dbCacheClearCount total operations, rather than after 3166 * dbCacheClearCount operations for a single environment, 3167 * because the total is a more accurate representation of 3168 * elapsed time, during which other threads may be waiting for 3169 * exclusive access to the MapLN. 3170 */ 3171 nOperations += 1; 3172 if ((nOperations % dbCacheClearCount) == 0) { 3173 releaseDbs(env); 3174 } 3175 3176 return env.getDbTree().getDb(dbId, -1, map); 3177 } 3178 3179 /** 3180 * Calls DbTree.releaseDb for cached DBs, and clears the cache. 3181 */ releaseDbs(EnvironmentImpl env)3182 void releaseDbs(EnvironmentImpl env) { 3183 if (shared) { 3184 for (Map.Entry<EnvironmentImpl, Map<DatabaseId, DatabaseImpl>> 3185 entry : envMap.entrySet()) { 3186 3187 final EnvironmentImpl sharingEnv = entry.getKey(); 3188 final Map<DatabaseId, DatabaseImpl> map = entry.getValue(); 3189 3190 sharingEnv.getDbTree().releaseDbs(map); 3191 map.clear(); 3192 } 3193 } else { 3194 env.getDbTree().releaseDbs(dbMap); 3195 dbMap.clear(); 3196 } 3197 } 3198 } 3199 } 3200