1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2002, 2010 Oracle and/or its affiliates.  All rights reserved.
5  *
6  */
7 
8 package com.sleepycat.je.cleaner;
9 
10 import java.util.ArrayList;
11 import java.util.LinkedList;
12 import java.util.Map;
13 import java.util.Set;
14 import java.util.SortedMap;
15 import java.util.SortedSet;
16 import java.util.logging.Level;
17 import java.util.logging.Logger;
18 
19 import com.sleepycat.je.config.EnvironmentParams;
20 import com.sleepycat.je.dbi.DbConfigManager;
21 import com.sleepycat.je.dbi.EnvironmentImpl;
22 import com.sleepycat.je.utilint.DbLsn;
23 import com.sleepycat.je.utilint.TestHook;
24 import com.sleepycat.je.utilint.TestHookExecute;
25 import com.sleepycat.je.utilint.LoggerUtils;
26 
27 /**
28  * Contains methods for calculating utilization and for selecting files to
29  * clean.
30  *
31  * In most cases the methods in this class are called by FileSelector methods,
32  * and synchronization order is always FileSelector then UtilizationCalculator.
33  * In some cases methods in this class are called directly by FileProcessor,
34  * and such methods must take care not to call FileSelector methods.
35  *
36  * This class maintains the size correction factor for LNs whose size is not
37  * counted, and related info, as explained in detail below. [#18633]
38  *
39  * Note that as of JE 6.0, utilization adjustments should no longer be needed
40  * because the last logged size is stored in the BIN.  Adjustments are
41  * therefore disabled by default.  Adjustments are supported (can be enabled
42  * explicitly) in the current release, to be safe, but will be removed entirely
43  * in a future release as described in the JE 6 change log and the javadoc for
44  * {@link com.sleepycat.je.EnvironmentConfig#CLEANER_ADJUST_UTILIZATION}.
45  * See [#22275] for more information.
46  *
47  * (Historical note:  Originally in JE 5, the corrected average LN size was
48  * used to adjust utilization.  This was changed to a correction factor since
49  * different log files may have different average LN sizes. [#21106])
50  *
51  * LN size adjustments are performed because the obsolete size of LNs may be
52  * unknown (if it was written prior to JE 6.0) when we delete or update an LN
53  * that is not resident in cache.  We do not wish to fetch the LN just to
54  * obtain the size of the previous version. Therefore we record in the
55  * FileSummary that an LN became obsolete (by incrementing obsoleteLNCount) and
56  * also that its size was not counted (by not incrementing
57  * obsoleteLNSizeCounted and not adding a size to obsoleteLNSize).
58  *
59  * Later we estimate utilization by assuming that such obsolete LNs have sizes
60  * that are roughly the average LN size for obsolete LNs whose size was not
61  * counted in the file, as calculated by FileSummary.getObsoleteLNSize and
62  * getAverageObsoleteLNSizeNotCounted.  This is fairly accurate when the sizes
63  * of obsolete and non-obsolete LNs in a file are uniformly distributed.  But
64  * it can be inaccurate when they are not.
65  *
66  * If obsolete LNs are generally smaller than non-obsolete LNs, the average
67  * will be larger than the true size and estimated utilization will be lower
68  * then true utilization; in this case there is danger of over-cleaning and
69  * wasted resources.  If obsolete LNs are generally larger, the average will be
70  * smaller than the true size and estimated utilization will be higher than
71  * true utilization; in this case there is danger of unbounded log growth and a
72  * "disk space leak".
73  *
74  * To correct the inaccurate estimate we take advantage of the fact that the
75  * true LN sizes and true average LN size can be determined when the
76  * FileProcessor reads a log file.  We save the true average LN size as the
77  * result of a FileProcessor run in the adjustUtilization method.  The true
78  * average LN size is then used to calculate a correction factor -- a factor
79  * that, when multiplied by the estimated average LN size, results in the
80  * true average LN size.  The correction factor is used when determining the
81  * utilization of other log files; this is done by the getBestFile method.
82  *
83  * To guard against over-cleaning (the first case described above), the
84  * FileProcessor will naturally select a file for cleaning because estimated
85  * utilization is lower than true utilization.  When the file is cleaned, the
86  * LN size correction factor will be determined.  However, to guard against
87  * unbounded log growth (the second case above), this mechanism is not
88  * sufficient because estimated utilization is higher than true utilization and
89  * a file may not be selected for cleaning.  To handle this case, a log file
90  * must sometimes be read by the FileProcessor, even though estimated
91  * utilization is above the minUtilization threshold, to "probe" the true
92  * utilization and determine the true average LN size.  Probing is a read-only
93  * operation and does not migrate the active LNs in the log file or attempt to
94  * delete the file.
95  *
96  * Probing a log file is extra work and we strive to do it as infrequently as
97  * possible.  The following algorithm is used.
98  *
99  * 1) We determine the number of new log files N that have been written since
100  * the last time adjustUtilization was called.  Recall that adjustUtilization
101  * is called when FileProcessor reads a file, for cleaning a file or probing
102  * it.  N is determined by the shouldPerformProbe method, which is called via
103  * FileSelector.selectFileForCorrection by the FileProcessor when the
104  * FileProcessor is woken to do cleaning but no file is selected for cleaning.
105  *
106  * 2) shouldPerformProbe returns true only if N is greater than a threshold
107  * value.  The threshold has a high value (20 by default) if the minimum number
108  * of initial adjustments (files cleaned or probed, 5 by default) has not been
109  * reached, and has a lower value (5 by default) after the initial adjustment
110  * minimum has been reached.  In other words, we do probes more frequently when
111  * we haven't collected a lot of input data.
112  *
113  * 3) If shouldPerformProbe returns true, then selectFileForCorrection
114  * in the FileSelector calls the getBestFile method here, passing true for the
115  * isProbe argument.  In this mode, getBestFile will select a file based on
116  * worst case (lowest possible) utilization.  The maximum LN size in the file
117  * is used, rather than the average LN size, for this worst case calculation.
118  * See the FileSummary getMaxObsoleteLNSize method.  If no file is selected
119  * using worst case utilization, then probing is not needed because total
120  * utilization cannot be lower than the configured minUtilization.
121  *
122  * 4) If FileSelector.selectFileForCorrection returns a non-null file number,
123  * then FileProcessor will read the file in calcUtilizationOnly mode.  True
124  * utilization will be determined and adjustUtilization will be called, but
125  * active LNs will not be migrated and the file will not be deleted.
126  *
127  * To summarize, a file is read in probing mode only if both of the following
128  * conditions are satisfied:
129  *
130  *  a) The number of new files written since the last correction, N, is greater
131  *     than the threshold, which is 5 or 20 depending on the whether the last
132  *     correction is greater or less than 0.9.
133  *
134  *  b) Using a worst case utilization calculation that uses the maximum
135  *     obsolete LN size in a file rather than the average, the total log
136  *     utilization is under the configured minUtilization.
137  *
138  * To determine the LN size correction factor that is applied when estimating
139  * utilization, rather than using the size for a single log file, the sizes for
140  * the last several log files are saved and the average value is used.  The
141  * average over the last 10 adjustments is computed when adjustUtilization is
142  * called.
143  *
144  * If the mix of LN sizes for obsolete and non-obsolete LNs is roughly the same
145  * for all log files, and this mix does not change over time, then using a
146  * running average would not be necessary and a single size would be
147  * sufficient.  However, some applications have access patterns that are not
148  * uniform over time, and the running average size will help to compensate for
149  * this.
150  *
151  * It is important to avoid premature use of the LN size correction factor in
152  * selecting files for cleaning, before enough input data has been collected to
153  * make the computed value meaningful.  To accomplish this, the correction
154  * factor is not computed or used until the average size data for a minimum
155  * number of LNs with uncounted sizes (1000 by default) is available.
156  *
157  * Because it is expensive to compute, the recently computed values are stored
158  * persistently as part of the CheckpointEnd log entry.  Flushing this info
159  * once per checkpoint is sufficient, and no special handling is necessary
160  * after a crash; losing the new info since the last checkpoint is acceptable.
161  *
162  * Note that we do clean files that are protected from deletion by HA/DataSync.
163  * If we did not clean them and a large number of files were to become
164  * unprotected at once, a large amount of log cleaning may suddenly be
165  * necessary.  Cleaning the files avoids this.  Better still would be to delete
166  * the metadata, but that would require writing a log entry to indicate the
167  * file is ready to be deleted, to avoid cleaning from scratch after a crash.
168  * [#16643] [#19221]
169  */
170 public class UtilizationCalculator {
171 
172     private static final boolean DEBUG = false;
173 
174     /*
175      * Configuration properties.  The default values are probably appropriate
176      * for all applications and these parameters are currently not exposed in
177      * EnvironmentConfig.  However, in case exposing them or experimenting with
178      * them is necessary in the future, they are represented as hidden
179      * properties in the EnvironmentParams class.
180      */
181     private final int configRecentLNSizes;      // default: 10
182     private final int configMinUncountedLNs;    // default: 1000
183     private final int configInitialAdjustments; // default: 5
184     private final int configMinProbeSkipFiles;  // default: 5
185     private final int configMaxProbeSkipFiles;  // default: 20
186     private final boolean configAdjustUtilization;
187 
188     /* Min/max correction factors used to prohibit unreasonable values. */
189     private static final float MIN_CORRECTION_FACTOR = 0.01f;
190     private static final float MAX_CORRECTION_FACTOR = 100f;
191 
192     private final EnvironmentImpl env;
193     private final Cleaner cleaner;
194     private final Logger logger;
195     private final FilesToMigrate filesToMigrate;
196     private float lnSizeCorrectionFactor;
197     private final LinkedList<AverageSize> recentAvgLNSizes;
198     private long endFileNumAtLastAdjustment;
199     private int initialAdjustments;
200     private TestHook<TestAdjustment> adjustmentHook;
201     private volatile int lastKnownUtilization = -1;
202 
UtilizationCalculator(EnvironmentImpl env, Cleaner cleaner)203     UtilizationCalculator(EnvironmentImpl env, Cleaner cleaner) {
204         this.env = env;
205         this.cleaner = cleaner;
206         logger = LoggerUtils.getLogger(getClass());
207 
208         /*
209          * Set default values. Some are overridden by setLogSummary when
210          * opening an existing environment.
211          */
212         lnSizeCorrectionFactor = Float.NaN;
213         recentAvgLNSizes = new LinkedList<AverageSize>();
214         endFileNumAtLastAdjustment = -1;
215         filesToMigrate = new FilesToMigrate(env);
216 
217         /* Configuration properties.  All are immutable. */
218         final DbConfigManager configManager = env.getConfigManager();
219         configAdjustUtilization = configManager.getBoolean
220             (EnvironmentParams.CLEANER_ADJUST_UTILIZATION);
221         configRecentLNSizes = configManager.getInt
222             (EnvironmentParams.CLEANER_CALC_RECENT_LN_SIZES);
223         configMinUncountedLNs = configManager.getInt
224             (EnvironmentParams.CLEANER_CALC_MIN_UNCOUNTED_LNS);
225         configInitialAdjustments = configManager.getInt
226             (EnvironmentParams.CLEANER_CALC_INITIAL_ADJUSTMENTS);
227         configMinProbeSkipFiles = configManager.getInt
228             (EnvironmentParams.CLEANER_CALC_MIN_PROBE_SKIP_FILES);
229         configMaxProbeSkipFiles = configManager.getInt
230             (EnvironmentParams.CLEANER_CALC_MAX_PROBE_SKIP_FILES);
231     }
232 
233     /**
234      * Returns log summary info that should be saved persistently.
235      */
getLogSummary()236     public synchronized CleanerLogSummary getLogSummary() {
237         return new CleanerLogSummary(new ArrayList(recentAvgLNSizes),
238                                      endFileNumAtLastAdjustment,
239                                      initialAdjustments);
240     }
241 
242     /**
243      * Restores log summary info that was read from persistent storage.
244      */
setLogSummary(CleanerLogSummary logSummary)245     public synchronized void setLogSummary(CleanerLogSummary logSummary) {
246         endFileNumAtLastAdjustment =
247             logSummary.getEndFileNumAtLastAdjustment();
248         initialAdjustments = logSummary.getInitialAdjustments();
249         recentAvgLNSizes.clear();
250         recentAvgLNSizes.addAll(logSummary.getRecentAvgLNSizes());
251         /* Call this last, as it uses the fields set above. */
252         updateObsoleteLNSizeCorrectionFactor();
253     }
254 
255     /**
256      * See UtilizationCorrectionTest.
257      */
setAdjustmentHook(TestHook<TestAdjustment> hook)258     synchronized void setAdjustmentHook(TestHook<TestAdjustment> hook) {
259         adjustmentHook = hook;
260     }
261 
262     /**
263      * Returns the factor to be multiplied by the average LN size (for LNs with
264      * uncounted sizes) to correct for differences between obsolete and active
265      * LN sizes.  This method is intentionally unsynchronized to provide fast
266      * access for stats.
267      */
getLNSizeCorrectionFactor()268     public float getLNSizeCorrectionFactor() {
269         return lnSizeCorrectionFactor;
270     }
271 
272     /**
273      * For stats.
274      */
getLastKnownUtilization()275     int getLastKnownUtilization() {
276         return lastKnownUtilization;
277     }
278 
279     /**
280      * Returns EnvironmentConfig.CLEANER_ADJUST_UTILIZATION value.
281      */
isAdjustUtilizationEnabled()282     public boolean isAdjustUtilizationEnabled() {
283         return configAdjustUtilization;
284     }
285 
286     /**
287      * Returns the obsolete size for a FileSummary, and applies the LN size
288      * correction factor if adjustment is configured.  This method should be
289      * used rather than calling FileSummary.getObsoleteSize directly, so that
290      * the configuration setting can be honored.
291      */
getObsoleteSize(FileSummary summary)292     public int getObsoleteSize(FileSummary summary) {
293         if (configAdjustUtilization) {
294             return summary.getObsoleteSize(lnSizeCorrectionFactor);
295         }
296         return summary.getObsoleteSize();
297     }
298 
299     /**
300      * Returns the best file that qualifies for cleaning or probing, or null
301      * if no file qualifies.
302      *
303      * @param fileSummaryMap the map containing file summary info.
304      *
305      * @param forceCleaning is true to always select a file, even if its
306      * utilization is above the minimum utilization threshold.
307      *
308      * @param isBacklog is true if there is currently a backlog, in which case
309      * FilesToMigrate won't be used to return a file.
310      *
311      * @param isProbe is true to use the maximum LN obsolete size to determine
312      * file utilization.  It should be false when selecting a file cleaning a
313      * file normally, to use the average LN size for uncounted sizes along with
314      * correction factor.  It should be true when selecting a file to calculate
315      * utilization without cleaning, to determine the worst case (lowest
316      * possible) utilization and to ignore the correction factor.
317      *
318      * @param excludeFiles is a set of files to exclude from being selected.
319      * Used to prevent probing the same files repeatedly.
320      */
getBestFile( SortedMap<Long, FileSummary> fileSummaryMap, boolean forceCleaning, boolean isBacklog, boolean isProbe, Set<Long> excludeFiles)321     synchronized Long getBestFile(
322         SortedMap<Long, FileSummary> fileSummaryMap,
323         boolean forceCleaning,
324         boolean isBacklog,
325         boolean isProbe,
326         Set<Long> excludeFiles) {
327 
328         /* Paranoia.  There should always be 1 file. */
329         if (fileSummaryMap.size() == 0) {
330             LoggerUtils.logMsg(logger, env, Level.SEVERE,
331                                "Can't clean, map is empty.");
332             return null;
333         }
334 
335         /* Used to avoid checking for Level.FINE on every iteration. */
336         final boolean isLoggingLevelFine = logger.isLoggable(Level.FINE);
337 
338         /*
339          * Use local variables for mutable properties.  Using values that are
340          * changing during a single file selection pass would not produce a
341          * well defined result.
342          *
343          * Note that age is a distance between files not a number of files,
344          * that is, deleted files are counted in the age.
345          */
346         final int useMinUtilization = cleaner.minUtilization;
347         final int useMinFileUtilization = cleaner.minFileUtilization;
348         final int useMinAge = cleaner.minAge;
349 
350         /*
351          * Cleaning must refrain from rearranging the portion of log processed
352          * as recovery time. Do not clean a file greater or equal to the first
353          * active file used in recovery, which is either the last log file or
354          * the file of the first active LSN in an active transaction, whichever
355          * is earlier.
356          *
357          * TxnManager.getFirstActiveLsn() (firstActiveTxnLsn below) is
358          * guaranteed to be earlier or equal to the first active LSN of the
359          * checkpoint that will be performed before deleting the selected log
360          * file. By selecting a file prior to this point we ensure that will
361          * not clean any entry that may be replayed by recovery.
362          *
363          * For example:
364          * 200 ckptA start, determines that ckpt's firstActiveLsn = 100
365          * 400 ckptA end
366          * 600 ckptB start, determines that ckpt's firstActiveLsn = 300
367          * 800 ckptB end
368          *
369          * Any cleaning that executes before ckpt A start will be constrained
370          * to files <= lsn 100, because it will have checked the TxnManager.
371          * If cleaning executes after ckptA start, it may indeed clean after
372          * ckptA's firstActiveLsn, but the cleaning run will wait to ckptB end
373          * to delete files.
374          */
375         long firstActiveFile = fileSummaryMap.lastKey().longValue();
376         final long firstActiveTxnLsn = env.getTxnManager().getFirstActiveLsn();
377         if (firstActiveTxnLsn != DbLsn.NULL_LSN) {
378             long firstActiveTxnFile =
379                 DbLsn.getFileNumber(firstActiveTxnLsn);
380             if (firstActiveFile > firstActiveTxnFile) {
381                 firstActiveFile = firstActiveTxnFile;
382             }
383         }
384 
385         /*
386          * Note that minAge is at least one and may be configured to a higher
387          * value to prevent cleaning recently active files.
388          */
389         final long lastFileToClean = firstActiveFile - useMinAge;
390 
391         /* Calculate totals and find the best file. */
392         Long bestFile = null;
393         int bestUtilization = 101;
394         long totalSize = 0;
395         long totalObsoleteSize = 0;
396         long lastKnownSize = 0;
397         long lastKnownObsoleteSize = 0;
398 
399         for (final Map.Entry<Long, FileSummary> entry :
400              fileSummaryMap.entrySet()) {
401 
402             final Long file = entry.getKey();
403             final long fileNum = file.longValue();
404 
405             final FileSummary summary = entry.getValue();
406             final int obsoleteSize = isProbe ?
407                 summary.getMaxObsoleteSize() :
408                 getObsoleteSize(summary);
409 
410             lastKnownSize += summary.totalSize;
411             lastKnownObsoleteSize += obsoleteSize;
412 
413             /*
414              * If the file is already being cleaned, only total the
415              * non-obsolete amount.  This is an optimistic prediction of the
416              * results of cleaning, and is used to prevent over-cleaning.
417              */
418             if (cleaner.getFileSelector().isFileCleaningInProgress(file)) {
419                 final int utilizedSize = summary.totalSize - obsoleteSize;
420                 totalSize += utilizedSize;
421                 if (isLoggingLevelFine) {
422                     LoggerUtils.logMsg
423                         (logger, env, Level.FINE,
424                          "Skip file previously selected for cleaning: 0x" +
425                          Long.toHexString(fileNum) + " utilizedSize: " +
426                          utilizedSize + " " + summary);
427                 }
428                 continue;
429             }
430 
431             /* Add this file's value to the totals. */
432             totalSize += summary.totalSize;
433             totalObsoleteSize += obsoleteSize;
434 
435             /* Don't select an explicitly excluded file. */
436             if (excludeFiles.contains(file)) {
437                 continue;
438             }
439 
440             /* Skip files that are too young to be cleaned. */
441             if (fileNum > lastFileToClean) {
442                 continue;
443             }
444 
445             /* Select this file if it has the lowest utilization so far. */
446             final int thisUtilization =
447                 FileSummary.utilization(obsoleteSize, summary.totalSize);
448             if (bestFile == null || thisUtilization < bestUtilization) {
449                 bestFile = file;
450                 bestUtilization = thisUtilization;
451             }
452         }
453 
454         /*
455          * The first priority is to clean the log up to the minimum utilization
456          * level, so if we're below the minimum (or an individual file is below
457          * the minimum for any file), then we clean the lowest utilization
458          * (best) file.  Otherwise, if there are more files to migrate, we
459          * clean the next file to be migrated.  Otherwise, if cleaning is
460          * forced (for unit testing), we clean the lowest utilization file.
461          */
462         final Long fileChosen;
463         final String loggingMsg;
464         final int totalUtilization =
465             FileSummary.utilization(totalObsoleteSize, totalSize);
466         lastKnownUtilization =
467             FileSummary.utilization(lastKnownObsoleteSize, lastKnownSize);
468 
469         if (totalUtilization < useMinUtilization ||
470             bestUtilization < useMinFileUtilization) {
471             fileChosen = bestFile;
472             loggingMsg = "Chose lowest utilized file for cleaning.";
473         } else if (!isBacklog &&
474                    filesToMigrate.hasNext(fileSummaryMap)) {
475             fileChosen = filesToMigrate.next(fileSummaryMap);
476             loggingMsg = "Chose file from files-to-migrate for cleaning.";
477         } else if (forceCleaning) {
478             fileChosen = bestFile;
479             loggingMsg = "Chose file for forced cleaning (during testing).";
480         } else {
481             fileChosen = null;
482             loggingMsg = "No file selected for cleaning.";
483         }
484 
485         final Level logLevel = (fileChosen != null) ? Level.INFO : Level.FINE;
486         if (logger.isLoggable(logLevel)) {
487             final String fileChosenString = (fileChosen != null) ?
488                 (" fileChosen: 0x" + Long.toHexString(fileChosen)) :
489                 "";
490             final String correctionString = configAdjustUtilization ?
491                 ( " lnSizeCorrectionFactor: " + lnSizeCorrectionFactor) :
492                 " (adjustment disabled)";
493             LoggerUtils.logMsg
494                 (logger, env, logLevel,
495                  loggingMsg + fileChosenString + correctionString +
496                  " totalUtilization: " + totalUtilization +
497                  " bestFileUtilization: " + bestUtilization +
498                  " isProbe: " + isProbe);
499         }
500 
501         return fileChosen;
502     }
503 
504     /**
505      * Returns the cheapest file to clean from the given list of files.
506      *
507      * The cheapest file is considered to be the one with least number of
508      * active (non-obsolete) entries, since each active entry requires a Btree
509      * lookup and must be logged.
510      *
511      * This method is used to select the first file to be cleaned in the batch
512      * of to-be-cleaned files.  If there is no backlog, then this method always
513      * returns the first and only file in the candidate set, so the cost
514      * algorithm has no impact.
515      *
516      * Returns null iff the candidate set is empty.
517      */
518     synchronized Long
getCheapestFileToClean(SortedMap<Long, FileSummary> fileSummaryMap, SortedSet<Long> candidateFiles)519         getCheapestFileToClean(SortedMap<Long, FileSummary> fileSummaryMap,
520                                SortedSet<Long> candidateFiles) {
521 
522         if (candidateFiles.size() == 0) {
523             return null;
524         }
525 
526         if (candidateFiles.size() == 1) {
527             return candidateFiles.first();
528         }
529 
530         Long bestFile = null;
531         int bestCost = Integer.MAX_VALUE;
532 
533         for (final Long file : candidateFiles) {
534             final FileSummary summary = fileSummaryMap.get(file);
535 
536             /*
537              * Return a file in the given set if it does not exist.  Deleted
538              * files should be selected ASAP to remove them from the backlog.
539              * [#18179] For details, see where FileProcessor.doClean handles
540              * LogFileNotFoundException.
541              */
542             if (summary == null) {
543                 return file;
544             }
545 
546             /* Calculate this file's cost to clean. */
547             final int thisCost = summary.getNonObsoleteCount();
548 
549             /* Select this file if it has the lowest cost so far. */
550             if (bestFile == null || thisCost < bestCost) {
551                 bestFile = file;
552                 bestCost = thisCost;
553             }
554         }
555 
556         return bestFile;
557     }
558 
559     /**
560      * Saves the true average LN size for use in calculating utilization.  The
561      * true FileSummary info is determined by FileProcessor when cleaning or
562      * probing a file.
563      *
564      * The following example shows how the true average LN size (for LNs with
565      * their size uncounted) is calculated, given the estimated and the true
566      * FileSummary info.
567      *
568      * Estimated FileSummary
569      * ---------------------
570      * This calculation is performed by FileSummary
571      * getAverageObsoleteLNSizeNotCounted and documented there.
572      *
573      * estimatedFileSummary.totalLNCount: 1,000
574      * estimatedFileSummary.totalLNSize: 100,000
575      * estimatedFileSummary.obsoleteLNCount: 500
576      * estimatedFileSummary.obsoleteLNSizeCounted: 250
577      * estimatedFileSummary.obsoleteLNSize: 12,500
578      * average counted obsolete LN size: 12,500/250 == 50
579      * remainder LN count: 1,000 - 250 == 750
580      * remainder LN size: 100,000 - 12,500 = 87,500
581      * average uncounted obsolete LN size: 87,500 / 750 == 116.67
582      * estimated obsolete LN size: (116.67 * 250) + 12,500 == 41,667.5
583      *
584      * True FileSummary
585      * ----------------
586      * This calculation is perform in this method, given the obsoleteLNSize
587      * amount in the true FileSummary and the information from the estimated
588      * FileSummary above.
589      *
590      * trueFileSummary.obsoleteLNSize: 60,000
591      *
592      * average uncounted obsolete LN size:
593      *  (60,000 - 12,500) / (500 - 250) == 190
594      *
595      * which is:
596      * (trueFileSummary.obsoleteLNSize -
597      *  estimatedFileSummary.obsoleteLNSize)
598      *  /
599      * (estimatedFileSummary.obsoleteLNCount -
600      *  estimatedFileSummary.obsoleteLNSizeCounted)
601      *
602      * Note that these values represent only the OBSOLETE uncounted amounts,
603      * while the estimate further above uses ALL uncounted amounts.
604      *
605      * As a check, if 190 were used in the estimation further above, rather
606      * than using 116.67, then the result would be 60,000:
607      *
608      * estimated obsolete LN size: (190 * 250) + 12,500 == 60,000
609      *
610      * @return whether the adjustment was valid, or no adjustment was needed.
611      * False is returned if the adjustment was discarded because it is invalid.
612      */
adjustUtilization( long fileNum, long endFileNum, FileSummary estimatedFileSummary, FileSummary trueFileSummary)613     synchronized boolean adjustUtilization(
614         long fileNum,
615         long endFileNum,
616         FileSummary estimatedFileSummary,
617         FileSummary trueFileSummary) {
618 
619         /* Save last file num at the time of the update. */
620         endFileNumAtLastAdjustment = endFileNum;
621 
622         /*
623          * If estimated and true obsolete counts are unequal then comparing the
624          * sizes would lead to incorrect conclusions, since the true obsolete
625          * size that corresponds to the estimated obsolete count is unknown.
626          * The obsolete counts might be unequal if estimated values are
627          * double-counted by recovery, for example, or if the file's
628          * utilization changes in between the estimate and the cleaning/probe.
629          * [#22814]
630          */
631         if (estimatedFileSummary.obsoleteLNCount !=
632             trueFileSummary.obsoleteLNCount) {
633             return false;
634         }
635 
636         /*
637          * Keep track of the number of adjustments, which is the number of
638          * files cleaned or probed.
639          */
640         if (initialAdjustments < configInitialAdjustments) {
641             initialAdjustments += 1;
642         }
643 
644         /* Normalize obsolete amounts to account for double-counting. */
645         final int estObsLNCount = Math.min(
646             estimatedFileSummary.obsoleteLNCount,
647             estimatedFileSummary.totalLNCount);
648         final int estObsLNSize = Math.min(
649             estimatedFileSummary.obsoleteLNSize,
650             estimatedFileSummary.totalLNSize);
651         final int estObsLNSizeCounted = Math.min(
652             estimatedFileSummary.obsoleteLNSizeCounted, estObsLNCount);
653         final int trueObsLNSize = Math.min(
654             trueFileSummary.obsoleteLNSize, trueFileSummary.totalLNSize);
655 
656         /* Estimated average for this adjustment, used for debugging. */
657         final float estimatedAvgSize =
658             estimatedFileSummary.getAvgObsoleteLNSizeNotCounted();
659 
660         /*
661          * Calculate true average as described in the method comment above.
662          * The uncountedSize and uncountedCount values represent only the
663          * OBSOLETE uncounted amounts, while estUncountedSize and
664          * estUncountedCount represented ALL uncounted amounts.
665          */
666         final float trueAvgSize;
667         final float correctionFactor;
668         final int uncountedSize = trueObsLNSize - estObsLNSize;
669         final int uncountedCount = estObsLNCount - estObsLNSizeCounted;
670 
671         /* If no LN sizes are uncounted, don't apply this adjustment. */
672         if (uncountedSize > 0 && uncountedCount > 0) {
673 
674             /* Estimated average size for this adjustment. */
675             final int estUncountedSize =
676                 estimatedFileSummary.totalLNSize - estObsLNSize;
677             final int estUncountedCount =
678                 estimatedFileSummary.totalLNCount - estObsLNSizeCounted;
679             assert estimatedAvgSize ==
680                 estUncountedSize / ((float) estUncountedCount) :
681                 "expected=" + estimatedAvgSize +
682                 "got=" + (estUncountedSize / ((float) estUncountedCount));
683 
684             /*
685              * Calc this file's correction factor and apply this adjustment
686              * only when it is a reasonable value.
687              */
688             trueAvgSize = uncountedSize / ((float) uncountedCount);
689             correctionFactor = trueAvgSize / estimatedAvgSize;
690 
691             if (correctionFactor < MIN_CORRECTION_FACTOR ||
692                 correctionFactor > MAX_CORRECTION_FACTOR) {
693                 return false;
694             }
695 
696             /* Add new average to recent values, remove oldest value. */
697             recentAvgLNSizes.addLast(
698                 new AverageSize(uncountedSize, uncountedCount,
699                                 estUncountedSize, estUncountedCount));
700 
701             while (recentAvgLNSizes.size() > configRecentLNSizes) {
702                 recentAvgLNSizes.removeFirst();
703             }
704 
705             /* Update current correction factor. */
706             updateObsoleteLNSizeCorrectionFactor();
707         } else {
708             trueAvgSize = Float.NaN;
709             correctionFactor = Float.NaN;
710         }
711 
712         /* Output debug info. */
713         if (DEBUG) {
714             System.out.println("estimatedAvgSize=" + estimatedAvgSize +
715                                " trueAvgSize=" + trueAvgSize +
716                                " fileNum=" + fileNum +
717                                "\nestimatedFileSummary=\n" +
718                                estimatedFileSummary +
719                                "\ntrueFileSummary=\n" + trueFileSummary);
720         }
721 
722         /* Call test hook. */
723         boolean assertionsEnabled = false;
724         assert (assertionsEnabled = true);
725         if (assertionsEnabled && adjustmentHook != null) {
726             final TestAdjustment testAdjustment = new TestAdjustment(
727                 fileNum, endFileNum, estimatedFileSummary, trueFileSummary,
728                 estimatedAvgSize, trueAvgSize, correctionFactor);
729             TestHookExecute.doHookIfSet(adjustmentHook, testAdjustment);
730         }
731 
732         return true;
733     }
734 
735     /**
736      * Sets lnSizeCorrectionFactor using recent average LN sizes.
737      */
updateObsoleteLNSizeCorrectionFactor()738     private void updateObsoleteLNSizeCorrectionFactor() {
739 
740         /* Calculate totals. */
741         long totalSize = 0;
742         long totalCount = 0;
743         long estTotalSize = 0;
744         long estTotalCount = 0;
745 
746         for (AverageSize avg : recentAvgLNSizes) {
747             totalSize += avg.size;
748             totalCount += avg.count;
749             estTotalSize += avg.estSize;
750             estTotalCount += avg.estCount;
751         }
752 
753         /*
754          * The correction factor is updated only if the number of LNs is high
755          * enough to be representative.  lnSizeCorrectionFactor may have been
756          * previously calculated and will remain unchanged if the recent
757          * AverageSize info does not have configMinUncountedLNs.
758          */
759         if (totalCount < configMinUncountedLNs) {
760             return;
761         }
762 
763         /*
764          * The result of the following calculation is that:
765          *
766          *  estimatedAvgLNSize * lnSizeCorrectionFactor == correctedAvgLNSize
767          *
768          * which is how lnSizeCorrectionFactor is used in the FileSummary class
769          * to calculate corrected LN sizes and utilization.
770          */
771         final float correctedAvgLNSize =
772             (float) (totalSize / ((double) totalCount));
773 
774         final float estimatedAvgLNSize =
775             (float) (estTotalSize / ((double) estTotalCount));
776 
777         lnSizeCorrectionFactor = correctedAvgLNSize / estimatedAvgLNSize;
778     }
779 
780     /**
781      * Returns whether enough adjustments have been made to conclude that the
782      * the LN size correction factor has been established, or at least
783      * unnecessary because very few LN sizes are uncounted.  When false is
784      * returned, a backlog of to-be-cleaned files should not be created,
785      * because when the correction factor is established we may find no need to
786      * clean the files in the backlog.  Note that we do not clear the backlog
787      * when we establish the size.
788      */
isCorrectionEstablished()789     synchronized boolean isCorrectionEstablished() {
790         if (!configAdjustUtilization) {
791             return true;
792         }
793         return (initialAdjustments >= configInitialAdjustments);
794     }
795 
796     /**
797      * Returns whether a correction probe should be attempted, if worst case
798      * utilization also indicates that cleaning may be needed.
799      *
800      * As a side effect whenever true is returned, we update
801      * endFileNumAtLastAdjustment, which is somewhat redundant because
802      * adjustUtilization will also update this field after the probe is
803      * complete.  Updating it here ensures that only a single probe will be
804      * performed at the scheduled interval, and that multiple cleaner threads
805      * will not do a probe at the same time.  This method is synchronized to
806      * guarantee that.
807      */
shouldPerformProbe(long endFileNum)808     synchronized boolean shouldPerformProbe(long endFileNum) {
809 
810         if (!configAdjustUtilization) {
811             return false;
812         }
813 
814         final int filesBeforeProbe = isCorrectionEstablished() ?
815             configMaxProbeSkipFiles : configMinProbeSkipFiles;
816 
817         if (endFileNum - endFileNumAtLastAdjustment < filesBeforeProbe) {
818             return false;
819         }
820 
821         endFileNumAtLastAdjustment = endFileNum;
822         return true;
823     }
824 
825     /**
826      * Bundles a count of LNs and their total size, for use in calculating a
827      * running average.
828      */
829     static class AverageSize {
830         final int size;
831         final int count;
832         final int estSize;
833         final int estCount;
834 
AverageSize(int size, int count, int estSize, int estCount)835         AverageSize(int size, int count, int estSize, int estCount) {
836             this.size = size;
837             this.count = count;
838             this.estSize = estSize;
839             this.estCount = estCount;
840         }
841 
842         @Override
equals(Object other)843         public boolean equals(Object other) {
844             if (!(other instanceof AverageSize)) {
845                 return false;
846             }
847             final AverageSize o = (AverageSize) other;
848             return size == o.size && count == o.count &&
849                 estSize == o.estSize && estCount == o.estCount;
850         }
851 
852         @Override
hashCode()853         public int hashCode() {
854             return size + count + estSize + estCount;
855         }
856 
857         @Override
toString()858         public String toString() {
859             return "size=" + size + " count=" + count +
860                 " estSize=" + estSize + " estCount=" + estCount;
861         }
862     }
863 
864     /**
865      * For passing adjustment information to a test hook.
866      */
867     static class TestAdjustment {
868         final long fileNum;
869         final long endFileNum;
870         final FileSummary estimatedFileSummary;
871         final FileSummary trueFileSummary;
872         final float estimatedAvgSize;
873         final float trueAvgSize;
874         final float correctionFactor;
875 
TestAdjustment(long fileNum, long endFileNum, FileSummary estimatedFileSummary, FileSummary trueFileSummary, float estimatedAvgSize, float trueAvgSize, float correctionFactor)876         TestAdjustment(long fileNum,
877                        long endFileNum,
878                        FileSummary estimatedFileSummary,
879                        FileSummary trueFileSummary,
880                        float estimatedAvgSize,
881                        float trueAvgSize,
882                        float correctionFactor) {
883             this.fileNum = fileNum;
884             this.endFileNum = endFileNum;
885             this.estimatedFileSummary = estimatedFileSummary;
886             this.trueFileSummary = trueFileSummary;
887             this.estimatedAvgSize = estimatedAvgSize;
888             this.trueAvgSize = trueAvgSize;
889             this.correctionFactor = correctionFactor;
890         }
891     }
892 }
893