1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #pragma once
11 #include "db/version_set.h"
12 #include "memory/arena.h"
13 #include "options/cf_options.h"
14 #include "util/autovector.h"
15 
16 namespace rocksdb {
17 // The file contains class Compaction, as well as some helper functions
18 // and data structures used by the class.
19 
20 // Utility for comparing sstable boundary keys. Returns -1 if either a or b is
21 // null which provides the property that a==null indicates a key that is less
22 // than any key and b==null indicates a key that is greater than any key. Note
23 // that the comparison is performed primarily on the user-key portion of the
24 // key. If the user-keys compare equal, an additional test is made to sort
25 // range tombstone sentinel keys before other keys with the same user-key. The
26 // result is that 2 user-keys will compare equal if they differ purely on
27 // their sequence number and value, but the range tombstone sentinel for that
28 // user-key will compare not equal. This is necessary because the range
29 // tombstone sentinel key is set as the largest key for an sstable even though
30 // that key never appears in the database. We don't want adjacent sstables to
31 // be considered overlapping if they are separated by the range tombstone
32 // sentinel.
33 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
34                       const InternalKey& b);
35 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a,
36                       const InternalKey& b);
37 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
38                       const InternalKey* b);
39 
40 // An AtomicCompactionUnitBoundary represents a range of keys [smallest,
41 // largest] that exactly spans one ore more neighbouring SSTs on the same
42 // level. Every pair of  SSTs in this range "overlap" (i.e., the largest
43 // user key of one file is the smallest user key of the next file). These
44 // boundaries are propagated down to RangeDelAggregator during compaction
45 // to provide safe truncation boundaries for range tombstones.
46 struct AtomicCompactionUnitBoundary {
47   const InternalKey* smallest = nullptr;
48   const InternalKey* largest = nullptr;
49 };
50 
51 // The structure that manages compaction input files associated
52 // with the same physical level.
53 struct CompactionInputFiles {
54   int level;
55   std::vector<FileMetaData*> files;
56   std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries;
emptyCompactionInputFiles57   inline bool empty() const { return files.empty(); }
sizeCompactionInputFiles58   inline size_t size() const { return files.size(); }
clearCompactionInputFiles59   inline void clear() { files.clear(); }
60   inline FileMetaData* operator[](size_t i) const { return files[i]; }
61 };
62 
63 class Version;
64 class ColumnFamilyData;
65 class VersionStorageInfo;
66 class CompactionFilter;
67 
68 // A Compaction encapsulates metadata about a compaction.
69 class Compaction {
70  public:
71   Compaction(VersionStorageInfo* input_version,
72              const ImmutableCFOptions& immutable_cf_options,
73              const MutableCFOptions& mutable_cf_options,
74              std::vector<CompactionInputFiles> inputs, int output_level,
75              uint64_t target_file_size, uint64_t max_compaction_bytes,
76              uint32_t output_path_id, CompressionType compression,
77              CompressionOptions compression_opts, uint32_t max_subcompactions,
78              std::vector<FileMetaData*> grandparents,
79              bool manual_compaction = false, double score = -1,
80              bool deletion_compaction = false,
81              CompactionReason compaction_reason = CompactionReason::kUnknown);
82 
83   // No copying allowed
84   Compaction(const Compaction&) = delete;
85   void operator=(const Compaction&) = delete;
86 
87   ~Compaction();
88 
89   // Returns the level associated to the specified compaction input level.
90   // If compaction_input_level is not specified, then input_level is set to 0.
91   int level(size_t compaction_input_level = 0) const {
92     return inputs_[compaction_input_level].level;
93   }
94 
start_level()95   int start_level() const { return start_level_; }
96 
97   // Outputs will go to this level
output_level()98   int output_level() const { return output_level_; }
99 
100   // Returns the number of input levels in this compaction.
num_input_levels()101   size_t num_input_levels() const { return inputs_.size(); }
102 
103   // Return the object that holds the edits to the descriptor done
104   // by this compaction.
edit()105   VersionEdit* edit() { return &edit_; }
106 
107   // Returns the number of input files associated to the specified
108   // compaction input level.
109   // The function will return 0 if when "compaction_input_level" < 0
110   // or "compaction_input_level" >= "num_input_levels()".
num_input_files(size_t compaction_input_level)111   size_t num_input_files(size_t compaction_input_level) const {
112     if (compaction_input_level < inputs_.size()) {
113       return inputs_[compaction_input_level].size();
114     }
115     return 0;
116   }
117 
118   // Returns input version of the compaction
input_version()119   Version* input_version() const { return input_version_; }
120 
121   // Returns the ColumnFamilyData associated with the compaction.
column_family_data()122   ColumnFamilyData* column_family_data() const { return cfd_; }
123 
124   // Returns the file meta data of the 'i'th input file at the
125   // specified compaction input level.
126   // REQUIREMENT: "compaction_input_level" must be >= 0 and
127   //              < "input_levels()"
input(size_t compaction_input_level,size_t i)128   FileMetaData* input(size_t compaction_input_level, size_t i) const {
129     assert(compaction_input_level < inputs_.size());
130     return inputs_[compaction_input_level][i];
131   }
132 
boundaries(size_t compaction_input_level)133   const std::vector<AtomicCompactionUnitBoundary>* boundaries(
134       size_t compaction_input_level) const {
135     assert(compaction_input_level < inputs_.size());
136     return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries;
137   }
138 
139   // Returns the list of file meta data of the specified compaction
140   // input level.
141   // REQUIREMENT: "compaction_input_level" must be >= 0 and
142   //              < "input_levels()"
inputs(size_t compaction_input_level)143   const std::vector<FileMetaData*>* inputs(
144       size_t compaction_input_level) const {
145     assert(compaction_input_level < inputs_.size());
146     return &inputs_[compaction_input_level].files;
147   }
148 
inputs()149   const std::vector<CompactionInputFiles>* inputs() { return &inputs_; }
150 
151   // Returns the LevelFilesBrief of the specified compaction input level.
input_levels(size_t compaction_input_level)152   const LevelFilesBrief* input_levels(size_t compaction_input_level) const {
153     return &input_levels_[compaction_input_level];
154   }
155 
156   // Maximum size of files to build during this compaction.
max_output_file_size()157   uint64_t max_output_file_size() const { return max_output_file_size_; }
158 
159   // What compression for output
output_compression()160   CompressionType output_compression() const { return output_compression_; }
161 
162   // What compression options for output
output_compression_opts()163   CompressionOptions output_compression_opts() const {
164     return output_compression_opts_;
165   }
166 
167   // Whether need to write output file to second DB path.
output_path_id()168   uint32_t output_path_id() const { return output_path_id_; }
169 
170   // Is this a trivial compaction that can be implemented by just
171   // moving a single input file to the next level (no merging or splitting)
172   bool IsTrivialMove() const;
173 
174   // If true, then the compaction can be done by simply deleting input files.
deletion_compaction()175   bool deletion_compaction() const { return deletion_compaction_; }
176 
177   // Add all inputs to this compaction as delete operations to *edit.
178   void AddInputDeletions(VersionEdit* edit);
179 
180   // Returns true if the available information we have guarantees that
181   // the input "user_key" does not exist in any level beyond "output_level()".
182   bool KeyNotExistsBeyondOutputLevel(const Slice& user_key,
183                                      std::vector<size_t>* level_ptrs) const;
184 
185   // Clear all files to indicate that they are not being compacted
186   // Delete this compaction from the list of running compactions.
187   //
188   // Requirement: DB mutex held
189   void ReleaseCompactionFiles(Status status);
190 
191   // Returns the summary of the compaction in "output" with maximum "len"
192   // in bytes.  The caller is responsible for the memory management of
193   // "output".
194   void Summary(char* output, int len);
195 
196   // Return the score that was used to pick this compaction run.
score()197   double score() const { return score_; }
198 
199   // Is this compaction creating a file in the bottom most level?
bottommost_level()200   bool bottommost_level() const { return bottommost_level_; }
201 
202   // Does this compaction include all sst files?
is_full_compaction()203   bool is_full_compaction() const { return is_full_compaction_; }
204 
205   // Was this compaction triggered manually by the client?
is_manual_compaction()206   bool is_manual_compaction() const { return is_manual_compaction_; }
207 
208   // Used when allow_trivial_move option is set in
209   // Universal compaction. If all the input files are
210   // non overlapping, then is_trivial_move_ variable
211   // will be set true, else false
set_is_trivial_move(bool trivial_move)212   void set_is_trivial_move(bool trivial_move) {
213     is_trivial_move_ = trivial_move;
214   }
215 
216   // Used when allow_trivial_move option is set in
217   // Universal compaction. Returns true, if the input files
218   // are non-overlapping and can be trivially moved.
is_trivial_move()219   bool is_trivial_move() const { return is_trivial_move_; }
220 
221   // How many total levels are there?
number_levels()222   int number_levels() const { return number_levels_; }
223 
224   // Return the ImmutableCFOptions that should be used throughout the compaction
225   // procedure
immutable_cf_options()226   const ImmutableCFOptions* immutable_cf_options() const {
227     return &immutable_cf_options_;
228   }
229 
230   // Return the MutableCFOptions that should be used throughout the compaction
231   // procedure
mutable_cf_options()232   const MutableCFOptions* mutable_cf_options() const {
233     return &mutable_cf_options_;
234   }
235 
236   // Returns the size in bytes that the output file should be preallocated to.
237   // In level compaction, that is max_file_size_. In universal compaction, that
238   // is the sum of all input file sizes.
239   uint64_t OutputFilePreallocationSize() const;
240 
241   void SetInputVersion(Version* input_version);
242 
243   struct InputLevelSummaryBuffer {
244     char buffer[128];
245   };
246 
247   const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const;
248 
249   uint64_t CalculateTotalInputSize() const;
250 
251   // In case of compaction error, reset the nextIndex that is used
252   // to pick up the next file to be compacted from files_by_size_
253   void ResetNextCompactionIndex();
254 
255   // Create a CompactionFilter from compaction_filter_factory
256   std::unique_ptr<CompactionFilter> CreateCompactionFilter() const;
257 
258   // Is the input level corresponding to output_level_ empty?
259   bool IsOutputLevelEmpty() const;
260 
261   // Should this compaction be broken up into smaller ones run in parallel?
262   bool ShouldFormSubcompactions() const;
263 
264   // test function to validate the functionality of IsBottommostLevel()
265   // function -- determines if compaction with inputs and storage is bottommost
266   static bool TEST_IsBottommostLevel(
267       int output_level, VersionStorageInfo* vstorage,
268       const std::vector<CompactionInputFiles>& inputs);
269 
GetOutputTableProperties()270   TablePropertiesCollection GetOutputTableProperties() const {
271     return output_table_properties_;
272   }
273 
SetOutputTableProperties(TablePropertiesCollection tp)274   void SetOutputTableProperties(TablePropertiesCollection tp) {
275     output_table_properties_ = std::move(tp);
276   }
277 
GetSmallestUserKey()278   Slice GetSmallestUserKey() const { return smallest_user_key_; }
279 
GetLargestUserKey()280   Slice GetLargestUserKey() const { return largest_user_key_; }
281 
282   int GetInputBaseLevel() const;
283 
compaction_reason()284   CompactionReason compaction_reason() { return compaction_reason_; }
285 
grandparents()286   const std::vector<FileMetaData*>& grandparents() const {
287     return grandparents_;
288   }
289 
max_compaction_bytes()290   uint64_t max_compaction_bytes() const { return max_compaction_bytes_; }
291 
max_subcompactions()292   uint32_t max_subcompactions() const { return max_subcompactions_; }
293 
294   uint64_t MinInputFileOldestAncesterTime() const;
295 
296  private:
297   // mark (or clear) all files that are being compacted
298   void MarkFilesBeingCompacted(bool mark_as_compacted);
299 
300   // get the smallest and largest key present in files to be compacted
301   static void GetBoundaryKeys(VersionStorageInfo* vstorage,
302                               const std::vector<CompactionInputFiles>& inputs,
303                               Slice* smallest_key, Slice* largest_key);
304 
305   // Get the atomic file boundaries for all files in the compaction. Necessary
306   // in order to avoid the scenario described in
307   // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb
308   // down appropriate key boundaries to RangeDelAggregator during compaction.
309   static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries(
310       VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs);
311 
312   // helper function to determine if compaction with inputs and storage is
313   // bottommost
314   static bool IsBottommostLevel(
315       int output_level, VersionStorageInfo* vstorage,
316       const std::vector<CompactionInputFiles>& inputs);
317 
318   static bool IsFullCompaction(VersionStorageInfo* vstorage,
319                                const std::vector<CompactionInputFiles>& inputs);
320 
321   VersionStorageInfo* input_vstorage_;
322 
323   const int start_level_;    // the lowest level to be compacted
324   const int output_level_;  // levels to which output files are stored
325   uint64_t max_output_file_size_;
326   uint64_t max_compaction_bytes_;
327   uint32_t max_subcompactions_;
328   const ImmutableCFOptions immutable_cf_options_;
329   const MutableCFOptions mutable_cf_options_;
330   Version* input_version_;
331   VersionEdit edit_;
332   const int number_levels_;
333   ColumnFamilyData* cfd_;
334   Arena arena_;          // Arena used to allocate space for file_levels_
335 
336   const uint32_t output_path_id_;
337   CompressionType output_compression_;
338   CompressionOptions output_compression_opts_;
339   // If true, then the comaction can be done by simply deleting input files.
340   const bool deletion_compaction_;
341 
342   // Compaction input files organized by level. Constant after construction
343   const std::vector<CompactionInputFiles> inputs_;
344 
345   // A copy of inputs_, organized more closely in memory
346   autovector<LevelFilesBrief, 2> input_levels_;
347 
348   // State used to check for number of overlapping grandparent files
349   // (grandparent == "output_level_ + 1")
350   std::vector<FileMetaData*> grandparents_;
351   const double score_;         // score that was used to pick this compaction.
352 
353   // Is this compaction creating a file in the bottom most level?
354   const bool bottommost_level_;
355   // Does this compaction include all sst files?
356   const bool is_full_compaction_;
357 
358   // Is this compaction requested by the client?
359   const bool is_manual_compaction_;
360 
361   // True if we can do trivial move in Universal multi level
362   // compaction
363   bool is_trivial_move_;
364 
365   // Does input compression match the output compression?
366   bool InputCompressionMatchesOutput() const;
367 
368   // table properties of output files
369   TablePropertiesCollection output_table_properties_;
370 
371   // smallest user keys in compaction
372   Slice smallest_user_key_;
373 
374   // largest user keys in compaction
375   Slice largest_user_key_;
376 
377   // Reason for compaction
378   CompactionReason compaction_reason_;
379 };
380 
381 // Return sum of sizes of all files in `files`.
382 extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files);
383 
384 }  // namespace rocksdb
385