1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #pragma once
11 #include "db/version_set.h"
12 #include "memory/arena.h"
13 #include "options/cf_options.h"
14 #include "rocksdb/sst_partitioner.h"
15 #include "util/autovector.h"
16 
17 namespace ROCKSDB_NAMESPACE {
18 // The file contains class Compaction, as well as some helper functions
19 // and data structures used by the class.
20 
21 // Utility for comparing sstable boundary keys. Returns -1 if either a or b is
22 // null which provides the property that a==null indicates a key that is less
23 // than any key and b==null indicates a key that is greater than any key. Note
24 // that the comparison is performed primarily on the user-key portion of the
25 // key. If the user-keys compare equal, an additional test is made to sort
26 // range tombstone sentinel keys before other keys with the same user-key. The
27 // result is that 2 user-keys will compare equal if they differ purely on
28 // their sequence number and value, but the range tombstone sentinel for that
29 // user-key will compare not equal. This is necessary because the range
30 // tombstone sentinel key is set as the largest key for an sstable even though
31 // that key never appears in the database. We don't want adjacent sstables to
32 // be considered overlapping if they are separated by the range tombstone
33 // sentinel.
34 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
35                       const InternalKey& b);
36 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a,
37                       const InternalKey& b);
38 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
39                       const InternalKey* b);
40 
41 // An AtomicCompactionUnitBoundary represents a range of keys [smallest,
42 // largest] that exactly spans one ore more neighbouring SSTs on the same
43 // level. Every pair of  SSTs in this range "overlap" (i.e., the largest
44 // user key of one file is the smallest user key of the next file). These
45 // boundaries are propagated down to RangeDelAggregator during compaction
46 // to provide safe truncation boundaries for range tombstones.
47 struct AtomicCompactionUnitBoundary {
48   const InternalKey* smallest = nullptr;
49   const InternalKey* largest = nullptr;
50 };
51 
52 // The structure that manages compaction input files associated
53 // with the same physical level.
54 struct CompactionInputFiles {
55   int level;
56   std::vector<FileMetaData*> files;
57   std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries;
emptyCompactionInputFiles58   inline bool empty() const { return files.empty(); }
sizeCompactionInputFiles59   inline size_t size() const { return files.size(); }
clearCompactionInputFiles60   inline void clear() { files.clear(); }
61   inline FileMetaData* operator[](size_t i) const { return files[i]; }
62 };
63 
64 class Version;
65 class ColumnFamilyData;
66 class VersionStorageInfo;
67 class CompactionFilter;
68 
69 // A Compaction encapsulates metadata about a compaction.
70 class Compaction {
71  public:
72   Compaction(VersionStorageInfo* input_version,
73              const ImmutableOptions& immutable_options,
74              const MutableCFOptions& mutable_cf_options,
75              const MutableDBOptions& mutable_db_options,
76              std::vector<CompactionInputFiles> inputs, int output_level,
77              uint64_t target_file_size, uint64_t max_compaction_bytes,
78              uint32_t output_path_id, CompressionType compression,
79              CompressionOptions compression_opts, uint32_t max_subcompactions,
80              std::vector<FileMetaData*> grandparents,
81              bool manual_compaction = false, double score = -1,
82              bool deletion_compaction = false,
83              CompactionReason compaction_reason = CompactionReason::kUnknown);
84 
85   // No copying allowed
86   Compaction(const Compaction&) = delete;
87   void operator=(const Compaction&) = delete;
88 
89   ~Compaction();
90 
91   // Returns the level associated to the specified compaction input level.
92   // If compaction_input_level is not specified, then input_level is set to 0.
93   int level(size_t compaction_input_level = 0) const {
94     return inputs_[compaction_input_level].level;
95   }
96 
start_level()97   int start_level() const { return start_level_; }
98 
99   // Outputs will go to this level
output_level()100   int output_level() const { return output_level_; }
101 
102   // Returns the number of input levels in this compaction.
num_input_levels()103   size_t num_input_levels() const { return inputs_.size(); }
104 
105   // Return the object that holds the edits to the descriptor done
106   // by this compaction.
edit()107   VersionEdit* edit() { return &edit_; }
108 
109   // Returns the number of input files associated to the specified
110   // compaction input level.
111   // The function will return 0 if when "compaction_input_level" < 0
112   // or "compaction_input_level" >= "num_input_levels()".
num_input_files(size_t compaction_input_level)113   size_t num_input_files(size_t compaction_input_level) const {
114     if (compaction_input_level < inputs_.size()) {
115       return inputs_[compaction_input_level].size();
116     }
117     return 0;
118   }
119 
120   // Returns input version of the compaction
input_version()121   Version* input_version() const { return input_version_; }
122 
123   // Returns the ColumnFamilyData associated with the compaction.
column_family_data()124   ColumnFamilyData* column_family_data() const { return cfd_; }
125 
126   // Returns the file meta data of the 'i'th input file at the
127   // specified compaction input level.
128   // REQUIREMENT: "compaction_input_level" must be >= 0 and
129   //              < "input_levels()"
input(size_t compaction_input_level,size_t i)130   FileMetaData* input(size_t compaction_input_level, size_t i) const {
131     assert(compaction_input_level < inputs_.size());
132     return inputs_[compaction_input_level][i];
133   }
134 
boundaries(size_t compaction_input_level)135   const std::vector<AtomicCompactionUnitBoundary>* boundaries(
136       size_t compaction_input_level) const {
137     assert(compaction_input_level < inputs_.size());
138     return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries;
139   }
140 
141   // Returns the list of file meta data of the specified compaction
142   // input level.
143   // REQUIREMENT: "compaction_input_level" must be >= 0 and
144   //              < "input_levels()"
inputs(size_t compaction_input_level)145   const std::vector<FileMetaData*>* inputs(
146       size_t compaction_input_level) const {
147     assert(compaction_input_level < inputs_.size());
148     return &inputs_[compaction_input_level].files;
149   }
150 
inputs()151   const std::vector<CompactionInputFiles>* inputs() { return &inputs_; }
152 
153   // Returns the LevelFilesBrief of the specified compaction input level.
input_levels(size_t compaction_input_level)154   const LevelFilesBrief* input_levels(size_t compaction_input_level) const {
155     return &input_levels_[compaction_input_level];
156   }
157 
158   // Maximum size of files to build during this compaction.
max_output_file_size()159   uint64_t max_output_file_size() const { return max_output_file_size_; }
160 
161   // What compression for output
output_compression()162   CompressionType output_compression() const { return output_compression_; }
163 
164   // What compression options for output
output_compression_opts()165   const CompressionOptions& output_compression_opts() const {
166     return output_compression_opts_;
167   }
168 
169   // Whether need to write output file to second DB path.
output_path_id()170   uint32_t output_path_id() const { return output_path_id_; }
171 
172   // Is this a trivial compaction that can be implemented by just
173   // moving a single input file to the next level (no merging or splitting)
174   bool IsTrivialMove() const;
175 
176   // If true, then the compaction can be done by simply deleting input files.
deletion_compaction()177   bool deletion_compaction() const { return deletion_compaction_; }
178 
179   // Add all inputs to this compaction as delete operations to *edit.
180   void AddInputDeletions(VersionEdit* edit);
181 
182   // Returns true if the available information we have guarantees that
183   // the input "user_key" does not exist in any level beyond "output_level()".
184   bool KeyNotExistsBeyondOutputLevel(const Slice& user_key,
185                                      std::vector<size_t>* level_ptrs) const;
186 
187   // Clear all files to indicate that they are not being compacted
188   // Delete this compaction from the list of running compactions.
189   //
190   // Requirement: DB mutex held
191   void ReleaseCompactionFiles(Status status);
192 
193   // Returns the summary of the compaction in "output" with maximum "len"
194   // in bytes.  The caller is responsible for the memory management of
195   // "output".
196   void Summary(char* output, int len);
197 
198   // Return the score that was used to pick this compaction run.
score()199   double score() const { return score_; }
200 
201   // Is this compaction creating a file in the bottom most level?
bottommost_level()202   bool bottommost_level() const { return bottommost_level_; }
203 
204   // Does this compaction include all sst files?
is_full_compaction()205   bool is_full_compaction() const { return is_full_compaction_; }
206 
207   // Was this compaction triggered manually by the client?
is_manual_compaction()208   bool is_manual_compaction() const { return is_manual_compaction_; }
209 
210   // Used when allow_trivial_move option is set in
211   // Universal compaction. If all the input files are
212   // non overlapping, then is_trivial_move_ variable
213   // will be set true, else false
set_is_trivial_move(bool trivial_move)214   void set_is_trivial_move(bool trivial_move) {
215     is_trivial_move_ = trivial_move;
216   }
217 
218   // Used when allow_trivial_move option is set in
219   // Universal compaction. Returns true, if the input files
220   // are non-overlapping and can be trivially moved.
is_trivial_move()221   bool is_trivial_move() const { return is_trivial_move_; }
222 
223   // How many total levels are there?
number_levels()224   int number_levels() const { return number_levels_; }
225 
226   // Return the ImmutableCFOptions that should be used throughout the compaction
227   // procedure
immutable_cf_options()228   const ImmutableOptions* immutable_cf_options() const {
229     return &immutable_options_;
230   }
231 
232   // Return the MutableCFOptions that should be used throughout the compaction
233   // procedure
mutable_cf_options()234   const MutableCFOptions* mutable_cf_options() const {
235     return &mutable_cf_options_;
236   }
237 
238   // Returns the size in bytes that the output file should be preallocated to.
239   // In level compaction, that is max_file_size_. In universal compaction, that
240   // is the sum of all input file sizes.
241   uint64_t OutputFilePreallocationSize() const;
242 
243   void SetInputVersion(Version* input_version);
244 
245   struct InputLevelSummaryBuffer {
246     char buffer[128];
247   };
248 
249   const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const;
250 
251   uint64_t CalculateTotalInputSize() const;
252 
253   // In case of compaction error, reset the nextIndex that is used
254   // to pick up the next file to be compacted from files_by_size_
255   void ResetNextCompactionIndex();
256 
257   // Create a CompactionFilter from compaction_filter_factory
258   std::unique_ptr<CompactionFilter> CreateCompactionFilter() const;
259 
260   // Create a SstPartitioner from sst_partitioner_factory
261   std::unique_ptr<SstPartitioner> CreateSstPartitioner() const;
262 
263   // Is the input level corresponding to output_level_ empty?
264   bool IsOutputLevelEmpty() const;
265 
266   // Should this compaction be broken up into smaller ones run in parallel?
267   bool ShouldFormSubcompactions() const;
268 
269   // test function to validate the functionality of IsBottommostLevel()
270   // function -- determines if compaction with inputs and storage is bottommost
271   static bool TEST_IsBottommostLevel(
272       int output_level, VersionStorageInfo* vstorage,
273       const std::vector<CompactionInputFiles>& inputs);
274 
GetOutputTableProperties()275   TablePropertiesCollection GetOutputTableProperties() const {
276     return output_table_properties_;
277   }
278 
SetOutputTableProperties(TablePropertiesCollection tp)279   void SetOutputTableProperties(TablePropertiesCollection tp) {
280     output_table_properties_ = std::move(tp);
281   }
282 
GetSmallestUserKey()283   Slice GetSmallestUserKey() const { return smallest_user_key_; }
284 
GetLargestUserKey()285   Slice GetLargestUserKey() const { return largest_user_key_; }
286 
287   int GetInputBaseLevel() const;
288 
compaction_reason()289   CompactionReason compaction_reason() { return compaction_reason_; }
290 
grandparents()291   const std::vector<FileMetaData*>& grandparents() const {
292     return grandparents_;
293   }
294 
max_compaction_bytes()295   uint64_t max_compaction_bytes() const { return max_compaction_bytes_; }
296 
max_subcompactions()297   uint32_t max_subcompactions() const { return max_subcompactions_; }
298 
299   uint64_t MinInputFileOldestAncesterTime() const;
300 
301  private:
302   // mark (or clear) all files that are being compacted
303   void MarkFilesBeingCompacted(bool mark_as_compacted);
304 
305   // get the smallest and largest key present in files to be compacted
306   static void GetBoundaryKeys(VersionStorageInfo* vstorage,
307                               const std::vector<CompactionInputFiles>& inputs,
308                               Slice* smallest_key, Slice* largest_key);
309 
310   // Get the atomic file boundaries for all files in the compaction. Necessary
311   // in order to avoid the scenario described in
312   // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb
313   // down appropriate key boundaries to RangeDelAggregator during compaction.
314   static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries(
315       VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs);
316 
317   // helper function to determine if compaction with inputs and storage is
318   // bottommost
319   static bool IsBottommostLevel(
320       int output_level, VersionStorageInfo* vstorage,
321       const std::vector<CompactionInputFiles>& inputs);
322 
323   static bool IsFullCompaction(VersionStorageInfo* vstorage,
324                                const std::vector<CompactionInputFiles>& inputs);
325 
326   VersionStorageInfo* input_vstorage_;
327 
328   const int start_level_;    // the lowest level to be compacted
329   const int output_level_;  // levels to which output files are stored
330   uint64_t max_output_file_size_;
331   uint64_t max_compaction_bytes_;
332   uint32_t max_subcompactions_;
333   const ImmutableOptions immutable_options_;
334   const MutableCFOptions mutable_cf_options_;
335   Version* input_version_;
336   VersionEdit edit_;
337   const int number_levels_;
338   ColumnFamilyData* cfd_;
339   Arena arena_;          // Arena used to allocate space for file_levels_
340 
341   const uint32_t output_path_id_;
342   CompressionType output_compression_;
343   CompressionOptions output_compression_opts_;
344   // If true, then the compaction can be done by simply deleting input files.
345   const bool deletion_compaction_;
346 
347   // Compaction input files organized by level. Constant after construction
348   const std::vector<CompactionInputFiles> inputs_;
349 
350   // A copy of inputs_, organized more closely in memory
351   autovector<LevelFilesBrief, 2> input_levels_;
352 
353   // State used to check for number of overlapping grandparent files
354   // (grandparent == "output_level_ + 1")
355   std::vector<FileMetaData*> grandparents_;
356   const double score_;         // score that was used to pick this compaction.
357 
358   // Is this compaction creating a file in the bottom most level?
359   const bool bottommost_level_;
360   // Does this compaction include all sst files?
361   const bool is_full_compaction_;
362 
363   // Is this compaction requested by the client?
364   const bool is_manual_compaction_;
365 
366   // True if we can do trivial move in Universal multi level
367   // compaction
368   bool is_trivial_move_;
369 
370   // Does input compression match the output compression?
371   bool InputCompressionMatchesOutput() const;
372 
373   // table properties of output files
374   TablePropertiesCollection output_table_properties_;
375 
376   // smallest user keys in compaction
377   Slice smallest_user_key_;
378 
379   // largest user keys in compaction
380   Slice largest_user_key_;
381 
382   // Reason for compaction
383   CompactionReason compaction_reason_;
384 };
385 
386 // Return sum of sizes of all files in `files`.
387 extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files);
388 
389 }  // namespace ROCKSDB_NAMESPACE
390