1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE file. See the AUTHORS file for names of contributors. 9 10 #pragma once 11 #include "db/version_set.h" 12 #include "memory/arena.h" 13 #include "options/cf_options.h" 14 #include "rocksdb/sst_partitioner.h" 15 #include "util/autovector.h" 16 17 namespace ROCKSDB_NAMESPACE { 18 // The file contains class Compaction, as well as some helper functions 19 // and data structures used by the class. 20 21 // Utility for comparing sstable boundary keys. Returns -1 if either a or b is 22 // null which provides the property that a==null indicates a key that is less 23 // than any key and b==null indicates a key that is greater than any key. Note 24 // that the comparison is performed primarily on the user-key portion of the 25 // key. If the user-keys compare equal, an additional test is made to sort 26 // range tombstone sentinel keys before other keys with the same user-key. The 27 // result is that 2 user-keys will compare equal if they differ purely on 28 // their sequence number and value, but the range tombstone sentinel for that 29 // user-key will compare not equal. This is necessary because the range 30 // tombstone sentinel key is set as the largest key for an sstable even though 31 // that key never appears in the database. We don't want adjacent sstables to 32 // be considered overlapping if they are separated by the range tombstone 33 // sentinel. 34 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, 35 const InternalKey& b); 36 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a, 37 const InternalKey& b); 38 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, 39 const InternalKey* b); 40 41 // An AtomicCompactionUnitBoundary represents a range of keys [smallest, 42 // largest] that exactly spans one ore more neighbouring SSTs on the same 43 // level. Every pair of SSTs in this range "overlap" (i.e., the largest 44 // user key of one file is the smallest user key of the next file). These 45 // boundaries are propagated down to RangeDelAggregator during compaction 46 // to provide safe truncation boundaries for range tombstones. 47 struct AtomicCompactionUnitBoundary { 48 const InternalKey* smallest = nullptr; 49 const InternalKey* largest = nullptr; 50 }; 51 52 // The structure that manages compaction input files associated 53 // with the same physical level. 54 struct CompactionInputFiles { 55 int level; 56 std::vector<FileMetaData*> files; 57 std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries; emptyCompactionInputFiles58 inline bool empty() const { return files.empty(); } sizeCompactionInputFiles59 inline size_t size() const { return files.size(); } clearCompactionInputFiles60 inline void clear() { files.clear(); } 61 inline FileMetaData* operator[](size_t i) const { return files[i]; } 62 }; 63 64 class Version; 65 class ColumnFamilyData; 66 class VersionStorageInfo; 67 class CompactionFilter; 68 69 // A Compaction encapsulates metadata about a compaction. 70 class Compaction { 71 public: 72 Compaction(VersionStorageInfo* input_version, 73 const ImmutableOptions& immutable_options, 74 const MutableCFOptions& mutable_cf_options, 75 const MutableDBOptions& mutable_db_options, 76 std::vector<CompactionInputFiles> inputs, int output_level, 77 uint64_t target_file_size, uint64_t max_compaction_bytes, 78 uint32_t output_path_id, CompressionType compression, 79 CompressionOptions compression_opts, uint32_t max_subcompactions, 80 std::vector<FileMetaData*> grandparents, 81 bool manual_compaction = false, double score = -1, 82 bool deletion_compaction = false, 83 CompactionReason compaction_reason = CompactionReason::kUnknown); 84 85 // No copying allowed 86 Compaction(const Compaction&) = delete; 87 void operator=(const Compaction&) = delete; 88 89 ~Compaction(); 90 91 // Returns the level associated to the specified compaction input level. 92 // If compaction_input_level is not specified, then input_level is set to 0. 93 int level(size_t compaction_input_level = 0) const { 94 return inputs_[compaction_input_level].level; 95 } 96 start_level()97 int start_level() const { return start_level_; } 98 99 // Outputs will go to this level output_level()100 int output_level() const { return output_level_; } 101 102 // Returns the number of input levels in this compaction. num_input_levels()103 size_t num_input_levels() const { return inputs_.size(); } 104 105 // Return the object that holds the edits to the descriptor done 106 // by this compaction. edit()107 VersionEdit* edit() { return &edit_; } 108 109 // Returns the number of input files associated to the specified 110 // compaction input level. 111 // The function will return 0 if when "compaction_input_level" < 0 112 // or "compaction_input_level" >= "num_input_levels()". num_input_files(size_t compaction_input_level)113 size_t num_input_files(size_t compaction_input_level) const { 114 if (compaction_input_level < inputs_.size()) { 115 return inputs_[compaction_input_level].size(); 116 } 117 return 0; 118 } 119 120 // Returns input version of the compaction input_version()121 Version* input_version() const { return input_version_; } 122 123 // Returns the ColumnFamilyData associated with the compaction. column_family_data()124 ColumnFamilyData* column_family_data() const { return cfd_; } 125 126 // Returns the file meta data of the 'i'th input file at the 127 // specified compaction input level. 128 // REQUIREMENT: "compaction_input_level" must be >= 0 and 129 // < "input_levels()" input(size_t compaction_input_level,size_t i)130 FileMetaData* input(size_t compaction_input_level, size_t i) const { 131 assert(compaction_input_level < inputs_.size()); 132 return inputs_[compaction_input_level][i]; 133 } 134 boundaries(size_t compaction_input_level)135 const std::vector<AtomicCompactionUnitBoundary>* boundaries( 136 size_t compaction_input_level) const { 137 assert(compaction_input_level < inputs_.size()); 138 return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries; 139 } 140 141 // Returns the list of file meta data of the specified compaction 142 // input level. 143 // REQUIREMENT: "compaction_input_level" must be >= 0 and 144 // < "input_levels()" inputs(size_t compaction_input_level)145 const std::vector<FileMetaData*>* inputs( 146 size_t compaction_input_level) const { 147 assert(compaction_input_level < inputs_.size()); 148 return &inputs_[compaction_input_level].files; 149 } 150 inputs()151 const std::vector<CompactionInputFiles>* inputs() { return &inputs_; } 152 153 // Returns the LevelFilesBrief of the specified compaction input level. input_levels(size_t compaction_input_level)154 const LevelFilesBrief* input_levels(size_t compaction_input_level) const { 155 return &input_levels_[compaction_input_level]; 156 } 157 158 // Maximum size of files to build during this compaction. max_output_file_size()159 uint64_t max_output_file_size() const { return max_output_file_size_; } 160 161 // What compression for output output_compression()162 CompressionType output_compression() const { return output_compression_; } 163 164 // What compression options for output output_compression_opts()165 const CompressionOptions& output_compression_opts() const { 166 return output_compression_opts_; 167 } 168 169 // Whether need to write output file to second DB path. output_path_id()170 uint32_t output_path_id() const { return output_path_id_; } 171 172 // Is this a trivial compaction that can be implemented by just 173 // moving a single input file to the next level (no merging or splitting) 174 bool IsTrivialMove() const; 175 176 // If true, then the compaction can be done by simply deleting input files. deletion_compaction()177 bool deletion_compaction() const { return deletion_compaction_; } 178 179 // Add all inputs to this compaction as delete operations to *edit. 180 void AddInputDeletions(VersionEdit* edit); 181 182 // Returns true if the available information we have guarantees that 183 // the input "user_key" does not exist in any level beyond "output_level()". 184 bool KeyNotExistsBeyondOutputLevel(const Slice& user_key, 185 std::vector<size_t>* level_ptrs) const; 186 187 // Clear all files to indicate that they are not being compacted 188 // Delete this compaction from the list of running compactions. 189 // 190 // Requirement: DB mutex held 191 void ReleaseCompactionFiles(Status status); 192 193 // Returns the summary of the compaction in "output" with maximum "len" 194 // in bytes. The caller is responsible for the memory management of 195 // "output". 196 void Summary(char* output, int len); 197 198 // Return the score that was used to pick this compaction run. score()199 double score() const { return score_; } 200 201 // Is this compaction creating a file in the bottom most level? bottommost_level()202 bool bottommost_level() const { return bottommost_level_; } 203 204 // Does this compaction include all sst files? is_full_compaction()205 bool is_full_compaction() const { return is_full_compaction_; } 206 207 // Was this compaction triggered manually by the client? is_manual_compaction()208 bool is_manual_compaction() const { return is_manual_compaction_; } 209 210 // Used when allow_trivial_move option is set in 211 // Universal compaction. If all the input files are 212 // non overlapping, then is_trivial_move_ variable 213 // will be set true, else false set_is_trivial_move(bool trivial_move)214 void set_is_trivial_move(bool trivial_move) { 215 is_trivial_move_ = trivial_move; 216 } 217 218 // Used when allow_trivial_move option is set in 219 // Universal compaction. Returns true, if the input files 220 // are non-overlapping and can be trivially moved. is_trivial_move()221 bool is_trivial_move() const { return is_trivial_move_; } 222 223 // How many total levels are there? number_levels()224 int number_levels() const { return number_levels_; } 225 226 // Return the ImmutableCFOptions that should be used throughout the compaction 227 // procedure immutable_cf_options()228 const ImmutableOptions* immutable_cf_options() const { 229 return &immutable_options_; 230 } 231 232 // Return the MutableCFOptions that should be used throughout the compaction 233 // procedure mutable_cf_options()234 const MutableCFOptions* mutable_cf_options() const { 235 return &mutable_cf_options_; 236 } 237 238 // Returns the size in bytes that the output file should be preallocated to. 239 // In level compaction, that is max_file_size_. In universal compaction, that 240 // is the sum of all input file sizes. 241 uint64_t OutputFilePreallocationSize() const; 242 243 void SetInputVersion(Version* input_version); 244 245 struct InputLevelSummaryBuffer { 246 char buffer[128]; 247 }; 248 249 const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const; 250 251 uint64_t CalculateTotalInputSize() const; 252 253 // In case of compaction error, reset the nextIndex that is used 254 // to pick up the next file to be compacted from files_by_size_ 255 void ResetNextCompactionIndex(); 256 257 // Create a CompactionFilter from compaction_filter_factory 258 std::unique_ptr<CompactionFilter> CreateCompactionFilter() const; 259 260 // Create a SstPartitioner from sst_partitioner_factory 261 std::unique_ptr<SstPartitioner> CreateSstPartitioner() const; 262 263 // Is the input level corresponding to output_level_ empty? 264 bool IsOutputLevelEmpty() const; 265 266 // Should this compaction be broken up into smaller ones run in parallel? 267 bool ShouldFormSubcompactions() const; 268 269 // test function to validate the functionality of IsBottommostLevel() 270 // function -- determines if compaction with inputs and storage is bottommost 271 static bool TEST_IsBottommostLevel( 272 int output_level, VersionStorageInfo* vstorage, 273 const std::vector<CompactionInputFiles>& inputs); 274 GetOutputTableProperties()275 TablePropertiesCollection GetOutputTableProperties() const { 276 return output_table_properties_; 277 } 278 SetOutputTableProperties(TablePropertiesCollection tp)279 void SetOutputTableProperties(TablePropertiesCollection tp) { 280 output_table_properties_ = std::move(tp); 281 } 282 GetSmallestUserKey()283 Slice GetSmallestUserKey() const { return smallest_user_key_; } 284 GetLargestUserKey()285 Slice GetLargestUserKey() const { return largest_user_key_; } 286 287 int GetInputBaseLevel() const; 288 compaction_reason()289 CompactionReason compaction_reason() { return compaction_reason_; } 290 grandparents()291 const std::vector<FileMetaData*>& grandparents() const { 292 return grandparents_; 293 } 294 max_compaction_bytes()295 uint64_t max_compaction_bytes() const { return max_compaction_bytes_; } 296 max_subcompactions()297 uint32_t max_subcompactions() const { return max_subcompactions_; } 298 299 uint64_t MinInputFileOldestAncesterTime() const; 300 301 private: 302 // mark (or clear) all files that are being compacted 303 void MarkFilesBeingCompacted(bool mark_as_compacted); 304 305 // get the smallest and largest key present in files to be compacted 306 static void GetBoundaryKeys(VersionStorageInfo* vstorage, 307 const std::vector<CompactionInputFiles>& inputs, 308 Slice* smallest_key, Slice* largest_key); 309 310 // Get the atomic file boundaries for all files in the compaction. Necessary 311 // in order to avoid the scenario described in 312 // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb 313 // down appropriate key boundaries to RangeDelAggregator during compaction. 314 static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries( 315 VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs); 316 317 // helper function to determine if compaction with inputs and storage is 318 // bottommost 319 static bool IsBottommostLevel( 320 int output_level, VersionStorageInfo* vstorage, 321 const std::vector<CompactionInputFiles>& inputs); 322 323 static bool IsFullCompaction(VersionStorageInfo* vstorage, 324 const std::vector<CompactionInputFiles>& inputs); 325 326 VersionStorageInfo* input_vstorage_; 327 328 const int start_level_; // the lowest level to be compacted 329 const int output_level_; // levels to which output files are stored 330 uint64_t max_output_file_size_; 331 uint64_t max_compaction_bytes_; 332 uint32_t max_subcompactions_; 333 const ImmutableOptions immutable_options_; 334 const MutableCFOptions mutable_cf_options_; 335 Version* input_version_; 336 VersionEdit edit_; 337 const int number_levels_; 338 ColumnFamilyData* cfd_; 339 Arena arena_; // Arena used to allocate space for file_levels_ 340 341 const uint32_t output_path_id_; 342 CompressionType output_compression_; 343 CompressionOptions output_compression_opts_; 344 // If true, then the compaction can be done by simply deleting input files. 345 const bool deletion_compaction_; 346 347 // Compaction input files organized by level. Constant after construction 348 const std::vector<CompactionInputFiles> inputs_; 349 350 // A copy of inputs_, organized more closely in memory 351 autovector<LevelFilesBrief, 2> input_levels_; 352 353 // State used to check for number of overlapping grandparent files 354 // (grandparent == "output_level_ + 1") 355 std::vector<FileMetaData*> grandparents_; 356 const double score_; // score that was used to pick this compaction. 357 358 // Is this compaction creating a file in the bottom most level? 359 const bool bottommost_level_; 360 // Does this compaction include all sst files? 361 const bool is_full_compaction_; 362 363 // Is this compaction requested by the client? 364 const bool is_manual_compaction_; 365 366 // True if we can do trivial move in Universal multi level 367 // compaction 368 bool is_trivial_move_; 369 370 // Does input compression match the output compression? 371 bool InputCompressionMatchesOutput() const; 372 373 // table properties of output files 374 TablePropertiesCollection output_table_properties_; 375 376 // smallest user keys in compaction 377 Slice smallest_user_key_; 378 379 // largest user keys in compaction 380 Slice largest_user_key_; 381 382 // Reason for compaction 383 CompactionReason compaction_reason_; 384 }; 385 386 // Return sum of sizes of all files in `files`. 387 extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files); 388 389 } // namespace ROCKSDB_NAMESPACE 390