1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE file. See the AUTHORS file for names of contributors. 9 10 #pragma once 11 #include "db/version_set.h" 12 #include "memory/arena.h" 13 #include "options/cf_options.h" 14 #include "util/autovector.h" 15 16 namespace ROCKSDB_NAMESPACE { 17 // The file contains class Compaction, as well as some helper functions 18 // and data structures used by the class. 19 20 // Utility for comparing sstable boundary keys. Returns -1 if either a or b is 21 // null which provides the property that a==null indicates a key that is less 22 // than any key and b==null indicates a key that is greater than any key. Note 23 // that the comparison is performed primarily on the user-key portion of the 24 // key. If the user-keys compare equal, an additional test is made to sort 25 // range tombstone sentinel keys before other keys with the same user-key. The 26 // result is that 2 user-keys will compare equal if they differ purely on 27 // their sequence number and value, but the range tombstone sentinel for that 28 // user-key will compare not equal. This is necessary because the range 29 // tombstone sentinel key is set as the largest key for an sstable even though 30 // that key never appears in the database. We don't want adjacent sstables to 31 // be considered overlapping if they are separated by the range tombstone 32 // sentinel. 33 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, 34 const InternalKey& b); 35 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a, 36 const InternalKey& b); 37 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, 38 const InternalKey* b); 39 40 // An AtomicCompactionUnitBoundary represents a range of keys [smallest, 41 // largest] that exactly spans one ore more neighbouring SSTs on the same 42 // level. Every pair of SSTs in this range "overlap" (i.e., the largest 43 // user key of one file is the smallest user key of the next file). These 44 // boundaries are propagated down to RangeDelAggregator during compaction 45 // to provide safe truncation boundaries for range tombstones. 46 struct AtomicCompactionUnitBoundary { 47 const InternalKey* smallest = nullptr; 48 const InternalKey* largest = nullptr; 49 }; 50 51 // The structure that manages compaction input files associated 52 // with the same physical level. 53 struct CompactionInputFiles { 54 int level; 55 std::vector<FileMetaData*> files; 56 std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries; emptyCompactionInputFiles57 inline bool empty() const { return files.empty(); } sizeCompactionInputFiles58 inline size_t size() const { return files.size(); } clearCompactionInputFiles59 inline void clear() { files.clear(); } 60 inline FileMetaData* operator[](size_t i) const { return files[i]; } 61 }; 62 63 class Version; 64 class ColumnFamilyData; 65 class VersionStorageInfo; 66 class CompactionFilter; 67 68 // A Compaction encapsulates metadata about a compaction. 69 class Compaction { 70 public: 71 Compaction(VersionStorageInfo* input_version, 72 const ImmutableCFOptions& immutable_cf_options, 73 const MutableCFOptions& mutable_cf_options, 74 std::vector<CompactionInputFiles> inputs, int output_level, 75 uint64_t target_file_size, uint64_t max_compaction_bytes, 76 uint32_t output_path_id, CompressionType compression, 77 CompressionOptions compression_opts, uint32_t max_subcompactions, 78 std::vector<FileMetaData*> grandparents, 79 bool manual_compaction = false, double score = -1, 80 bool deletion_compaction = false, 81 CompactionReason compaction_reason = CompactionReason::kUnknown); 82 83 // No copying allowed 84 Compaction(const Compaction&) = delete; 85 void operator=(const Compaction&) = delete; 86 87 ~Compaction(); 88 89 // Returns the level associated to the specified compaction input level. 90 // If compaction_input_level is not specified, then input_level is set to 0. 91 int level(size_t compaction_input_level = 0) const { 92 return inputs_[compaction_input_level].level; 93 } 94 start_level()95 int start_level() const { return start_level_; } 96 97 // Outputs will go to this level output_level()98 int output_level() const { return output_level_; } 99 100 // Returns the number of input levels in this compaction. num_input_levels()101 size_t num_input_levels() const { return inputs_.size(); } 102 103 // Return the object that holds the edits to the descriptor done 104 // by this compaction. edit()105 VersionEdit* edit() { return &edit_; } 106 107 // Returns the number of input files associated to the specified 108 // compaction input level. 109 // The function will return 0 if when "compaction_input_level" < 0 110 // or "compaction_input_level" >= "num_input_levels()". num_input_files(size_t compaction_input_level)111 size_t num_input_files(size_t compaction_input_level) const { 112 if (compaction_input_level < inputs_.size()) { 113 return inputs_[compaction_input_level].size(); 114 } 115 return 0; 116 } 117 118 // Returns input version of the compaction input_version()119 Version* input_version() const { return input_version_; } 120 121 // Returns the ColumnFamilyData associated with the compaction. column_family_data()122 ColumnFamilyData* column_family_data() const { return cfd_; } 123 124 // Returns the file meta data of the 'i'th input file at the 125 // specified compaction input level. 126 // REQUIREMENT: "compaction_input_level" must be >= 0 and 127 // < "input_levels()" input(size_t compaction_input_level,size_t i)128 FileMetaData* input(size_t compaction_input_level, size_t i) const { 129 assert(compaction_input_level < inputs_.size()); 130 return inputs_[compaction_input_level][i]; 131 } 132 boundaries(size_t compaction_input_level)133 const std::vector<AtomicCompactionUnitBoundary>* boundaries( 134 size_t compaction_input_level) const { 135 assert(compaction_input_level < inputs_.size()); 136 return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries; 137 } 138 139 // Returns the list of file meta data of the specified compaction 140 // input level. 141 // REQUIREMENT: "compaction_input_level" must be >= 0 and 142 // < "input_levels()" inputs(size_t compaction_input_level)143 const std::vector<FileMetaData*>* inputs( 144 size_t compaction_input_level) const { 145 assert(compaction_input_level < inputs_.size()); 146 return &inputs_[compaction_input_level].files; 147 } 148 inputs()149 const std::vector<CompactionInputFiles>* inputs() { return &inputs_; } 150 151 // Returns the LevelFilesBrief of the specified compaction input level. input_levels(size_t compaction_input_level)152 const LevelFilesBrief* input_levels(size_t compaction_input_level) const { 153 return &input_levels_[compaction_input_level]; 154 } 155 156 // Maximum size of files to build during this compaction. max_output_file_size()157 uint64_t max_output_file_size() const { return max_output_file_size_; } 158 159 // What compression for output output_compression()160 CompressionType output_compression() const { return output_compression_; } 161 162 // What compression options for output output_compression_opts()163 CompressionOptions output_compression_opts() const { 164 return output_compression_opts_; 165 } 166 167 // Whether need to write output file to second DB path. output_path_id()168 uint32_t output_path_id() const { return output_path_id_; } 169 170 // Is this a trivial compaction that can be implemented by just 171 // moving a single input file to the next level (no merging or splitting) 172 bool IsTrivialMove() const; 173 174 // If true, then the compaction can be done by simply deleting input files. deletion_compaction()175 bool deletion_compaction() const { return deletion_compaction_; } 176 177 // Add all inputs to this compaction as delete operations to *edit. 178 void AddInputDeletions(VersionEdit* edit); 179 180 // Returns true if the available information we have guarantees that 181 // the input "user_key" does not exist in any level beyond "output_level()". 182 bool KeyNotExistsBeyondOutputLevel(const Slice& user_key, 183 std::vector<size_t>* level_ptrs) const; 184 185 // Clear all files to indicate that they are not being compacted 186 // Delete this compaction from the list of running compactions. 187 // 188 // Requirement: DB mutex held 189 void ReleaseCompactionFiles(Status status); 190 191 // Returns the summary of the compaction in "output" with maximum "len" 192 // in bytes. The caller is responsible for the memory management of 193 // "output". 194 void Summary(char* output, int len); 195 196 // Return the score that was used to pick this compaction run. score()197 double score() const { return score_; } 198 199 // Is this compaction creating a file in the bottom most level? bottommost_level()200 bool bottommost_level() const { return bottommost_level_; } 201 202 // Does this compaction include all sst files? is_full_compaction()203 bool is_full_compaction() const { return is_full_compaction_; } 204 205 // Was this compaction triggered manually by the client? is_manual_compaction()206 bool is_manual_compaction() const { return is_manual_compaction_; } 207 208 // Used when allow_trivial_move option is set in 209 // Universal compaction. If all the input files are 210 // non overlapping, then is_trivial_move_ variable 211 // will be set true, else false set_is_trivial_move(bool trivial_move)212 void set_is_trivial_move(bool trivial_move) { 213 is_trivial_move_ = trivial_move; 214 } 215 216 // Used when allow_trivial_move option is set in 217 // Universal compaction. Returns true, if the input files 218 // are non-overlapping and can be trivially moved. is_trivial_move()219 bool is_trivial_move() const { return is_trivial_move_; } 220 221 // How many total levels are there? number_levels()222 int number_levels() const { return number_levels_; } 223 224 // Return the ImmutableCFOptions that should be used throughout the compaction 225 // procedure immutable_cf_options()226 const ImmutableCFOptions* immutable_cf_options() const { 227 return &immutable_cf_options_; 228 } 229 230 // Return the MutableCFOptions that should be used throughout the compaction 231 // procedure mutable_cf_options()232 const MutableCFOptions* mutable_cf_options() const { 233 return &mutable_cf_options_; 234 } 235 236 // Returns the size in bytes that the output file should be preallocated to. 237 // In level compaction, that is max_file_size_. In universal compaction, that 238 // is the sum of all input file sizes. 239 uint64_t OutputFilePreallocationSize() const; 240 241 void SetInputVersion(Version* input_version); 242 243 struct InputLevelSummaryBuffer { 244 char buffer[128]; 245 }; 246 247 const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const; 248 249 uint64_t CalculateTotalInputSize() const; 250 251 // In case of compaction error, reset the nextIndex that is used 252 // to pick up the next file to be compacted from files_by_size_ 253 void ResetNextCompactionIndex(); 254 255 // Create a CompactionFilter from compaction_filter_factory 256 std::unique_ptr<CompactionFilter> CreateCompactionFilter() const; 257 258 // Is the input level corresponding to output_level_ empty? 259 bool IsOutputLevelEmpty() const; 260 261 // Should this compaction be broken up into smaller ones run in parallel? 262 bool ShouldFormSubcompactions() const; 263 264 // test function to validate the functionality of IsBottommostLevel() 265 // function -- determines if compaction with inputs and storage is bottommost 266 static bool TEST_IsBottommostLevel( 267 int output_level, VersionStorageInfo* vstorage, 268 const std::vector<CompactionInputFiles>& inputs); 269 GetOutputTableProperties()270 TablePropertiesCollection GetOutputTableProperties() const { 271 return output_table_properties_; 272 } 273 SetOutputTableProperties(TablePropertiesCollection tp)274 void SetOutputTableProperties(TablePropertiesCollection tp) { 275 output_table_properties_ = std::move(tp); 276 } 277 GetSmallestUserKey()278 Slice GetSmallestUserKey() const { return smallest_user_key_; } 279 GetLargestUserKey()280 Slice GetLargestUserKey() const { return largest_user_key_; } 281 282 int GetInputBaseLevel() const; 283 compaction_reason()284 CompactionReason compaction_reason() { return compaction_reason_; } 285 grandparents()286 const std::vector<FileMetaData*>& grandparents() const { 287 return grandparents_; 288 } 289 max_compaction_bytes()290 uint64_t max_compaction_bytes() const { return max_compaction_bytes_; } 291 max_subcompactions()292 uint32_t max_subcompactions() const { return max_subcompactions_; } 293 294 uint64_t MinInputFileOldestAncesterTime() const; 295 296 private: 297 // mark (or clear) all files that are being compacted 298 void MarkFilesBeingCompacted(bool mark_as_compacted); 299 300 // get the smallest and largest key present in files to be compacted 301 static void GetBoundaryKeys(VersionStorageInfo* vstorage, 302 const std::vector<CompactionInputFiles>& inputs, 303 Slice* smallest_key, Slice* largest_key); 304 305 // Get the atomic file boundaries for all files in the compaction. Necessary 306 // in order to avoid the scenario described in 307 // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb 308 // down appropriate key boundaries to RangeDelAggregator during compaction. 309 static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries( 310 VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs); 311 312 // helper function to determine if compaction with inputs and storage is 313 // bottommost 314 static bool IsBottommostLevel( 315 int output_level, VersionStorageInfo* vstorage, 316 const std::vector<CompactionInputFiles>& inputs); 317 318 static bool IsFullCompaction(VersionStorageInfo* vstorage, 319 const std::vector<CompactionInputFiles>& inputs); 320 321 VersionStorageInfo* input_vstorage_; 322 323 const int start_level_; // the lowest level to be compacted 324 const int output_level_; // levels to which output files are stored 325 uint64_t max_output_file_size_; 326 uint64_t max_compaction_bytes_; 327 uint32_t max_subcompactions_; 328 const ImmutableCFOptions immutable_cf_options_; 329 const MutableCFOptions mutable_cf_options_; 330 Version* input_version_; 331 VersionEdit edit_; 332 const int number_levels_; 333 ColumnFamilyData* cfd_; 334 Arena arena_; // Arena used to allocate space for file_levels_ 335 336 const uint32_t output_path_id_; 337 CompressionType output_compression_; 338 CompressionOptions output_compression_opts_; 339 // If true, then the comaction can be done by simply deleting input files. 340 const bool deletion_compaction_; 341 342 // Compaction input files organized by level. Constant after construction 343 const std::vector<CompactionInputFiles> inputs_; 344 345 // A copy of inputs_, organized more closely in memory 346 autovector<LevelFilesBrief, 2> input_levels_; 347 348 // State used to check for number of overlapping grandparent files 349 // (grandparent == "output_level_ + 1") 350 std::vector<FileMetaData*> grandparents_; 351 const double score_; // score that was used to pick this compaction. 352 353 // Is this compaction creating a file in the bottom most level? 354 const bool bottommost_level_; 355 // Does this compaction include all sst files? 356 const bool is_full_compaction_; 357 358 // Is this compaction requested by the client? 359 const bool is_manual_compaction_; 360 361 // True if we can do trivial move in Universal multi level 362 // compaction 363 bool is_trivial_move_; 364 365 // Does input compression match the output compression? 366 bool InputCompressionMatchesOutput() const; 367 368 // table properties of output files 369 TablePropertiesCollection output_table_properties_; 370 371 // smallest user keys in compaction 372 Slice smallest_user_key_; 373 374 // largest user keys in compaction 375 Slice largest_user_key_; 376 377 // Reason for compaction 378 CompactionReason compaction_reason_; 379 }; 380 381 // Return sum of sizes of all files in `files`. 382 extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files); 383 384 } // namespace ROCKSDB_NAMESPACE 385