1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 5 #pragma once 6 7 #include <stdint.h> 8 9 #include <map> 10 #include <memory> 11 #include <string> 12 13 #include "rocksdb/customizable.h" 14 #include "rocksdb/status.h" 15 #include "rocksdb/types.h" 16 17 namespace ROCKSDB_NAMESPACE { 18 19 // -- Table Properties 20 // Other than basic table properties, each table may also have the user 21 // collected properties. 22 // The value of the user-collected properties are encoded as raw bytes -- 23 // users have to interpret these values by themselves. 24 // Note: To do prefix seek/scan in `UserCollectedProperties`, you can do 25 // something similar to: 26 // 27 // UserCollectedProperties props = ...; 28 // for (auto pos = props.lower_bound(prefix); 29 // pos != props.end() && pos->first.compare(0, prefix.size(), prefix) == 0; 30 // ++pos) { 31 // ... 32 // } 33 using UserCollectedProperties = std::map<std::string, std::string>; 34 35 // table properties' human-readable names in the property block. 36 struct TablePropertiesNames { 37 static const std::string kDbId; 38 static const std::string kDbSessionId; 39 static const std::string kDbHostId; 40 static const std::string kOriginalFileNumber; 41 static const std::string kDataSize; 42 static const std::string kIndexSize; 43 static const std::string kIndexPartitions; 44 static const std::string kTopLevelIndexSize; 45 static const std::string kIndexKeyIsUserKey; 46 static const std::string kIndexValueIsDeltaEncoded; 47 static const std::string kFilterSize; 48 static const std::string kRawKeySize; 49 static const std::string kRawValueSize; 50 static const std::string kNumDataBlocks; 51 static const std::string kNumEntries; 52 static const std::string kNumFilterEntries; 53 static const std::string kDeletedKeys; 54 static const std::string kMergeOperands; 55 static const std::string kNumRangeDeletions; 56 static const std::string kFormatVersion; 57 static const std::string kFixedKeyLen; 58 static const std::string kFilterPolicy; 59 static const std::string kColumnFamilyName; 60 static const std::string kColumnFamilyId; 61 static const std::string kComparator; 62 static const std::string kMergeOperator; 63 static const std::string kPrefixExtractorName; 64 static const std::string kPropertyCollectors; 65 static const std::string kCompression; 66 static const std::string kCompressionOptions; 67 static const std::string kCreationTime; 68 static const std::string kOldestKeyTime; 69 static const std::string kFileCreationTime; 70 static const std::string kSlowCompressionEstimatedDataSize; 71 static const std::string kFastCompressionEstimatedDataSize; 72 }; 73 74 extern const std::string kPropertiesBlock; 75 extern const std::string kCompressionDictBlock; 76 extern const std::string kRangeDelBlock; 77 78 // `TablePropertiesCollector` provides the mechanism for users to collect 79 // their own properties that they are interested in. This class is essentially 80 // a collection of callback functions that will be invoked during table 81 // building. It is constructed with TablePropertiesCollectorFactory. The methods 82 // don't need to be thread-safe, as we will create exactly one 83 // TablePropertiesCollector object per table and then call it sequentially 84 class TablePropertiesCollector { 85 public: ~TablePropertiesCollector()86 virtual ~TablePropertiesCollector() {} 87 88 // DEPRECATE User defined collector should implement AddUserKey(), though 89 // this old function still works for backward compatible reason. 90 // Add() will be called when a new key/value pair is inserted into the table. 91 // @params key the user key that is inserted into the table. 92 // @params value the value that is inserted into the table. Add(const Slice &,const Slice &)93 virtual Status Add(const Slice& /*key*/, const Slice& /*value*/) { 94 return Status::InvalidArgument( 95 "TablePropertiesCollector::Add() deprecated."); 96 } 97 98 // AddUserKey() will be called when a new key/value pair is inserted into the 99 // table. 100 // @params key the user key that is inserted into the table. 101 // @params value the value that is inserted into the table. AddUserKey(const Slice & key,const Slice & value,EntryType,SequenceNumber,uint64_t)102 virtual Status AddUserKey(const Slice& key, const Slice& value, 103 EntryType /*type*/, SequenceNumber /*seq*/, 104 uint64_t /*file_size*/) { 105 // For backwards-compatibility. 106 return Add(key, value); 107 } 108 109 // Called after each new block is cut BlockAdd(uint64_t,uint64_t,uint64_t)110 virtual void BlockAdd(uint64_t /* block_raw_bytes */, 111 uint64_t /* block_compressed_bytes_fast */, 112 uint64_t /* block_compressed_bytes_slow */) { 113 // Nothing to do here. Callback registers can override. 114 return; 115 } 116 117 // Finish() will be called when a table has already been built and is ready 118 // for writing the properties block. 119 // @params properties User will add their collected statistics to 120 // `properties`. 121 virtual Status Finish(UserCollectedProperties* properties) = 0; 122 123 // Return the human-readable properties, where the key is property name and 124 // the value is the human-readable form of value. 125 virtual UserCollectedProperties GetReadableProperties() const = 0; 126 127 // The name of the properties collector can be used for debugging purpose. 128 virtual const char* Name() const = 0; 129 130 // EXPERIMENTAL Return whether the output file should be further compacted NeedCompact()131 virtual bool NeedCompact() const { return false; } 132 }; 133 134 // Constructs TablePropertiesCollector. Internals create a new 135 // TablePropertiesCollector for each new table 136 class TablePropertiesCollectorFactory : public Customizable { 137 public: 138 struct Context { 139 uint32_t column_family_id; 140 // The level at creating the SST file (i.e, table), of which the 141 // properties are being collected. 142 int level_at_creation = kUnknownLevelAtCreation; 143 static const uint32_t kUnknownColumnFamily; 144 static const int kUnknownLevelAtCreation = -1; 145 }; 146 ~TablePropertiesCollectorFactory()147 virtual ~TablePropertiesCollectorFactory() {} Type()148 static const char* Type() { return "TablePropertiesCollectorFactory"; } 149 static Status CreateFromString( 150 const ConfigOptions& options, const std::string& value, 151 std::shared_ptr<TablePropertiesCollectorFactory>* result); 152 153 // has to be thread-safe 154 virtual TablePropertiesCollector* CreateTablePropertiesCollector( 155 TablePropertiesCollectorFactory::Context context) = 0; 156 157 // The name of the properties collector can be used for debugging purpose. 158 virtual const char* Name() const = 0; 159 160 // Can be overridden by sub-classes to return the Name, followed by 161 // configuration info that will // be logged to the info log when the 162 // DB is opened ToString()163 virtual std::string ToString() const { return Name(); } 164 }; 165 166 // TableProperties contains a bunch of read-only properties of its associated 167 // table. 168 struct TableProperties { 169 public: 170 // the file number at creation time, or 0 for unknown. When known, 171 // combining with db_session_id must uniquely identify an SST file. 172 uint64_t orig_file_number = 0; 173 // the total size of all data blocks. 174 uint64_t data_size = 0; 175 // the size of index block. 176 uint64_t index_size = 0; 177 // Total number of index partitions if kTwoLevelIndexSearch is used 178 uint64_t index_partitions = 0; 179 // Size of the top-level index if kTwoLevelIndexSearch is used 180 uint64_t top_level_index_size = 0; 181 // Whether the index key is user key. Otherwise it includes 8 byte of sequence 182 // number added by internal key format. 183 uint64_t index_key_is_user_key = 0; 184 // Whether delta encoding is used to encode the index values. 185 uint64_t index_value_is_delta_encoded = 0; 186 // the size of filter block. 187 uint64_t filter_size = 0; 188 // total raw key size 189 uint64_t raw_key_size = 0; 190 // total raw value size 191 uint64_t raw_value_size = 0; 192 // the number of blocks in this table 193 uint64_t num_data_blocks = 0; 194 // the number of entries in this table 195 uint64_t num_entries = 0; 196 // the number of unique entries (keys or prefixes) added to filters 197 uint64_t num_filter_entries = 0; 198 // the number of deletions in the table 199 uint64_t num_deletions = 0; 200 // the number of merge operands in the table 201 uint64_t num_merge_operands = 0; 202 // the number of range deletions in this table 203 uint64_t num_range_deletions = 0; 204 // format version, reserved for backward compatibility 205 uint64_t format_version = 0; 206 // If 0, key is variable length. Otherwise number of bytes for each key. 207 uint64_t fixed_key_len = 0; 208 // ID of column family for this SST file, corresponding to the CF identified 209 // by column_family_name. 210 uint64_t column_family_id = ROCKSDB_NAMESPACE:: 211 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; 212 // Timestamp of the latest key. 0 means unknown. 213 // TODO(sagar0): Should be changed to latest_key_time ... but don't know the 214 // full implications of backward compatibility. Hence retaining for now. 215 uint64_t creation_time = 0; 216 // Timestamp of the earliest key. 0 means unknown. 217 uint64_t oldest_key_time = 0; 218 // Actual SST file creation time. 0 means unknown. 219 uint64_t file_creation_time = 0; 220 // Estimated size of data blocks if compressed using a relatively slower 221 // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`). 222 // 0 means unknown. 223 uint64_t slow_compression_estimated_data_size = 0; 224 // Estimated size of data blocks if compressed using a relatively faster 225 // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`). 226 // 0 means unknown. 227 uint64_t fast_compression_estimated_data_size = 0; 228 229 // DB identity 230 // db_id is an identifier generated the first time the DB is created 231 // If DB identity is unset or unassigned, `db_id` will be an empty string. 232 std::string db_id; 233 234 // DB session identity 235 // db_session_id is an identifier that gets reset every time the DB is opened 236 // If DB session identity is unset or unassigned, `db_session_id` will be an 237 // empty string. 238 std::string db_session_id; 239 240 // Location of the machine hosting the DB instance 241 // db_host_id identifies the location of the host in some form 242 // (hostname by default, but can also be any string of the user's choosing). 243 // It can potentially change whenever the DB is opened 244 std::string db_host_id; 245 246 // Name of the column family with which this SST file is associated. 247 // If column family is unknown, `column_family_name` will be an empty string. 248 std::string column_family_name; 249 250 // The name of the filter policy used in this table. 251 // If no filter policy is used, `filter_policy_name` will be an empty string. 252 std::string filter_policy_name; 253 254 // The name of the comparator used in this table. 255 std::string comparator_name; 256 257 // The name of the merge operator used in this table. 258 // If no merge operator is used, `merge_operator_name` will be "nullptr". 259 std::string merge_operator_name; 260 261 // The name of the prefix extractor used in this table 262 // If no prefix extractor is used, `prefix_extractor_name` will be "nullptr". 263 std::string prefix_extractor_name; 264 265 // The names of the property collectors factories used in this table 266 // separated by commas 267 // {collector_name[1]},{collector_name[2]},{collector_name[3]} .. 268 std::string property_collectors_names; 269 270 // The compression algo used to compress the SST files. 271 std::string compression_name; 272 273 // Compression options used to compress the SST files. 274 std::string compression_options; 275 276 // user collected properties 277 UserCollectedProperties user_collected_properties; 278 UserCollectedProperties readable_properties; 279 280 // The offset of the value of each property in the file. 281 std::map<std::string, uint64_t> properties_offsets; 282 283 // convert this object to a human readable form 284 // @prop_delim: delimiter for each property. 285 std::string ToString(const std::string& prop_delim = "; ", 286 const std::string& kv_delim = "=") const; 287 288 // Aggregate the numerical member variables of the specified 289 // TableProperties. 290 void Add(const TableProperties& tp); 291 292 // Subset of properties that make sense when added together 293 // between tables. Keys match field names in this class instead 294 // of using full property names. 295 std::map<std::string, uint64_t> GetAggregatablePropertiesAsMap() const; 296 }; 297 298 // Extra properties 299 // Below is a list of non-basic properties that are collected by database 300 // itself. Especially some properties regarding to the internal keys (which 301 // is unknown to `table`). 302 // 303 // DEPRECATED: these properties now belong as TableProperties members. Please 304 // use TableProperties::num_deletions and TableProperties::num_merge_operands, 305 // respectively. 306 extern uint64_t GetDeletedKeys(const UserCollectedProperties& props); 307 extern uint64_t GetMergeOperands(const UserCollectedProperties& props, 308 bool* property_present); 309 310 } // namespace ROCKSDB_NAMESPACE 311