1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
5 #pragma once
6 
7 #include <stdint.h>
8 
9 #include <map>
10 #include <memory>
11 #include <string>
12 
13 #include "rocksdb/customizable.h"
14 #include "rocksdb/status.h"
15 #include "rocksdb/types.h"
16 
17 namespace ROCKSDB_NAMESPACE {
18 
19 // -- Table Properties
20 // Other than basic table properties, each table may also have the user
21 // collected properties.
22 // The value of the user-collected properties are encoded as raw bytes --
23 // users have to interpret these values by themselves.
24 // Note: To do prefix seek/scan in `UserCollectedProperties`, you can do
25 // something similar to:
26 //
27 // UserCollectedProperties props = ...;
28 // for (auto pos = props.lower_bound(prefix);
29 //      pos != props.end() && pos->first.compare(0, prefix.size(), prefix) == 0;
30 //      ++pos) {
31 //   ...
32 // }
33 using UserCollectedProperties = std::map<std::string, std::string>;
34 
35 // table properties' human-readable names in the property block.
36 struct TablePropertiesNames {
37   static const std::string kDbId;
38   static const std::string kDbSessionId;
39   static const std::string kDbHostId;
40   static const std::string kOriginalFileNumber;
41   static const std::string kDataSize;
42   static const std::string kIndexSize;
43   static const std::string kIndexPartitions;
44   static const std::string kTopLevelIndexSize;
45   static const std::string kIndexKeyIsUserKey;
46   static const std::string kIndexValueIsDeltaEncoded;
47   static const std::string kFilterSize;
48   static const std::string kRawKeySize;
49   static const std::string kRawValueSize;
50   static const std::string kNumDataBlocks;
51   static const std::string kNumEntries;
52   static const std::string kNumFilterEntries;
53   static const std::string kDeletedKeys;
54   static const std::string kMergeOperands;
55   static const std::string kNumRangeDeletions;
56   static const std::string kFormatVersion;
57   static const std::string kFixedKeyLen;
58   static const std::string kFilterPolicy;
59   static const std::string kColumnFamilyName;
60   static const std::string kColumnFamilyId;
61   static const std::string kComparator;
62   static const std::string kMergeOperator;
63   static const std::string kPrefixExtractorName;
64   static const std::string kPropertyCollectors;
65   static const std::string kCompression;
66   static const std::string kCompressionOptions;
67   static const std::string kCreationTime;
68   static const std::string kOldestKeyTime;
69   static const std::string kFileCreationTime;
70   static const std::string kSlowCompressionEstimatedDataSize;
71   static const std::string kFastCompressionEstimatedDataSize;
72 };
73 
74 extern const std::string kPropertiesBlock;
75 extern const std::string kCompressionDictBlock;
76 extern const std::string kRangeDelBlock;
77 
78 // `TablePropertiesCollector` provides the mechanism for users to collect
79 // their own properties that they are interested in. This class is essentially
80 // a collection of callback functions that will be invoked during table
81 // building. It is constructed with TablePropertiesCollectorFactory. The methods
82 // don't need to be thread-safe, as we will create exactly one
83 // TablePropertiesCollector object per table and then call it sequentially
84 class TablePropertiesCollector {
85  public:
~TablePropertiesCollector()86   virtual ~TablePropertiesCollector() {}
87 
88   // DEPRECATE User defined collector should implement AddUserKey(), though
89   //           this old function still works for backward compatible reason.
90   // Add() will be called when a new key/value pair is inserted into the table.
91   // @params key    the user key that is inserted into the table.
92   // @params value  the value that is inserted into the table.
Add(const Slice &,const Slice &)93   virtual Status Add(const Slice& /*key*/, const Slice& /*value*/) {
94     return Status::InvalidArgument(
95         "TablePropertiesCollector::Add() deprecated.");
96   }
97 
98   // AddUserKey() will be called when a new key/value pair is inserted into the
99   // table.
100   // @params key    the user key that is inserted into the table.
101   // @params value  the value that is inserted into the table.
AddUserKey(const Slice & key,const Slice & value,EntryType,SequenceNumber,uint64_t)102   virtual Status AddUserKey(const Slice& key, const Slice& value,
103                             EntryType /*type*/, SequenceNumber /*seq*/,
104                             uint64_t /*file_size*/) {
105     // For backwards-compatibility.
106     return Add(key, value);
107   }
108 
109   // Called after each new block is cut
BlockAdd(uint64_t,uint64_t,uint64_t)110   virtual void BlockAdd(uint64_t /* block_raw_bytes */,
111                         uint64_t /* block_compressed_bytes_fast */,
112                         uint64_t /* block_compressed_bytes_slow */) {
113     // Nothing to do here. Callback registers can override.
114     return;
115   }
116 
117   // Finish() will be called when a table has already been built and is ready
118   // for writing the properties block.
119   // @params properties  User will add their collected statistics to
120   // `properties`.
121   virtual Status Finish(UserCollectedProperties* properties) = 0;
122 
123   // Return the human-readable properties, where the key is property name and
124   // the value is the human-readable form of value.
125   virtual UserCollectedProperties GetReadableProperties() const = 0;
126 
127   // The name of the properties collector can be used for debugging purpose.
128   virtual const char* Name() const = 0;
129 
130   // EXPERIMENTAL Return whether the output file should be further compacted
NeedCompact()131   virtual bool NeedCompact() const { return false; }
132 };
133 
134 // Constructs TablePropertiesCollector. Internals create a new
135 // TablePropertiesCollector for each new table
136 class TablePropertiesCollectorFactory : public Customizable {
137  public:
138   struct Context {
139     uint32_t column_family_id;
140     // The level at creating the SST file (i.e, table), of which the
141     // properties are being collected.
142     int level_at_creation = kUnknownLevelAtCreation;
143     static const uint32_t kUnknownColumnFamily;
144     static const int kUnknownLevelAtCreation = -1;
145   };
146 
~TablePropertiesCollectorFactory()147   virtual ~TablePropertiesCollectorFactory() {}
Type()148   static const char* Type() { return "TablePropertiesCollectorFactory"; }
149   static Status CreateFromString(
150       const ConfigOptions& options, const std::string& value,
151       std::shared_ptr<TablePropertiesCollectorFactory>* result);
152 
153   // has to be thread-safe
154   virtual TablePropertiesCollector* CreateTablePropertiesCollector(
155       TablePropertiesCollectorFactory::Context context) = 0;
156 
157   // The name of the properties collector can be used for debugging purpose.
158   virtual const char* Name() const = 0;
159 
160   // Can be overridden by sub-classes to return the Name, followed by
161   // configuration info that will // be logged to the info log when the
162   // DB is opened
ToString()163   virtual std::string ToString() const { return Name(); }
164 };
165 
166 // TableProperties contains a bunch of read-only properties of its associated
167 // table.
168 struct TableProperties {
169  public:
170   // the file number at creation time, or 0 for unknown. When known,
171   // combining with db_session_id must uniquely identify an SST file.
172   uint64_t orig_file_number = 0;
173   // the total size of all data blocks.
174   uint64_t data_size = 0;
175   // the size of index block.
176   uint64_t index_size = 0;
177   // Total number of index partitions if kTwoLevelIndexSearch is used
178   uint64_t index_partitions = 0;
179   // Size of the top-level index if kTwoLevelIndexSearch is used
180   uint64_t top_level_index_size = 0;
181   // Whether the index key is user key. Otherwise it includes 8 byte of sequence
182   // number added by internal key format.
183   uint64_t index_key_is_user_key = 0;
184   // Whether delta encoding is used to encode the index values.
185   uint64_t index_value_is_delta_encoded = 0;
186   // the size of filter block.
187   uint64_t filter_size = 0;
188   // total raw key size
189   uint64_t raw_key_size = 0;
190   // total raw value size
191   uint64_t raw_value_size = 0;
192   // the number of blocks in this table
193   uint64_t num_data_blocks = 0;
194   // the number of entries in this table
195   uint64_t num_entries = 0;
196   // the number of unique entries (keys or prefixes) added to filters
197   uint64_t num_filter_entries = 0;
198   // the number of deletions in the table
199   uint64_t num_deletions = 0;
200   // the number of merge operands in the table
201   uint64_t num_merge_operands = 0;
202   // the number of range deletions in this table
203   uint64_t num_range_deletions = 0;
204   // format version, reserved for backward compatibility
205   uint64_t format_version = 0;
206   // If 0, key is variable length. Otherwise number of bytes for each key.
207   uint64_t fixed_key_len = 0;
208   // ID of column family for this SST file, corresponding to the CF identified
209   // by column_family_name.
210   uint64_t column_family_id = ROCKSDB_NAMESPACE::
211       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily;
212   // Timestamp of the latest key. 0 means unknown.
213   // TODO(sagar0): Should be changed to latest_key_time ... but don't know the
214   // full implications of backward compatibility. Hence retaining for now.
215   uint64_t creation_time = 0;
216   // Timestamp of the earliest key. 0 means unknown.
217   uint64_t oldest_key_time = 0;
218   // Actual SST file creation time. 0 means unknown.
219   uint64_t file_creation_time = 0;
220   // Estimated size of data blocks if compressed using a relatively slower
221   // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`).
222   // 0 means unknown.
223   uint64_t slow_compression_estimated_data_size = 0;
224   // Estimated size of data blocks if compressed using a relatively faster
225   // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`).
226   // 0 means unknown.
227   uint64_t fast_compression_estimated_data_size = 0;
228 
229   // DB identity
230   // db_id is an identifier generated the first time the DB is created
231   // If DB identity is unset or unassigned, `db_id` will be an empty string.
232   std::string db_id;
233 
234   // DB session identity
235   // db_session_id is an identifier that gets reset every time the DB is opened
236   // If DB session identity is unset or unassigned, `db_session_id` will be an
237   // empty string.
238   std::string db_session_id;
239 
240   // Location of the machine hosting the DB instance
241   // db_host_id identifies the location of the host in some form
242   // (hostname by default, but can also be any string of the user's choosing).
243   // It can potentially change whenever the DB is opened
244   std::string db_host_id;
245 
246   // Name of the column family with which this SST file is associated.
247   // If column family is unknown, `column_family_name` will be an empty string.
248   std::string column_family_name;
249 
250   // The name of the filter policy used in this table.
251   // If no filter policy is used, `filter_policy_name` will be an empty string.
252   std::string filter_policy_name;
253 
254   // The name of the comparator used in this table.
255   std::string comparator_name;
256 
257   // The name of the merge operator used in this table.
258   // If no merge operator is used, `merge_operator_name` will be "nullptr".
259   std::string merge_operator_name;
260 
261   // The name of the prefix extractor used in this table
262   // If no prefix extractor is used, `prefix_extractor_name` will be "nullptr".
263   std::string prefix_extractor_name;
264 
265   // The names of the property collectors factories used in this table
266   // separated by commas
267   // {collector_name[1]},{collector_name[2]},{collector_name[3]} ..
268   std::string property_collectors_names;
269 
270   // The compression algo used to compress the SST files.
271   std::string compression_name;
272 
273   // Compression options used to compress the SST files.
274   std::string compression_options;
275 
276   // user collected properties
277   UserCollectedProperties user_collected_properties;
278   UserCollectedProperties readable_properties;
279 
280   // The offset of the value of each property in the file.
281   std::map<std::string, uint64_t> properties_offsets;
282 
283   // convert this object to a human readable form
284   //   @prop_delim: delimiter for each property.
285   std::string ToString(const std::string& prop_delim = "; ",
286                        const std::string& kv_delim = "=") const;
287 
288   // Aggregate the numerical member variables of the specified
289   // TableProperties.
290   void Add(const TableProperties& tp);
291 
292   // Subset of properties that make sense when added together
293   // between tables. Keys match field names in this class instead
294   // of using full property names.
295   std::map<std::string, uint64_t> GetAggregatablePropertiesAsMap() const;
296 };
297 
298 // Extra properties
299 // Below is a list of non-basic properties that are collected by database
300 // itself. Especially some properties regarding to the internal keys (which
301 // is unknown to `table`).
302 //
303 // DEPRECATED: these properties now belong as TableProperties members. Please
304 // use TableProperties::num_deletions and TableProperties::num_merge_operands,
305 // respectively.
306 extern uint64_t GetDeletedKeys(const UserCollectedProperties& props);
307 extern uint64_t GetMergeOperands(const UserCollectedProperties& props,
308                                  bool* property_present);
309 
310 }  // namespace ROCKSDB_NAMESPACE
311