1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <map>
9 #include <set>
10 #include <vector>
11 
12 #include "db/dbformat.h"
13 #include "rocksdb/env.h"
14 #include "rocksdb/utilities/sim_cache.h"
15 #include "trace_replay/block_cache_tracer.h"
16 #include "utilities/simulator_cache/cache_simulator.h"
17 
18 namespace ROCKSDB_NAMESPACE {
19 
20 // Statistics of a key refereneced by a Get.
21 struct GetKeyInfo {
22   uint64_t key_id = 0;
23   std::vector<uint64_t> access_sequence_number_timeline;
24   std::vector<uint64_t> access_timeline;
25 
26   void AddAccess(const BlockCacheTraceRecord& access,
27                  uint64_t access_sequnce_number) {
28     access_sequence_number_timeline.push_back(access_sequnce_number);
29     access_timeline.push_back(access.access_timestamp);
30   }
31 };
32 
33 // Statistics of a block.
34 struct BlockAccessInfo {
35   uint64_t block_id = 0;
36   uint64_t table_id = 0;
37   uint64_t block_offset = 0;
38   uint64_t num_accesses = 0;
39   uint64_t block_size = 0;
pairingheap_SpGistSearchItem_cmp(const pairingheap_node * a,const pairingheap_node * b,void * arg)40   uint64_t first_access_time = 0;
41   uint64_t last_access_time = 0;
42   uint64_t num_keys = 0;
43   std::map<std::string, std::map<TableReaderCaller, uint64_t>>
44       key_num_access_map;  // for keys exist in this block.
45   std::map<std::string, std::map<TableReaderCaller, uint64_t>>
46       non_exist_key_num_access_map;  // for keys do not exist in this block.
47   uint64_t num_referenced_key_exist_in_block = 0;
48   uint64_t referenced_data_size = 0;
49   std::map<TableReaderCaller, uint64_t> caller_num_access_map;
50   // caller:timestamp:number_of_accesses. The granularity of the timestamp is
51   // seconds.
52   std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
53       caller_num_accesses_timeline;
54   // Unique blocks since the last access.
55   std::set<std::string> unique_blocks_since_last_access;
56   // Number of reuses grouped by reuse distance.
57   std::map<uint64_t, uint64_t> reuse_distance_count;
58 
59   // The access sequence numbers of this block.
60   std::vector<uint64_t> access_sequence_number_timeline;
61   std::map<TableReaderCaller, std::vector<uint64_t>>
62       caller_access_sequence__number_timeline;
63   // The access timestamp in microseconds of this block.
64   std::vector<uint64_t> access_timeline;
65   std::map<TableReaderCaller, std::vector<uint64_t>> caller_access_timeline;
66 
67   void AddAccess(const BlockCacheTraceRecord& access,
68                  uint64_t access_sequnce_number) {
69     if (block_size != 0 && access.block_size != 0) {
70       assert(block_size == access.block_size);
71     }
72     if (num_keys != 0 && access.num_keys_in_block != 0) {
73       assert(num_keys == access.num_keys_in_block);
74     }
75     if (first_access_time == 0) {
76       first_access_time = access.access_timestamp;
77     }
78     table_id = BlockCacheTraceHelper::GetTableId(access);
79     block_offset = BlockCacheTraceHelper::GetBlockOffsetInFile(access);
80     last_access_time = access.access_timestamp;
81     block_size = access.block_size;
82     caller_num_access_map[access.caller]++;
spgFreeSearchItem(SpGistScanOpaque so,SpGistSearchItem * item)83     num_accesses++;
84     // access.access_timestamp is in microsecond.
85     const uint64_t timestamp_in_seconds =
86         access.access_timestamp / kMicrosInSecond;
87     caller_num_accesses_timeline[access.caller][timestamp_in_seconds] += 1;
88     // Populate the feature vectors.
89     access_sequence_number_timeline.push_back(access_sequnce_number);
90     caller_access_sequence__number_timeline[access.caller].push_back(
91         access_sequnce_number);
92     access_timeline.push_back(access.access_timestamp);
93     caller_access_timeline[access.caller].push_back(access.access_timestamp);
94     if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(access.block_type,
95                                                           access.caller)) {
96       num_keys = access.num_keys_in_block;
97       if (access.referenced_key_exist_in_block == Boolean::kTrue) {
98         if (key_num_access_map.find(access.referenced_key) ==
99             key_num_access_map.end()) {
100           referenced_data_size += access.referenced_data_size;
spgAddSearchItemToQueue(SpGistScanOpaque so,SpGistSearchItem * item)101         }
102         key_num_access_map[access.referenced_key][access.caller]++;
103         num_referenced_key_exist_in_block++;
104         if (referenced_data_size > block_size && block_size != 0) {
105           ParsedInternalKey internal_key;
106           ParseInternalKey(access.referenced_key, &internal_key);
spgAllocSearchItem(SpGistScanOpaque so,bool isnull,double * distances)107         }
108       } else {
109         non_exist_key_num_access_map[access.referenced_key][access.caller]++;
110       }
111     }
112   }
113 };
114 
115 // Aggregates stats of a block given a block type.
116 struct BlockTypeAccessInfoAggregate {
117   std::map<std::string, BlockAccessInfo> block_access_info_map;
118 };
119 
120 // Aggregates BlockTypeAggregate given a SST file.
121 struct SSTFileAccessInfoAggregate {
122   uint32_t level;
spgAddStartItem(SpGistScanOpaque so,bool isnull)123   std::map<TraceType, BlockTypeAccessInfoAggregate> block_type_aggregates_map;
124 };
125 
126 // Aggregates SSTFileAggregate given a column family.
127 struct ColumnFamilyAccessInfoAggregate {
128   std::map<uint64_t, SSTFileAccessInfoAggregate> fd_aggregates_map;
129 };
130 
131 struct Features {
132   std::vector<uint64_t> elapsed_time_since_last_access;
133   std::vector<uint64_t> num_accesses_since_last_access;
134   std::vector<uint64_t> num_past_accesses;
135 };
136 
137 struct Predictions {
138   std::vector<uint64_t> elapsed_time_till_next_access;
139   std::vector<uint64_t> num_accesses_till_next_access;
140 };
141 
142 class BlockCacheTraceAnalyzer {
143  public:
144   BlockCacheTraceAnalyzer(
145       const std::string& trace_file_path, const std::string& output_dir,
resetSpGistScanOpaque(SpGistScanOpaque so)146       const std::string& human_readable_trace_file_path,
147       bool compute_reuse_distance, bool mrc_only,
148       bool is_human_readable_trace_file,
149       std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator);
150   ~BlockCacheTraceAnalyzer() = default;
151   // No copy and move.
152   BlockCacheTraceAnalyzer(const BlockCacheTraceAnalyzer&) = delete;
153   BlockCacheTraceAnalyzer& operator=(const BlockCacheTraceAnalyzer&) = delete;
154   BlockCacheTraceAnalyzer(BlockCacheTraceAnalyzer&&) = delete;
155   BlockCacheTraceAnalyzer& operator=(BlockCacheTraceAnalyzer&&) = delete;
156 
157   // Read all access records in the given trace_file, maintains the stats of
158   // a block, and aggregates the information by block type, sst file, and column
159   // family. Subsequently, the caller may call Print* functions to print
160   // statistics.
161   Status Analyze();
162 
163   // Print a summary of statistics of the trace, e.g.,
164   // Number of files: 2 Number of blocks: 50 Number of accesses: 50
165   // Number of Index blocks: 10
166   // Number of Filter blocks: 10
167   // Number of Data blocks: 10
168   // Number of UncompressionDict blocks: 10
169   // Number of RangeDeletion blocks: 10
170   // ***************************************************************
171   // Caller Get: Number of accesses 10
172   // Caller Get: Number of accesses per level break down
173   //          Level 0: Number of accesses: 10
174   // Caller Get: Number of accesses per block type break down
175   //          Block Type Index: Number of accesses: 2
176   //          Block Type Filter: Number of accesses: 2
177   //          Block Type Data: Number of accesses: 2
178   //          Block Type UncompressionDict: Number of accesses: 2
179   //          Block Type RangeDeletion: Number of accesses: 2
180   void PrintStatsSummary() const;
181 
182   // Print block size distribution and the distribution break down by block type
183   // and column family.
184   void PrintBlockSizeStats() const;
185 
186   // Print access count distribution and the distribution break down by block
187   // type and column family.
188   void PrintAccessCountStats(bool user_access_only, uint32_t bottom_k,
189                              uint32_t top_k) const;
190 
191   // Print data block accesses by user Get and Multi-Get.
192   // It prints out 1) A histogram on the percentage of keys accessed in a data
193   // block break down by if a referenced key exists in the data block andthe
194   // histogram break down by column family. 2) A histogram on the percentage of
195   // accesses on keys exist in a data block and its break down by column family.
196   void PrintDataBlockAccessStats() const;
197 
198   // Write the percentage of accesses break down by column family into a csv
199   // file saved in 'output_dir'.
spgPrepareScanKeys(IndexScanDesc scan)200   //
201   // The file is named "percentage_of_accesses_summary". The file format is
202   // caller,cf_0,cf_1,...,cf_n where the cf_i is the column family name found in
203   // the trace.
204   void WritePercentAccessSummaryStats() const;
205 
206   // Write the percentage of accesses for the given caller break down by column
207   // family, level, and block type into a csv file saved in 'output_dir'.
208   //
209   // It generates two files: 1) caller_level_percentage_of_accesses_summary and
210   // 2) caller_bt_percentage_of_accesses_summary which break down by the level
211   // and block type, respectively. The file format is
212   // level/bt,cf_0,cf_1,...,cf_n where cf_i is the column family name found in
213   // the trace.
214   void WriteDetailedPercentAccessSummaryStats(TableReaderCaller caller) const;
215 
216   // Write the access count summary into a csv file saved in 'output_dir'.
217   // It groups blocks by their access count.
218   //
219   // It generates two files: 1) cf_access_count_summary and 2)
220   // bt_access_count_summary which break down the access count by column family
221   // and block type, respectively. The file format is
222   // cf/bt,bucket_0,bucket_1,...,bucket_N.
223   void WriteAccessCountSummaryStats(
224       const std::vector<uint64_t>& access_count_buckets,
225       bool user_access_only) const;
226 
227   // Write miss ratio curves of simulated cache configurations into a csv file
228   // named "mrc" saved in 'output_dir'.
229   //
230   // The file format is
231   // "cache_name,num_shard_bits,capacity,miss_ratio,total_accesses".
232   void WriteMissRatioCurves() const;
233 
234   // Write miss ratio timeline of simulated cache configurations into several
235   // csv files, one per cache capacity saved in 'output_dir'.
236   //
237   // The file format is
238   // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second"
239   // where N is the number of unique cache names
240   // (cache_name+num_shard_bits+ghost_capacity).
241   void WriteMissRatioTimeline(uint64_t time_unit) const;
242 
243   // Write misses timeline of simulated cache configurations into several
244   // csv files, one per cache capacity saved in 'output_dir'.
245   //
246   // The file format is
247   // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second"
248   // where N is the number of unique cache names
249   // (cache_name+num_shard_bits+ghost_capacity).
250   void WriteMissTimeline(uint64_t time_unit) const;
251 
252   // Write the access timeline into a csv file saved in 'output_dir'.
253   //
254   // The file is named "label_access_timeline".The file format is
255   // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second"
256   // where N is the number of unique labels found in the trace.
257   void WriteAccessTimeline(const std::string& label, uint64_t time_unit,
258                            bool user_access_only) const;
259 
260   // Write the reuse distance into a csv file saved in 'output_dir'. Reuse
261   // distance is defined as the cumulated size of unique blocks read between two
262   // consective accesses on the same block.
263   //
264   // The file is named "label_reuse_distance". The file format is
265   // bucket,label_1,label_2,...,label_N.
266   void WriteReuseDistance(const std::string& label_str,
267                           const std::vector<uint64_t>& distance_buckets) const;
268 
269   // Write the reuse interval into a csv file saved in 'output_dir'. Reuse
270   // interval is defined as the time between two consecutive accesses on the
271   // same block.
272   //
273   // The file is named "label_reuse_interval". The file format is
274   // bucket,label_1,label_2,...,label_N.
275   void WriteReuseInterval(const std::string& label_str,
276                           const std::vector<uint64_t>& time_buckets) const;
277 
278   // Write the reuse lifetime into a csv file saved in 'output_dir'. Reuse
279   // lifetime is defined as the time interval between the first access of a
280   // block and its last access.
281   //
282   // The file is named "label_reuse_lifetime". The file format is
283   // bucket,label_1,label_2,...,label_N.
284   void WriteReuseLifetime(const std::string& label_str,
285                           const std::vector<uint64_t>& time_buckets) const;
286 
287   // Write the reuse timeline into a csv file saved in 'output_dir'.
288   //
289   // The file is named
290   // "block_type_user_access_only_reuse_window_reuse_timeline". The file format
291   // is start_time,0,1,...,N where N equals trace_duration / reuse_window.
292   void WriteBlockReuseTimeline(const uint64_t reuse_window, bool user_access_only,
293                                TraceType block_type) const;
294 
295   // Write the Get spatical locality into csv files saved in 'output_dir'.
spgbeginscan(Relation rel,int keysz,int orderbysz)296   //
297   // It generates three csv files. label_percent_ref_keys,
298   // label_percent_accesses_on_ref_keys, and
299   // label_percent_data_size_on_ref_keys.
300   void WriteGetSpatialLocality(
301       const std::string& label_str,
302       const std::vector<uint64_t>& percent_buckets) const;
303 
304   void WriteCorrelationFeatures(const std::string& label_str,
305                                 uint32_t max_number_of_values) const;
306 
307   void WriteCorrelationFeaturesForGet(uint32_t max_number_of_values) const;
308 
309   void WriteSkewness(const std::string& label_str,
310                      const std::vector<uint64_t>& percent_buckets,
311                      TraceType target_block_type) const;
312 
313   const std::map<std::string, ColumnFamilyAccessInfoAggregate>&
314   TEST_cf_aggregates_map() const {
315     return cf_aggregates_map_;
316   }
317 
318  private:
319   std::set<std::string> ParseLabelStr(const std::string& label_str) const;
320 
321   std::string BuildLabel(const std::set<std::string>& labels,
322                          const std::string& cf_name, uint64_t fd,
323                          uint32_t level, TraceType type,
324                          TableReaderCaller caller, uint64_t block_key,
325                          const BlockAccessInfo& block) const;
326 
327   void ComputeReuseDistance(BlockAccessInfo* info) const;
328 
329   Status RecordAccess(const BlockCacheTraceRecord& access);
330 
331   void UpdateReuseIntervalStats(
332       const std::string& label, const std::vector<uint64_t>& time_buckets,
333       const std::map<uint64_t, uint64_t> timeline,
334       std::map<std::string, std::map<uint64_t, uint64_t>>*
335           label_time_num_reuses,
336       uint64_t* total_num_reuses) const;
337 
338   std::string OutputPercentAccessStats(
339       uint64_t total_accesses,
340       const std::map<std::string, uint64_t>& cf_access_count) const;
341 
342   void WriteStatsToFile(
343       const std::string& label_str, const std::vector<uint64_t>& time_buckets,
344       const std::string& filename_suffix,
345       const std::map<std::string, std::map<uint64_t, uint64_t>>& label_data,
346       uint64_t ntotal) const;
347 
348   void TraverseBlocks(
349       std::function<void(const std::string& /*cf_name*/, uint64_t /*fd*/,
350                          uint32_t /*level*/, TraceType /*block_type*/,
351                          const std::string& /*block_key*/,
352                          uint64_t /*block_key_id*/,
353                          const BlockAccessInfo& /*block_access_info*/)>
354           block_callback,
355       std::set<std::string>* labels = nullptr) const;
356 
357   void UpdateFeatureVectors(
358       const std::vector<uint64_t>& access_sequence_number_timeline,
359       const std::vector<uint64_t>& access_timeline, const std::string& label,
360       std::map<std::string, Features>* label_features,
361       std::map<std::string, Predictions>* label_predictions) const;
362 
363   void WriteCorrelationFeaturesToFile(
364       const std::string& label,
365       const std::map<std::string, Features>& label_features,
spgrescan(IndexScanDesc scan,ScanKey scankey,int nscankeys,ScanKey orderbys,int norderbys)366       const std::map<std::string, Predictions>& label_predictions,
367       uint32_t max_number_of_values) const;
368 
369   ROCKSDB_NAMESPACE::Env* env_;
370   const std::string trace_file_path_;
371   const std::string output_dir_;
372   std::string human_readable_trace_file_path_;
373   const bool compute_reuse_distance_;
374   const bool mrc_only_;
375   const bool is_human_readable_trace_file_;
376 
377   BlockCacheTraceHeader header_;
378   std::unique_ptr<BlockCacheTraceSimulator> cache_simulator_;
379   std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_;
380   std::map<std::string, BlockAccessInfo*> block_info_map_;
381   std::unordered_map<std::string, GetKeyInfo> get_key_info_map_;
382   uint64_t access_sequence_number_ = 0;
383   uint64_t trace_start_timestamp_in_seconds_ = 0;
384   uint64_t trace_end_timestamp_in_seconds_ = 0;
385   MissRatioStats miss_ratio_stats_;
386   uint64_t unique_block_id_ = 1;
387   uint64_t unique_get_key_id_ = 1;
388   BlockCacheHumanReadableTraceWriter human_readable_trace_writer_;
389 };
390 
391 int block_cache_trace_analyzer_tool(int argc, char** argv);
392 
393 }  // namespace ROCKSDB_NAMESPACE
394