1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 #pragma once 7 8 #include <map> 9 #include <set> 10 #include <vector> 11 12 #include "db/dbformat.h" 13 #include "rocksdb/env.h" 14 #include "rocksdb/utilities/sim_cache.h" 15 #include "trace_replay/block_cache_tracer.h" 16 #include "utilities/simulator_cache/cache_simulator.h" 17 18 namespace ROCKSDB_NAMESPACE { 19 20 // Statistics of a key refereneced by a Get. 21 struct GetKeyInfo { 22 uint64_t key_id = 0; 23 std::vector<uint64_t> access_sequence_number_timeline; 24 std::vector<uint64_t> access_timeline; 25 26 void AddAccess(const BlockCacheTraceRecord& access, 27 uint64_t access_sequnce_number) { 28 access_sequence_number_timeline.push_back(access_sequnce_number); 29 access_timeline.push_back(access.access_timestamp); 30 } 31 }; 32 33 // Statistics of a block. 34 struct BlockAccessInfo { 35 uint64_t block_id = 0; 36 uint64_t table_id = 0; 37 uint64_t block_offset = 0; 38 uint64_t num_accesses = 0; 39 uint64_t block_size = 0; pairingheap_SpGistSearchItem_cmp(const pairingheap_node * a,const pairingheap_node * b,void * arg)40 uint64_t first_access_time = 0; 41 uint64_t last_access_time = 0; 42 uint64_t num_keys = 0; 43 std::map<std::string, std::map<TableReaderCaller, uint64_t>> 44 key_num_access_map; // for keys exist in this block. 45 std::map<std::string, std::map<TableReaderCaller, uint64_t>> 46 non_exist_key_num_access_map; // for keys do not exist in this block. 47 uint64_t num_referenced_key_exist_in_block = 0; 48 uint64_t referenced_data_size = 0; 49 std::map<TableReaderCaller, uint64_t> caller_num_access_map; 50 // caller:timestamp:number_of_accesses. The granularity of the timestamp is 51 // seconds. 52 std::map<TableReaderCaller, std::map<uint64_t, uint64_t>> 53 caller_num_accesses_timeline; 54 // Unique blocks since the last access. 55 std::set<std::string> unique_blocks_since_last_access; 56 // Number of reuses grouped by reuse distance. 57 std::map<uint64_t, uint64_t> reuse_distance_count; 58 59 // The access sequence numbers of this block. 60 std::vector<uint64_t> access_sequence_number_timeline; 61 std::map<TableReaderCaller, std::vector<uint64_t>> 62 caller_access_sequence__number_timeline; 63 // The access timestamp in microseconds of this block. 64 std::vector<uint64_t> access_timeline; 65 std::map<TableReaderCaller, std::vector<uint64_t>> caller_access_timeline; 66 67 void AddAccess(const BlockCacheTraceRecord& access, 68 uint64_t access_sequnce_number) { 69 if (block_size != 0 && access.block_size != 0) { 70 assert(block_size == access.block_size); 71 } 72 if (num_keys != 0 && access.num_keys_in_block != 0) { 73 assert(num_keys == access.num_keys_in_block); 74 } 75 if (first_access_time == 0) { 76 first_access_time = access.access_timestamp; 77 } 78 table_id = BlockCacheTraceHelper::GetTableId(access); 79 block_offset = BlockCacheTraceHelper::GetBlockOffsetInFile(access); 80 last_access_time = access.access_timestamp; 81 block_size = access.block_size; 82 caller_num_access_map[access.caller]++; spgFreeSearchItem(SpGistScanOpaque so,SpGistSearchItem * item)83 num_accesses++; 84 // access.access_timestamp is in microsecond. 85 const uint64_t timestamp_in_seconds = 86 access.access_timestamp / kMicrosInSecond; 87 caller_num_accesses_timeline[access.caller][timestamp_in_seconds] += 1; 88 // Populate the feature vectors. 89 access_sequence_number_timeline.push_back(access_sequnce_number); 90 caller_access_sequence__number_timeline[access.caller].push_back( 91 access_sequnce_number); 92 access_timeline.push_back(access.access_timestamp); 93 caller_access_timeline[access.caller].push_back(access.access_timestamp); 94 if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(access.block_type, 95 access.caller)) { 96 num_keys = access.num_keys_in_block; 97 if (access.referenced_key_exist_in_block == Boolean::kTrue) { 98 if (key_num_access_map.find(access.referenced_key) == 99 key_num_access_map.end()) { 100 referenced_data_size += access.referenced_data_size; spgAddSearchItemToQueue(SpGistScanOpaque so,SpGistSearchItem * item)101 } 102 key_num_access_map[access.referenced_key][access.caller]++; 103 num_referenced_key_exist_in_block++; 104 if (referenced_data_size > block_size && block_size != 0) { 105 ParsedInternalKey internal_key; 106 ParseInternalKey(access.referenced_key, &internal_key); spgAllocSearchItem(SpGistScanOpaque so,bool isnull,double * distances)107 } 108 } else { 109 non_exist_key_num_access_map[access.referenced_key][access.caller]++; 110 } 111 } 112 } 113 }; 114 115 // Aggregates stats of a block given a block type. 116 struct BlockTypeAccessInfoAggregate { 117 std::map<std::string, BlockAccessInfo> block_access_info_map; 118 }; 119 120 // Aggregates BlockTypeAggregate given a SST file. 121 struct SSTFileAccessInfoAggregate { 122 uint32_t level; spgAddStartItem(SpGistScanOpaque so,bool isnull)123 std::map<TraceType, BlockTypeAccessInfoAggregate> block_type_aggregates_map; 124 }; 125 126 // Aggregates SSTFileAggregate given a column family. 127 struct ColumnFamilyAccessInfoAggregate { 128 std::map<uint64_t, SSTFileAccessInfoAggregate> fd_aggregates_map; 129 }; 130 131 struct Features { 132 std::vector<uint64_t> elapsed_time_since_last_access; 133 std::vector<uint64_t> num_accesses_since_last_access; 134 std::vector<uint64_t> num_past_accesses; 135 }; 136 137 struct Predictions { 138 std::vector<uint64_t> elapsed_time_till_next_access; 139 std::vector<uint64_t> num_accesses_till_next_access; 140 }; 141 142 class BlockCacheTraceAnalyzer { 143 public: 144 BlockCacheTraceAnalyzer( 145 const std::string& trace_file_path, const std::string& output_dir, resetSpGistScanOpaque(SpGistScanOpaque so)146 const std::string& human_readable_trace_file_path, 147 bool compute_reuse_distance, bool mrc_only, 148 bool is_human_readable_trace_file, 149 std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator); 150 ~BlockCacheTraceAnalyzer() = default; 151 // No copy and move. 152 BlockCacheTraceAnalyzer(const BlockCacheTraceAnalyzer&) = delete; 153 BlockCacheTraceAnalyzer& operator=(const BlockCacheTraceAnalyzer&) = delete; 154 BlockCacheTraceAnalyzer(BlockCacheTraceAnalyzer&&) = delete; 155 BlockCacheTraceAnalyzer& operator=(BlockCacheTraceAnalyzer&&) = delete; 156 157 // Read all access records in the given trace_file, maintains the stats of 158 // a block, and aggregates the information by block type, sst file, and column 159 // family. Subsequently, the caller may call Print* functions to print 160 // statistics. 161 Status Analyze(); 162 163 // Print a summary of statistics of the trace, e.g., 164 // Number of files: 2 Number of blocks: 50 Number of accesses: 50 165 // Number of Index blocks: 10 166 // Number of Filter blocks: 10 167 // Number of Data blocks: 10 168 // Number of UncompressionDict blocks: 10 169 // Number of RangeDeletion blocks: 10 170 // *************************************************************** 171 // Caller Get: Number of accesses 10 172 // Caller Get: Number of accesses per level break down 173 // Level 0: Number of accesses: 10 174 // Caller Get: Number of accesses per block type break down 175 // Block Type Index: Number of accesses: 2 176 // Block Type Filter: Number of accesses: 2 177 // Block Type Data: Number of accesses: 2 178 // Block Type UncompressionDict: Number of accesses: 2 179 // Block Type RangeDeletion: Number of accesses: 2 180 void PrintStatsSummary() const; 181 182 // Print block size distribution and the distribution break down by block type 183 // and column family. 184 void PrintBlockSizeStats() const; 185 186 // Print access count distribution and the distribution break down by block 187 // type and column family. 188 void PrintAccessCountStats(bool user_access_only, uint32_t bottom_k, 189 uint32_t top_k) const; 190 191 // Print data block accesses by user Get and Multi-Get. 192 // It prints out 1) A histogram on the percentage of keys accessed in a data 193 // block break down by if a referenced key exists in the data block andthe 194 // histogram break down by column family. 2) A histogram on the percentage of 195 // accesses on keys exist in a data block and its break down by column family. 196 void PrintDataBlockAccessStats() const; 197 198 // Write the percentage of accesses break down by column family into a csv 199 // file saved in 'output_dir'. spgPrepareScanKeys(IndexScanDesc scan)200 // 201 // The file is named "percentage_of_accesses_summary". The file format is 202 // caller,cf_0,cf_1,...,cf_n where the cf_i is the column family name found in 203 // the trace. 204 void WritePercentAccessSummaryStats() const; 205 206 // Write the percentage of accesses for the given caller break down by column 207 // family, level, and block type into a csv file saved in 'output_dir'. 208 // 209 // It generates two files: 1) caller_level_percentage_of_accesses_summary and 210 // 2) caller_bt_percentage_of_accesses_summary which break down by the level 211 // and block type, respectively. The file format is 212 // level/bt,cf_0,cf_1,...,cf_n where cf_i is the column family name found in 213 // the trace. 214 void WriteDetailedPercentAccessSummaryStats(TableReaderCaller caller) const; 215 216 // Write the access count summary into a csv file saved in 'output_dir'. 217 // It groups blocks by their access count. 218 // 219 // It generates two files: 1) cf_access_count_summary and 2) 220 // bt_access_count_summary which break down the access count by column family 221 // and block type, respectively. The file format is 222 // cf/bt,bucket_0,bucket_1,...,bucket_N. 223 void WriteAccessCountSummaryStats( 224 const std::vector<uint64_t>& access_count_buckets, 225 bool user_access_only) const; 226 227 // Write miss ratio curves of simulated cache configurations into a csv file 228 // named "mrc" saved in 'output_dir'. 229 // 230 // The file format is 231 // "cache_name,num_shard_bits,capacity,miss_ratio,total_accesses". 232 void WriteMissRatioCurves() const; 233 234 // Write miss ratio timeline of simulated cache configurations into several 235 // csv files, one per cache capacity saved in 'output_dir'. 236 // 237 // The file format is 238 // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second" 239 // where N is the number of unique cache names 240 // (cache_name+num_shard_bits+ghost_capacity). 241 void WriteMissRatioTimeline(uint64_t time_unit) const; 242 243 // Write misses timeline of simulated cache configurations into several 244 // csv files, one per cache capacity saved in 'output_dir'. 245 // 246 // The file format is 247 // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second" 248 // where N is the number of unique cache names 249 // (cache_name+num_shard_bits+ghost_capacity). 250 void WriteMissTimeline(uint64_t time_unit) const; 251 252 // Write the access timeline into a csv file saved in 'output_dir'. 253 // 254 // The file is named "label_access_timeline".The file format is 255 // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second" 256 // where N is the number of unique labels found in the trace. 257 void WriteAccessTimeline(const std::string& label, uint64_t time_unit, 258 bool user_access_only) const; 259 260 // Write the reuse distance into a csv file saved in 'output_dir'. Reuse 261 // distance is defined as the cumulated size of unique blocks read between two 262 // consective accesses on the same block. 263 // 264 // The file is named "label_reuse_distance". The file format is 265 // bucket,label_1,label_2,...,label_N. 266 void WriteReuseDistance(const std::string& label_str, 267 const std::vector<uint64_t>& distance_buckets) const; 268 269 // Write the reuse interval into a csv file saved in 'output_dir'. Reuse 270 // interval is defined as the time between two consecutive accesses on the 271 // same block. 272 // 273 // The file is named "label_reuse_interval". The file format is 274 // bucket,label_1,label_2,...,label_N. 275 void WriteReuseInterval(const std::string& label_str, 276 const std::vector<uint64_t>& time_buckets) const; 277 278 // Write the reuse lifetime into a csv file saved in 'output_dir'. Reuse 279 // lifetime is defined as the time interval between the first access of a 280 // block and its last access. 281 // 282 // The file is named "label_reuse_lifetime". The file format is 283 // bucket,label_1,label_2,...,label_N. 284 void WriteReuseLifetime(const std::string& label_str, 285 const std::vector<uint64_t>& time_buckets) const; 286 287 // Write the reuse timeline into a csv file saved in 'output_dir'. 288 // 289 // The file is named 290 // "block_type_user_access_only_reuse_window_reuse_timeline". The file format 291 // is start_time,0,1,...,N where N equals trace_duration / reuse_window. 292 void WriteBlockReuseTimeline(const uint64_t reuse_window, bool user_access_only, 293 TraceType block_type) const; 294 295 // Write the Get spatical locality into csv files saved in 'output_dir'. spgbeginscan(Relation rel,int keysz,int orderbysz)296 // 297 // It generates three csv files. label_percent_ref_keys, 298 // label_percent_accesses_on_ref_keys, and 299 // label_percent_data_size_on_ref_keys. 300 void WriteGetSpatialLocality( 301 const std::string& label_str, 302 const std::vector<uint64_t>& percent_buckets) const; 303 304 void WriteCorrelationFeatures(const std::string& label_str, 305 uint32_t max_number_of_values) const; 306 307 void WriteCorrelationFeaturesForGet(uint32_t max_number_of_values) const; 308 309 void WriteSkewness(const std::string& label_str, 310 const std::vector<uint64_t>& percent_buckets, 311 TraceType target_block_type) const; 312 313 const std::map<std::string, ColumnFamilyAccessInfoAggregate>& 314 TEST_cf_aggregates_map() const { 315 return cf_aggregates_map_; 316 } 317 318 private: 319 std::set<std::string> ParseLabelStr(const std::string& label_str) const; 320 321 std::string BuildLabel(const std::set<std::string>& labels, 322 const std::string& cf_name, uint64_t fd, 323 uint32_t level, TraceType type, 324 TableReaderCaller caller, uint64_t block_key, 325 const BlockAccessInfo& block) const; 326 327 void ComputeReuseDistance(BlockAccessInfo* info) const; 328 329 Status RecordAccess(const BlockCacheTraceRecord& access); 330 331 void UpdateReuseIntervalStats( 332 const std::string& label, const std::vector<uint64_t>& time_buckets, 333 const std::map<uint64_t, uint64_t> timeline, 334 std::map<std::string, std::map<uint64_t, uint64_t>>* 335 label_time_num_reuses, 336 uint64_t* total_num_reuses) const; 337 338 std::string OutputPercentAccessStats( 339 uint64_t total_accesses, 340 const std::map<std::string, uint64_t>& cf_access_count) const; 341 342 void WriteStatsToFile( 343 const std::string& label_str, const std::vector<uint64_t>& time_buckets, 344 const std::string& filename_suffix, 345 const std::map<std::string, std::map<uint64_t, uint64_t>>& label_data, 346 uint64_t ntotal) const; 347 348 void TraverseBlocks( 349 std::function<void(const std::string& /*cf_name*/, uint64_t /*fd*/, 350 uint32_t /*level*/, TraceType /*block_type*/, 351 const std::string& /*block_key*/, 352 uint64_t /*block_key_id*/, 353 const BlockAccessInfo& /*block_access_info*/)> 354 block_callback, 355 std::set<std::string>* labels = nullptr) const; 356 357 void UpdateFeatureVectors( 358 const std::vector<uint64_t>& access_sequence_number_timeline, 359 const std::vector<uint64_t>& access_timeline, const std::string& label, 360 std::map<std::string, Features>* label_features, 361 std::map<std::string, Predictions>* label_predictions) const; 362 363 void WriteCorrelationFeaturesToFile( 364 const std::string& label, 365 const std::map<std::string, Features>& label_features, spgrescan(IndexScanDesc scan,ScanKey scankey,int nscankeys,ScanKey orderbys,int norderbys)366 const std::map<std::string, Predictions>& label_predictions, 367 uint32_t max_number_of_values) const; 368 369 ROCKSDB_NAMESPACE::Env* env_; 370 const std::string trace_file_path_; 371 const std::string output_dir_; 372 std::string human_readable_trace_file_path_; 373 const bool compute_reuse_distance_; 374 const bool mrc_only_; 375 const bool is_human_readable_trace_file_; 376 377 BlockCacheTraceHeader header_; 378 std::unique_ptr<BlockCacheTraceSimulator> cache_simulator_; 379 std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_; 380 std::map<std::string, BlockAccessInfo*> block_info_map_; 381 std::unordered_map<std::string, GetKeyInfo> get_key_info_map_; 382 uint64_t access_sequence_number_ = 0; 383 uint64_t trace_start_timestamp_in_seconds_ = 0; 384 uint64_t trace_end_timestamp_in_seconds_ = 0; 385 MissRatioStats miss_ratio_stats_; 386 uint64_t unique_block_id_ = 1; 387 uint64_t unique_get_key_id_ = 1; 388 BlockCacheHumanReadableTraceWriter human_readable_trace_writer_; 389 }; 390 391 int block_cache_trace_analyzer_tool(int argc, char** argv); 392 393 } // namespace ROCKSDB_NAMESPACE 394