1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #ifndef ROCKSDB_LITE
7 #ifndef GFLAGS
8 #include <cstdio>
main()9 int main() {
10   fprintf(stderr,
11           "Please install gflags to run block_cache_trace_analyzer_test\n");
12   return 1;
13 }
14 #else
15 
16 #include <fstream>
17 #include <iostream>
18 #include <map>
19 #include <vector>
20 
21 #include "rocksdb/env.h"
22 #include "rocksdb/status.h"
23 #include "rocksdb/trace_reader_writer.h"
24 #include "test_util/testharness.h"
25 #include "test_util/testutil.h"
26 #include "tools/block_cache_analyzer/block_cache_trace_analyzer.h"
27 #include "trace_replay/block_cache_tracer.h"
28 
29 namespace ROCKSDB_NAMESPACE {
30 
31 namespace {
32 const uint64_t kBlockSize = 1024;
33 const std::string kBlockKeyPrefix = "test-block-";
34 const uint32_t kCFId = 0;
35 const uint32_t kLevel = 1;
36 const uint64_t kSSTStoringEvenKeys = 100;
37 const uint64_t kSSTStoringOddKeys = 101;
38 const std::string kRefKeyPrefix = "test-get-";
39 const uint64_t kNumKeysInBlock = 1024;
40 const int kMaxArgCount = 100;
41 const size_t kArgBufferSize = 100000;
42 }  // namespace
43 
44 class BlockCacheTracerTest : public testing::Test {
45  public:
BlockCacheTracerTest()46   BlockCacheTracerTest() {
47     test_path_ = test::PerThreadDBPath("block_cache_tracer_test");
48     env_ = ROCKSDB_NAMESPACE::Env::Default();
49     EXPECT_OK(env_->CreateDir(test_path_));
50     trace_file_path_ = test_path_ + "/block_cache_trace";
51     block_cache_sim_config_path_ = test_path_ + "/block_cache_sim_config";
52     timeline_labels_ =
53         "block,all,cf,sst,level,bt,caller,cf_sst,cf_level,cf_bt,cf_caller";
54     reuse_distance_labels_ =
55         "block,all,cf,sst,level,bt,caller,cf_sst,cf_level,cf_bt,cf_caller";
56     reuse_distance_buckets_ = "1,1K,1M,1G";
57     reuse_interval_labels_ = "block,all,cf,sst,level,bt,cf_sst,cf_level,cf_bt";
58     reuse_interval_buckets_ = "1,10,100,1000";
59     reuse_lifetime_labels_ = "block,all,cf,sst,level,bt,cf_sst,cf_level,cf_bt";
60     reuse_lifetime_buckets_ = "1,10,100,1000";
61     analyzing_callers_ = "Get,Iterator";
62     access_count_buckets_ = "2,3,4,5,10";
63     analyze_get_spatial_locality_labels_ = "all";
64     analyze_get_spatial_locality_buckets_ = "10,20,30,40,50,60,70,80,90,100";
65   }
66 
~BlockCacheTracerTest()67   ~BlockCacheTracerTest() override {
68     if (getenv("KEEP_DB")) {
69       printf("The trace file is still at %s\n", trace_file_path_.c_str());
70       return;
71     }
72     EXPECT_OK(env_->DeleteFile(trace_file_path_));
73     EXPECT_OK(env_->DeleteDir(test_path_));
74   }
75 
GetCaller(uint32_t key_id)76   TableReaderCaller GetCaller(uint32_t key_id) {
77     uint32_t n = key_id % 5;
78     switch (n) {
79       case 0:
80         return TableReaderCaller::kPrefetch;
81       case 1:
82         return TableReaderCaller::kCompaction;
83       case 2:
84         return TableReaderCaller::kUserGet;
85       case 3:
86         return TableReaderCaller::kUserMultiGet;
87       case 4:
88         return TableReaderCaller::kUserIterator;
89     }
90     // This cannot happend.
91     assert(false);
92     return TableReaderCaller::kMaxBlockCacheLookupCaller;
93   }
94 
WriteBlockAccess(BlockCacheTraceWriter * writer,uint32_t from_key_id,TraceType block_type,uint32_t nblocks)95   void WriteBlockAccess(BlockCacheTraceWriter* writer, uint32_t from_key_id,
96                         TraceType block_type, uint32_t nblocks) {
97     assert(writer);
98     for (uint32_t i = 0; i < nblocks; i++) {
99       uint32_t key_id = from_key_id + i;
100       uint64_t timestamp = (key_id + 1) * kMicrosInSecond;
101       BlockCacheTraceRecord record;
102       record.block_type = block_type;
103       record.block_size = kBlockSize + key_id;
104       record.block_key = kBlockKeyPrefix + std::to_string(key_id);
105       record.access_timestamp = timestamp;
106       record.cf_id = kCFId;
107       record.cf_name = kDefaultColumnFamilyName;
108       record.caller = GetCaller(key_id);
109       record.level = kLevel;
110       if (key_id % 2 == 0) {
111         record.sst_fd_number = kSSTStoringEvenKeys;
112       } else {
113         record.sst_fd_number = kSSTStoringOddKeys;
114       }
115       record.is_cache_hit = Boolean::kFalse;
116       record.no_insert = Boolean::kFalse;
117       // Provide these fields for all block types.
118       // The writer should only write these fields for data blocks and the
119       // caller is either GET or MGET.
120       record.referenced_key =
121           kRefKeyPrefix + std::to_string(key_id) + std::string(8, 0);
122       record.referenced_key_exist_in_block = Boolean::kTrue;
123       record.num_keys_in_block = kNumKeysInBlock;
124       ASSERT_OK(writer->WriteBlockAccess(
125           record, record.block_key, record.cf_name, record.referenced_key));
126     }
127   }
128 
AssertBlockAccessInfo(uint32_t key_id,TraceType type,const std::map<std::string,BlockAccessInfo> & block_access_info_map)129   void AssertBlockAccessInfo(
130       uint32_t key_id, TraceType type,
131       const std::map<std::string, BlockAccessInfo>& block_access_info_map) {
132     auto key_id_str = kBlockKeyPrefix + std::to_string(key_id);
133     ASSERT_TRUE(block_access_info_map.find(key_id_str) !=
134                 block_access_info_map.end());
135     auto& block_access_info = block_access_info_map.find(key_id_str)->second;
136     ASSERT_EQ(1, block_access_info.num_accesses);
137     ASSERT_EQ(kBlockSize + key_id, block_access_info.block_size);
138     ASSERT_GT(block_access_info.first_access_time, 0);
139     ASSERT_GT(block_access_info.last_access_time, 0);
140     ASSERT_EQ(1, block_access_info.caller_num_access_map.size());
141     TableReaderCaller expected_caller = GetCaller(key_id);
142     ASSERT_TRUE(block_access_info.caller_num_access_map.find(expected_caller) !=
143                 block_access_info.caller_num_access_map.end());
144     ASSERT_EQ(
145         1,
146         block_access_info.caller_num_access_map.find(expected_caller)->second);
147 
148     if ((expected_caller == TableReaderCaller::kUserGet ||
149          expected_caller == TableReaderCaller::kUserMultiGet) &&
150         type == TraceType::kBlockTraceDataBlock) {
151       ASSERT_EQ(kNumKeysInBlock, block_access_info.num_keys);
152       ASSERT_EQ(1, block_access_info.key_num_access_map.size());
153       ASSERT_EQ(0, block_access_info.non_exist_key_num_access_map.size());
154       ASSERT_EQ(1, block_access_info.num_referenced_key_exist_in_block);
155     }
156   }
157 
RunBlockCacheTraceAnalyzer()158   void RunBlockCacheTraceAnalyzer() {
159     std::vector<std::string> params = {
160         "./block_cache_trace_analyzer",
161         "-block_cache_trace_path=" + trace_file_path_,
162         "-block_cache_sim_config_path=" + block_cache_sim_config_path_,
163         "-block_cache_analysis_result_dir=" + test_path_,
164         "-print_block_size_stats",
165         "-print_access_count_stats",
166         "-print_data_block_access_count_stats",
167         "-cache_sim_warmup_seconds=0",
168         "-analyze_bottom_k_access_count_blocks=5",
169         "-analyze_top_k_access_count_blocks=5",
170         "-analyze_blocks_reuse_k_reuse_window=5",
171         "-timeline_labels=" + timeline_labels_,
172         "-reuse_distance_labels=" + reuse_distance_labels_,
173         "-reuse_distance_buckets=" + reuse_distance_buckets_,
174         "-reuse_interval_labels=" + reuse_interval_labels_,
175         "-reuse_interval_buckets=" + reuse_interval_buckets_,
176         "-reuse_lifetime_labels=" + reuse_lifetime_labels_,
177         "-reuse_lifetime_buckets=" + reuse_lifetime_buckets_,
178         "-analyze_callers=" + analyzing_callers_,
179         "-access_count_buckets=" + access_count_buckets_,
180         "-analyze_get_spatial_locality_labels=" +
181             analyze_get_spatial_locality_labels_,
182         "-analyze_get_spatial_locality_buckets=" +
183             analyze_get_spatial_locality_buckets_,
184         "-analyze_correlation_coefficients_labels=all",
185         "-skew_labels=all",
186         "-skew_buckets=10,50,100"};
187     char arg_buffer[kArgBufferSize];
188     char* argv[kMaxArgCount];
189     int argc = 0;
190     int cursor = 0;
191     for (const auto& arg : params) {
192       ASSERT_LE(cursor + arg.size() + 1, kArgBufferSize);
193       ASSERT_LE(argc + 1, kMaxArgCount);
194       snprintf(arg_buffer + cursor, arg.size() + 1, "%s", arg.c_str());
195 
196       argv[argc++] = arg_buffer + cursor;
197       cursor += static_cast<int>(arg.size()) + 1;
198     }
199     ASSERT_EQ(0,
200               ROCKSDB_NAMESPACE::block_cache_trace_analyzer_tool(argc, argv));
201   }
202 
203   Env* env_;
204   EnvOptions env_options_;
205   std::string block_cache_sim_config_path_;
206   std::string trace_file_path_;
207   std::string test_path_;
208   std::string timeline_labels_;
209   std::string reuse_distance_labels_;
210   std::string reuse_distance_buckets_;
211   std::string reuse_interval_labels_;
212   std::string reuse_interval_buckets_;
213   std::string reuse_lifetime_labels_;
214   std::string reuse_lifetime_buckets_;
215   std::string analyzing_callers_;
216   std::string access_count_buckets_;
217   std::string analyze_get_spatial_locality_labels_;
218   std::string analyze_get_spatial_locality_buckets_;
219 };
220 
TEST_F(BlockCacheTracerTest,BlockCacheAnalyzer)221 TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
222   {
223     // Generate a trace file.
224     TraceOptions trace_opt;
225     std::unique_ptr<TraceWriter> trace_writer;
226     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
227                                  &trace_writer));
228     BlockCacheTraceWriter writer(env_, trace_opt, std::move(trace_writer));
229     ASSERT_OK(writer.WriteHeader());
230     WriteBlockAccess(&writer, 0, TraceType::kBlockTraceDataBlock, 50);
231     ASSERT_OK(env_->FileExists(trace_file_path_));
232   }
233   {
234     // Generate a cache sim config.
235     std::string config = "lru,1,0,1K,1M,1G";
236     std::ofstream out(block_cache_sim_config_path_);
237     ASSERT_TRUE(out.is_open());
238     out << config << std::endl;
239     out.close();
240   }
241   RunBlockCacheTraceAnalyzer();
242   {
243     // Validate the cache miss ratios.
244     std::vector<uint64_t> expected_capacities{1024, 1024 * 1024,
245                                               1024 * 1024 * 1024};
246     const std::string mrc_path = test_path_ + "/49_50_mrc";
247     std::ifstream infile(mrc_path);
248     uint32_t config_index = 0;
249     std::string line;
250     // Read header.
251     ASSERT_TRUE(getline(infile, line));
252     while (getline(infile, line)) {
253       std::stringstream ss(line);
254       std::vector<std::string> result_strs;
255       while (ss.good()) {
256         std::string substr;
257         getline(ss, substr, ',');
258         result_strs.push_back(substr);
259       }
260       ASSERT_EQ(6, result_strs.size());
261       ASSERT_LT(config_index, expected_capacities.size());
262       ASSERT_EQ("lru", result_strs[0]);  // cache_name
263       ASSERT_EQ("1", result_strs[1]);    // num_shard_bits
264       ASSERT_EQ("0", result_strs[2]);    // ghost_cache_capacity
265       ASSERT_EQ(std::to_string(expected_capacities[config_index]),
266                 result_strs[3]);              // cache_capacity
267       ASSERT_EQ("100.0000", result_strs[4]);  // miss_ratio
268       ASSERT_EQ("50", result_strs[5]);        // number of accesses.
269       config_index++;
270     }
271     ASSERT_EQ(expected_capacities.size(), config_index);
272     infile.close();
273     ASSERT_OK(env_->DeleteFile(mrc_path));
274 
275     const std::vector<std::string> time_units{"1", "60", "3600"};
276     expected_capacities.push_back(port::kMaxUint64);
277     for (auto const& expected_capacity : expected_capacities) {
278       for (auto const& time_unit : time_units) {
279         const std::string miss_ratio_timeline_path =
280             test_path_ + "/" + std::to_string(expected_capacity) + "_" +
281             time_unit + "_miss_ratio_timeline";
282         std::ifstream mrt_file(miss_ratio_timeline_path);
283         // Read header.
284         ASSERT_TRUE(getline(mrt_file, line));
285         ASSERT_TRUE(getline(mrt_file, line));
286         std::stringstream ss(line);
287         bool read_header = false;
288         while (ss.good()) {
289           std::string substr;
290           getline(ss, substr, ',');
291           if (!read_header) {
292             if (expected_capacity == port::kMaxUint64) {
293               ASSERT_EQ("trace", substr);
294             } else {
295               ASSERT_EQ("lru-1-0", substr);
296             }
297             read_header = true;
298             continue;
299           }
300           ASSERT_DOUBLE_EQ(100.0, ParseDouble(substr));
301         }
302         ASSERT_FALSE(getline(mrt_file, line));
303         mrt_file.close();
304         ASSERT_OK(env_->DeleteFile(miss_ratio_timeline_path));
305       }
306       for (auto const& time_unit : time_units) {
307         const std::string miss_timeline_path =
308             test_path_ + "/" + std::to_string(expected_capacity) + "_" +
309             time_unit + "_miss_timeline";
310         std::ifstream mt_file(miss_timeline_path);
311         // Read header.
312         ASSERT_TRUE(getline(mt_file, line));
313         ASSERT_TRUE(getline(mt_file, line));
314         std::stringstream ss(line);
315         uint32_t num_misses = 0;
316         while (ss.good()) {
317           std::string substr;
318           getline(ss, substr, ',');
319           if (num_misses == 0) {
320             if (expected_capacity == port::kMaxUint64) {
321               ASSERT_EQ("trace", substr);
322             } else {
323               ASSERT_EQ("lru-1-0", substr);
324             }
325             num_misses++;
326             continue;
327           }
328           num_misses += ParseInt(substr);
329         }
330         ASSERT_EQ(51, num_misses);
331         ASSERT_FALSE(getline(mt_file, line));
332         mt_file.close();
333         ASSERT_OK(env_->DeleteFile(miss_timeline_path));
334       }
335     }
336   }
337   {
338     // Validate the skewness csv file.
339     const std::string skewness_file_path = test_path_ + "/all_skewness";
340     std::ifstream skew_file(skewness_file_path);
341     // Read header.
342     std::string line;
343     ASSERT_TRUE(getline(skew_file, line));
344     std::stringstream ss(line);
345     double sum_percent = 0;
346     while (getline(skew_file, line)) {
347       std::stringstream ss_naccess(line);
348       std::string substr;
349       bool read_label = false;
350       while (ss_naccess.good()) {
351         ASSERT_TRUE(getline(ss_naccess, substr, ','));
352         if (!read_label) {
353           read_label = true;
354           continue;
355         }
356         sum_percent += ParseDouble(substr);
357       }
358     }
359     ASSERT_EQ(100.0, sum_percent);
360     ASSERT_FALSE(getline(skew_file, line));
361     skew_file.close();
362     ASSERT_OK(env_->DeleteFile(skewness_file_path));
363   }
364   {
365     // Validate the timeline csv files.
366     const std::vector<std::string> time_units{"_60", "_3600"};
367     const std::vector<std::string> user_access_only_flags{"user_access_only_",
368                                                           "all_access_"};
369     for (auto const& user_access_only : user_access_only_flags) {
370       for (auto const& unit : time_units) {
371         std::stringstream ss(timeline_labels_);
372         while (ss.good()) {
373           std::string l;
374           ASSERT_TRUE(getline(ss, l, ','));
375           if (l.find("block") == std::string::npos) {
376             if (user_access_only != "all_access_") {
377               continue;
378             }
379           }
380           const std::string timeline_file = test_path_ + "/" +
381                                             user_access_only + l + unit +
382                                             "_access_timeline";
383           std::ifstream infile(timeline_file);
384           std::string line;
385           const uint64_t expected_naccesses = 50;
386           const uint64_t expected_user_accesses = 30;
387           ASSERT_TRUE(getline(infile, line)) << timeline_file;
388           uint32_t naccesses = 0;
389           while (getline(infile, line)) {
390             std::stringstream ss_naccess(line);
391             std::string substr;
392             bool read_label = false;
393             while (ss_naccess.good()) {
394               ASSERT_TRUE(getline(ss_naccess, substr, ','));
395               if (!read_label) {
396                 read_label = true;
397                 continue;
398               }
399               naccesses += ParseUint32(substr);
400             }
401           }
402           if (user_access_only == "user_access_only_") {
403             ASSERT_EQ(expected_user_accesses, naccesses) << timeline_file;
404           } else {
405             ASSERT_EQ(expected_naccesses, naccesses) << timeline_file;
406           }
407           ASSERT_OK(env_->DeleteFile(timeline_file));
408         }
409       }
410     }
411   }
412   {
413     // Validate the reuse_interval and reuse_distance csv files.
414     std::map<std::string, std::string> test_reuse_csv_files;
415     test_reuse_csv_files["_access_reuse_interval"] = reuse_interval_labels_;
416     test_reuse_csv_files["_reuse_distance"] = reuse_distance_labels_;
417     test_reuse_csv_files["_reuse_lifetime"] = reuse_lifetime_labels_;
418     test_reuse_csv_files["_avg_reuse_interval"] = reuse_interval_labels_;
419     test_reuse_csv_files["_avg_reuse_interval_naccesses"] =
420         reuse_interval_labels_;
421     for (auto const& test : test_reuse_csv_files) {
422       const std::string& file_suffix = test.first;
423       const std::string& labels = test.second;
424       const uint32_t expected_num_rows = 5;
425       std::stringstream ss(labels);
426       while (ss.good()) {
427         std::string l;
428         ASSERT_TRUE(getline(ss, l, ','));
429         const std::string reuse_csv_file = test_path_ + "/" + l + file_suffix;
430         std::ifstream infile(reuse_csv_file);
431         std::string line;
432         ASSERT_TRUE(getline(infile, line));
433         double npercentage = 0;
434         uint32_t nrows = 0;
435         while (getline(infile, line)) {
436           std::stringstream ss_naccess(line);
437           bool label_read = false;
438           nrows++;
439           while (ss_naccess.good()) {
440             std::string substr;
441             ASSERT_TRUE(getline(ss_naccess, substr, ','));
442             if (!label_read) {
443               label_read = true;
444               continue;
445             }
446             npercentage += ParseDouble(substr);
447           }
448         }
449         ASSERT_EQ(expected_num_rows, nrows);
450         if ("_reuse_lifetime" == test.first ||
451             "_avg_reuse_interval" == test.first ||
452             "_avg_reuse_interval_naccesses" == test.first) {
453           ASSERT_EQ(100, npercentage) << reuse_csv_file;
454         } else {
455           ASSERT_LT(npercentage, 0);
456         }
457         ASSERT_OK(env_->DeleteFile(reuse_csv_file));
458       }
459     }
460   }
461 
462   {
463     // Validate the percentage of accesses summary.
464     const std::string percent_access_summary_file =
465         test_path_ + "/percentage_of_accesses_summary";
466     std::ifstream infile(percent_access_summary_file);
467     std::string line;
468     ASSERT_TRUE(getline(infile, line));
469     std::set<std::string> callers;
470     std::set<std::string> expected_callers{"Get", "MultiGet", "Iterator",
471                                            "Prefetch", "Compaction"};
472     while (getline(infile, line)) {
473       std::stringstream caller_percent(line);
474       std::string caller;
475       ASSERT_TRUE(getline(caller_percent, caller, ','));
476       std::string percent;
477       ASSERT_TRUE(getline(caller_percent, percent, ','));
478       ASSERT_FALSE(caller_percent.good());
479       callers.insert(caller);
480       ASSERT_EQ(20, ParseDouble(percent));
481     }
482     ASSERT_EQ(expected_callers.size(), callers.size());
483     for (auto caller : callers) {
484       ASSERT_TRUE(expected_callers.find(caller) != expected_callers.end());
485     }
486     ASSERT_OK(env_->DeleteFile(percent_access_summary_file));
487   }
488   {
489     // Validate the percentage of accesses summary by analyzing callers.
490     std::stringstream analyzing_callers(analyzing_callers_);
491     while (analyzing_callers.good()) {
492       std::string caller;
493       ASSERT_TRUE(getline(analyzing_callers, caller, ','));
494       std::vector<std::string> breakdowns{"level", "bt"};
495       for (auto breakdown : breakdowns) {
496         const std::string file_name = test_path_ + "/" + caller + "_" +
497                                       breakdown +
498                                       "_percentage_of_accesses_summary";
499         std::ifstream infile(file_name);
500         std::string line;
501         ASSERT_TRUE(getline(infile, line));
502         double sum = 0;
503         while (getline(infile, line)) {
504           std::stringstream label_percent(line);
505           std::string label;
506           ASSERT_TRUE(getline(label_percent, label, ','));
507           std::string percent;
508           ASSERT_TRUE(getline(label_percent, percent, ','));
509           ASSERT_FALSE(label_percent.good());
510           sum += ParseDouble(percent);
511         }
512         ASSERT_EQ(100, sum);
513         ASSERT_OK(env_->DeleteFile(file_name));
514       }
515     }
516   }
517   const std::vector<std::string> access_types{"user_access_only", "all_access"};
518   const std::vector<std::string> prefix{"bt", "cf"};
519   for (auto const& pre : prefix) {
520     for (auto const& access_type : access_types) {
521       {
522         // Validate the access count summary.
523         const std::string bt_access_count_summary = test_path_ + "/" + pre +
524                                                     "_" + access_type +
525                                                     "_access_count_summary";
526         std::ifstream infile(bt_access_count_summary);
527         std::string line;
528         ASSERT_TRUE(getline(infile, line));
529         double sum_percent = 0;
530         while (getline(infile, line)) {
531           std::stringstream bt_percent(line);
532           std::string bt;
533           ASSERT_TRUE(getline(bt_percent, bt, ','));
534           std::string percent;
535           ASSERT_TRUE(getline(bt_percent, percent, ','));
536           sum_percent += ParseDouble(percent);
537         }
538         ASSERT_EQ(100.0, sum_percent);
539         ASSERT_OK(env_->DeleteFile(bt_access_count_summary));
540       }
541     }
542   }
543   for (auto const& access_type : access_types) {
544     std::vector<std::string> block_types{"Index", "Data", "Filter"};
545     for (auto block_type : block_types) {
546       // Validate reuse block timeline.
547       const std::string reuse_blocks_timeline = test_path_ + "/" + block_type +
548                                                 "_" + access_type +
549                                                 "_5_reuse_blocks_timeline";
550       std::ifstream infile(reuse_blocks_timeline);
551       std::string line;
552       ASSERT_TRUE(getline(infile, line)) << reuse_blocks_timeline;
553       uint32_t index = 0;
554       while (getline(infile, line)) {
555         std::stringstream timeline(line);
556         bool start_time = false;
557         double sum = 0;
558         while (timeline.good()) {
559           std::string value;
560           ASSERT_TRUE(getline(timeline, value, ','));
561           if (!start_time) {
562             start_time = true;
563             continue;
564           }
565           sum += ParseDouble(value);
566         }
567         index++;
568         ASSERT_LT(sum, 100.0 * index + 1) << reuse_blocks_timeline;
569       }
570       ASSERT_OK(env_->DeleteFile(reuse_blocks_timeline));
571     }
572   }
573 
574   std::stringstream ss(analyze_get_spatial_locality_labels_);
575   while (ss.good()) {
576     std::string l;
577     ASSERT_TRUE(getline(ss, l, ','));
578     const std::vector<std::string> spatial_locality_files{
579         "_percent_ref_keys", "_percent_accesses_on_ref_keys",
580         "_percent_data_size_on_ref_keys"};
581     for (auto const& spatial_locality_file : spatial_locality_files) {
582       const std::string filename = test_path_ + "/" + l + spatial_locality_file;
583       std::ifstream infile(filename);
584       std::string line;
585       ASSERT_TRUE(getline(infile, line));
586       double sum_percent = 0;
587       uint32_t nrows = 0;
588       while (getline(infile, line)) {
589         std::stringstream bt_percent(line);
590         std::string bt;
591         ASSERT_TRUE(getline(bt_percent, bt, ','));
592         std::string percent;
593         ASSERT_TRUE(getline(bt_percent, percent, ','));
594         sum_percent += ParseDouble(percent);
595         nrows++;
596       }
597       ASSERT_EQ(11, nrows);
598       ASSERT_EQ(100.0, sum_percent);
599       ASSERT_OK(env_->DeleteFile(filename));
600     }
601   }
602   ASSERT_OK(env_->DeleteFile(block_cache_sim_config_path_));
603 }
604 
TEST_F(BlockCacheTracerTest,MixedBlocks)605 TEST_F(BlockCacheTracerTest, MixedBlocks) {
606   {
607     // Generate a trace file containing a mix of blocks.
608     // It contains two SST files with 25 blocks of odd numbered block_key in
609     // kSSTStoringOddKeys and 25 blocks of even numbered blocks_key in
610     // kSSTStoringEvenKeys.
611     TraceOptions trace_opt;
612     std::unique_ptr<TraceWriter> trace_writer;
613     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
614                                  &trace_writer));
615     BlockCacheTraceWriter writer(env_, trace_opt, std::move(trace_writer));
616     ASSERT_OK(writer.WriteHeader());
617     // Write blocks of different types.
618     WriteBlockAccess(&writer, 0, TraceType::kBlockTraceUncompressionDictBlock,
619                      10);
620     WriteBlockAccess(&writer, 10, TraceType::kBlockTraceDataBlock, 10);
621     WriteBlockAccess(&writer, 20, TraceType::kBlockTraceFilterBlock, 10);
622     WriteBlockAccess(&writer, 30, TraceType::kBlockTraceIndexBlock, 10);
623     WriteBlockAccess(&writer, 40, TraceType::kBlockTraceRangeDeletionBlock, 10);
624     ASSERT_OK(env_->FileExists(trace_file_path_));
625   }
626 
627   {
628     // Verify trace file is generated correctly.
629     std::unique_ptr<TraceReader> trace_reader;
630     ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
631                                  &trace_reader));
632     BlockCacheTraceReader reader(std::move(trace_reader));
633     BlockCacheTraceHeader header;
634     ASSERT_OK(reader.ReadHeader(&header));
635     ASSERT_EQ(kMajorVersion, header.rocksdb_major_version);
636     ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version);
637     // Read blocks.
638     BlockCacheTraceAnalyzer analyzer(
639         trace_file_path_,
640         /*output_miss_ratio_curve_path=*/"",
641         /*human_readable_trace_file_path=*/"",
642         /*compute_reuse_distance=*/true,
643         /*mrc_only=*/false,
644         /*is_block_cache_human_readable_trace=*/false,
645         /*simulator=*/nullptr);
646     // The analyzer ends when it detects an incomplete access record.
647     ASSERT_EQ(Status::Incomplete(""), analyzer.Analyze());
648     const uint64_t expected_num_cfs = 1;
649     std::vector<uint64_t> expected_fds{kSSTStoringOddKeys, kSSTStoringEvenKeys};
650     const std::vector<TraceType> expected_types{
651         TraceType::kBlockTraceUncompressionDictBlock,
652         TraceType::kBlockTraceDataBlock, TraceType::kBlockTraceFilterBlock,
653         TraceType::kBlockTraceIndexBlock,
654         TraceType::kBlockTraceRangeDeletionBlock};
655     const uint64_t expected_num_keys_per_type = 5;
656 
657     auto& stats = analyzer.TEST_cf_aggregates_map();
658     ASSERT_EQ(expected_num_cfs, stats.size());
659     ASSERT_TRUE(stats.find(kDefaultColumnFamilyName) != stats.end());
660     auto& cf_stats = stats.find(kDefaultColumnFamilyName)->second;
661     ASSERT_EQ(expected_fds.size(), cf_stats.fd_aggregates_map.size());
662     for (auto fd_id : expected_fds) {
663       ASSERT_TRUE(cf_stats.fd_aggregates_map.find(fd_id) !=
664                   cf_stats.fd_aggregates_map.end());
665       ASSERT_EQ(kLevel, cf_stats.fd_aggregates_map.find(fd_id)->second.level);
666       auto& block_type_aggregates_map = cf_stats.fd_aggregates_map.find(fd_id)
667                                             ->second.block_type_aggregates_map;
668       ASSERT_EQ(expected_types.size(), block_type_aggregates_map.size());
669       uint32_t key_id = 0;
670       for (auto type : expected_types) {
671         ASSERT_TRUE(block_type_aggregates_map.find(type) !=
672                     block_type_aggregates_map.end());
673         auto& block_access_info_map =
674             block_type_aggregates_map.find(type)->second.block_access_info_map;
675         // Each block type has 5 blocks.
676         ASSERT_EQ(expected_num_keys_per_type, block_access_info_map.size());
677         for (uint32_t i = 0; i < 10; i++) {
678           // Verify that odd numbered blocks are stored in kSSTStoringOddKeys
679           // and even numbered blocks are stored in kSSTStoringEvenKeys.
680           auto key_id_str = kBlockKeyPrefix + std::to_string(key_id);
681           if (fd_id == kSSTStoringOddKeys) {
682             if (key_id % 2 == 1) {
683               AssertBlockAccessInfo(key_id, type, block_access_info_map);
684             } else {
685               ASSERT_TRUE(block_access_info_map.find(key_id_str) ==
686                           block_access_info_map.end());
687             }
688           } else {
689             if (key_id % 2 == 1) {
690               ASSERT_TRUE(block_access_info_map.find(key_id_str) ==
691                           block_access_info_map.end());
692             } else {
693               AssertBlockAccessInfo(key_id, type, block_access_info_map);
694             }
695           }
696           key_id++;
697         }
698       }
699     }
700   }
701 }
702 
703 }  // namespace ROCKSDB_NAMESPACE
704 
main(int argc,char ** argv)705 int main(int argc, char** argv) {
706   ::testing::InitGoogleTest(&argc, argv);
707   return RUN_ALL_TESTS();
708 }
709 #endif  // GFLAG
710 #else
711 #include <stdio.h>
main(int,char **)712 int main(int /*argc*/, char** /*argv*/) {
713   fprintf(stderr,
714           "block_cache_trace_analyzer_test is not supported in ROCKSDB_LITE\n");
715   return 0;
716 }
717 #endif  // ROCKSDB_LITE
718