1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #include "trace_replay/block_cache_tracer.h"
7 #include "rocksdb/env.h"
8 #include "rocksdb/status.h"
9 #include "rocksdb/trace_reader_writer.h"
10 #include "test_util/testharness.h"
11 #include "test_util/testutil.h"
12 
13 namespace ROCKSDB_NAMESPACE {
14 
15 namespace {
16 const uint64_t kBlockSize = 1024;
17 const std::string kBlockKeyPrefix = "test-block-";
18 const uint32_t kCFId = 0;
19 const uint32_t kLevel = 1;
20 const uint64_t kSSTFDNumber = 100;
21 const std::string kRefKeyPrefix = "test-get-";
22 const uint64_t kNumKeysInBlock = 1024;
23 const uint64_t kReferencedDataSize = 10;
24 }  // namespace
25 
26 class BlockCacheTracerTest : public testing::Test {
27  public:
BlockCacheTracerTest()28   BlockCacheTracerTest() {
29     test_path_ = test::PerThreadDBPath("block_cache_tracer_test");
30     env_ = ROCKSDB_NAMESPACE::Env::Default();
31     EXPECT_OK(env_->CreateDir(test_path_));
32     trace_file_path_ = test_path_ + "/block_cache_trace";
33   }
34 
~BlockCacheTracerTest()35   ~BlockCacheTracerTest() override {
36     EXPECT_OK(env_->DeleteFile(trace_file_path_));
37     EXPECT_OK(env_->DeleteDir(test_path_));
38   }
39 
GetCaller(uint32_t key_id)40   TableReaderCaller GetCaller(uint32_t key_id) {
41     uint32_t n = key_id % 5;
42     switch (n) {
43       case 0:
44         return TableReaderCaller::kPrefetch;
45       case 1:
46         return TableReaderCaller::kCompaction;
47       case 2:
48         return TableReaderCaller::kUserGet;
49       case 3:
50         return TableReaderCaller::kUserMultiGet;
51       case 4:
52         return TableReaderCaller::kUserIterator;
53     }
54     assert(false);
55   }
56 
WriteBlockAccess(BlockCacheTraceWriter * writer,uint32_t from_key_id,TraceType block_type,uint32_t nblocks)57   void WriteBlockAccess(BlockCacheTraceWriter* writer, uint32_t from_key_id,
58                         TraceType block_type, uint32_t nblocks) {
59     assert(writer);
60     for (uint32_t i = 0; i < nblocks; i++) {
61       uint32_t key_id = from_key_id + i;
62       BlockCacheTraceRecord record;
63       record.block_type = block_type;
64       record.block_size = kBlockSize + key_id;
65       record.block_key = (kBlockKeyPrefix + std::to_string(key_id));
66       record.access_timestamp = env_->NowMicros();
67       record.cf_id = kCFId;
68       record.cf_name = kDefaultColumnFamilyName;
69       record.caller = GetCaller(key_id);
70       record.level = kLevel;
71       record.sst_fd_number = kSSTFDNumber + key_id;
72       record.is_cache_hit = Boolean::kFalse;
73       record.no_insert = Boolean::kFalse;
74       // Provide get_id for all callers. The writer should only write get_id
75       // when the caller is either GET or MGET.
76       record.get_id = key_id + 1;
77       record.get_from_user_specified_snapshot = Boolean::kTrue;
78       // Provide these fields for all block types.
79       // The writer should only write these fields for data blocks and the
80       // caller is either GET or MGET.
81       record.referenced_key = (kRefKeyPrefix + std::to_string(key_id));
82       record.referenced_key_exist_in_block = Boolean::kTrue;
83       record.num_keys_in_block = kNumKeysInBlock;
84       record.referenced_data_size = kReferencedDataSize + key_id;
85       ASSERT_OK(writer->WriteBlockAccess(
86           record, record.block_key, record.cf_name, record.referenced_key));
87     }
88   }
89 
GenerateAccessRecord()90   BlockCacheTraceRecord GenerateAccessRecord() {
91     uint32_t key_id = 0;
92     BlockCacheTraceRecord record;
93     record.block_type = TraceType::kBlockTraceDataBlock;
94     record.block_size = kBlockSize;
95     record.block_key = kBlockKeyPrefix + std::to_string(key_id);
96     record.access_timestamp = env_->NowMicros();
97     record.cf_id = kCFId;
98     record.cf_name = kDefaultColumnFamilyName;
99     record.caller = GetCaller(key_id);
100     record.level = kLevel;
101     record.sst_fd_number = kSSTFDNumber + key_id;
102     record.is_cache_hit = Boolean::kFalse;
103     record.no_insert = Boolean::kFalse;
104     record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
105     record.referenced_key_exist_in_block = Boolean::kTrue;
106     record.num_keys_in_block = kNumKeysInBlock;
107     return record;
108   }
109 
VerifyAccess(BlockCacheTraceReader * reader,uint32_t from_key_id,TraceType block_type,uint32_t nblocks)110   void VerifyAccess(BlockCacheTraceReader* reader, uint32_t from_key_id,
111                     TraceType block_type, uint32_t nblocks) {
112     assert(reader);
113     for (uint32_t i = 0; i < nblocks; i++) {
114       uint32_t key_id = from_key_id + i;
115       BlockCacheTraceRecord record;
116       ASSERT_OK(reader->ReadAccess(&record));
117       ASSERT_EQ(block_type, record.block_type);
118       ASSERT_EQ(kBlockSize + key_id, record.block_size);
119       ASSERT_EQ(kBlockKeyPrefix + std::to_string(key_id), record.block_key);
120       ASSERT_EQ(kCFId, record.cf_id);
121       ASSERT_EQ(kDefaultColumnFamilyName, record.cf_name);
122       ASSERT_EQ(GetCaller(key_id), record.caller);
123       ASSERT_EQ(kLevel, record.level);
124       ASSERT_EQ(kSSTFDNumber + key_id, record.sst_fd_number);
125       ASSERT_EQ(Boolean::kFalse, record.is_cache_hit);
126       ASSERT_EQ(Boolean::kFalse, record.no_insert);
127       if (record.caller == TableReaderCaller::kUserGet ||
128           record.caller == TableReaderCaller::kUserMultiGet) {
129         ASSERT_EQ(key_id + 1, record.get_id);
130         ASSERT_EQ(Boolean::kTrue, record.get_from_user_specified_snapshot);
131         ASSERT_EQ(kRefKeyPrefix + std::to_string(key_id),
132                   record.referenced_key);
133       } else {
134         ASSERT_EQ(BlockCacheTraceHelper::kReservedGetId, record.get_id);
135         ASSERT_EQ(Boolean::kFalse, record.get_from_user_specified_snapshot);
136         ASSERT_EQ("", record.referenced_key);
137       }
138       if (block_type == TraceType::kBlockTraceDataBlock &&
139           (record.caller == TableReaderCaller::kUserGet ||
140            record.caller == TableReaderCaller::kUserMultiGet)) {
141         ASSERT_EQ(Boolean::kTrue, record.referenced_key_exist_in_block);
142         ASSERT_EQ(kNumKeysInBlock, record.num_keys_in_block);
143         ASSERT_EQ(kReferencedDataSize + key_id, record.referenced_data_size);
144         continue;
145       }
146       ASSERT_EQ(Boolean::kFalse, record.referenced_key_exist_in_block);
147       ASSERT_EQ(0, record.num_keys_in_block);
148       ASSERT_EQ(0, record.referenced_data_size);
149     }
150   }
151 
152   Env* env_;
153   EnvOptions env_options_;
154   std::string trace_file_path_;
155   std::string test_path_;
156 };
157 
TEST_F(BlockCacheTracerTest,AtomicWriteBeforeStartTrace)158 TEST_F(BlockCacheTracerTest, AtomicWriteBeforeStartTrace) {
159   BlockCacheTraceRecord record = GenerateAccessRecord();
160   {
161     std::unique_ptr<TraceWriter> trace_writer;
162     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
163                                  &trace_writer));
164     BlockCacheTracer writer;
165     // The record should be written to the trace_file since StartTrace is not
166     // called.
167     ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
168                                       record.referenced_key));
169     ASSERT_OK(env_->FileExists(trace_file_path_));
170   }
171   {
172     // Verify trace file contains nothing.
173     std::unique_ptr<TraceReader> trace_reader;
174     ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
175                                  &trace_reader));
176     BlockCacheTraceReader reader(std::move(trace_reader));
177     BlockCacheTraceHeader header;
178     ASSERT_NOK(reader.ReadHeader(&header));
179   }
180 }
181 
TEST_F(BlockCacheTracerTest,AtomicWrite)182 TEST_F(BlockCacheTracerTest, AtomicWrite) {
183   BlockCacheTraceRecord record = GenerateAccessRecord();
184   {
185     TraceOptions trace_opt;
186     std::unique_ptr<TraceWriter> trace_writer;
187     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
188                                  &trace_writer));
189     BlockCacheTracer writer;
190     ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
191     ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
192                                       record.referenced_key));
193     ASSERT_OK(env_->FileExists(trace_file_path_));
194   }
195   {
196     // Verify trace file contains one record.
197     std::unique_ptr<TraceReader> trace_reader;
198     ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
199                                  &trace_reader));
200     BlockCacheTraceReader reader(std::move(trace_reader));
201     BlockCacheTraceHeader header;
202     ASSERT_OK(reader.ReadHeader(&header));
203     ASSERT_EQ(kMajorVersion, header.rocksdb_major_version);
204     ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version);
205     VerifyAccess(&reader, 0, TraceType::kBlockTraceDataBlock, 1);
206     ASSERT_NOK(reader.ReadAccess(&record));
207   }
208 }
209 
TEST_F(BlockCacheTracerTest,ConsecutiveStartTrace)210 TEST_F(BlockCacheTracerTest, ConsecutiveStartTrace) {
211   TraceOptions trace_opt;
212   std::unique_ptr<TraceWriter> trace_writer;
213   ASSERT_OK(
214       NewFileTraceWriter(env_, env_options_, trace_file_path_, &trace_writer));
215   BlockCacheTracer writer;
216   ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
217   ASSERT_NOK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
218   ASSERT_OK(env_->FileExists(trace_file_path_));
219 }
220 
TEST_F(BlockCacheTracerTest,AtomicNoWriteAfterEndTrace)221 TEST_F(BlockCacheTracerTest, AtomicNoWriteAfterEndTrace) {
222   BlockCacheTraceRecord record = GenerateAccessRecord();
223   {
224     TraceOptions trace_opt;
225     std::unique_ptr<TraceWriter> trace_writer;
226     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
227                                  &trace_writer));
228     BlockCacheTracer writer;
229     ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
230     ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
231                                       record.referenced_key));
232     writer.EndTrace();
233     // Write the record again. This time the record should not be written since
234     // EndTrace is called.
235     ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
236                                       record.referenced_key));
237     ASSERT_OK(env_->FileExists(trace_file_path_));
238   }
239   {
240     // Verify trace file contains one record.
241     std::unique_ptr<TraceReader> trace_reader;
242     ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
243                                  &trace_reader));
244     BlockCacheTraceReader reader(std::move(trace_reader));
245     BlockCacheTraceHeader header;
246     ASSERT_OK(reader.ReadHeader(&header));
247     ASSERT_EQ(kMajorVersion, header.rocksdb_major_version);
248     ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version);
249     VerifyAccess(&reader, 0, TraceType::kBlockTraceDataBlock, 1);
250     ASSERT_NOK(reader.ReadAccess(&record));
251   }
252 }
253 
TEST_F(BlockCacheTracerTest,NextGetId)254 TEST_F(BlockCacheTracerTest, NextGetId) {
255   BlockCacheTracer writer;
256   {
257     TraceOptions trace_opt;
258     std::unique_ptr<TraceWriter> trace_writer;
259     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
260                                  &trace_writer));
261     // next get id should always return 0 before we call StartTrace.
262     ASSERT_EQ(0, writer.NextGetId());
263     ASSERT_EQ(0, writer.NextGetId());
264     ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
265     ASSERT_EQ(1, writer.NextGetId());
266     ASSERT_EQ(2, writer.NextGetId());
267     writer.EndTrace();
268     // next get id should return 0.
269     ASSERT_EQ(0, writer.NextGetId());
270   }
271 
272   // Start trace again and next get id should return 1.
273   {
274     TraceOptions trace_opt;
275     std::unique_ptr<TraceWriter> trace_writer;
276     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
277                                  &trace_writer));
278     ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
279     ASSERT_EQ(1, writer.NextGetId());
280   }
281 }
282 
TEST_F(BlockCacheTracerTest,MixedBlocks)283 TEST_F(BlockCacheTracerTest, MixedBlocks) {
284   {
285     // Generate a trace file containing a mix of blocks.
286     TraceOptions trace_opt;
287     std::unique_ptr<TraceWriter> trace_writer;
288     ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
289                                  &trace_writer));
290     BlockCacheTraceWriter writer(env_, trace_opt, std::move(trace_writer));
291     ASSERT_OK(writer.WriteHeader());
292     // Write blocks of different types.
293     WriteBlockAccess(&writer, 0, TraceType::kBlockTraceUncompressionDictBlock,
294                      10);
295     WriteBlockAccess(&writer, 10, TraceType::kBlockTraceDataBlock, 10);
296     WriteBlockAccess(&writer, 20, TraceType::kBlockTraceFilterBlock, 10);
297     WriteBlockAccess(&writer, 30, TraceType::kBlockTraceIndexBlock, 10);
298     WriteBlockAccess(&writer, 40, TraceType::kBlockTraceRangeDeletionBlock, 10);
299     ASSERT_OK(env_->FileExists(trace_file_path_));
300   }
301 
302   {
303     // Verify trace file is generated correctly.
304     std::unique_ptr<TraceReader> trace_reader;
305     ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
306                                  &trace_reader));
307     BlockCacheTraceReader reader(std::move(trace_reader));
308     BlockCacheTraceHeader header;
309     ASSERT_OK(reader.ReadHeader(&header));
310     ASSERT_EQ(kMajorVersion, header.rocksdb_major_version);
311     ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version);
312     // Read blocks.
313     VerifyAccess(&reader, 0, TraceType::kBlockTraceUncompressionDictBlock, 10);
314     VerifyAccess(&reader, 10, TraceType::kBlockTraceDataBlock, 10);
315     VerifyAccess(&reader, 20, TraceType::kBlockTraceFilterBlock, 10);
316     VerifyAccess(&reader, 30, TraceType::kBlockTraceIndexBlock, 10);
317     VerifyAccess(&reader, 40, TraceType::kBlockTraceRangeDeletionBlock, 10);
318     // Read one more record should report an error.
319     BlockCacheTraceRecord record;
320     ASSERT_NOK(reader.ReadAccess(&record));
321   }
322 }
323 
TEST_F(BlockCacheTracerTest,HumanReadableTrace)324 TEST_F(BlockCacheTracerTest, HumanReadableTrace) {
325   BlockCacheTraceRecord record = GenerateAccessRecord();
326   record.get_id = 1;
327   record.referenced_key = "";
328   record.caller = TableReaderCaller::kUserGet;
329   record.get_from_user_specified_snapshot = Boolean::kTrue;
330   record.referenced_data_size = kReferencedDataSize;
331   PutFixed32(&record.referenced_key, 111);
332   PutLengthPrefixedSlice(&record.referenced_key, "get_key");
333   PutFixed64(&record.referenced_key, 2 << 8);
334   PutLengthPrefixedSlice(&record.block_key, "block_key");
335   PutVarint64(&record.block_key, 333);
336   {
337     // Generate a human readable trace file.
338     BlockCacheHumanReadableTraceWriter writer;
339     ASSERT_OK(writer.NewWritableFile(trace_file_path_, env_));
340     ASSERT_OK(writer.WriteHumanReadableTraceRecord(record, 1, 1));
341     ASSERT_OK(env_->FileExists(trace_file_path_));
342   }
343   {
344     BlockCacheHumanReadableTraceReader reader(trace_file_path_);
345     BlockCacheTraceHeader header;
346     BlockCacheTraceRecord read_record;
347     ASSERT_OK(reader.ReadHeader(&header));
348     ASSERT_OK(reader.ReadAccess(&read_record));
349     ASSERT_EQ(TraceType::kBlockTraceDataBlock, read_record.block_type);
350     ASSERT_EQ(kBlockSize, read_record.block_size);
351     ASSERT_EQ(kCFId, read_record.cf_id);
352     ASSERT_EQ(kDefaultColumnFamilyName, read_record.cf_name);
353     ASSERT_EQ(TableReaderCaller::kUserGet, read_record.caller);
354     ASSERT_EQ(kLevel, read_record.level);
355     ASSERT_EQ(kSSTFDNumber, read_record.sst_fd_number);
356     ASSERT_EQ(Boolean::kFalse, read_record.is_cache_hit);
357     ASSERT_EQ(Boolean::kFalse, read_record.no_insert);
358     ASSERT_EQ(1, read_record.get_id);
359     ASSERT_EQ(Boolean::kTrue, read_record.get_from_user_specified_snapshot);
360     ASSERT_EQ(Boolean::kTrue, read_record.referenced_key_exist_in_block);
361     ASSERT_EQ(kNumKeysInBlock, read_record.num_keys_in_block);
362     ASSERT_EQ(kReferencedDataSize, read_record.referenced_data_size);
363     ASSERT_EQ(record.block_key.size(), read_record.block_key.size());
364     ASSERT_EQ(record.referenced_key.size(), record.referenced_key.size());
365     ASSERT_EQ(112, BlockCacheTraceHelper::GetTableId(read_record));
366     ASSERT_EQ(3, BlockCacheTraceHelper::GetSequenceNumber(read_record));
367     ASSERT_EQ(333, BlockCacheTraceHelper::GetBlockOffsetInFile(read_record));
368     // Read again should fail.
369     ASSERT_NOK(reader.ReadAccess(&read_record));
370   }
371 }
372 
373 }  // namespace ROCKSDB_NAMESPACE
374 
main(int argc,char ** argv)375 int main(int argc, char** argv) {
376   ::testing::InitGoogleTest(&argc, argv);
377   return RUN_ALL_TESTS();
378 }
379