1
2 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
3 // This source code is licensed under both the GPLv2 (found in the
4 // COPYING file in the root directory) and Apache 2.0 License
5 // (found in the LICENSE.Apache file in the root directory).
6 //
7 #ifndef ROCKSDB_LITE
8
9 #include "tools/sst_dump_tool_imp.h"
10
11 #include <cinttypes>
12 #include <iostream>
13 #include <map>
14 #include <memory>
15 #include <sstream>
16 #include <vector>
17
18 #include "db/blob_index.h"
19 #include "db/memtable.h"
20 #include "db/write_batch_internal.h"
21 #include "env/composite_env_wrapper.h"
22 #include "options/cf_options.h"
23 #include "rocksdb/db.h"
24 #include "rocksdb/env.h"
25 #include "rocksdb/iterator.h"
26 #include "rocksdb/slice_transform.h"
27 #include "rocksdb/status.h"
28 #include "rocksdb/table_properties.h"
29 #include "rocksdb/utilities/ldb_cmd.h"
30 #include "table/block_based/block.h"
31 #include "table/block_based/block_based_table_builder.h"
32 #include "table/block_based/block_based_table_factory.h"
33 #include "table/block_based/block_builder.h"
34 #include "table/format.h"
35 #include "table/meta_blocks.h"
36 #include "table/plain/plain_table_factory.h"
37 #include "table/table_reader.h"
38 #include "util/compression.h"
39 #include "util/random.h"
40
41 #include "port/port.h"
42
43 namespace ROCKSDB_NAMESPACE {
44
SstFileDumper(const Options & options,const std::string & file_path,bool verify_checksum,bool output_hex,bool decode_blob_index)45 SstFileDumper::SstFileDumper(const Options& options,
46 const std::string& file_path, bool verify_checksum,
47 bool output_hex, bool decode_blob_index)
48 : file_name_(file_path),
49 read_num_(0),
50 verify_checksum_(verify_checksum),
51 output_hex_(output_hex),
52 decode_blob_index_(decode_blob_index),
53 options_(options),
54 ioptions_(options_),
55 moptions_(ColumnFamilyOptions(options_)),
56 internal_comparator_(BytewiseComparator()) {
57 fprintf(stdout, "Process %s\n", file_path.c_str());
58 init_result_ = GetTableReader(file_name_);
59 }
60
61 extern const uint64_t kBlockBasedTableMagicNumber;
62 extern const uint64_t kLegacyBlockBasedTableMagicNumber;
63 extern const uint64_t kPlainTableMagicNumber;
64 extern const uint64_t kLegacyPlainTableMagicNumber;
65
66 const char* testFileName = "test_file_name";
67
68 static const std::vector<std::pair<CompressionType, const char*>>
69 kCompressions = {
70 {CompressionType::kNoCompression, "kNoCompression"},
71 {CompressionType::kSnappyCompression, "kSnappyCompression"},
72 {CompressionType::kZlibCompression, "kZlibCompression"},
73 {CompressionType::kBZip2Compression, "kBZip2Compression"},
74 {CompressionType::kLZ4Compression, "kLZ4Compression"},
75 {CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
76 {CompressionType::kXpressCompression, "kXpressCompression"},
77 {CompressionType::kZSTD, "kZSTD"}};
78
GetTableReader(const std::string & file_path)79 Status SstFileDumper::GetTableReader(const std::string& file_path) {
80 // Warning about 'magic_number' being uninitialized shows up only in UBsan
81 // builds. Though access is guarded by 's.ok()' checks, fix the issue to
82 // avoid any warnings.
83 uint64_t magic_number = Footer::kInvalidTableMagicNumber;
84
85 // read table magic number
86 Footer footer;
87
88 std::unique_ptr<RandomAccessFile> file;
89 uint64_t file_size = 0;
90 Status s = options_.env->NewRandomAccessFile(file_path, &file, soptions_);
91 if (s.ok()) {
92 s = options_.env->GetFileSize(file_path, &file_size);
93 }
94
95 file_.reset(new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file),
96 file_path));
97
98 if (s.ok()) {
99 s = ReadFooterFromFile(file_.get(), nullptr /* prefetch_buffer */,
100 file_size, &footer);
101 }
102 if (s.ok()) {
103 magic_number = footer.table_magic_number();
104 }
105
106 if (s.ok()) {
107 if (magic_number == kPlainTableMagicNumber ||
108 magic_number == kLegacyPlainTableMagicNumber) {
109 soptions_.use_mmap_reads = true;
110 options_.env->NewRandomAccessFile(file_path, &file, soptions_);
111 file_.reset(new RandomAccessFileReader(
112 NewLegacyRandomAccessFileWrapper(file), file_path));
113 }
114 options_.comparator = &internal_comparator_;
115 // For old sst format, ReadTableProperties might fail but file can be read
116 if (ReadTableProperties(magic_number, file_.get(), file_size).ok()) {
117 SetTableOptionsByMagicNumber(magic_number);
118 } else {
119 SetOldTableOptions();
120 }
121 }
122
123 if (s.ok()) {
124 s = NewTableReader(ioptions_, soptions_, internal_comparator_, file_size,
125 &table_reader_);
126 }
127 return s;
128 }
129
NewTableReader(const ImmutableCFOptions &,const EnvOptions &,const InternalKeyComparator &,uint64_t file_size,std::unique_ptr<TableReader> *)130 Status SstFileDumper::NewTableReader(
131 const ImmutableCFOptions& /*ioptions*/, const EnvOptions& /*soptions*/,
132 const InternalKeyComparator& /*internal_comparator*/, uint64_t file_size,
133 std::unique_ptr<TableReader>* /*table_reader*/) {
134 // We need to turn off pre-fetching of index and filter nodes for
135 // BlockBasedTable
136 if (BlockBasedTableFactory::kName == options_.table_factory->Name()) {
137 return options_.table_factory->NewTableReader(
138 TableReaderOptions(ioptions_, moptions_.prefix_extractor.get(),
139 soptions_, internal_comparator_),
140 std::move(file_), file_size, &table_reader_, /*enable_prefetch=*/false);
141 }
142
143 // For all other factory implementation
144 return options_.table_factory->NewTableReader(
145 TableReaderOptions(ioptions_, moptions_.prefix_extractor.get(), soptions_,
146 internal_comparator_),
147 std::move(file_), file_size, &table_reader_);
148 }
149
VerifyChecksum()150 Status SstFileDumper::VerifyChecksum() {
151 // We could pass specific readahead setting into read options if needed.
152 return table_reader_->VerifyChecksum(ReadOptions(),
153 TableReaderCaller::kSSTDumpTool);
154 }
155
DumpTable(const std::string & out_filename)156 Status SstFileDumper::DumpTable(const std::string& out_filename) {
157 std::unique_ptr<WritableFile> out_file;
158 Env* env = options_.env;
159 env->NewWritableFile(out_filename, &out_file, soptions_);
160 Status s = table_reader_->DumpTable(out_file.get());
161 out_file->Close();
162 return s;
163 }
164
CalculateCompressedTableSize(const TableBuilderOptions & tb_options,size_t block_size,uint64_t * num_data_blocks)165 uint64_t SstFileDumper::CalculateCompressedTableSize(
166 const TableBuilderOptions& tb_options, size_t block_size,
167 uint64_t* num_data_blocks) {
168 std::unique_ptr<WritableFile> out_file;
169 std::unique_ptr<Env> env(NewMemEnv(options_.env));
170 env->NewWritableFile(testFileName, &out_file, soptions_);
171 std::unique_ptr<WritableFileWriter> dest_writer;
172 dest_writer.reset(
173 new WritableFileWriter(NewLegacyWritableFileWrapper(std::move(out_file)),
174 testFileName, soptions_));
175 BlockBasedTableOptions table_options;
176 table_options.block_size = block_size;
177 BlockBasedTableFactory block_based_tf(table_options);
178 std::unique_ptr<TableBuilder> table_builder;
179 table_builder.reset(block_based_tf.NewTableBuilder(
180 tb_options,
181 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
182 dest_writer.get()));
183 std::unique_ptr<InternalIterator> iter(table_reader_->NewIterator(
184 ReadOptions(), moptions_.prefix_extractor.get(), /*arena=*/nullptr,
185 /*skip_filters=*/false, TableReaderCaller::kSSTDumpTool));
186 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
187 if (!iter->status().ok()) {
188 fputs(iter->status().ToString().c_str(), stderr);
189 exit(1);
190 }
191 table_builder->Add(iter->key(), iter->value());
192 }
193 Status s = table_builder->Finish();
194 if (!s.ok()) {
195 fputs(s.ToString().c_str(), stderr);
196 exit(1);
197 }
198 uint64_t size = table_builder->FileSize();
199 assert(num_data_blocks != nullptr);
200 *num_data_blocks = table_builder->GetTableProperties().num_data_blocks;
201 env->DeleteFile(testFileName);
202 return size;
203 }
204
ShowAllCompressionSizes(size_t block_size,const std::vector<std::pair<CompressionType,const char * >> & compression_types)205 int SstFileDumper::ShowAllCompressionSizes(
206 size_t block_size,
207 const std::vector<std::pair<CompressionType, const char*>>&
208 compression_types) {
209 ReadOptions read_options;
210 Options opts;
211 opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
212 opts.statistics->set_stats_level(StatsLevel::kAll);
213 const ImmutableCFOptions imoptions(opts);
214 const ColumnFamilyOptions cfo(opts);
215 const MutableCFOptions moptions(cfo);
216 ROCKSDB_NAMESPACE::InternalKeyComparator ikc(opts.comparator);
217 std::vector<std::unique_ptr<IntTblPropCollectorFactory> >
218 block_based_table_factories;
219
220 fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
221
222 for (auto& i : compression_types) {
223 if (CompressionTypeSupported(i.first)) {
224 CompressionOptions compress_opt;
225 std::string column_family_name;
226 int unknown_level = -1;
227 TableBuilderOptions tb_opts(
228 imoptions, moptions, ikc, &block_based_table_factories, i.first,
229 0 /* sample_for_compression */, compress_opt,
230 false /* skip_filters */, column_family_name, unknown_level);
231 uint64_t num_data_blocks = 0;
232 uint64_t file_size =
233 CalculateCompressedTableSize(tb_opts, block_size, &num_data_blocks);
234 fprintf(stdout, "Compression: %-24s", i.second);
235 fprintf(stdout, " Size: %10" PRIu64, file_size);
236 fprintf(stdout, " Blocks: %6" PRIu64, num_data_blocks);
237 const uint64_t compressed_blocks =
238 opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_COMPRESSED);
239 const uint64_t not_compressed_blocks =
240 opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_NOT_COMPRESSED);
241 // When the option enable_index_compression is true,
242 // NUMBER_BLOCK_COMPRESSED is incremented for index block(s).
243 if ((compressed_blocks + not_compressed_blocks) > num_data_blocks) {
244 num_data_blocks = compressed_blocks + not_compressed_blocks;
245 }
246 const uint64_t ratio_not_compressed_blocks =
247 (num_data_blocks - compressed_blocks) - not_compressed_blocks;
248 const double compressed_pcnt =
249 (0 == num_data_blocks) ? 0.0
250 : ((static_cast<double>(compressed_blocks) /
251 static_cast<double>(num_data_blocks)) *
252 100.0);
253 const double ratio_not_compressed_pcnt =
254 (0 == num_data_blocks)
255 ? 0.0
256 : ((static_cast<double>(ratio_not_compressed_blocks) /
257 static_cast<double>(num_data_blocks)) *
258 100.0);
259 const double not_compressed_pcnt =
260 (0 == num_data_blocks)
261 ? 0.0
262 : ((static_cast<double>(not_compressed_blocks) /
263 static_cast<double>(num_data_blocks)) *
264 100.0);
265 fprintf(stdout, " Compressed: %6" PRIu64 " (%5.1f%%)", compressed_blocks,
266 compressed_pcnt);
267 fprintf(stdout, " Not compressed (ratio): %6" PRIu64 " (%5.1f%%)",
268 ratio_not_compressed_blocks, ratio_not_compressed_pcnt);
269 fprintf(stdout, " Not compressed (abort): %6" PRIu64 " (%5.1f%%)\n",
270 not_compressed_blocks, not_compressed_pcnt);
271 } else {
272 fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
273 }
274 }
275 return 0;
276 }
ReadTableProperties(uint64_t table_magic_number,RandomAccessFileReader * file,uint64_t file_size)277 Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number,
278 RandomAccessFileReader* file,
279 uint64_t file_size) {
280 TableProperties* table_properties = nullptr;
281 Status s = ROCKSDB_NAMESPACE::ReadTableProperties(
282 file, file_size, table_magic_number, ioptions_, &table_properties);
283 if (s.ok()) {
284 table_properties_.reset(table_properties);
285 } else {
286 fprintf(stdout, "Not able to read table properties\n");
287 }
288 return s;
289 }
290
SetTableOptionsByMagicNumber(uint64_t table_magic_number)291 Status SstFileDumper::SetTableOptionsByMagicNumber(
292 uint64_t table_magic_number) {
293 assert(table_properties_);
294 if (table_magic_number == kBlockBasedTableMagicNumber ||
295 table_magic_number == kLegacyBlockBasedTableMagicNumber) {
296 options_.table_factory = std::make_shared<BlockBasedTableFactory>();
297 fprintf(stdout, "Sst file format: block-based\n");
298 auto& props = table_properties_->user_collected_properties;
299 auto pos = props.find(BlockBasedTablePropertyNames::kIndexType);
300 if (pos != props.end()) {
301 auto index_type_on_file = static_cast<BlockBasedTableOptions::IndexType>(
302 DecodeFixed32(pos->second.c_str()));
303 if (index_type_on_file ==
304 BlockBasedTableOptions::IndexType::kHashSearch) {
305 options_.prefix_extractor.reset(NewNoopTransform());
306 }
307 }
308 } else if (table_magic_number == kPlainTableMagicNumber ||
309 table_magic_number == kLegacyPlainTableMagicNumber) {
310 options_.allow_mmap_reads = true;
311
312 PlainTableOptions plain_table_options;
313 plain_table_options.user_key_len = kPlainTableVariableLength;
314 plain_table_options.bloom_bits_per_key = 0;
315 plain_table_options.hash_table_ratio = 0;
316 plain_table_options.index_sparseness = 1;
317 plain_table_options.huge_page_tlb_size = 0;
318 plain_table_options.encoding_type = kPlain;
319 plain_table_options.full_scan_mode = true;
320
321 options_.table_factory.reset(NewPlainTableFactory(plain_table_options));
322 fprintf(stdout, "Sst file format: plain table\n");
323 } else {
324 char error_msg_buffer[80];
325 snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1,
326 "Unsupported table magic number --- %lx",
327 (long)table_magic_number);
328 return Status::InvalidArgument(error_msg_buffer);
329 }
330
331 return Status::OK();
332 }
333
SetOldTableOptions()334 Status SstFileDumper::SetOldTableOptions() {
335 assert(table_properties_ == nullptr);
336 options_.table_factory = std::make_shared<BlockBasedTableFactory>();
337 fprintf(stdout, "Sst file format: block-based(old version)\n");
338
339 return Status::OK();
340 }
341
ReadSequential(bool print_kv,uint64_t read_num,bool has_from,const std::string & from_key,bool has_to,const std::string & to_key,bool use_from_as_prefix)342 Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num,
343 bool has_from, const std::string& from_key,
344 bool has_to, const std::string& to_key,
345 bool use_from_as_prefix) {
346 if (!table_reader_) {
347 return init_result_;
348 }
349
350 InternalIterator* iter = table_reader_->NewIterator(
351 ReadOptions(verify_checksum_, false), moptions_.prefix_extractor.get(),
352 /*arena=*/nullptr, /*skip_filters=*/false,
353 TableReaderCaller::kSSTDumpTool);
354 uint64_t i = 0;
355 if (has_from) {
356 InternalKey ikey;
357 ikey.SetMinPossibleForUserKey(from_key);
358 iter->Seek(ikey.Encode());
359 } else {
360 iter->SeekToFirst();
361 }
362 for (; iter->Valid(); iter->Next()) {
363 Slice key = iter->key();
364 Slice value = iter->value();
365 ++i;
366 if (read_num > 0 && i > read_num)
367 break;
368
369 ParsedInternalKey ikey;
370 if (!ParseInternalKey(key, &ikey)) {
371 std::cerr << "Internal Key ["
372 << key.ToString(true /* in hex*/)
373 << "] parse error!\n";
374 continue;
375 }
376
377 // the key returned is not prefixed with out 'from' key
378 if (use_from_as_prefix && !ikey.user_key.starts_with(from_key)) {
379 break;
380 }
381
382 // If end marker was specified, we stop before it
383 if (has_to && BytewiseComparator()->Compare(ikey.user_key, to_key) >= 0) {
384 break;
385 }
386
387 if (print_kv) {
388 if (!decode_blob_index_ || ikey.type != kTypeBlobIndex) {
389 fprintf(stdout, "%s => %s\n", ikey.DebugString(output_hex_).c_str(),
390 value.ToString(output_hex_).c_str());
391 } else {
392 BlobIndex blob_index;
393
394 const Status s = blob_index.DecodeFrom(value);
395 if (!s.ok()) {
396 fprintf(stderr, "%s => error decoding blob index\n",
397 ikey.DebugString(output_hex_).c_str());
398 continue;
399 }
400
401 fprintf(stdout, "%s => %s\n", ikey.DebugString(output_hex_).c_str(),
402 blob_index.DebugString(output_hex_).c_str());
403 }
404 }
405 }
406
407 read_num_ += i;
408
409 Status ret = iter->status();
410 delete iter;
411 return ret;
412 }
413
ReadTableProperties(std::shared_ptr<const TableProperties> * table_properties)414 Status SstFileDumper::ReadTableProperties(
415 std::shared_ptr<const TableProperties>* table_properties) {
416 if (!table_reader_) {
417 return init_result_;
418 }
419
420 *table_properties = table_reader_->GetTableProperties();
421 return init_result_;
422 }
423
424 namespace {
425
print_help()426 void print_help() {
427 fprintf(
428 stderr,
429 R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress]
430 --file=<data_dir_OR_sst_file>
431 Path to SST file or directory containing SST files
432
433 --env_uri=<uri of underlying Env>
434 URI of underlying Env
435
436 --command=check|scan|raw|verify
437 check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
438 scan: Iterate over entries in files and print them to screen
439 raw: Dump all the table contents to <file_name>_dump.txt
440 verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
441 recompress: reports the SST file size if recompressed with different
442 compression types
443
444 --output_hex
445 Can be combined with scan command to print the keys and values in Hex
446
447 --decode_blob_index
448 Decode blob indexes and print them in a human-readable format during scans.
449
450 --from=<user_key>
451 Key to start reading from when executing check|scan
452
453 --to=<user_key>
454 Key to stop reading at when executing check|scan
455
456 --prefix=<user_key>
457 Returns all keys with this prefix when executing check|scan
458 Cannot be used in conjunction with --from
459
460 --read_num=<num>
461 Maximum number of entries to read when executing check|scan
462
463 --verify_checksum
464 Verify file checksum when executing check|scan
465
466 --input_key_hex
467 Can be combined with --from and --to to indicate that these values are encoded in Hex
468
469 --show_properties
470 Print table properties after iterating over the file when executing
471 check|scan|raw
472
473 --set_block_size=<block_size>
474 Can be combined with --command=recompress to set the block size that will
475 be used when trying different compression algorithms
476
477 --compression_types=<comma-separated list of CompressionType members, e.g.,
478 kSnappyCompression>
479 Can be combined with --command=recompress to run recompression for this
480 list of compression types
481
482 --parse_internal_key=<0xKEY>
483 Convenience option to parse an internal key on the command line. Dumps the
484 internal key in hex format {'key' @ SN: type}
485 )");
486 }
487
488 } // namespace
489
Run(int argc,char ** argv,Options options)490 int SSTDumpTool::Run(int argc, char** argv, Options options) {
491 const char* env_uri = nullptr;
492 const char* dir_or_file = nullptr;
493 uint64_t read_num = std::numeric_limits<uint64_t>::max();
494 std::string command;
495
496 char junk;
497 uint64_t n;
498 bool verify_checksum = false;
499 bool output_hex = false;
500 bool decode_blob_index = false;
501 bool input_key_hex = false;
502 bool has_from = false;
503 bool has_to = false;
504 bool use_from_as_prefix = false;
505 bool show_properties = false;
506 bool show_summary = false;
507 bool set_block_size = false;
508 std::string from_key;
509 std::string to_key;
510 std::string block_size_str;
511 size_t block_size = 0;
512 std::vector<std::pair<CompressionType, const char*>> compression_types;
513 uint64_t total_num_files = 0;
514 uint64_t total_num_data_blocks = 0;
515 uint64_t total_data_block_size = 0;
516 uint64_t total_index_block_size = 0;
517 uint64_t total_filter_block_size = 0;
518 for (int i = 1; i < argc; i++) {
519 if (strncmp(argv[i], "--env_uri=", 10) == 0) {
520 env_uri = argv[i] + 10;
521 } else if (strncmp(argv[i], "--file=", 7) == 0) {
522 dir_or_file = argv[i] + 7;
523 } else if (strcmp(argv[i], "--output_hex") == 0) {
524 output_hex = true;
525 } else if (strcmp(argv[i], "--decode_blob_index") == 0) {
526 decode_blob_index = true;
527 } else if (strcmp(argv[i], "--input_key_hex") == 0) {
528 input_key_hex = true;
529 } else if (sscanf(argv[i], "--read_num=%lu%c", (unsigned long*)&n, &junk) ==
530 1) {
531 read_num = n;
532 } else if (strcmp(argv[i], "--verify_checksum") == 0) {
533 verify_checksum = true;
534 } else if (strncmp(argv[i], "--command=", 10) == 0) {
535 command = argv[i] + 10;
536 } else if (strncmp(argv[i], "--from=", 7) == 0) {
537 from_key = argv[i] + 7;
538 has_from = true;
539 } else if (strncmp(argv[i], "--to=", 5) == 0) {
540 to_key = argv[i] + 5;
541 has_to = true;
542 } else if (strncmp(argv[i], "--prefix=", 9) == 0) {
543 from_key = argv[i] + 9;
544 use_from_as_prefix = true;
545 } else if (strcmp(argv[i], "--show_properties") == 0) {
546 show_properties = true;
547 } else if (strcmp(argv[i], "--show_summary") == 0) {
548 show_summary = true;
549 } else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
550 set_block_size = true;
551 block_size_str = argv[i] + 17;
552 std::istringstream iss(block_size_str);
553 iss >> block_size;
554 if (iss.fail()) {
555 fprintf(stderr, "block size must be numeric\n");
556 exit(1);
557 }
558 } else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
559 std::string compression_types_csv = argv[i] + 20;
560 std::istringstream iss(compression_types_csv);
561 std::string compression_type;
562 while (std::getline(iss, compression_type, ',')) {
563 auto iter = std::find_if(
564 kCompressions.begin(), kCompressions.end(),
565 [&compression_type](std::pair<CompressionType, const char*> curr) {
566 return curr.second == compression_type;
567 });
568 if (iter == kCompressions.end()) {
569 fprintf(stderr, "%s is not a valid CompressionType\n",
570 compression_type.c_str());
571 exit(1);
572 }
573 compression_types.emplace_back(*iter);
574 }
575 } else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
576 std::string in_key(argv[i] + 21);
577 try {
578 in_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(in_key);
579 } catch (...) {
580 std::cerr << "ERROR: Invalid key input '"
581 << in_key
582 << "' Use 0x{hex representation of internal rocksdb key}" << std::endl;
583 return -1;
584 }
585 Slice sl_key = ROCKSDB_NAMESPACE::Slice(in_key);
586 ParsedInternalKey ikey;
587 int retc = 0;
588 if (!ParseInternalKey(sl_key, &ikey)) {
589 std::cerr << "Internal Key [" << sl_key.ToString(true /* in hex*/)
590 << "] parse error!\n";
591 retc = -1;
592 }
593 fprintf(stdout, "key=%s\n", ikey.DebugString(true).c_str());
594 return retc;
595 } else {
596 fprintf(stderr, "Unrecognized argument '%s'\n\n", argv[i]);
597 print_help();
598 exit(1);
599 }
600 }
601
602 if (use_from_as_prefix && has_from) {
603 fprintf(stderr, "Cannot specify --prefix and --from\n\n");
604 exit(1);
605 }
606
607 if (input_key_hex) {
608 if (has_from || use_from_as_prefix) {
609 from_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(from_key);
610 }
611 if (has_to) {
612 to_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(to_key);
613 }
614 }
615
616 if (dir_or_file == nullptr) {
617 fprintf(stderr, "file or directory must be specified.\n\n");
618 print_help();
619 exit(1);
620 }
621
622 std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
623
624 // If caller of SSTDumpTool::Run(...) does not specify a different env other
625 // than Env::Default(), then try to load custom env based on dir_or_file.
626 // Otherwise, the caller is responsible for creating custom env.
627 if (!options.env || options.env == ROCKSDB_NAMESPACE::Env::Default()) {
628 Env* env = Env::Default();
629 Status s = Env::LoadEnv(env_uri ? env_uri : "", &env, &env_guard);
630 if (!s.ok() && !s.IsNotFound()) {
631 fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
632 exit(1);
633 }
634 options.env = env;
635 } else {
636 fprintf(stdout, "options.env is %p\n", options.env);
637 }
638
639 std::vector<std::string> filenames;
640 ROCKSDB_NAMESPACE::Env* env = options.env;
641 ROCKSDB_NAMESPACE::Status st = env->GetChildren(dir_or_file, &filenames);
642 bool dir = true;
643 if (!st.ok()) {
644 filenames.clear();
645 filenames.push_back(dir_or_file);
646 dir = false;
647 }
648
649 fprintf(stdout, "from [%s] to [%s]\n",
650 ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
651 ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
652
653 uint64_t total_read = 0;
654 for (size_t i = 0; i < filenames.size(); i++) {
655 std::string filename = filenames.at(i);
656 if (filename.length() <= 4 ||
657 filename.rfind(".sst") != filename.length() - 4) {
658 // ignore
659 continue;
660 }
661 if (dir) {
662 filename = std::string(dir_or_file) + "/" + filename;
663 }
664
665 ROCKSDB_NAMESPACE::SstFileDumper dumper(options, filename, verify_checksum,
666 output_hex, decode_blob_index);
667 if (!dumper.getStatus().ok()) {
668 fprintf(stderr, "%s: %s\n", filename.c_str(),
669 dumper.getStatus().ToString().c_str());
670 continue;
671 }
672
673 if (command == "recompress") {
674 dumper.ShowAllCompressionSizes(
675 set_block_size ? block_size : 16384,
676 compression_types.empty() ? kCompressions : compression_types);
677 return 0;
678 }
679
680 if (command == "raw") {
681 std::string out_filename = filename.substr(0, filename.length() - 4);
682 out_filename.append("_dump.txt");
683
684 st = dumper.DumpTable(out_filename);
685 if (!st.ok()) {
686 fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
687 exit(1);
688 } else {
689 fprintf(stdout, "raw dump written to file %s\n", &out_filename[0]);
690 }
691 continue;
692 }
693
694 // scan all files in give file path.
695 if (command == "" || command == "scan" || command == "check") {
696 st = dumper.ReadSequential(
697 command == "scan", read_num > 0 ? (read_num - total_read) : read_num,
698 has_from || use_from_as_prefix, from_key, has_to, to_key,
699 use_from_as_prefix);
700 if (!st.ok()) {
701 fprintf(stderr, "%s: %s\n", filename.c_str(),
702 st.ToString().c_str());
703 }
704 total_read += dumper.GetReadNumber();
705 if (read_num > 0 && total_read > read_num) {
706 break;
707 }
708 }
709
710 if (command == "verify") {
711 st = dumper.VerifyChecksum();
712 if (!st.ok()) {
713 fprintf(stderr, "%s is corrupted: %s\n", filename.c_str(),
714 st.ToString().c_str());
715 } else {
716 fprintf(stdout, "The file is ok\n");
717 }
718 continue;
719 }
720
721 if (show_properties || show_summary) {
722 const ROCKSDB_NAMESPACE::TableProperties* table_properties;
723
724 std::shared_ptr<const ROCKSDB_NAMESPACE::TableProperties>
725 table_properties_from_reader;
726 st = dumper.ReadTableProperties(&table_properties_from_reader);
727 if (!st.ok()) {
728 fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
729 fprintf(stderr, "Try to use initial table properties\n");
730 table_properties = dumper.GetInitTableProperties();
731 } else {
732 table_properties = table_properties_from_reader.get();
733 }
734 if (table_properties != nullptr) {
735 if (show_properties) {
736 fprintf(stdout,
737 "Table Properties:\n"
738 "------------------------------\n"
739 " %s",
740 table_properties->ToString("\n ", ": ").c_str());
741 }
742 total_num_files += 1;
743 total_num_data_blocks += table_properties->num_data_blocks;
744 total_data_block_size += table_properties->data_size;
745 total_index_block_size += table_properties->index_size;
746 total_filter_block_size += table_properties->filter_size;
747 if (show_properties) {
748 fprintf(stdout,
749 "Raw user collected properties\n"
750 "------------------------------\n");
751 for (const auto& kv : table_properties->user_collected_properties) {
752 std::string prop_name = kv.first;
753 std::string prop_val = Slice(kv.second).ToString(true);
754 fprintf(stdout, " # %s: 0x%s\n", prop_name.c_str(),
755 prop_val.c_str());
756 }
757 }
758 } else {
759 fprintf(stderr, "Reader unexpectedly returned null properties\n");
760 }
761 }
762 }
763 if (show_summary) {
764 fprintf(stdout, "total number of files: %" PRIu64 "\n", total_num_files);
765 fprintf(stdout, "total number of data blocks: %" PRIu64 "\n",
766 total_num_data_blocks);
767 fprintf(stdout, "total data block size: %" PRIu64 "\n",
768 total_data_block_size);
769 fprintf(stdout, "total index block size: %" PRIu64 "\n",
770 total_index_block_size);
771 fprintf(stdout, "total filter block size: %" PRIu64 "\n",
772 total_filter_block_size);
773 }
774 return 0;
775 }
776 } // namespace ROCKSDB_NAMESPACE
777
778 #endif // ROCKSDB_LITE
779