1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #if !defined(ROCKSDB_LITE) && !defined(OS_WIN)
11
12 #include "rocksdb/utilities/backupable_db.h"
13
14 #include <algorithm>
15 #include <array>
16 #include <cstddef>
17 #include <cstdint>
18 #include <limits>
19 #include <random>
20 #include <string>
21 #include <utility>
22
23 #include "db/db_impl/db_impl.h"
24 #include "env/env_chroot.h"
25 #include "file/filename.h"
26 #include "port/port.h"
27 #include "port/stack_trace.h"
28 #include "rocksdb/file_checksum.h"
29 #include "rocksdb/rate_limiter.h"
30 #include "rocksdb/transaction_log.h"
31 #include "rocksdb/types.h"
32 #include "rocksdb/utilities/options_util.h"
33 #include "test_util/sync_point.h"
34 #include "test_util/testharness.h"
35 #include "test_util/testutil.h"
36 #include "util/cast_util.h"
37 #include "util/mutexlock.h"
38 #include "util/random.h"
39 #include "util/stderr_logger.h"
40 #include "util/string_util.h"
41 #include "utilities/backupable/backupable_db_impl.h"
42
43 namespace ROCKSDB_NAMESPACE {
44
45 namespace {
46 using ShareFilesNaming = BackupableDBOptions::ShareFilesNaming;
47 const auto kLegacyCrc32cAndFileSize =
48 BackupableDBOptions::kLegacyCrc32cAndFileSize;
49 const auto kUseDbSessionId = BackupableDBOptions::kUseDbSessionId;
50 const auto kFlagIncludeFileSize = BackupableDBOptions::kFlagIncludeFileSize;
51 const auto kNamingDefault = kUseDbSessionId | kFlagIncludeFileSize;
52
53 class DummyDB : public StackableDB {
54 public:
55 /* implicit */
DummyDB(const Options & options,const std::string & dbname)56 DummyDB(const Options& options, const std::string& dbname)
57 : StackableDB(nullptr), options_(options), dbname_(dbname),
58 deletions_enabled_(true), sequence_number_(0) {}
59
GetLatestSequenceNumber() const60 SequenceNumber GetLatestSequenceNumber() const override {
61 return ++sequence_number_;
62 }
63
GetName() const64 const std::string& GetName() const override { return dbname_; }
65
GetEnv() const66 Env* GetEnv() const override { return options_.env; }
67
68 using DB::GetOptions;
GetOptions(ColumnFamilyHandle *) const69 Options GetOptions(ColumnFamilyHandle* /*column_family*/) const override {
70 return options_;
71 }
72
GetDBOptions() const73 DBOptions GetDBOptions() const override { return DBOptions(options_); }
74
EnableFileDeletions(bool)75 Status EnableFileDeletions(bool /*force*/) override {
76 EXPECT_TRUE(!deletions_enabled_);
77 deletions_enabled_ = true;
78 return Status::OK();
79 }
80
DisableFileDeletions()81 Status DisableFileDeletions() override {
82 EXPECT_TRUE(deletions_enabled_);
83 deletions_enabled_ = false;
84 return Status::OK();
85 }
86
DefaultColumnFamily() const87 ColumnFamilyHandle* DefaultColumnFamily() const override { return nullptr; }
88
89 class DummyLogFile : public LogFile {
90 public:
91 /* implicit */
DummyLogFile(const std::string & path,bool alive=true)92 DummyLogFile(const std::string& path, bool alive = true)
93 : path_(path), alive_(alive) {}
94
PathName() const95 std::string PathName() const override { return path_; }
96
LogNumber() const97 uint64_t LogNumber() const override {
98 // what business do you have calling this method?
99 ADD_FAILURE();
100 return 0;
101 }
102
Type() const103 WalFileType Type() const override {
104 return alive_ ? kAliveLogFile : kArchivedLogFile;
105 }
106
StartSequence() const107 SequenceNumber StartSequence() const override {
108 // this seqnum guarantees the dummy file will be included in the backup
109 // as long as it is alive.
110 return kMaxSequenceNumber;
111 }
112
SizeFileBytes() const113 uint64_t SizeFileBytes() const override { return 0; }
114
115 private:
116 std::string path_;
117 bool alive_;
118 }; // DummyLogFile
119
GetLiveFilesStorageInfo(const LiveFilesStorageInfoOptions & opts,std::vector<LiveFileStorageInfo> * files)120 Status GetLiveFilesStorageInfo(
121 const LiveFilesStorageInfoOptions& opts,
122 std::vector<LiveFileStorageInfo>* files) override {
123 uint64_t number;
124 FileType type;
125 files->clear();
126 for (auto& f : live_files_) {
127 bool success = ParseFileName(f, &number, &type);
128 if (!success) {
129 return Status::InvalidArgument("Bad file name: " + f);
130 }
131 files->emplace_back();
132 LiveFileStorageInfo& info = files->back();
133 info.relative_filename = f;
134 info.directory = dbname_;
135 info.file_number = number;
136 info.file_type = type;
137 if (type == kDescriptorFile) {
138 info.size = 100; // See TestEnv::GetChildrenFileAttributes below
139 info.trim_to_size = true;
140 } else if (type == kCurrentFile) {
141 info.size = 0;
142 info.trim_to_size = true;
143 } else {
144 info.size = 200; // See TestEnv::GetChildrenFileAttributes below
145 }
146 if (opts.include_checksum_info) {
147 info.file_checksum = kUnknownFileChecksum;
148 info.file_checksum_func_name = kUnknownFileChecksumFuncName;
149 }
150 }
151 return Status::OK();
152 }
153
154 // To avoid FlushWAL called on stacked db which is nullptr
FlushWAL(bool)155 Status FlushWAL(bool /*sync*/) override { return Status::OK(); }
156
157 std::vector<std::string> live_files_;
158
159 private:
160 Options options_;
161 std::string dbname_;
162 bool deletions_enabled_;
163 mutable SequenceNumber sequence_number_;
164 }; // DummyDB
165
166 class TestEnv : public EnvWrapper {
167 public:
TestEnv(Env * t)168 explicit TestEnv(Env* t) : EnvWrapper(t) {}
169
170 class DummySequentialFile : public SequentialFile {
171 public:
DummySequentialFile(bool fail_reads)172 explicit DummySequentialFile(bool fail_reads)
173 : SequentialFile(), rnd_(5), fail_reads_(fail_reads) {}
Read(size_t n,Slice * result,char * scratch)174 Status Read(size_t n, Slice* result, char* scratch) override {
175 if (fail_reads_) {
176 return Status::IOError();
177 }
178 size_t read_size = (n > size_left) ? size_left : n;
179 for (size_t i = 0; i < read_size; ++i) {
180 scratch[i] = rnd_.Next() & 255;
181 }
182 *result = Slice(scratch, read_size);
183 size_left -= read_size;
184 return Status::OK();
185 }
186
Skip(uint64_t n)187 Status Skip(uint64_t n) override {
188 size_left = (n > size_left) ? size_left - n : 0;
189 return Status::OK();
190 }
191
192 private:
193 size_t size_left = 200;
194 Random rnd_;
195 bool fail_reads_;
196 };
197
NewSequentialFile(const std::string & f,std::unique_ptr<SequentialFile> * r,const EnvOptions & options)198 Status NewSequentialFile(const std::string& f,
199 std::unique_ptr<SequentialFile>* r,
200 const EnvOptions& options) override {
201 MutexLock l(&mutex_);
202 if (dummy_sequential_file_) {
203 r->reset(
204 new TestEnv::DummySequentialFile(dummy_sequential_file_fail_reads_));
205 return Status::OK();
206 } else {
207 Status s = EnvWrapper::NewSequentialFile(f, r, options);
208 if (s.ok()) {
209 if ((*r)->use_direct_io()) {
210 ++num_direct_seq_readers_;
211 }
212 ++num_seq_readers_;
213 }
214 return s;
215 }
216 }
217
NewWritableFile(const std::string & f,std::unique_ptr<WritableFile> * r,const EnvOptions & options)218 Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
219 const EnvOptions& options) override {
220 MutexLock l(&mutex_);
221 written_files_.push_back(f);
222 if (limit_written_files_ <= 0) {
223 return Status::NotSupported("Sorry, can't do this");
224 }
225 limit_written_files_--;
226 Status s = EnvWrapper::NewWritableFile(f, r, options);
227 if (s.ok()) {
228 if ((*r)->use_direct_io()) {
229 ++num_direct_writers_;
230 }
231 ++num_writers_;
232 }
233 return s;
234 }
235
NewRandomAccessFile(const std::string & fname,std::unique_ptr<RandomAccessFile> * result,const EnvOptions & options)236 Status NewRandomAccessFile(const std::string& fname,
237 std::unique_ptr<RandomAccessFile>* result,
238 const EnvOptions& options) override {
239 MutexLock l(&mutex_);
240 Status s = EnvWrapper::NewRandomAccessFile(fname, result, options);
241 if (s.ok()) {
242 if ((*result)->use_direct_io()) {
243 ++num_direct_rand_readers_;
244 }
245 ++num_rand_readers_;
246 }
247 return s;
248 }
249
DeleteFile(const std::string & fname)250 Status DeleteFile(const std::string& fname) override {
251 MutexLock l(&mutex_);
252 if (fail_delete_files_) {
253 return Status::IOError();
254 }
255 EXPECT_GT(limit_delete_files_, 0U);
256 limit_delete_files_--;
257 return EnvWrapper::DeleteFile(fname);
258 }
259
DeleteDir(const std::string & dirname)260 Status DeleteDir(const std::string& dirname) override {
261 MutexLock l(&mutex_);
262 if (fail_delete_files_) {
263 return Status::IOError();
264 }
265 return EnvWrapper::DeleteDir(dirname);
266 }
267
AssertWrittenFiles(std::vector<std::string> & should_have_written)268 void AssertWrittenFiles(std::vector<std::string>& should_have_written) {
269 MutexLock l(&mutex_);
270 std::sort(should_have_written.begin(), should_have_written.end());
271 std::sort(written_files_.begin(), written_files_.end());
272
273 ASSERT_EQ(should_have_written, written_files_);
274 }
275
ClearWrittenFiles()276 void ClearWrittenFiles() {
277 MutexLock l(&mutex_);
278 written_files_.clear();
279 }
280
SetLimitWrittenFiles(uint64_t limit)281 void SetLimitWrittenFiles(uint64_t limit) {
282 MutexLock l(&mutex_);
283 limit_written_files_ = limit;
284 }
285
SetLimitDeleteFiles(uint64_t limit)286 void SetLimitDeleteFiles(uint64_t limit) {
287 MutexLock l(&mutex_);
288 limit_delete_files_ = limit;
289 }
290
SetDeleteFileFailure(bool fail)291 void SetDeleteFileFailure(bool fail) {
292 MutexLock l(&mutex_);
293 fail_delete_files_ = fail;
294 }
295
SetDummySequentialFile(bool dummy_sequential_file)296 void SetDummySequentialFile(bool dummy_sequential_file) {
297 MutexLock l(&mutex_);
298 dummy_sequential_file_ = dummy_sequential_file;
299 }
SetDummySequentialFileFailReads(bool dummy_sequential_file_fail_reads)300 void SetDummySequentialFileFailReads(bool dummy_sequential_file_fail_reads) {
301 MutexLock l(&mutex_);
302 dummy_sequential_file_fail_reads_ = dummy_sequential_file_fail_reads;
303 }
304
SetGetChildrenFailure(bool fail)305 void SetGetChildrenFailure(bool fail) { get_children_failure_ = fail; }
GetChildren(const std::string & dir,std::vector<std::string> * r)306 Status GetChildren(const std::string& dir,
307 std::vector<std::string>* r) override {
308 if (get_children_failure_) {
309 return Status::IOError("SimulatedFailure");
310 }
311 return EnvWrapper::GetChildren(dir, r);
312 }
313
314 // Some test cases do not actually create the test files (e.g., see
315 // DummyDB::live_files_) - for those cases, we mock those files' attributes
316 // so CreateNewBackup() can get their attributes.
SetFilenamesForMockedAttrs(const std::vector<std::string> & filenames)317 void SetFilenamesForMockedAttrs(const std::vector<std::string>& filenames) {
318 filenames_for_mocked_attrs_ = filenames;
319 }
GetChildrenFileAttributes(const std::string & dir,std::vector<Env::FileAttributes> * r)320 Status GetChildrenFileAttributes(
321 const std::string& dir, std::vector<Env::FileAttributes>* r) override {
322 if (filenames_for_mocked_attrs_.size() > 0) {
323 for (const auto& filename : filenames_for_mocked_attrs_) {
324 uint64_t size_bytes = 200; // Match TestEnv
325 if (filename.find("MANIFEST") == 0) {
326 size_bytes = 100; // Match DummyDB::GetLiveFiles
327 }
328 r->push_back({dir + "/" + filename, size_bytes});
329 }
330 return Status::OK();
331 }
332 return EnvWrapper::GetChildrenFileAttributes(dir, r);
333 }
GetFileSize(const std::string & path,uint64_t * size_bytes)334 Status GetFileSize(const std::string& path, uint64_t* size_bytes) override {
335 if (filenames_for_mocked_attrs_.size() > 0) {
336 auto fname = path.substr(path.find_last_of('/') + 1);
337 auto filename_iter = std::find(filenames_for_mocked_attrs_.begin(),
338 filenames_for_mocked_attrs_.end(), fname);
339 if (filename_iter != filenames_for_mocked_attrs_.end()) {
340 *size_bytes = 200; // Match TestEnv
341 if (fname.find("MANIFEST") == 0) {
342 *size_bytes = 100; // Match DummyDB::GetLiveFiles
343 }
344 return Status::OK();
345 }
346 return Status::NotFound(fname);
347 }
348 return EnvWrapper::GetFileSize(path, size_bytes);
349 }
350
SetCreateDirIfMissingFailure(bool fail)351 void SetCreateDirIfMissingFailure(bool fail) {
352 create_dir_if_missing_failure_ = fail;
353 }
CreateDirIfMissing(const std::string & d)354 Status CreateDirIfMissing(const std::string& d) override {
355 if (create_dir_if_missing_failure_) {
356 return Status::IOError("SimulatedFailure");
357 }
358 return EnvWrapper::CreateDirIfMissing(d);
359 }
360
SetNewDirectoryFailure(bool fail)361 void SetNewDirectoryFailure(bool fail) { new_directory_failure_ = fail; }
NewDirectory(const std::string & name,std::unique_ptr<Directory> * result)362 Status NewDirectory(const std::string& name,
363 std::unique_ptr<Directory>* result) override {
364 if (new_directory_failure_) {
365 return Status::IOError("SimulatedFailure");
366 }
367 return EnvWrapper::NewDirectory(name, result);
368 }
369
ClearFileOpenCounters()370 void ClearFileOpenCounters() {
371 MutexLock l(&mutex_);
372 num_rand_readers_ = 0;
373 num_direct_rand_readers_ = 0;
374 num_seq_readers_ = 0;
375 num_direct_seq_readers_ = 0;
376 num_writers_ = 0;
377 num_direct_writers_ = 0;
378 }
379
num_rand_readers()380 int num_rand_readers() { return num_rand_readers_; }
num_direct_rand_readers()381 int num_direct_rand_readers() { return num_direct_rand_readers_; }
num_seq_readers()382 int num_seq_readers() { return num_seq_readers_; }
num_direct_seq_readers()383 int num_direct_seq_readers() { return num_direct_seq_readers_; }
num_writers()384 int num_writers() { return num_writers_; }
num_direct_writers()385 int num_direct_writers() { return num_direct_writers_; }
386
387 private:
388 port::Mutex mutex_;
389 bool dummy_sequential_file_ = false;
390 bool dummy_sequential_file_fail_reads_ = false;
391 std::vector<std::string> written_files_;
392 std::vector<std::string> filenames_for_mocked_attrs_;
393 uint64_t limit_written_files_ = 1000000;
394 uint64_t limit_delete_files_ = 1000000;
395 bool fail_delete_files_ = false;
396
397 bool get_children_failure_ = false;
398 bool create_dir_if_missing_failure_ = false;
399 bool new_directory_failure_ = false;
400
401 // Keeps track of how many files of each type were successfully opened, and
402 // out of those, how many were opened with direct I/O.
403 std::atomic<int> num_rand_readers_;
404 std::atomic<int> num_direct_rand_readers_;
405 std::atomic<int> num_seq_readers_;
406 std::atomic<int> num_direct_seq_readers_;
407 std::atomic<int> num_writers_;
408 std::atomic<int> num_direct_writers_;
409 }; // TestEnv
410
411 class FileManager : public EnvWrapper {
412 public:
FileManager(Env * t)413 explicit FileManager(Env* t) : EnvWrapper(t), rnd_(5) {}
414
GetRandomFileInDir(const std::string & dir,std::string * fname,uint64_t * fsize)415 Status GetRandomFileInDir(const std::string& dir, std::string* fname,
416 uint64_t* fsize) {
417 std::vector<FileAttributes> children;
418 auto s = GetChildrenFileAttributes(dir, &children);
419 if (!s.ok()) {
420 return s;
421 } else if (children.size() <= 2) { // . and ..
422 return Status::NotFound("Empty directory: " + dir);
423 }
424 assert(fname != nullptr);
425 while (true) {
426 int i = rnd_.Next() % children.size();
427 fname->assign(dir + "/" + children[i].name);
428 *fsize = children[i].size_bytes;
429 return Status::OK();
430 }
431 // should never get here
432 assert(false);
433 return Status::NotFound("");
434 }
435
DeleteRandomFileInDir(const std::string & dir)436 Status DeleteRandomFileInDir(const std::string& dir) {
437 std::vector<std::string> children;
438 Status s = GetChildren(dir, &children);
439 if (!s.ok()) {
440 return s;
441 }
442 while (true) {
443 int i = rnd_.Next() % children.size();
444 return DeleteFile(dir + "/" + children[i]);
445 }
446 // should never get here
447 assert(false);
448 return Status::NotFound("");
449 }
450
AppendToRandomFileInDir(const std::string & dir,const std::string & data)451 Status AppendToRandomFileInDir(const std::string& dir,
452 const std::string& data) {
453 std::vector<std::string> children;
454 Status s = GetChildren(dir, &children);
455 if (!s.ok()) {
456 return s;
457 }
458 while (true) {
459 int i = rnd_.Next() % children.size();
460 return WriteToFile(dir + "/" + children[i], data);
461 }
462 // should never get here
463 assert(false);
464 return Status::NotFound("");
465 }
466
CorruptFile(const std::string & fname,uint64_t bytes_to_corrupt)467 Status CorruptFile(const std::string& fname, uint64_t bytes_to_corrupt) {
468 std::string file_contents;
469 Status s = ReadFileToString(this, fname, &file_contents);
470 if (!s.ok()) {
471 return s;
472 }
473 s = DeleteFile(fname);
474 if (!s.ok()) {
475 return s;
476 }
477
478 for (uint64_t i = 0; i < bytes_to_corrupt; ++i) {
479 std::string tmp = rnd_.RandomString(1);
480 file_contents[rnd_.Next() % file_contents.size()] = tmp[0];
481 }
482 return WriteToFile(fname, file_contents);
483 }
484
CorruptFileStart(const std::string & fname)485 Status CorruptFileStart(const std::string& fname) {
486 std::string to_xor = "blah";
487 std::string file_contents;
488 Status s = ReadFileToString(this, fname, &file_contents);
489 if (!s.ok()) {
490 return s;
491 }
492 s = DeleteFile(fname);
493 if (!s.ok()) {
494 return s;
495 }
496 for (size_t i = 0; i < to_xor.size(); ++i) {
497 file_contents[i] ^= to_xor[i];
498 }
499 return WriteToFile(fname, file_contents);
500 }
501
CorruptChecksum(const std::string & fname,bool appear_valid)502 Status CorruptChecksum(const std::string& fname, bool appear_valid) {
503 std::string metadata;
504 Status s = ReadFileToString(this, fname, &metadata);
505 if (!s.ok()) {
506 return s;
507 }
508 s = DeleteFile(fname);
509 if (!s.ok()) {
510 return s;
511 }
512
513 auto pos = metadata.find("private");
514 if (pos == std::string::npos) {
515 return Status::Corruption("private file is expected");
516 }
517 pos = metadata.find(" crc32 ", pos + 6);
518 if (pos == std::string::npos) {
519 return Status::Corruption("checksum not found");
520 }
521
522 if (metadata.size() < pos + 7) {
523 return Status::Corruption("bad CRC32 checksum value");
524 }
525
526 if (appear_valid) {
527 if (metadata[pos + 8] == '\n') {
528 // single digit value, safe to insert one more digit
529 metadata.insert(pos + 8, 1, '0');
530 } else {
531 metadata.erase(pos + 8, 1);
532 }
533 } else {
534 metadata[pos + 7] = 'a';
535 }
536
537 return WriteToFile(fname, metadata);
538 }
539
WriteToFile(const std::string & fname,const std::string & data)540 Status WriteToFile(const std::string& fname, const std::string& data) {
541 std::unique_ptr<WritableFile> file;
542 EnvOptions env_options;
543 env_options.use_mmap_writes = false;
544 Status s = EnvWrapper::NewWritableFile(fname, &file, env_options);
545 if (!s.ok()) {
546 return s;
547 }
548 return file->Append(Slice(data));
549 }
550
551 private:
552 Random rnd_;
553 }; // FileManager
554
555 // utility functions
556 namespace {
557
558 enum FillDBFlushAction {
559 kFlushMost,
560 kFlushAll,
561 kAutoFlushOnly,
562 };
563
564 // Many tests in this file expect FillDB to write at least one sst file,
565 // so the default behavior (if not kAutoFlushOnly) of FillDB is to force
566 // a flush. But to ensure coverage of the WAL file case, we also (by default)
567 // do one Put after the Flush (kFlushMost).
FillDB(DB * db,int from,int to,FillDBFlushAction flush_action=kFlushMost)568 size_t FillDB(DB* db, int from, int to,
569 FillDBFlushAction flush_action = kFlushMost) {
570 size_t bytes_written = 0;
571 for (int i = from; i < to; ++i) {
572 std::string key = "testkey" + ToString(i);
573 std::string value = "testvalue" + ToString(i);
574 bytes_written += key.size() + value.size();
575
576 EXPECT_OK(db->Put(WriteOptions(), Slice(key), Slice(value)));
577
578 if (flush_action == kFlushMost && i == to - 2) {
579 EXPECT_OK(db->Flush(FlushOptions()));
580 }
581 }
582 if (flush_action == kFlushAll) {
583 EXPECT_OK(db->Flush(FlushOptions()));
584 }
585 return bytes_written;
586 }
587
AssertExists(DB * db,int from,int to)588 void AssertExists(DB* db, int from, int to) {
589 for (int i = from; i < to; ++i) {
590 std::string key = "testkey" + ToString(i);
591 std::string value;
592 Status s = db->Get(ReadOptions(), Slice(key), &value);
593 ASSERT_EQ(value, "testvalue" + ToString(i));
594 }
595 }
596
AssertEmpty(DB * db,int from,int to)597 void AssertEmpty(DB* db, int from, int to) {
598 for (int i = from; i < to; ++i) {
599 std::string key = "testkey" + ToString(i);
600 std::string value = "testvalue" + ToString(i);
601
602 Status s = db->Get(ReadOptions(), Slice(key), &value);
603 ASSERT_TRUE(s.IsNotFound());
604 }
605 }
606 } // namespace
607
608 class BackupEngineTest : public testing::Test {
609 public:
610 enum ShareOption {
611 kNoShare,
612 kShareNoChecksum,
613 kShareWithChecksum,
614 };
615
616 const std::vector<ShareOption> kAllShareOptions = {
617 kNoShare, kShareNoChecksum, kShareWithChecksum};
618
BackupEngineTest()619 BackupEngineTest() {
620 // set up files
621 std::string db_chroot = test::PerThreadDBPath("db_for_backup");
622 std::string backup_chroot = test::PerThreadDBPath("db_backups");
623 EXPECT_OK(Env::Default()->CreateDirIfMissing(db_chroot));
624 EXPECT_OK(Env::Default()->CreateDirIfMissing(backup_chroot));
625 dbname_ = "/tempdb";
626 backupdir_ = "/tempbk";
627 latest_backup_ = backupdir_ + "/LATEST_BACKUP";
628
629 // set up envs
630 db_chroot_env_.reset(NewChrootEnv(Env::Default(), db_chroot));
631 backup_chroot_env_.reset(NewChrootEnv(Env::Default(), backup_chroot));
632 test_db_env_.reset(new TestEnv(db_chroot_env_.get()));
633 test_backup_env_.reset(new TestEnv(backup_chroot_env_.get()));
634 file_manager_.reset(new FileManager(backup_chroot_env_.get()));
635 db_file_manager_.reset(new FileManager(db_chroot_env_.get()));
636
637 // set up db options
638 options_.create_if_missing = true;
639 options_.paranoid_checks = true;
640 options_.write_buffer_size = 1 << 17; // 128KB
641 options_.env = test_db_env_.get();
642 options_.wal_dir = dbname_;
643 options_.enable_blob_files = true;
644
645 // Create logger
646 DBOptions logger_options;
647 logger_options.env = db_chroot_env_.get();
648 // TODO: This should really be an EXPECT_OK, but this CreateLogger fails
649 // regularly in some environments with "no such directory"
650 CreateLoggerFromOptions(dbname_, logger_options, &logger_)
651 .PermitUncheckedError();
652
653 // The sync option is not easily testable in unit tests, but should be
654 // smoke tested across all the other backup tests. However, it is
655 // certainly not worth doubling the runtime of backup tests for it.
656 // Thus, we can enable sync for one of our alternate testing
657 // configurations.
658 constexpr bool kUseSync =
659 #ifdef ROCKSDB_MODIFY_NPHASH
660 true;
661 #else
662 false;
663 #endif // ROCKSDB_MODIFY_NPHASH
664
665 // set up backup db options
666 backupable_options_.reset(new BackupableDBOptions(
667 backupdir_, test_backup_env_.get(), /*share_table_files*/ true,
668 logger_.get(), kUseSync));
669
670 // most tests will use multi-threaded backups
671 backupable_options_->max_background_operations = 7;
672
673 // delete old files in db
674 DestroyDB(dbname_, options_);
675
676 // delete old LATEST_BACKUP file, which some tests create for compatibility
677 // testing.
678 backup_chroot_env_->DeleteFile(latest_backup_).PermitUncheckedError();
679 }
680
OpenDB()681 DB* OpenDB() {
682 DB* db;
683 EXPECT_OK(DB::Open(options_, dbname_, &db));
684 return db;
685 }
686
CloseAndReopenDB(bool read_only=false)687 void CloseAndReopenDB(bool read_only = false) {
688 // Close DB
689 db_.reset();
690
691 // Open DB
692 test_db_env_->SetLimitWrittenFiles(1000000);
693 DB* db;
694 if (read_only) {
695 ASSERT_OK(DB::OpenForReadOnly(options_, dbname_, &db));
696 } else {
697 ASSERT_OK(DB::Open(options_, dbname_, &db));
698 }
699 db_.reset(db);
700 }
701
InitializeDBAndBackupEngine(bool dummy=false)702 void InitializeDBAndBackupEngine(bool dummy = false) {
703 // reset all the db env defaults
704 test_db_env_->SetLimitWrittenFiles(1000000);
705 test_db_env_->SetDummySequentialFile(dummy);
706
707 DB* db;
708 if (dummy) {
709 dummy_db_ = new DummyDB(options_, dbname_);
710 db = dummy_db_;
711 } else {
712 ASSERT_OK(DB::Open(options_, dbname_, &db));
713 }
714 db_.reset(db);
715 }
716
OpenDBAndBackupEngine(bool destroy_old_data=false,bool dummy=false,ShareOption shared_option=kShareNoChecksum)717 virtual void OpenDBAndBackupEngine(
718 bool destroy_old_data = false, bool dummy = false,
719 ShareOption shared_option = kShareNoChecksum) {
720 InitializeDBAndBackupEngine(dummy);
721 // reset backup env defaults
722 test_backup_env_->SetLimitWrittenFiles(1000000);
723 backupable_options_->destroy_old_data = destroy_old_data;
724 backupable_options_->share_table_files = shared_option != kNoShare;
725 backupable_options_->share_files_with_checksum =
726 shared_option == kShareWithChecksum;
727 OpenBackupEngine(destroy_old_data);
728 }
729
CloseDBAndBackupEngine()730 void CloseDBAndBackupEngine() {
731 db_.reset();
732 backup_engine_.reset();
733 }
734
OpenBackupEngine(bool destroy_old_data=false)735 void OpenBackupEngine(bool destroy_old_data = false) {
736 backupable_options_->destroy_old_data = destroy_old_data;
737 BackupEngine* backup_engine;
738 ASSERT_OK(BackupEngine::Open(test_db_env_.get(), *backupable_options_,
739 &backup_engine));
740 backup_engine_.reset(backup_engine);
741 }
742
CloseBackupEngine()743 void CloseBackupEngine() { backup_engine_.reset(nullptr); }
744
745 // cross-cutting test of GetBackupInfo
AssertBackupInfoConsistency()746 void AssertBackupInfoConsistency() {
747 std::vector<BackupInfo> backup_info;
748 backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true);
749 std::map<std::string, uint64_t> file_sizes;
750
751 // Find the files that are supposed to be there
752 for (auto& backup : backup_info) {
753 uint64_t sum_for_backup = 0;
754 for (auto& file : backup.file_details) {
755 auto e = file_sizes.find(file.relative_filename);
756 if (e == file_sizes.end()) {
757 // fprintf(stderr, "Adding %s -> %u\n",
758 // file.relative_filename.c_str(), (unsigned)file.size);
759 file_sizes[file.relative_filename] = file.size;
760 } else {
761 ASSERT_EQ(file_sizes[file.relative_filename], file.size);
762 }
763 sum_for_backup += file.size;
764 }
765 ASSERT_EQ(backup.size, sum_for_backup);
766 }
767
768 std::vector<BackupID> corrupt_backup_ids;
769 backup_engine_->GetCorruptedBackups(&corrupt_backup_ids);
770 bool has_corrupt = corrupt_backup_ids.size() > 0;
771
772 // Compare with what's in backup dir
773 std::vector<std::string> child_dirs;
774 ASSERT_OK(
775 test_backup_env_->GetChildren(backupdir_ + "/private", &child_dirs));
776 for (auto& dir : child_dirs) {
777 dir = "private/" + dir;
778 }
779 child_dirs.push_back("shared"); // might not exist
780 child_dirs.push_back("shared_checksum"); // might not exist
781 for (auto& dir : child_dirs) {
782 std::vector<std::string> children;
783 test_backup_env_->GetChildren(backupdir_ + "/" + dir, &children)
784 .PermitUncheckedError();
785 // fprintf(stderr, "ls %s\n", (backupdir_ + "/" + dir).c_str());
786 for (auto& file : children) {
787 uint64_t size;
788 size = UINT64_MAX; // appease clang-analyze
789 std::string rel_file = dir + "/" + file;
790 // fprintf(stderr, "stat %s\n", (backupdir_ + "/" + rel_file).c_str());
791 ASSERT_OK(
792 test_backup_env_->GetFileSize(backupdir_ + "/" + rel_file, &size));
793 auto e = file_sizes.find(rel_file);
794 if (e == file_sizes.end()) {
795 // The only case in which we should find files not reported
796 ASSERT_TRUE(has_corrupt);
797 } else {
798 ASSERT_EQ(e->second, size);
799 file_sizes.erase(e);
800 }
801 }
802 }
803
804 // Everything should have been matched
805 ASSERT_EQ(file_sizes.size(), 0);
806 }
807
808 // restores backup backup_id and asserts the existence of
809 // [start_exist, end_exist> and not-existence of
810 // [end_exist, end>
811 //
812 // if backup_id == 0, it means restore from latest
813 // if end == 0, don't check AssertEmpty
AssertBackupConsistency(BackupID backup_id,uint32_t start_exist,uint32_t end_exist,uint32_t end=0,bool keep_log_files=false)814 void AssertBackupConsistency(BackupID backup_id, uint32_t start_exist,
815 uint32_t end_exist, uint32_t end = 0,
816 bool keep_log_files = false) {
817 RestoreOptions restore_options(keep_log_files);
818 bool opened_backup_engine = false;
819 if (backup_engine_.get() == nullptr) {
820 opened_backup_engine = true;
821 OpenBackupEngine();
822 }
823 AssertBackupInfoConsistency();
824
825 // Now perform restore
826 if (backup_id > 0) {
827 ASSERT_OK(backup_engine_->RestoreDBFromBackup(backup_id, dbname_, dbname_,
828 restore_options));
829 } else {
830 ASSERT_OK(backup_engine_->RestoreDBFromLatestBackup(dbname_, dbname_,
831 restore_options));
832 }
833 DB* db = OpenDB();
834 // Check DB contents
835 AssertExists(db, start_exist, end_exist);
836 if (end != 0) {
837 AssertEmpty(db, end_exist, end);
838 }
839 delete db;
840 if (opened_backup_engine) {
841 CloseBackupEngine();
842 }
843 }
844
DeleteLogFiles()845 void DeleteLogFiles() {
846 std::vector<std::string> delete_logs;
847 ASSERT_OK(db_chroot_env_->GetChildren(dbname_, &delete_logs));
848 for (auto f : delete_logs) {
849 uint64_t number;
850 FileType type;
851 bool ok = ParseFileName(f, &number, &type);
852 if (ok && type == kWalFile) {
853 ASSERT_OK(db_chroot_env_->DeleteFile(dbname_ + "/" + f));
854 }
855 }
856 }
857
GetDataFilesInDB(const FileType & file_type,std::vector<FileAttributes> * files)858 Status GetDataFilesInDB(const FileType& file_type,
859 std::vector<FileAttributes>* files) {
860 std::vector<std::string> live;
861 uint64_t ignore_manifest_size;
862 Status s = db_->GetLiveFiles(live, &ignore_manifest_size, /*flush*/ false);
863 if (!s.ok()) {
864 return s;
865 }
866 std::vector<FileAttributes> children;
867 s = test_db_env_->GetChildrenFileAttributes(dbname_, &children);
868 for (const auto& child : children) {
869 FileType type;
870 uint64_t number = 0;
871 if (ParseFileName(child.name, &number, &type) && type == file_type &&
872 std::find(live.begin(), live.end(), "/" + child.name) != live.end()) {
873 files->push_back(child);
874 }
875 }
876 return s;
877 }
878
GetRandomDataFileInDB(const FileType & file_type,std::string * fname_out,uint64_t * fsize_out=nullptr)879 Status GetRandomDataFileInDB(const FileType& file_type,
880 std::string* fname_out,
881 uint64_t* fsize_out = nullptr) {
882 Random rnd(6); // NB: hardly "random"
883 std::vector<FileAttributes> files;
884 Status s = GetDataFilesInDB(file_type, &files);
885 if (!s.ok()) {
886 return s;
887 }
888 if (files.empty()) {
889 return Status::NotFound("");
890 }
891 size_t i = rnd.Uniform(static_cast<int>(files.size()));
892 *fname_out = dbname_ + "/" + files[i].name;
893 if (fsize_out) {
894 *fsize_out = files[i].size_bytes;
895 }
896 return Status::OK();
897 }
898
CorruptRandomDataFileInDB(const FileType & file_type)899 Status CorruptRandomDataFileInDB(const FileType& file_type) {
900 std::string fname;
901 uint64_t fsize = 0;
902 Status s = GetRandomDataFileInDB(file_type, &fname, &fsize);
903 if (!s.ok()) {
904 return s;
905 }
906
907 std::string file_contents;
908 s = ReadFileToString(test_db_env_.get(), fname, &file_contents);
909 if (!s.ok()) {
910 return s;
911 }
912 s = test_db_env_->DeleteFile(fname);
913 if (!s.ok()) {
914 return s;
915 }
916
917 file_contents[0] = (file_contents[0] + 257) % 256;
918 return WriteStringToFile(test_db_env_.get(), file_contents, fname);
919 }
920
AssertDirectoryFilesMatchRegex(const std::string & dir,const TestRegex & pattern,const std::string & file_type,int minimum_count)921 void AssertDirectoryFilesMatchRegex(const std::string& dir,
922 const TestRegex& pattern,
923 const std::string& file_type,
924 int minimum_count) {
925 std::vector<FileAttributes> children;
926 ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children));
927 int found_count = 0;
928 for (const auto& child : children) {
929 if (EndsWith(child.name, file_type)) {
930 ASSERT_MATCHES_REGEX(child.name, pattern);
931 ++found_count;
932 }
933 }
934 ASSERT_GE(found_count, minimum_count);
935 }
936
AssertDirectoryFilesSizeIndicators(const std::string & dir,int minimum_count)937 void AssertDirectoryFilesSizeIndicators(const std::string& dir,
938 int minimum_count) {
939 std::vector<FileAttributes> children;
940 ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children));
941 int found_count = 0;
942 for (const auto& child : children) {
943 auto last_underscore = child.name.find_last_of('_');
944 auto last_dot = child.name.find_last_of('.');
945 ASSERT_NE(child.name, child.name.substr(0, last_underscore));
946 ASSERT_NE(child.name, child.name.substr(0, last_dot));
947 ASSERT_LT(last_underscore, last_dot);
948 std::string s = child.name.substr(last_underscore + 1,
949 last_dot - (last_underscore + 1));
950 ASSERT_EQ(s, ToString(child.size_bytes));
951 ++found_count;
952 }
953 ASSERT_GE(found_count, minimum_count);
954 }
955
956 // files
957 std::string dbname_;
958 std::string backupdir_;
959 std::string latest_backup_;
960
961 // logger_ must be above backup_engine_ such that the engine's destructor,
962 // which uses a raw pointer to the logger, executes first.
963 std::shared_ptr<Logger> logger_;
964
965 // envs
966 std::unique_ptr<Env> db_chroot_env_;
967 std::unique_ptr<Env> backup_chroot_env_;
968 std::unique_ptr<TestEnv> test_db_env_;
969 std::unique_ptr<TestEnv> test_backup_env_;
970 std::unique_ptr<FileManager> file_manager_;
971 std::unique_ptr<FileManager> db_file_manager_;
972
973 // all the dbs!
974 DummyDB* dummy_db_; // owned as db_ when present
975 std::unique_ptr<DB> db_;
976 std::unique_ptr<BackupEngine> backup_engine_;
977
978 // options
979 Options options_;
980
981 protected:
982 std::unique_ptr<BackupableDBOptions> backupable_options_;
983 }; // BackupEngineTest
984
AppendPath(const std::string & path,std::vector<std::string> & v)985 void AppendPath(const std::string& path, std::vector<std::string>& v) {
986 for (auto& f : v) {
987 f = path + f;
988 }
989 }
990
991 class BackupEngineTestWithParam : public BackupEngineTest,
992 public testing::WithParamInterface<bool> {
993 public:
BackupEngineTestWithParam()994 BackupEngineTestWithParam() {
995 backupable_options_->share_files_with_checksum = GetParam();
996 }
OpenDBAndBackupEngine(bool destroy_old_data=false,bool dummy=false,ShareOption shared_option=kShareNoChecksum)997 void OpenDBAndBackupEngine(
998 bool destroy_old_data = false, bool dummy = false,
999 ShareOption shared_option = kShareNoChecksum) override {
1000 BackupEngineTest::InitializeDBAndBackupEngine(dummy);
1001 // reset backup env defaults
1002 test_backup_env_->SetLimitWrittenFiles(1000000);
1003 backupable_options_->destroy_old_data = destroy_old_data;
1004 backupable_options_->share_table_files = shared_option != kNoShare;
1005 // NOTE: keep share_files_with_checksum setting from constructor
1006 OpenBackupEngine(destroy_old_data);
1007 }
1008 };
1009
TEST_F(BackupEngineTest,FileCollision)1010 TEST_F(BackupEngineTest, FileCollision) {
1011 const int keys_iteration = 5000;
1012 for (const auto& sopt : kAllShareOptions) {
1013 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
1014 FillDB(db_.get(), 0, keys_iteration);
1015 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1016 FillDB(db_.get(), 0, keys_iteration);
1017 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1018 CloseDBAndBackupEngine();
1019
1020 // If the db directory has been cleaned up, it is sensitive to file
1021 // collision.
1022 ASSERT_OK(DestroyDB(dbname_, options_));
1023
1024 // open with old backup
1025 OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
1026 sopt);
1027 FillDB(db_.get(), 0, keys_iteration * 2);
1028 if (sopt != kShareNoChecksum) {
1029 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1030 } else {
1031 // The new table files created in FillDB() will clash with the old
1032 // backup and sharing tables with no checksum will have the file
1033 // collision problem.
1034 ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
1035 ASSERT_OK(backup_engine_->PurgeOldBackups(0));
1036 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1037 }
1038 CloseDBAndBackupEngine();
1039
1040 // delete old data
1041 ASSERT_OK(DestroyDB(dbname_, options_));
1042 }
1043 }
1044
1045 // This test verifies that the verifyBackup method correctly identifies
1046 // invalid backups
TEST_P(BackupEngineTestWithParam,VerifyBackup)1047 TEST_P(BackupEngineTestWithParam, VerifyBackup) {
1048 const int keys_iteration = 5000;
1049 OpenDBAndBackupEngine(true);
1050 // create five backups
1051 for (int i = 0; i < 5; ++i) {
1052 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1053 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
1054 }
1055 CloseDBAndBackupEngine();
1056
1057 OpenDBAndBackupEngine();
1058 // ---------- case 1. - valid backup -----------
1059 ASSERT_TRUE(backup_engine_->VerifyBackup(1).ok());
1060
1061 // ---------- case 2. - delete a file -----------i
1062 ASSERT_OK(file_manager_->DeleteRandomFileInDir(backupdir_ + "/private/1"));
1063 ASSERT_TRUE(backup_engine_->VerifyBackup(1).IsNotFound());
1064
1065 // ---------- case 3. - corrupt a file -----------
1066 std::string append_data = "Corrupting a random file";
1067 ASSERT_OK(file_manager_->AppendToRandomFileInDir(backupdir_ + "/private/2",
1068 append_data));
1069 ASSERT_TRUE(backup_engine_->VerifyBackup(2).IsCorruption());
1070
1071 // ---------- case 4. - invalid backup -----------
1072 ASSERT_TRUE(backup_engine_->VerifyBackup(6).IsNotFound());
1073 CloseDBAndBackupEngine();
1074 }
1075
1076 #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1077 // open DB, write, close DB, backup, restore, repeat
TEST_P(BackupEngineTestWithParam,OfflineIntegrationTest)1078 TEST_P(BackupEngineTestWithParam, OfflineIntegrationTest) {
1079 // has to be a big number, so that it triggers the memtable flush
1080 const int keys_iteration = 5000;
1081 const int max_key = keys_iteration * 4 + 10;
1082 // first iter -- flush before backup
1083 // second iter -- don't flush before backup
1084 for (int iter = 0; iter < 2; ++iter) {
1085 // delete old data
1086 DestroyDB(dbname_, options_);
1087 bool destroy_data = true;
1088
1089 // every iteration --
1090 // 1. insert new data in the DB
1091 // 2. backup the DB
1092 // 3. destroy the db
1093 // 4. restore the db, check everything is still there
1094 for (int i = 0; i < 5; ++i) {
1095 // in last iteration, put smaller amount of data,
1096 int fill_up_to = std::min(keys_iteration * (i + 1), max_key);
1097 // ---- insert new data and back up ----
1098 OpenDBAndBackupEngine(destroy_data);
1099 destroy_data = false;
1100 // kAutoFlushOnly to preserve legacy test behavior (consider updating)
1101 FillDB(db_.get(), keys_iteration * i, fill_up_to, kAutoFlushOnly);
1102 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), iter == 0));
1103 CloseDBAndBackupEngine();
1104 DestroyDB(dbname_, options_);
1105
1106 // ---- make sure it's empty ----
1107 DB* db = OpenDB();
1108 AssertEmpty(db, 0, fill_up_to);
1109 delete db;
1110
1111 // ---- restore the DB ----
1112 OpenBackupEngine();
1113 if (i >= 3) { // test purge old backups
1114 // when i == 4, purge to only 1 backup
1115 // when i == 3, purge to 2 backups
1116 ASSERT_OK(backup_engine_->PurgeOldBackups(5 - i));
1117 }
1118 // ---- make sure the data is there ---
1119 AssertBackupConsistency(0, 0, fill_up_to, max_key);
1120 CloseBackupEngine();
1121 }
1122 }
1123 }
1124
1125 // open DB, write, backup, write, backup, close, restore
TEST_P(BackupEngineTestWithParam,OnlineIntegrationTest)1126 TEST_P(BackupEngineTestWithParam, OnlineIntegrationTest) {
1127 // has to be a big number, so that it triggers the memtable flush
1128 const int keys_iteration = 5000;
1129 const int max_key = keys_iteration * 4 + 10;
1130 Random rnd(7);
1131 // delete old data
1132 DestroyDB(dbname_, options_);
1133
1134 // TODO: Implement & test db_paths support in backup (not supported in
1135 // restore)
1136 // options_.db_paths.emplace_back(dbname_, 500 * 1024);
1137 // options_.db_paths.emplace_back(dbname_ + "_2", 1024 * 1024 * 1024);
1138
1139 OpenDBAndBackupEngine(true);
1140 // write some data, backup, repeat
1141 for (int i = 0; i < 5; ++i) {
1142 if (i == 4) {
1143 // delete backup number 2, online delete!
1144 ASSERT_OK(backup_engine_->DeleteBackup(2));
1145 }
1146 // in last iteration, put smaller amount of data,
1147 // so that backups can share sst files
1148 int fill_up_to = std::min(keys_iteration * (i + 1), max_key);
1149 // kAutoFlushOnly to preserve legacy test behavior (consider updating)
1150 FillDB(db_.get(), keys_iteration * i, fill_up_to, kAutoFlushOnly);
1151 // we should get consistent results with flush_before_backup
1152 // set to both true and false
1153 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)));
1154 }
1155 // close and destroy
1156 CloseDBAndBackupEngine();
1157 DestroyDB(dbname_, options_);
1158
1159 // ---- make sure it's empty ----
1160 DB* db = OpenDB();
1161 AssertEmpty(db, 0, max_key);
1162 delete db;
1163
1164 // ---- restore every backup and verify all the data is there ----
1165 OpenBackupEngine();
1166 for (int i = 1; i <= 5; ++i) {
1167 if (i == 2) {
1168 // we deleted backup 2
1169 Status s = backup_engine_->RestoreDBFromBackup(2, dbname_, dbname_);
1170 ASSERT_TRUE(!s.ok());
1171 } else {
1172 int fill_up_to = std::min(keys_iteration * i, max_key);
1173 AssertBackupConsistency(i, 0, fill_up_to, max_key);
1174 }
1175 }
1176
1177 // delete some backups -- this should leave only backups 3 and 5 alive
1178 ASSERT_OK(backup_engine_->DeleteBackup(4));
1179 ASSERT_OK(backup_engine_->PurgeOldBackups(2));
1180
1181 std::vector<BackupInfo> backup_info;
1182 backup_engine_->GetBackupInfo(&backup_info);
1183 ASSERT_EQ(2UL, backup_info.size());
1184
1185 // check backup 3
1186 AssertBackupConsistency(3, 0, 3 * keys_iteration, max_key);
1187 // check backup 5
1188 AssertBackupConsistency(5, 0, max_key);
1189
1190 CloseBackupEngine();
1191 }
1192 #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1193
1194 INSTANTIATE_TEST_CASE_P(BackupEngineTestWithParam, BackupEngineTestWithParam,
1195 ::testing::Bool());
1196
1197 // this will make sure that backup does not copy the same file twice
TEST_F(BackupEngineTest,NoDoubleCopy_And_AutoGC)1198 TEST_F(BackupEngineTest, NoDoubleCopy_And_AutoGC) {
1199 OpenDBAndBackupEngine(true, true);
1200
1201 // should write 5 DB files + one meta file
1202 test_backup_env_->SetLimitWrittenFiles(7);
1203 test_backup_env_->ClearWrittenFiles();
1204 test_db_env_->SetLimitWrittenFiles(0);
1205 dummy_db_->live_files_ = {"00010.sst", "00011.sst", "CURRENT", "MANIFEST-01",
1206 "00011.log"};
1207 test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_);
1208 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
1209 std::vector<std::string> should_have_written = {
1210 "/shared/.00010.sst.tmp", "/shared/.00011.sst.tmp", "/private/1/CURRENT",
1211 "/private/1/MANIFEST-01", "/private/1/00011.log", "/meta/.1.tmp"};
1212 AppendPath(backupdir_, should_have_written);
1213 test_backup_env_->AssertWrittenFiles(should_have_written);
1214
1215 char db_number = '1';
1216
1217 for (std::string other_sst : {"00015.sst", "00017.sst", "00019.sst"}) {
1218 // should write 4 new DB files + one meta file
1219 // should not write/copy 00010.sst, since it's already there!
1220 test_backup_env_->SetLimitWrittenFiles(6);
1221 test_backup_env_->ClearWrittenFiles();
1222
1223 dummy_db_->live_files_ = {"00010.sst", other_sst, "CURRENT", "MANIFEST-01",
1224 "00011.log"};
1225 test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_);
1226 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
1227 // should not open 00010.sst - it's already there
1228
1229 ++db_number;
1230 std::string private_dir = std::string("/private/") + db_number;
1231 should_have_written = {
1232 "/shared/." + other_sst + ".tmp", private_dir + "/CURRENT",
1233 private_dir + "/MANIFEST-01", private_dir + "/00011.log",
1234 std::string("/meta/.") + db_number + ".tmp"};
1235 AppendPath(backupdir_, should_have_written);
1236 test_backup_env_->AssertWrittenFiles(should_have_written);
1237 }
1238
1239 ASSERT_OK(backup_engine_->DeleteBackup(1));
1240 ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00010.sst"));
1241
1242 // 00011.sst was only in backup 1, should be deleted
1243 ASSERT_EQ(Status::NotFound(),
1244 test_backup_env_->FileExists(backupdir_ + "/shared/00011.sst"));
1245 ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00015.sst"));
1246
1247 // MANIFEST file size should be only 100
1248 uint64_t size = 0;
1249 ASSERT_OK(test_backup_env_->GetFileSize(backupdir_ + "/private/2/MANIFEST-01",
1250 &size));
1251 ASSERT_EQ(100UL, size);
1252 ASSERT_OK(
1253 test_backup_env_->GetFileSize(backupdir_ + "/shared/00015.sst", &size));
1254 ASSERT_EQ(200UL, size);
1255
1256 CloseBackupEngine();
1257
1258 //
1259 // Now simulate incomplete delete by removing just meta
1260 //
1261 ASSERT_OK(test_backup_env_->DeleteFile(backupdir_ + "/meta/2"));
1262
1263 OpenBackupEngine();
1264
1265 // 1 appears to be removed, so
1266 // 2 non-corrupt and 0 corrupt seen
1267 std::vector<BackupInfo> backup_info;
1268 std::vector<BackupID> corrupt_backup_ids;
1269 backup_engine_->GetBackupInfo(&backup_info);
1270 backup_engine_->GetCorruptedBackups(&corrupt_backup_ids);
1271 ASSERT_EQ(2UL, backup_info.size());
1272 ASSERT_EQ(0UL, corrupt_backup_ids.size());
1273
1274 // Keep the two we see, but this should suffice to purge unreferenced
1275 // shared files from incomplete delete.
1276 ASSERT_OK(backup_engine_->PurgeOldBackups(2));
1277
1278 // Make sure dangling sst file has been removed (somewhere along this
1279 // process). GarbageCollect should not be needed.
1280 ASSERT_EQ(Status::NotFound(),
1281 test_backup_env_->FileExists(backupdir_ + "/shared/00015.sst"));
1282 ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst"));
1283 ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst"));
1284
1285 // Now actually purge a good one
1286 ASSERT_OK(backup_engine_->PurgeOldBackups(1));
1287
1288 ASSERT_EQ(Status::NotFound(),
1289 test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst"));
1290 ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst"));
1291
1292 CloseDBAndBackupEngine();
1293 }
1294
1295 // test various kind of corruptions that may happen:
1296 // 1. Not able to write a file for backup - that backup should fail,
1297 // everything else should work
1298 // 2. Corrupted backup meta file or missing backuped file - we should
1299 // not be able to open that backup, but all other backups should be
1300 // fine
1301 // 3. Corrupted checksum value - if the checksum is not a valid uint32_t,
1302 // db open should fail, otherwise, it aborts during the restore process.
TEST_F(BackupEngineTest,CorruptionsTest)1303 TEST_F(BackupEngineTest, CorruptionsTest) {
1304 const int keys_iteration = 5000;
1305 Random rnd(6);
1306 Status s;
1307
1308 OpenDBAndBackupEngine(true);
1309 // create five backups
1310 for (int i = 0; i < 5; ++i) {
1311 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1312 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)));
1313 }
1314
1315 // ---------- case 1. - fail a write -----------
1316 // try creating backup 6, but fail a write
1317 FillDB(db_.get(), keys_iteration * 5, keys_iteration * 6);
1318 test_backup_env_->SetLimitWrittenFiles(2);
1319 // should fail
1320 s = backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2));
1321 ASSERT_NOK(s);
1322 test_backup_env_->SetLimitWrittenFiles(1000000);
1323 // latest backup should have all the keys
1324 CloseDBAndBackupEngine();
1325 AssertBackupConsistency(0, 0, keys_iteration * 5, keys_iteration * 6);
1326
1327 // --------- case 2. corrupted backup meta or missing backuped file ----
1328 ASSERT_OK(file_manager_->CorruptFile(backupdir_ + "/meta/5", 3));
1329 // since 5 meta is now corrupted, latest backup should be 4
1330 AssertBackupConsistency(0, 0, keys_iteration * 4, keys_iteration * 5);
1331 OpenBackupEngine();
1332 s = backup_engine_->RestoreDBFromBackup(5, dbname_, dbname_);
1333 ASSERT_NOK(s);
1334 CloseBackupEngine();
1335 ASSERT_OK(file_manager_->DeleteRandomFileInDir(backupdir_ + "/private/4"));
1336 // 4 is corrupted, 3 is the latest backup now
1337 AssertBackupConsistency(0, 0, keys_iteration * 3, keys_iteration * 5);
1338 OpenBackupEngine();
1339 s = backup_engine_->RestoreDBFromBackup(4, dbname_, dbname_);
1340 CloseBackupEngine();
1341 ASSERT_NOK(s);
1342
1343 // --------- case 3. corrupted checksum value ----
1344 ASSERT_OK(file_manager_->CorruptChecksum(backupdir_ + "/meta/3", false));
1345 // checksum of backup 3 is an invalid value, this can be detected at
1346 // db open time, and it reverts to the previous backup automatically
1347 AssertBackupConsistency(0, 0, keys_iteration * 2, keys_iteration * 5);
1348 // checksum of the backup 2 appears to be valid, this can cause checksum
1349 // mismatch and abort restore process
1350 ASSERT_OK(file_manager_->CorruptChecksum(backupdir_ + "/meta/2", true));
1351 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/2"));
1352 OpenBackupEngine();
1353 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/2"));
1354 s = backup_engine_->RestoreDBFromBackup(2, dbname_, dbname_);
1355 ASSERT_NOK(s);
1356
1357 // make sure that no corrupt backups have actually been deleted!
1358 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/1"));
1359 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/2"));
1360 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/3"));
1361 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/4"));
1362 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/5"));
1363 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/private/1"));
1364 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/private/2"));
1365 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/private/3"));
1366 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/private/4"));
1367 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/private/5"));
1368
1369 // delete the corrupt backups and then make sure they're actually deleted
1370 ASSERT_OK(backup_engine_->DeleteBackup(5));
1371 ASSERT_OK(backup_engine_->DeleteBackup(4));
1372 ASSERT_OK(backup_engine_->DeleteBackup(3));
1373 ASSERT_OK(backup_engine_->DeleteBackup(2));
1374 // Should not be needed anymore with auto-GC on DeleteBackup
1375 //(void)backup_engine_->GarbageCollect();
1376 ASSERT_EQ(Status::NotFound(),
1377 file_manager_->FileExists(backupdir_ + "/meta/5"));
1378 ASSERT_EQ(Status::NotFound(),
1379 file_manager_->FileExists(backupdir_ + "/private/5"));
1380 ASSERT_EQ(Status::NotFound(),
1381 file_manager_->FileExists(backupdir_ + "/meta/4"));
1382 ASSERT_EQ(Status::NotFound(),
1383 file_manager_->FileExists(backupdir_ + "/private/4"));
1384 ASSERT_EQ(Status::NotFound(),
1385 file_manager_->FileExists(backupdir_ + "/meta/3"));
1386 ASSERT_EQ(Status::NotFound(),
1387 file_manager_->FileExists(backupdir_ + "/private/3"));
1388 ASSERT_EQ(Status::NotFound(),
1389 file_manager_->FileExists(backupdir_ + "/meta/2"));
1390 ASSERT_EQ(Status::NotFound(),
1391 file_manager_->FileExists(backupdir_ + "/private/2"));
1392 CloseBackupEngine();
1393 AssertBackupConsistency(0, 0, keys_iteration * 1, keys_iteration * 5);
1394
1395 // new backup should be 2!
1396 OpenDBAndBackupEngine();
1397 FillDB(db_.get(), keys_iteration * 1, keys_iteration * 2);
1398 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)));
1399 CloseDBAndBackupEngine();
1400 AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5);
1401 }
1402
1403 // Corrupt a file but maintain its size
TEST_F(BackupEngineTest,CorruptFileMaintainSize)1404 TEST_F(BackupEngineTest, CorruptFileMaintainSize) {
1405 const int keys_iteration = 5000;
1406 OpenDBAndBackupEngine(true);
1407 // create a backup
1408 FillDB(db_.get(), 0, keys_iteration);
1409 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
1410 CloseDBAndBackupEngine();
1411
1412 OpenDBAndBackupEngine();
1413 // verify with file size
1414 ASSERT_OK(backup_engine_->VerifyBackup(1, false));
1415 // verify with file checksum
1416 ASSERT_OK(backup_engine_->VerifyBackup(1, true));
1417
1418 std::string file_to_corrupt;
1419 uint64_t file_size = 0;
1420 // under normal circumstance, there should be at least one nonempty file
1421 while (file_size == 0) {
1422 // get a random file in /private/1
1423 assert(file_manager_
1424 ->GetRandomFileInDir(backupdir_ + "/private/1", &file_to_corrupt,
1425 &file_size)
1426 .ok());
1427 // corrupt the file by replacing its content by file_size random bytes
1428 ASSERT_OK(file_manager_->CorruptFile(file_to_corrupt, file_size));
1429 }
1430 // file sizes match
1431 ASSERT_OK(backup_engine_->VerifyBackup(1, false));
1432 // file checksums mismatch
1433 ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
1434 // sanity check, use default second argument
1435 ASSERT_OK(backup_engine_->VerifyBackup(1));
1436 CloseDBAndBackupEngine();
1437
1438 // an extra challenge
1439 // set share_files_with_checksum to true and do two more backups
1440 // corrupt all the table files in shared_checksum but maintain their sizes
1441 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
1442 kShareWithChecksum);
1443 // creat two backups
1444 for (int i = 1; i < 3; ++i) {
1445 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1446 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
1447 }
1448 CloseDBAndBackupEngine();
1449
1450 OpenDBAndBackupEngine();
1451 std::vector<FileAttributes> children;
1452 const std::string dir = backupdir_ + "/shared_checksum";
1453 ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children));
1454 for (const auto& child : children) {
1455 if (child.size_bytes == 0) {
1456 continue;
1457 }
1458 // corrupt the file by replacing its content by file_size random bytes
1459 ASSERT_OK(
1460 file_manager_->CorruptFile(dir + "/" + child.name, child.size_bytes));
1461 }
1462 // file sizes match
1463 ASSERT_OK(backup_engine_->VerifyBackup(1, false));
1464 ASSERT_OK(backup_engine_->VerifyBackup(2, false));
1465 // file checksums mismatch
1466 ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
1467 ASSERT_NOK(backup_engine_->VerifyBackup(2, true));
1468 CloseDBAndBackupEngine();
1469 }
1470
1471 // Corrupt a blob file but maintain its size
TEST_P(BackupEngineTestWithParam,CorruptBlobFileMaintainSize)1472 TEST_P(BackupEngineTestWithParam, CorruptBlobFileMaintainSize) {
1473 const int keys_iteration = 5000;
1474 OpenDBAndBackupEngine(true);
1475 // create a backup
1476 FillDB(db_.get(), 0, keys_iteration);
1477 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
1478 CloseDBAndBackupEngine();
1479
1480 OpenDBAndBackupEngine();
1481 // verify with file size
1482 ASSERT_OK(backup_engine_->VerifyBackup(1, false));
1483 // verify with file checksum
1484 ASSERT_OK(backup_engine_->VerifyBackup(1, true));
1485
1486 std::string file_to_corrupt;
1487 std::vector<FileAttributes> children;
1488
1489 std::string dir = backupdir_;
1490 if (backupable_options_->share_files_with_checksum) {
1491 dir += "/shared_checksum";
1492 } else {
1493 dir += "/shared";
1494 }
1495
1496 ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children));
1497
1498 for (const auto& child : children) {
1499 if (EndsWith(child.name, ".blob") && child.size_bytes != 0) {
1500 // corrupt the blob files by replacing its content by file_size random
1501 // bytes
1502 ASSERT_OK(
1503 file_manager_->CorruptFile(dir + "/" + child.name, child.size_bytes));
1504 }
1505 }
1506
1507 // file sizes match
1508 ASSERT_OK(backup_engine_->VerifyBackup(1, false));
1509 // file checksums mismatch
1510 ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
1511 // sanity check, use default second argument
1512 ASSERT_OK(backup_engine_->VerifyBackup(1));
1513 CloseDBAndBackupEngine();
1514 }
1515
1516 // Test if BackupEngine will fail to create new backup if some table has been
1517 // corrupted and the table file checksum is stored in the DB manifest
TEST_F(BackupEngineTest,TableFileCorruptedBeforeBackup)1518 TEST_F(BackupEngineTest, TableFileCorruptedBeforeBackup) {
1519 const int keys_iteration = 50000;
1520
1521 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
1522 kNoShare);
1523 FillDB(db_.get(), 0, keys_iteration);
1524 CloseAndReopenDB(/*read_only*/ true);
1525 // corrupt a random table file in the DB directory
1526 ASSERT_OK(CorruptRandomDataFileInDB(kTableFile));
1527 // file_checksum_gen_factory is null, and thus table checksum is not
1528 // verified for creating a new backup; no correction is detected
1529 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1530 CloseDBAndBackupEngine();
1531
1532 // delete old files in db
1533 ASSERT_OK(DestroyDB(dbname_, options_));
1534
1535 // Enable table file checksum in DB manifest
1536 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
1537 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
1538 kNoShare);
1539 FillDB(db_.get(), 0, keys_iteration);
1540 CloseAndReopenDB(/*read_only*/ true);
1541 // corrupt a random table file in the DB directory
1542 ASSERT_OK(CorruptRandomDataFileInDB(kTableFile));
1543 // table file checksum is enabled so we should be able to detect any
1544 // corruption
1545 ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
1546 CloseDBAndBackupEngine();
1547 }
1548
1549 // Test if BackupEngine will fail to create new backup if some blob files has
1550 // been corrupted and the blob file checksum is stored in the DB manifest
TEST_F(BackupEngineTest,BlobFileCorruptedBeforeBackup)1551 TEST_F(BackupEngineTest, BlobFileCorruptedBeforeBackup) {
1552 const int keys_iteration = 50000;
1553
1554 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
1555 kNoShare);
1556 FillDB(db_.get(), 0, keys_iteration);
1557 CloseAndReopenDB(/*read_only*/ true);
1558 // corrupt a random blob file in the DB directory
1559 ASSERT_OK(CorruptRandomDataFileInDB(kBlobFile));
1560 // file_checksum_gen_factory is null, and thus blob checksum is not
1561 // verified for creating a new backup; no correction is detected
1562 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1563 CloseDBAndBackupEngine();
1564
1565 // delete old files in db
1566 ASSERT_OK(DestroyDB(dbname_, options_));
1567
1568 // Enable file checksum in DB manifest
1569 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
1570 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
1571 kNoShare);
1572 FillDB(db_.get(), 0, keys_iteration);
1573 CloseAndReopenDB(/*read_only*/ true);
1574 // corrupt a random blob file in the DB directory
1575 ASSERT_OK(CorruptRandomDataFileInDB(kBlobFile));
1576
1577 // file checksum is enabled so we should be able to detect any
1578 // corruption
1579 ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
1580 CloseDBAndBackupEngine();
1581 }
1582
1583 #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1584 // Test if BackupEngine will fail to create new backup if some table has been
1585 // corrupted and the table file checksum is stored in the DB manifest for the
1586 // case when backup table files will be stored in a shared directory
TEST_P(BackupEngineTestWithParam,TableFileCorruptedBeforeBackup)1587 TEST_P(BackupEngineTestWithParam, TableFileCorruptedBeforeBackup) {
1588 const int keys_iteration = 50000;
1589
1590 OpenDBAndBackupEngine(true /* destroy_old_data */);
1591 FillDB(db_.get(), 0, keys_iteration);
1592 CloseAndReopenDB(/*read_only*/ true);
1593 // corrupt a random table file in the DB directory
1594 ASSERT_OK(CorruptRandomDataFileInDB(kTableFile));
1595 // cannot detect corruption since DB manifest has no table checksums
1596 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1597 CloseDBAndBackupEngine();
1598
1599 // delete old files in db
1600 ASSERT_OK(DestroyDB(dbname_, options_));
1601
1602 // Enable table checksums in DB manifest
1603 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
1604 OpenDBAndBackupEngine(true /* destroy_old_data */);
1605 FillDB(db_.get(), 0, keys_iteration);
1606 CloseAndReopenDB(/*read_only*/ true);
1607 // corrupt a random table file in the DB directory
1608 ASSERT_OK(CorruptRandomDataFileInDB(kTableFile));
1609 // corruption is detected
1610 ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
1611 CloseDBAndBackupEngine();
1612 }
1613
1614 // Test if BackupEngine will fail to create new backup if some blob files have
1615 // been corrupted and the blob file checksum is stored in the DB manifest for
1616 // the case when backup blob files will be stored in a shared directory
TEST_P(BackupEngineTestWithParam,BlobFileCorruptedBeforeBackup)1617 TEST_P(BackupEngineTestWithParam, BlobFileCorruptedBeforeBackup) {
1618 const int keys_iteration = 50000;
1619 OpenDBAndBackupEngine(true /* destroy_old_data */);
1620 FillDB(db_.get(), 0, keys_iteration);
1621 CloseAndReopenDB(/*read_only*/ true);
1622 // corrupt a random blob file in the DB directory
1623 ASSERT_OK(CorruptRandomDataFileInDB(kBlobFile));
1624 // cannot detect corruption since DB manifest has no blob file checksums
1625 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1626 CloseDBAndBackupEngine();
1627
1628 // delete old files in db
1629 ASSERT_OK(DestroyDB(dbname_, options_));
1630
1631 // Enable blob file checksums in DB manifest
1632 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
1633 OpenDBAndBackupEngine(true /* destroy_old_data */);
1634 FillDB(db_.get(), 0, keys_iteration);
1635 CloseAndReopenDB(/*read_only*/ true);
1636 // corrupt a random blob file in the DB directory
1637 ASSERT_OK(CorruptRandomDataFileInDB(kBlobFile));
1638 // corruption is detected
1639 ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
1640 CloseDBAndBackupEngine();
1641 }
1642 #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1643
TEST_F(BackupEngineTest,TableFileWithoutDbChecksumCorruptedDuringBackup)1644 TEST_F(BackupEngineTest, TableFileWithoutDbChecksumCorruptedDuringBackup) {
1645 const int keys_iteration = 50000;
1646 backupable_options_->share_files_with_checksum_naming =
1647 kLegacyCrc32cAndFileSize;
1648 // When share_files_with_checksum is on, we calculate checksums of table
1649 // files before and after copying. So we can test whether a corruption has
1650 // happened during the file is copied to backup directory.
1651 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
1652 kShareWithChecksum);
1653
1654 FillDB(db_.get(), 0, keys_iteration);
1655 std::atomic<bool> corrupted{false};
1656 // corrupt files when copying to the backup directory
1657 SyncPoint::GetInstance()->SetCallBack(
1658 "BackupEngineImpl::CopyOrCreateFile:CorruptionDuringBackup",
1659 [&](void* data) {
1660 if (data != nullptr) {
1661 Slice* d = reinterpret_cast<Slice*>(data);
1662 if (!d->empty()) {
1663 d->remove_suffix(1);
1664 corrupted = true;
1665 }
1666 }
1667 });
1668 SyncPoint::GetInstance()->EnableProcessing();
1669 Status s = backup_engine_->CreateNewBackup(db_.get());
1670 if (corrupted) {
1671 ASSERT_NOK(s);
1672 } else {
1673 // should not in this path in normal cases
1674 ASSERT_OK(s);
1675 }
1676
1677 SyncPoint::GetInstance()->DisableProcessing();
1678 SyncPoint::GetInstance()->ClearAllCallBacks();
1679
1680 CloseDBAndBackupEngine();
1681 // delete old files in db
1682 ASSERT_OK(DestroyDB(dbname_, options_));
1683 }
1684
TEST_F(BackupEngineTest,TableFileWithDbChecksumCorruptedDuringBackup)1685 TEST_F(BackupEngineTest, TableFileWithDbChecksumCorruptedDuringBackup) {
1686 const int keys_iteration = 50000;
1687 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
1688 for (auto& sopt : kAllShareOptions) {
1689 // Since the default DB table file checksum is on, we obtain checksums of
1690 // table files from the DB manifest before copying and verify it with the
1691 // one calculated during copying.
1692 // Therefore, we can test whether a corruption has happened during the file
1693 // being copied to backup directory.
1694 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
1695
1696 FillDB(db_.get(), 0, keys_iteration);
1697
1698 // corrupt files when copying to the backup directory
1699 SyncPoint::GetInstance()->SetCallBack(
1700 "BackupEngineImpl::CopyOrCreateFile:CorruptionDuringBackup",
1701 [&](void* data) {
1702 if (data != nullptr) {
1703 Slice* d = reinterpret_cast<Slice*>(data);
1704 if (!d->empty()) {
1705 d->remove_suffix(1);
1706 }
1707 }
1708 });
1709 SyncPoint::GetInstance()->EnableProcessing();
1710 // The only case that we can't detect a corruption is when the file
1711 // being backed up is empty. But as keys_iteration is large, such
1712 // a case shouldn't have happened and we should be able to detect
1713 // the corruption.
1714 ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
1715
1716 SyncPoint::GetInstance()->DisableProcessing();
1717 SyncPoint::GetInstance()->ClearAllCallBacks();
1718
1719 CloseDBAndBackupEngine();
1720 // delete old files in db
1721 ASSERT_OK(DestroyDB(dbname_, options_));
1722 }
1723 }
1724
TEST_F(BackupEngineTest,InterruptCreationTest)1725 TEST_F(BackupEngineTest, InterruptCreationTest) {
1726 // Interrupt backup creation by failing new writes and failing cleanup of the
1727 // partial state. Then verify a subsequent backup can still succeed.
1728 const int keys_iteration = 5000;
1729 Random rnd(6);
1730
1731 OpenDBAndBackupEngine(true /* destroy_old_data */);
1732 FillDB(db_.get(), 0, keys_iteration);
1733 test_backup_env_->SetLimitWrittenFiles(2);
1734 test_backup_env_->SetDeleteFileFailure(true);
1735 // should fail creation
1736 ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)));
1737 CloseDBAndBackupEngine();
1738 // should also fail cleanup so the tmp directory stays behind
1739 ASSERT_OK(backup_chroot_env_->FileExists(backupdir_ + "/private/1/"));
1740
1741 OpenDBAndBackupEngine(false /* destroy_old_data */);
1742 test_backup_env_->SetLimitWrittenFiles(1000000);
1743 test_backup_env_->SetDeleteFileFailure(false);
1744 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)));
1745 // latest backup should have all the keys
1746 CloseDBAndBackupEngine();
1747 AssertBackupConsistency(0, 0, keys_iteration);
1748 }
1749
TEST_F(BackupEngineTest,FlushCompactDuringBackupCheckpoint)1750 TEST_F(BackupEngineTest, FlushCompactDuringBackupCheckpoint) {
1751 const int keys_iteration = 5000;
1752 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
1753 for (const auto& sopt : kAllShareOptions) {
1754 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
1755 FillDB(db_.get(), 0, keys_iteration);
1756 // That FillDB leaves a mix of flushed and unflushed data
1757 SyncPoint::GetInstance()->LoadDependency(
1758 {{"CheckpointImpl::CreateCustomCheckpoint:AfterGetLive1",
1759 "BackupEngineTest::FlushCompactDuringBackupCheckpoint:Before"},
1760 {"BackupEngineTest::FlushCompactDuringBackupCheckpoint:After",
1761 "CheckpointImpl::CreateCustomCheckpoint:AfterGetLive2"}});
1762 SyncPoint::GetInstance()->EnableProcessing();
1763 ROCKSDB_NAMESPACE::port::Thread flush_thread{[this]() {
1764 TEST_SYNC_POINT(
1765 "BackupEngineTest::FlushCompactDuringBackupCheckpoint:Before");
1766 FillDB(db_.get(), keys_iteration, 2 * keys_iteration);
1767 ASSERT_OK(db_->Flush(FlushOptions()));
1768 DBImpl* dbi = static_cast<DBImpl*>(db_.get());
1769 ASSERT_OK(dbi->TEST_WaitForFlushMemTable());
1770 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1771 ASSERT_OK(dbi->TEST_WaitForCompact());
1772 TEST_SYNC_POINT(
1773 "BackupEngineTest::FlushCompactDuringBackupCheckpoint:After");
1774 }};
1775 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1776 flush_thread.join();
1777 CloseDBAndBackupEngine();
1778 SyncPoint::GetInstance()->DisableProcessing();
1779 SyncPoint::GetInstance()->ClearAllCallBacks();
1780 /* FIXME(peterd): reinstate with option for checksum in file names
1781 if (sopt == kShareWithChecksum) {
1782 // Ensure we actually got DB manifest checksums by inspecting
1783 // shared_checksum file names for hex checksum component
1784 TestRegex expected("[^_]+_[0-9A-F]{8}_[^_]+.sst");
1785 std::vector<FileAttributes> children;
1786 const std::string dir = backupdir_ + "/shared_checksum";
1787 ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children));
1788 for (const auto& child : children) {
1789 if (child.size_bytes == 0) {
1790 continue;
1791 }
1792 EXPECT_MATCHES_REGEX(child.name, expected);
1793 }
1794 }
1795 */
1796 AssertBackupConsistency(0, 0, keys_iteration);
1797 }
1798 }
1799
OptionsPath(std::string ret,int backupID)1800 inline std::string OptionsPath(std::string ret, int backupID) {
1801 ret += "/private/";
1802 ret += std::to_string(backupID);
1803 ret += "/";
1804 return ret;
1805 }
1806
1807 // Backup the LATEST options file to
1808 // "<backup_dir>/private/<backup_id>/OPTIONS<number>"
1809
TEST_F(BackupEngineTest,BackupOptions)1810 TEST_F(BackupEngineTest, BackupOptions) {
1811 OpenDBAndBackupEngine(true);
1812 for (int i = 1; i < 5; i++) {
1813 std::string name;
1814 std::vector<std::string> filenames;
1815 // Must reset() before reset(OpenDB()) again.
1816 // Calling OpenDB() while *db_ is existing will cause LOCK issue
1817 db_.reset();
1818 db_.reset(OpenDB());
1819 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
1820 ASSERT_OK(ROCKSDB_NAMESPACE::GetLatestOptionsFileName(db_->GetName(),
1821 options_.env, &name));
1822 ASSERT_OK(file_manager_->FileExists(OptionsPath(backupdir_, i) + name));
1823 ASSERT_OK(backup_chroot_env_->GetChildren(OptionsPath(backupdir_, i),
1824 &filenames));
1825 for (auto fn : filenames) {
1826 if (fn.compare(0, 7, "OPTIONS") == 0) {
1827 ASSERT_EQ(name, fn);
1828 }
1829 }
1830 }
1831
1832 CloseDBAndBackupEngine();
1833 }
1834
TEST_F(BackupEngineTest,SetOptionsBackupRaceCondition)1835 TEST_F(BackupEngineTest, SetOptionsBackupRaceCondition) {
1836 OpenDBAndBackupEngine(true);
1837 SyncPoint::GetInstance()->LoadDependency(
1838 {{"CheckpointImpl::CreateCheckpoint:SavedLiveFiles1",
1839 "BackupEngineTest::SetOptionsBackupRaceCondition:BeforeSetOptions"},
1840 {"BackupEngineTest::SetOptionsBackupRaceCondition:AfterSetOptions",
1841 "CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"}});
1842 SyncPoint::GetInstance()->EnableProcessing();
1843 ROCKSDB_NAMESPACE::port::Thread setoptions_thread{[this]() {
1844 TEST_SYNC_POINT(
1845 "BackupEngineTest::SetOptionsBackupRaceCondition:BeforeSetOptions");
1846 DBImpl* dbi = static_cast<DBImpl*>(db_.get());
1847 // Change arbitrary option to trigger OPTIONS file deletion
1848 ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
1849 {{"paranoid_file_checks", "false"}}));
1850 ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
1851 {{"paranoid_file_checks", "true"}}));
1852 ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
1853 {{"paranoid_file_checks", "false"}}));
1854 TEST_SYNC_POINT(
1855 "BackupEngineTest::SetOptionsBackupRaceCondition:AfterSetOptions");
1856 }};
1857 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1858 setoptions_thread.join();
1859 CloseDBAndBackupEngine();
1860 }
1861
1862 // This test verifies we don't delete the latest backup when read-only option is
1863 // set
TEST_F(BackupEngineTest,NoDeleteWithReadOnly)1864 TEST_F(BackupEngineTest, NoDeleteWithReadOnly) {
1865 const int keys_iteration = 5000;
1866 Random rnd(6);
1867
1868 OpenDBAndBackupEngine(true);
1869 // create five backups
1870 for (int i = 0; i < 5; ++i) {
1871 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1872 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)));
1873 }
1874 CloseDBAndBackupEngine();
1875 ASSERT_OK(file_manager_->WriteToFile(latest_backup_, "4"));
1876
1877 backupable_options_->destroy_old_data = false;
1878 BackupEngineReadOnly* read_only_backup_engine;
1879 ASSERT_OK(BackupEngineReadOnly::Open(backup_chroot_env_.get(),
1880 *backupable_options_,
1881 &read_only_backup_engine));
1882
1883 // assert that data from backup 5 is still here (even though LATEST_BACKUP
1884 // says 4 is latest)
1885 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/meta/5"));
1886 ASSERT_OK(file_manager_->FileExists(backupdir_ + "/private/5"));
1887
1888 // Behavior change: We now ignore LATEST_BACKUP contents. This means that
1889 // we should have 5 backups, even if LATEST_BACKUP says 4.
1890 std::vector<BackupInfo> backup_info;
1891 read_only_backup_engine->GetBackupInfo(&backup_info);
1892 ASSERT_EQ(5UL, backup_info.size());
1893 delete read_only_backup_engine;
1894 }
1895
TEST_F(BackupEngineTest,FailOverwritingBackups)1896 TEST_F(BackupEngineTest, FailOverwritingBackups) {
1897 options_.write_buffer_size = 1024 * 1024 * 1024; // 1GB
1898 options_.disable_auto_compactions = true;
1899
1900 // create backups 1, 2, 3, 4, 5
1901 OpenDBAndBackupEngine(true);
1902 for (int i = 0; i < 5; ++i) {
1903 CloseDBAndBackupEngine();
1904 DeleteLogFiles();
1905 OpenDBAndBackupEngine(false);
1906 FillDB(db_.get(), 100 * i, 100 * (i + 1), kFlushAll);
1907 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1908 }
1909 CloseDBAndBackupEngine();
1910
1911 // restore 3
1912 OpenBackupEngine();
1913 ASSERT_OK(backup_engine_->RestoreDBFromBackup(3, dbname_, dbname_));
1914 CloseBackupEngine();
1915
1916 OpenDBAndBackupEngine(false);
1917 // More data, bigger SST
1918 FillDB(db_.get(), 1000, 1300, kFlushAll);
1919 Status s = backup_engine_->CreateNewBackup(db_.get());
1920 // the new backup fails because new table files
1921 // clash with old table files from backups 4 and 5
1922 // (since write_buffer_size is huge, we can be sure that
1923 // each backup will generate only one sst file and that
1924 // a file generated here would have the same name as an
1925 // sst file generated by backup 4, and will be bigger)
1926 ASSERT_TRUE(s.IsCorruption());
1927 ASSERT_OK(backup_engine_->DeleteBackup(4));
1928 ASSERT_OK(backup_engine_->DeleteBackup(5));
1929 // now, the backup can succeed
1930 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
1931 CloseDBAndBackupEngine();
1932 }
1933
TEST_F(BackupEngineTest,NoShareTableFiles)1934 TEST_F(BackupEngineTest, NoShareTableFiles) {
1935 const int keys_iteration = 5000;
1936 OpenDBAndBackupEngine(true, false, kNoShare);
1937 for (int i = 0; i < 5; ++i) {
1938 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1939 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(i % 2)));
1940 }
1941 CloseDBAndBackupEngine();
1942
1943 for (int i = 0; i < 5; ++i) {
1944 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
1945 keys_iteration * 6);
1946 }
1947 }
1948
1949 // Verify that you can backup and restore with share_files_with_checksum on
TEST_F(BackupEngineTest,ShareTableFilesWithChecksums)1950 TEST_F(BackupEngineTest, ShareTableFilesWithChecksums) {
1951 const int keys_iteration = 5000;
1952 OpenDBAndBackupEngine(true, false, kShareWithChecksum);
1953 for (int i = 0; i < 5; ++i) {
1954 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1955 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(i % 2)));
1956 }
1957 CloseDBAndBackupEngine();
1958
1959 for (int i = 0; i < 5; ++i) {
1960 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
1961 keys_iteration * 6);
1962 }
1963 }
1964
1965 // Verify that you can backup and restore using share_files_with_checksum set to
1966 // false and then transition this option to true
TEST_F(BackupEngineTest,ShareTableFilesWithChecksumsTransition)1967 TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsTransition) {
1968 const int keys_iteration = 5000;
1969 // set share_files_with_checksum to false
1970 OpenDBAndBackupEngine(true, false, kShareNoChecksum);
1971 for (int i = 0; i < 5; ++i) {
1972 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1973 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
1974 }
1975 CloseDBAndBackupEngine();
1976
1977 for (int i = 0; i < 5; ++i) {
1978 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
1979 keys_iteration * 6);
1980 }
1981
1982 // set share_files_with_checksum to true and do some more backups
1983 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
1984 kShareWithChecksum);
1985 for (int i = 5; i < 10; ++i) {
1986 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
1987 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
1988 }
1989 CloseDBAndBackupEngine();
1990
1991 // Verify first (about to delete)
1992 AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 11);
1993
1994 // For an extra challenge, make sure that GarbageCollect / DeleteBackup
1995 // is OK even if we open without share_table_files
1996 OpenDBAndBackupEngine(false /* destroy_old_data */, false, kNoShare);
1997 ASSERT_OK(backup_engine_->DeleteBackup(1));
1998 ASSERT_OK(backup_engine_->GarbageCollect());
1999 CloseDBAndBackupEngine();
2000
2001 // Verify rest (not deleted)
2002 for (int i = 1; i < 10; ++i) {
2003 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
2004 keys_iteration * 11);
2005 }
2006 }
2007
2008 // Verify backup and restore with various naming options, check names
TEST_F(BackupEngineTest,ShareTableFilesWithChecksumsNewNaming)2009 TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsNewNaming) {
2010 ASSERT_TRUE(backupable_options_->share_files_with_checksum_naming ==
2011 kNamingDefault);
2012
2013 const int keys_iteration = 5000;
2014
2015 OpenDBAndBackupEngine(true, false, kShareWithChecksum);
2016 FillDB(db_.get(), 0, keys_iteration);
2017 CloseDBAndBackupEngine();
2018
2019 static const std::map<ShareFilesNaming, TestRegex> option_to_expected = {
2020 {kLegacyCrc32cAndFileSize, "[0-9]+_[0-9]+_[0-9]+[.]sst"},
2021 // kFlagIncludeFileSize redundant here
2022 {kLegacyCrc32cAndFileSize | kFlagIncludeFileSize,
2023 "[0-9]+_[0-9]+_[0-9]+[.]sst"},
2024 {kUseDbSessionId, "[0-9]+_s[0-9A-Z]{20}[.]sst"},
2025 {kUseDbSessionId | kFlagIncludeFileSize,
2026 "[0-9]+_s[0-9A-Z]{20}_[0-9]+[.]sst"},
2027 };
2028
2029 const TestRegex blobfile_pattern = "[0-9]+_[0-9]+_[0-9]+[.]blob";
2030
2031 for (const auto& pair : option_to_expected) {
2032 CloseAndReopenDB();
2033 backupable_options_->share_files_with_checksum_naming = pair.first;
2034 OpenBackupEngine(true /*destroy_old_data*/);
2035 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
2036 CloseDBAndBackupEngine();
2037 AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2);
2038 AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum", pair.second,
2039 ".sst", 1 /* minimum_count */);
2040 if (std::string::npos != pair.second.GetPattern().find("_[0-9]+[.]sst")) {
2041 AssertDirectoryFilesSizeIndicators(backupdir_ + "/shared_checksum",
2042 1 /* minimum_count */);
2043 }
2044
2045 AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum",
2046 blobfile_pattern, ".blob",
2047 1 /* minimum_count */);
2048 }
2049 }
2050
2051 // Mimic SST file generated by pre-6.12 releases and verify that
2052 // old names are always used regardless of naming option.
TEST_F(BackupEngineTest,ShareTableFilesWithChecksumsOldFileNaming)2053 TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsOldFileNaming) {
2054 const int keys_iteration = 5000;
2055
2056 // Pre-6.12 release did not include db id and db session id properties.
2057 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
2058 "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) {
2059 auto props = static_cast<TableProperties*>(props_vs);
2060 props->db_id = "";
2061 props->db_session_id = "";
2062 });
2063 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
2064
2065 OpenDBAndBackupEngine(true, false, kShareWithChecksum);
2066 FillDB(db_.get(), 0, keys_iteration);
2067 CloseDBAndBackupEngine();
2068
2069 // Old names should always be used on old files
2070 const TestRegex sstfile_pattern("[0-9]+_[0-9]+_[0-9]+[.]sst");
2071
2072 const TestRegex blobfile_pattern = "[0-9]+_[0-9]+_[0-9]+[.]blob";
2073
2074 for (ShareFilesNaming option : {kNamingDefault, kUseDbSessionId}) {
2075 CloseAndReopenDB();
2076 backupable_options_->share_files_with_checksum_naming = option;
2077 OpenBackupEngine(true /*destroy_old_data*/);
2078 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
2079 CloseDBAndBackupEngine();
2080 AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2);
2081 AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum",
2082 sstfile_pattern, ".sst",
2083 1 /* minimum_count */);
2084 AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum",
2085 blobfile_pattern, ".blob",
2086 1 /* minimum_count */);
2087 }
2088
2089 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
2090 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
2091 }
2092
2093 // Test how naming options interact with detecting DB corruption
2094 // between incremental backups
TEST_F(BackupEngineTest,TableFileCorruptionBeforeIncremental)2095 TEST_F(BackupEngineTest, TableFileCorruptionBeforeIncremental) {
2096 const auto share_no_checksum = static_cast<ShareFilesNaming>(0);
2097
2098 for (bool corrupt_before_first_backup : {false, true}) {
2099 for (ShareFilesNaming option :
2100 {share_no_checksum, kLegacyCrc32cAndFileSize, kNamingDefault}) {
2101 auto share =
2102 option == share_no_checksum ? kShareNoChecksum : kShareWithChecksum;
2103 if (option != share_no_checksum) {
2104 backupable_options_->share_files_with_checksum_naming = option;
2105 }
2106 OpenDBAndBackupEngine(true, false, share);
2107 DBImpl* dbi = static_cast<DBImpl*>(db_.get());
2108 // A small SST file
2109 ASSERT_OK(dbi->Put(WriteOptions(), "x", "y"));
2110 ASSERT_OK(dbi->Flush(FlushOptions()));
2111 // And a bigger one
2112 ASSERT_OK(dbi->Put(WriteOptions(), "y", Random(42).RandomString(500)));
2113 ASSERT_OK(dbi->Flush(FlushOptions()));
2114 ASSERT_OK(dbi->TEST_WaitForFlushMemTable());
2115 CloseAndReopenDB(/*read_only*/ true);
2116
2117 std::vector<FileAttributes> table_files;
2118 ASSERT_OK(GetDataFilesInDB(kTableFile, &table_files));
2119 ASSERT_EQ(table_files.size(), 2);
2120 std::string tf0 = dbname_ + "/" + table_files[0].name;
2121 std::string tf1 = dbname_ + "/" + table_files[1].name;
2122
2123 CloseDBAndBackupEngine();
2124
2125 if (corrupt_before_first_backup) {
2126 // This corrupts a data block, which does not cause DB open
2127 // failure, only failure on accessing the block.
2128 ASSERT_OK(db_file_manager_->CorruptFileStart(tf0));
2129 }
2130
2131 OpenDBAndBackupEngine(false, false, share);
2132 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
2133 CloseDBAndBackupEngine();
2134
2135 // if corrupt_before_first_backup, this undoes the initial corruption
2136 ASSERT_OK(db_file_manager_->CorruptFileStart(tf0));
2137
2138 OpenDBAndBackupEngine(false, false, share);
2139 Status s = backup_engine_->CreateNewBackup(db_.get());
2140
2141 // Even though none of the naming options catch the inconsistency
2142 // between the first and second time backing up fname, in the case
2143 // of kUseDbSessionId (kNamingDefault), this is an intentional
2144 // trade-off to avoid full scan of files from the DB that are
2145 // already backed up. If we did the scan, kUseDbSessionId could catch
2146 // the corruption. kLegacyCrc32cAndFileSize does the scan (to
2147 // compute checksum for name) without catching the corruption,
2148 // because the corruption means the names don't merge.
2149 EXPECT_OK(s);
2150
2151 // VerifyBackup doesn't check DB integrity or table file internal
2152 // checksums
2153 EXPECT_OK(backup_engine_->VerifyBackup(1, true));
2154 EXPECT_OK(backup_engine_->VerifyBackup(2, true));
2155
2156 db_.reset();
2157 ASSERT_OK(backup_engine_->RestoreDBFromBackup(2, dbname_, dbname_));
2158 {
2159 DB* db = OpenDB();
2160 s = db->VerifyChecksum();
2161 delete db;
2162 }
2163 if (option != kLegacyCrc32cAndFileSize && !corrupt_before_first_backup) {
2164 // Second backup is OK because it used (uncorrupt) file from first
2165 // backup instead of (corrupt) file from DB.
2166 // This is arguably a good trade-off vs. treating the file as distinct
2167 // from the old version, because a file should be more likely to be
2168 // corrupt as it ages. Although the backed-up file might also corrupt
2169 // with age, the alternative approach (checksum in file name computed
2170 // from current DB file contents) wouldn't detect that case at backup
2171 // time either. Although you would have both copies of the file with
2172 // the alternative approach, that would only last until the older
2173 // backup is deleted.
2174 ASSERT_OK(s);
2175 } else if (option == kLegacyCrc32cAndFileSize &&
2176 corrupt_before_first_backup) {
2177 // Second backup is OK because it saved the updated (uncorrupt)
2178 // file from DB, instead of the sharing with first backup.
2179 // Recall: if corrupt_before_first_backup, [second CorruptFileStart]
2180 // undoes the initial corruption.
2181 // This is arguably a bad trade-off vs. sharing the old version of the
2182 // file because a file should be more likely to corrupt as it ages.
2183 // (Not likely that the previously backed-up version was already
2184 // corrupt and the new version is non-corrupt. This approach doesn't
2185 // help if backed-up version is corrupted after taking the backup.)
2186 ASSERT_OK(s);
2187 } else {
2188 // Something is legitimately corrupted, but we can't be sure what
2189 // with information available (TODO? unless one passes block checksum
2190 // test and other doesn't. Probably better to use end-to-end full file
2191 // checksum anyway.)
2192 ASSERT_TRUE(s.IsCorruption());
2193 }
2194
2195 CloseDBAndBackupEngine();
2196 ASSERT_OK(DestroyDB(dbname_, options_));
2197 }
2198 }
2199 }
2200
2201 // Test how naming options interact with detecting file size corruption
2202 // between incremental backups
TEST_F(BackupEngineTest,FileSizeForIncremental)2203 TEST_F(BackupEngineTest, FileSizeForIncremental) {
2204 const auto share_no_checksum = static_cast<ShareFilesNaming>(0);
2205 // TODO: enable blob files once Integrated BlobDB supports DB session id.
2206 options_.enable_blob_files = false;
2207
2208 for (ShareFilesNaming option : {share_no_checksum, kLegacyCrc32cAndFileSize,
2209 kNamingDefault, kUseDbSessionId}) {
2210 auto share =
2211 option == share_no_checksum ? kShareNoChecksum : kShareWithChecksum;
2212 if (option != share_no_checksum) {
2213 backupable_options_->share_files_with_checksum_naming = option;
2214 }
2215 OpenDBAndBackupEngine(true, false, share);
2216
2217 std::vector<FileAttributes> children;
2218 const std::string shared_dir =
2219 backupdir_ +
2220 (option == share_no_checksum ? "/shared" : "/shared_checksum");
2221
2222 // A single small SST file
2223 ASSERT_OK(db_->Put(WriteOptions(), "x", "y"));
2224
2225 // First, test that we always detect file size corruption on the shared
2226 // backup side on incremental. (Since sizes aren't really part of backup
2227 // meta file, this works by querying the filesystem for the sizes.)
2228 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true /*flush*/));
2229 CloseDBAndBackupEngine();
2230
2231 // Corrupt backup SST file
2232 ASSERT_OK(file_manager_->GetChildrenFileAttributes(shared_dir, &children));
2233 ASSERT_EQ(children.size(), 1U); // one sst
2234 for (const auto& child : children) {
2235 if (child.name.size() > 4 && child.size_bytes > 0) {
2236 ASSERT_OK(
2237 file_manager_->WriteToFile(shared_dir + "/" + child.name, "asdf"));
2238 break;
2239 }
2240 }
2241
2242 OpenDBAndBackupEngine(false, false, share);
2243 Status s = backup_engine_->CreateNewBackup(db_.get());
2244 EXPECT_TRUE(s.IsCorruption());
2245
2246 ASSERT_OK(backup_engine_->PurgeOldBackups(0));
2247 CloseDBAndBackupEngine();
2248
2249 // Second, test that a hypothetical db session id collision would likely
2250 // not suffice to corrupt a backup, because there's a good chance of
2251 // file size difference (in this test, guaranteed) so either no name
2252 // collision or detected collision.
2253
2254 // Create backup 1
2255 OpenDBAndBackupEngine(false, false, share);
2256 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
2257
2258 // Even though we have "the same" DB state as backup 1, we need
2259 // to restore to recreate the same conditions as later restore.
2260 db_.reset();
2261 ASSERT_OK(DestroyDB(dbname_, options_));
2262 ASSERT_OK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
2263 CloseDBAndBackupEngine();
2264
2265 // Forge session id
2266 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
2267 "DBImpl::SetDbSessionId", [](void* sid_void_star) {
2268 std::string* sid = static_cast<std::string*>(sid_void_star);
2269 *sid = "01234567890123456789";
2270 });
2271 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
2272
2273 // Create another SST file
2274 OpenDBAndBackupEngine(false, false, share);
2275 ASSERT_OK(db_->Put(WriteOptions(), "y", "x"));
2276
2277 // Create backup 2
2278 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true /*flush*/));
2279
2280 // Restore backup 1 (again)
2281 db_.reset();
2282 ASSERT_OK(DestroyDB(dbname_, options_));
2283 ASSERT_OK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
2284 CloseDBAndBackupEngine();
2285
2286 // Create another SST file with same number and db session id, only bigger
2287 OpenDBAndBackupEngine(false, false, share);
2288 ASSERT_OK(db_->Put(WriteOptions(), "y", Random(42).RandomString(500)));
2289
2290 // Count backup SSTs files.
2291 children.clear();
2292 ASSERT_OK(file_manager_->GetChildrenFileAttributes(shared_dir, &children));
2293 ASSERT_EQ(children.size(), 2U); // two sst files
2294
2295 // Try create backup 3
2296 s = backup_engine_->CreateNewBackup(db_.get(), true /*flush*/);
2297
2298 // Re-count backup SSTs
2299 children.clear();
2300 ASSERT_OK(file_manager_->GetChildrenFileAttributes(shared_dir, &children));
2301
2302 if (option == kUseDbSessionId) {
2303 // Acceptable to call it corruption if size is not in name and
2304 // db session id collision is practically impossible.
2305 EXPECT_TRUE(s.IsCorruption());
2306 EXPECT_EQ(children.size(), 2U); // no SST file added
2307 } else if (option == share_no_checksum) {
2308 // Good to call it corruption if both backups cannot be
2309 // accommodated.
2310 EXPECT_TRUE(s.IsCorruption());
2311 EXPECT_EQ(children.size(), 2U); // no SST file added
2312 } else {
2313 // Since opening a DB seems sufficient for detecting size corruption
2314 // on the DB side, this should be a good thing, ...
2315 EXPECT_OK(s);
2316 // ... as long as we did actually treat it as a distinct SST file.
2317 EXPECT_EQ(children.size(), 3U); // Another SST added
2318 }
2319 CloseDBAndBackupEngine();
2320 ASSERT_OK(DestroyDB(dbname_, options_));
2321 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
2322 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
2323 }
2324 }
2325
2326 // Verify backup and restore with share_files_with_checksum off and then
2327 // transition this option to on and share_files_with_checksum_naming to be
2328 // based on kUseDbSessionId
TEST_F(BackupEngineTest,ShareTableFilesWithChecksumsNewNamingTransition)2329 TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsNewNamingTransition) {
2330 const int keys_iteration = 5000;
2331 // We may set share_files_with_checksum_naming to kLegacyCrc32cAndFileSize
2332 // here but even if we don't, it should have no effect when
2333 // share_files_with_checksum is false
2334 ASSERT_TRUE(backupable_options_->share_files_with_checksum_naming ==
2335 kNamingDefault);
2336 // set share_files_with_checksum to false
2337 OpenDBAndBackupEngine(true, false, kShareNoChecksum);
2338 int j = 3;
2339 for (int i = 0; i < j; ++i) {
2340 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
2341 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2342 }
2343 CloseDBAndBackupEngine();
2344
2345 for (int i = 0; i < j; ++i) {
2346 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
2347 keys_iteration * (j + 1));
2348 }
2349
2350 // set share_files_with_checksum to true and do some more backups
2351 // and use session id in the name of SST file backup
2352 ASSERT_TRUE(backupable_options_->share_files_with_checksum_naming ==
2353 kNamingDefault);
2354 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
2355 kShareWithChecksum);
2356 FillDB(db_.get(), keys_iteration * j, keys_iteration * (j + 1));
2357 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2358 CloseDBAndBackupEngine();
2359 // Use checksum in the name as well
2360 ++j;
2361 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
2362 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
2363 kShareWithChecksum);
2364 FillDB(db_.get(), keys_iteration * j, keys_iteration * (j + 1));
2365 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2366 CloseDBAndBackupEngine();
2367
2368 // Verify first (about to delete)
2369 AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * (j + 1));
2370
2371 // For an extra challenge, make sure that GarbageCollect / DeleteBackup
2372 // is OK even if we open without share_table_files but with
2373 // share_files_with_checksum_naming based on kUseDbSessionId
2374 ASSERT_TRUE(backupable_options_->share_files_with_checksum_naming ==
2375 kNamingDefault);
2376 OpenDBAndBackupEngine(false /* destroy_old_data */, false, kNoShare);
2377 ASSERT_OK(backup_engine_->DeleteBackup(1));
2378 ASSERT_OK(backup_engine_->GarbageCollect());
2379 CloseDBAndBackupEngine();
2380
2381 // Verify second (about to delete)
2382 AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * (j + 1));
2383
2384 // Use checksum and file size for backup table file names and open without
2385 // share_table_files
2386 // Again, make sure that GarbageCollect / DeleteBackup is OK
2387 backupable_options_->share_files_with_checksum_naming =
2388 kLegacyCrc32cAndFileSize;
2389 OpenDBAndBackupEngine(false /* destroy_old_data */, false, kNoShare);
2390 ASSERT_OK(backup_engine_->DeleteBackup(2));
2391 ASSERT_OK(backup_engine_->GarbageCollect());
2392 CloseDBAndBackupEngine();
2393
2394 // Verify rest (not deleted)
2395 for (int i = 2; i < j; ++i) {
2396 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
2397 keys_iteration * (j + 1));
2398 }
2399 }
2400
2401 // Verify backup and restore with share_files_with_checksum on and transition
2402 // from kLegacyCrc32cAndFileSize to kUseDbSessionId
TEST_F(BackupEngineTest,ShareTableFilesWithChecksumsNewNamingUpgrade)2403 TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsNewNamingUpgrade) {
2404 backupable_options_->share_files_with_checksum_naming =
2405 kLegacyCrc32cAndFileSize;
2406 const int keys_iteration = 5000;
2407 // set share_files_with_checksum to true
2408 OpenDBAndBackupEngine(true, false, kShareWithChecksum);
2409 int j = 3;
2410 for (int i = 0; i < j; ++i) {
2411 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
2412 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2413 }
2414 CloseDBAndBackupEngine();
2415
2416 for (int i = 0; i < j; ++i) {
2417 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
2418 keys_iteration * (j + 1));
2419 }
2420
2421 backupable_options_->share_files_with_checksum_naming = kUseDbSessionId;
2422 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
2423 kShareWithChecksum);
2424 FillDB(db_.get(), keys_iteration * j, keys_iteration * (j + 1));
2425 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2426 CloseDBAndBackupEngine();
2427
2428 ++j;
2429 options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
2430 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
2431 kShareWithChecksum);
2432 FillDB(db_.get(), keys_iteration * j, keys_iteration * (j + 1));
2433 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2434 CloseDBAndBackupEngine();
2435
2436 // Verify first (about to delete)
2437 AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * (j + 1));
2438
2439 // For an extra challenge, make sure that GarbageCollect / DeleteBackup
2440 // is OK even if we open without share_table_files
2441 OpenDBAndBackupEngine(false /* destroy_old_data */, false, kNoShare);
2442 ASSERT_OK(backup_engine_->DeleteBackup(1));
2443 ASSERT_OK(backup_engine_->GarbageCollect());
2444 CloseDBAndBackupEngine();
2445
2446 // Verify second (about to delete)
2447 AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * (j + 1));
2448
2449 // Use checksum and file size for backup table file names and open without
2450 // share_table_files
2451 // Again, make sure that GarbageCollect / DeleteBackup is OK
2452 backupable_options_->share_files_with_checksum_naming =
2453 kLegacyCrc32cAndFileSize;
2454 OpenDBAndBackupEngine(false /* destroy_old_data */, false, kNoShare);
2455 ASSERT_OK(backup_engine_->DeleteBackup(2));
2456 ASSERT_OK(backup_engine_->GarbageCollect());
2457 CloseDBAndBackupEngine();
2458
2459 // Verify rest (not deleted)
2460 for (int i = 2; i < j; ++i) {
2461 AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
2462 keys_iteration * (j + 1));
2463 }
2464 }
2465
2466 // This test simulates cleaning up after aborted or incomplete creation
2467 // of a new backup.
TEST_F(BackupEngineTest,DeleteTmpFiles)2468 TEST_F(BackupEngineTest, DeleteTmpFiles) {
2469 for (int cleanup_fn : {1, 2, 3, 4}) {
2470 for (ShareOption shared_option : kAllShareOptions) {
2471 OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
2472 shared_option);
2473 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
2474 BackupID next_id = 1;
2475 BackupID oldest_id = std::numeric_limits<BackupID>::max();
2476 {
2477 std::vector<BackupInfo> backup_info;
2478 backup_engine_->GetBackupInfo(&backup_info);
2479 for (const auto& bi : backup_info) {
2480 next_id = std::max(next_id, bi.backup_id + 1);
2481 oldest_id = std::min(oldest_id, bi.backup_id);
2482 }
2483 }
2484 CloseDBAndBackupEngine();
2485
2486 // An aborted or incomplete new backup will always be in the next
2487 // id (maybe more)
2488 std::string next_private = "private/" + std::to_string(next_id);
2489
2490 // NOTE: both shared and shared_checksum should be cleaned up
2491 // regardless of how the backup engine is opened.
2492 std::vector<std::string> tmp_files_and_dirs;
2493 for (const auto& dir_and_file : {
2494 std::make_pair(std::string("shared"),
2495 std::string(".00006.sst.tmp")),
2496 std::make_pair(std::string("shared_checksum"),
2497 std::string(".00007.sst.tmp")),
2498 std::make_pair(next_private, std::string("00003.sst")),
2499 }) {
2500 std::string dir = backupdir_ + "/" + dir_and_file.first;
2501 ASSERT_OK(file_manager_->CreateDirIfMissing(dir));
2502 ASSERT_OK(file_manager_->FileExists(dir));
2503
2504 std::string file = dir + "/" + dir_and_file.second;
2505 ASSERT_OK(file_manager_->WriteToFile(file, "tmp"));
2506 ASSERT_OK(file_manager_->FileExists(file));
2507
2508 tmp_files_and_dirs.push_back(file);
2509 }
2510 if (cleanup_fn != /*CreateNewBackup*/ 4) {
2511 // This exists after CreateNewBackup because it's deleted then
2512 // re-created.
2513 tmp_files_and_dirs.push_back(backupdir_ + "/" + next_private);
2514 }
2515
2516 OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
2517 shared_option);
2518 // Need to call one of these explicitly to delete tmp files
2519 switch (cleanup_fn) {
2520 case 1:
2521 ASSERT_OK(backup_engine_->GarbageCollect());
2522 break;
2523 case 2:
2524 ASSERT_OK(backup_engine_->DeleteBackup(oldest_id));
2525 break;
2526 case 3:
2527 ASSERT_OK(backup_engine_->PurgeOldBackups(1));
2528 break;
2529 case 4:
2530 // Does a garbage collect if it sees that next private dir exists
2531 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
2532 break;
2533 default:
2534 assert(false);
2535 }
2536 CloseDBAndBackupEngine();
2537 for (std::string file_or_dir : tmp_files_and_dirs) {
2538 if (file_manager_->FileExists(file_or_dir) != Status::NotFound()) {
2539 FAIL() << file_or_dir << " was expected to be deleted." << cleanup_fn;
2540 }
2541 }
2542 }
2543 }
2544 }
2545
TEST_F(BackupEngineTest,KeepLogFiles)2546 TEST_F(BackupEngineTest, KeepLogFiles) {
2547 backupable_options_->backup_log_files = false;
2548 // basically infinite
2549 options_.WAL_ttl_seconds = 24 * 60 * 60;
2550 OpenDBAndBackupEngine(true);
2551 FillDB(db_.get(), 0, 100, kFlushAll);
2552 FillDB(db_.get(), 100, 200, kFlushAll);
2553 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
2554 FillDB(db_.get(), 200, 300, kFlushAll);
2555 FillDB(db_.get(), 300, 400, kFlushAll);
2556 FillDB(db_.get(), 400, 500, kFlushAll);
2557 CloseDBAndBackupEngine();
2558
2559 // all data should be there if we call with keep_log_files = true
2560 AssertBackupConsistency(0, 0, 500, 600, true);
2561 }
2562
2563 #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
2564 class BackupEngineRateLimitingTestWithParam
2565 : public BackupEngineTest,
2566 public testing::WithParamInterface<
2567 std::tuple<bool /* make throttle */,
2568 int /* 0 = single threaded, 1 = multi threaded*/,
2569 std::pair<uint64_t, uint64_t> /* limits */>> {
2570 public:
BackupEngineRateLimitingTestWithParam()2571 BackupEngineRateLimitingTestWithParam() {}
2572 };
2573
2574 uint64_t const MB = 1024 * 1024;
2575
2576 INSTANTIATE_TEST_CASE_P(
2577 RateLimiting, BackupEngineRateLimitingTestWithParam,
2578 ::testing::Values(std::make_tuple(false, 0, std::make_pair(1 * MB, 5 * MB)),
2579 std::make_tuple(false, 0, std::make_pair(2 * MB, 3 * MB)),
2580 std::make_tuple(false, 1, std::make_pair(1 * MB, 5 * MB)),
2581 std::make_tuple(false, 1, std::make_pair(2 * MB, 3 * MB)),
2582 std::make_tuple(true, 0, std::make_pair(1 * MB, 5 * MB)),
2583 std::make_tuple(true, 0, std::make_pair(2 * MB, 3 * MB)),
2584 std::make_tuple(true, 1, std::make_pair(1 * MB, 5 * MB)),
2585 std::make_tuple(true, 1,
2586 std::make_pair(2 * MB, 3 * MB))));
2587
TEST_P(BackupEngineRateLimitingTestWithParam,RateLimiting)2588 TEST_P(BackupEngineRateLimitingTestWithParam, RateLimiting) {
2589 size_t const kMicrosPerSec = 1000 * 1000LL;
2590
2591 std::shared_ptr<RateLimiter> backupThrottler(NewGenericRateLimiter(1));
2592 std::shared_ptr<RateLimiter> restoreThrottler(NewGenericRateLimiter(1));
2593
2594 bool makeThrottler = std::get<0>(GetParam());
2595 if (makeThrottler) {
2596 backupable_options_->backup_rate_limiter = backupThrottler;
2597 backupable_options_->restore_rate_limiter = restoreThrottler;
2598 }
2599
2600 // iter 0 -- single threaded
2601 // iter 1 -- multi threaded
2602 int iter = std::get<1>(GetParam());
2603 const std::pair<uint64_t, uint64_t> limit = std::get<2>(GetParam());
2604
2605 // destroy old data
2606 DestroyDB(dbname_, Options());
2607 if (makeThrottler) {
2608 backupThrottler->SetBytesPerSecond(limit.first);
2609 restoreThrottler->SetBytesPerSecond(limit.second);
2610 } else {
2611 backupable_options_->backup_rate_limit = limit.first;
2612 backupable_options_->restore_rate_limit = limit.second;
2613 }
2614 backupable_options_->max_background_operations = (iter == 0) ? 1 : 10;
2615 options_.compression = kNoCompression;
2616 OpenDBAndBackupEngine(true);
2617 size_t bytes_written = FillDB(db_.get(), 0, 100000);
2618
2619 auto start_backup = db_chroot_env_->NowMicros();
2620 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
2621 auto backup_time = db_chroot_env_->NowMicros() - start_backup;
2622 auto rate_limited_backup_time = (bytes_written * kMicrosPerSec) / limit.first;
2623 ASSERT_GT(backup_time, 0.8 * rate_limited_backup_time);
2624
2625 CloseDBAndBackupEngine();
2626
2627 OpenBackupEngine();
2628 auto start_restore = db_chroot_env_->NowMicros();
2629 ASSERT_OK(backup_engine_->RestoreDBFromLatestBackup(dbname_, dbname_));
2630 auto restore_time = db_chroot_env_->NowMicros() - start_restore;
2631 CloseBackupEngine();
2632 auto rate_limited_restore_time =
2633 (bytes_written * kMicrosPerSec) / limit.second;
2634 ASSERT_GT(restore_time, 0.8 * rate_limited_restore_time);
2635
2636 AssertBackupConsistency(0, 0, 100000, 100010);
2637 }
2638
TEST_P(BackupEngineRateLimitingTestWithParam,RateLimitingVerifyBackup)2639 TEST_P(BackupEngineRateLimitingTestWithParam, RateLimitingVerifyBackup) {
2640 const std::size_t kMicrosPerSec = 1000 * 1000LL;
2641 std::shared_ptr<RateLimiter> backupThrottler(NewGenericRateLimiter(
2642 1, 100 * 1000 /* refill_period_us */, 10 /* fairness */,
2643 RateLimiter::Mode::kAllIo /* mode */));
2644
2645 bool makeThrottler = std::get<0>(GetParam());
2646 if (makeThrottler) {
2647 backupable_options_->backup_rate_limiter = backupThrottler;
2648 }
2649
2650 bool is_single_threaded = std::get<1>(GetParam()) == 0 ? true : false;
2651 backupable_options_->max_background_operations = is_single_threaded ? 1 : 10;
2652
2653 const std::uint64_t backup_rate_limiter_limit = std::get<2>(GetParam()).first;
2654 if (makeThrottler) {
2655 backupable_options_->backup_rate_limiter->SetBytesPerSecond(
2656 backup_rate_limiter_limit);
2657 } else {
2658 backupable_options_->backup_rate_limit = backup_rate_limiter_limit;
2659 }
2660
2661 DestroyDB(dbname_, Options());
2662 OpenDBAndBackupEngine(true /* destroy_old_data */);
2663 FillDB(db_.get(), 0, 100000);
2664 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
2665 false /* flush_before_backup */));
2666
2667 std::vector<BackupInfo> backup_infos;
2668 BackupInfo backup_info;
2669 backup_engine_->GetBackupInfo(&backup_infos);
2670 ASSERT_EQ(1, backup_infos.size());
2671 const int backup_id = 1;
2672 ASSERT_EQ(backup_id, backup_infos[0].backup_id);
2673 ASSERT_OK(backup_engine_->GetBackupInfo(backup_id, &backup_info,
2674 true /* include_file_details */));
2675
2676 std::uint64_t bytes_read_during_verify_backup = 0;
2677 for (BackupFileInfo backup_file_info : backup_info.file_details) {
2678 bytes_read_during_verify_backup += backup_file_info.size;
2679 }
2680
2681 auto start_verify_backup = db_chroot_env_->NowMicros();
2682 ASSERT_OK(
2683 backup_engine_->VerifyBackup(backup_id, true /* verify_with_checksum */));
2684 auto verify_backup_time = db_chroot_env_->NowMicros() - start_verify_backup;
2685 auto rate_limited_verify_backup_time =
2686 (bytes_read_during_verify_backup * kMicrosPerSec) /
2687 backup_rate_limiter_limit;
2688
2689 if (makeThrottler) {
2690 EXPECT_GE(verify_backup_time, 0.8 * rate_limited_verify_backup_time);
2691 }
2692 CloseDBAndBackupEngine();
2693 AssertBackupConsistency(backup_id, 0, 100000, 100010);
2694 DestroyDB(dbname_, Options());
2695 }
2696
TEST_P(BackupEngineRateLimitingTestWithParam,RateLimitingChargeReadInBackup)2697 TEST_P(BackupEngineRateLimitingTestWithParam, RateLimitingChargeReadInBackup) {
2698 bool is_single_threaded = std::get<1>(GetParam()) == 0 ? true : false;
2699 backupable_options_->max_background_operations = is_single_threaded ? 1 : 10;
2700
2701 const std::uint64_t backup_rate_limiter_limit = std::get<2>(GetParam()).first;
2702 std::shared_ptr<RateLimiter> backup_rate_limiter(NewGenericRateLimiter(
2703 backup_rate_limiter_limit, 100 * 1000 /* refill_period_us */,
2704 10 /* fairness */, RateLimiter::Mode::kWritesOnly /* mode */));
2705 backupable_options_->backup_rate_limiter = backup_rate_limiter;
2706
2707 DestroyDB(dbname_, Options());
2708 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
2709 kShareWithChecksum /* shared_option */);
2710 FillDB(db_.get(), 0, 10);
2711 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
2712 false /* flush_before_backup */));
2713 std::int64_t total_bytes_through_with_no_read_charged =
2714 backup_rate_limiter->GetTotalBytesThrough();
2715 CloseBackupEngine();
2716
2717 backup_rate_limiter.reset(NewGenericRateLimiter(
2718 backup_rate_limiter_limit, 100 * 1000 /* refill_period_us */,
2719 10 /* fairness */, RateLimiter::Mode::kAllIo /* mode */));
2720 backupable_options_->backup_rate_limiter = backup_rate_limiter;
2721
2722 OpenBackupEngine(true);
2723 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
2724 false /* flush_before_backup */));
2725 std::int64_t total_bytes_through_with_read_charged =
2726 backup_rate_limiter->GetTotalBytesThrough();
2727 EXPECT_GT(total_bytes_through_with_read_charged,
2728 total_bytes_through_with_no_read_charged);
2729 CloseDBAndBackupEngine();
2730 AssertBackupConsistency(1, 0, 10, 20);
2731 DestroyDB(dbname_, Options());
2732 }
2733
TEST_P(BackupEngineRateLimitingTestWithParam,RateLimitingChargeReadInRestore)2734 TEST_P(BackupEngineRateLimitingTestWithParam, RateLimitingChargeReadInRestore) {
2735 bool is_single_threaded = std::get<1>(GetParam()) == 0 ? true : false;
2736 backupable_options_->max_background_operations = is_single_threaded ? 1 : 10;
2737
2738 const std::uint64_t restore_rate_limiter_limit =
2739 std::get<2>(GetParam()).second;
2740 std::shared_ptr<RateLimiter> restore_rate_limiter(NewGenericRateLimiter(
2741 restore_rate_limiter_limit, 100 * 1000 /* refill_period_us */,
2742 10 /* fairness */, RateLimiter::Mode::kWritesOnly /* mode */));
2743 backupable_options_->restore_rate_limiter = restore_rate_limiter;
2744
2745 DestroyDB(dbname_, Options());
2746 OpenDBAndBackupEngine(true /* destroy_old_data */);
2747 FillDB(db_.get(), 0, 10);
2748 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
2749 false /* flush_before_backup */));
2750 CloseDBAndBackupEngine();
2751 DestroyDB(dbname_, Options());
2752
2753 OpenBackupEngine(false /* destroy_old_data */);
2754 ASSERT_OK(backup_engine_->RestoreDBFromLatestBackup(dbname_, dbname_));
2755 std::int64_t total_bytes_through_with_no_read_charged =
2756 restore_rate_limiter->GetTotalBytesThrough();
2757 CloseBackupEngine();
2758 DestroyDB(dbname_, Options());
2759
2760 restore_rate_limiter.reset(NewGenericRateLimiter(
2761 restore_rate_limiter_limit, 100 * 1000 /* refill_period_us */,
2762 10 /* fairness */, RateLimiter::Mode::kAllIo /* mode */));
2763 backupable_options_->restore_rate_limiter = restore_rate_limiter;
2764
2765 OpenBackupEngine(false /* destroy_old_data */);
2766 ASSERT_OK(backup_engine_->RestoreDBFromLatestBackup(dbname_, dbname_));
2767 std::int64_t total_bytes_through_with_read_charged =
2768 restore_rate_limiter->GetTotalBytesThrough();
2769 EXPECT_EQ(total_bytes_through_with_read_charged,
2770 total_bytes_through_with_no_read_charged * 2);
2771 CloseBackupEngine();
2772 AssertBackupConsistency(1, 0, 10, 20);
2773 DestroyDB(dbname_, Options());
2774 }
2775
TEST_P(BackupEngineRateLimitingTestWithParam,RateLimitingChargeReadInInitialize)2776 TEST_P(BackupEngineRateLimitingTestWithParam,
2777 RateLimitingChargeReadInInitialize) {
2778 bool is_single_threaded = std::get<1>(GetParam()) == 0 ? true : false;
2779 backupable_options_->max_background_operations = is_single_threaded ? 1 : 10;
2780
2781 const std::uint64_t backup_rate_limiter_limit = std::get<2>(GetParam()).first;
2782 std::shared_ptr<RateLimiter> backup_rate_limiter(NewGenericRateLimiter(
2783 backup_rate_limiter_limit, 100 * 1000 /* refill_period_us */,
2784 10 /* fairness */, RateLimiter::Mode::kAllIo /* mode */));
2785 backupable_options_->backup_rate_limiter = backup_rate_limiter;
2786
2787 DestroyDB(dbname_, Options());
2788 OpenDBAndBackupEngine(true /* destroy_old_data */);
2789 FillDB(db_.get(), 0, 10);
2790 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
2791 false /* flush_before_backup */));
2792 CloseDBAndBackupEngine();
2793 AssertBackupConsistency(1, 0, 10, 20);
2794
2795 std::int64_t total_bytes_through_before_initialize =
2796 backupable_options_->backup_rate_limiter->GetTotalBytesThrough();
2797 OpenDBAndBackupEngine(false /* destroy_old_data */);
2798 // We charge read in BackupEngineImpl::BackupMeta::LoadFromFile,
2799 // which is called in BackupEngineImpl::Initialize() during
2800 // OpenBackupEngine(false)
2801 EXPECT_GT(backupable_options_->backup_rate_limiter->GetTotalBytesThrough(),
2802 total_bytes_through_before_initialize);
2803 CloseDBAndBackupEngine();
2804 DestroyDB(dbname_, Options());
2805 }
2806 #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
2807
TEST_F(BackupEngineTest,ReadOnlyBackupEngine)2808 TEST_F(BackupEngineTest, ReadOnlyBackupEngine) {
2809 DestroyDB(dbname_, options_);
2810 OpenDBAndBackupEngine(true);
2811 FillDB(db_.get(), 0, 100);
2812 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2813 FillDB(db_.get(), 100, 200);
2814 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2815 CloseDBAndBackupEngine();
2816 DestroyDB(dbname_, options_);
2817
2818 backupable_options_->destroy_old_data = false;
2819 test_backup_env_->ClearWrittenFiles();
2820 test_backup_env_->SetLimitDeleteFiles(0);
2821 BackupEngineReadOnly* read_only_backup_engine;
2822 ASSERT_OK(BackupEngineReadOnly::Open(
2823 db_chroot_env_.get(), *backupable_options_, &read_only_backup_engine));
2824 std::vector<BackupInfo> backup_info;
2825 read_only_backup_engine->GetBackupInfo(&backup_info);
2826 ASSERT_EQ(backup_info.size(), 2U);
2827
2828 RestoreOptions restore_options(false);
2829 ASSERT_OK(read_only_backup_engine->RestoreDBFromLatestBackup(
2830 dbname_, dbname_, restore_options));
2831 delete read_only_backup_engine;
2832 std::vector<std::string> should_have_written;
2833 test_backup_env_->AssertWrittenFiles(should_have_written);
2834
2835 DB* db = OpenDB();
2836 AssertExists(db, 0, 200);
2837 delete db;
2838 }
2839
TEST_F(BackupEngineTest,OpenBackupAsReadOnlyDB)2840 TEST_F(BackupEngineTest, OpenBackupAsReadOnlyDB) {
2841 DestroyDB(dbname_, options_);
2842 options_.write_dbid_to_manifest = false;
2843
2844 OpenDBAndBackupEngine(true);
2845 FillDB(db_.get(), 0, 100);
2846 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ false));
2847
2848 options_.write_dbid_to_manifest = true; // exercises some read-only DB code
2849 CloseAndReopenDB();
2850
2851 FillDB(db_.get(), 100, 200);
2852 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ false));
2853 db_.reset(); // CloseDB
2854 DestroyDB(dbname_, options_);
2855 BackupInfo backup_info;
2856 // First, check that we get empty fields without include_file_details
2857 ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 1U, &backup_info,
2858 /*with file details*/ false));
2859 ASSERT_EQ(backup_info.name_for_open, "");
2860 ASSERT_FALSE(backup_info.env_for_open);
2861
2862 // Now for the real test
2863 backup_info = BackupInfo();
2864 ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 1U, &backup_info,
2865 /*with file details*/ true));
2866
2867 // Caution: DBOptions only holds a raw pointer to Env, so something else
2868 // must keep it alive.
2869 // Case 1: Keeping BackupEngine open suffices to keep Env alive
2870 DB* db = nullptr;
2871 Options opts = options_;
2872 // Ensure some key defaults are set
2873 opts.wal_dir = "";
2874 opts.create_if_missing = false;
2875 opts.info_log.reset();
2876
2877 opts.env = backup_info.env_for_open.get();
2878 std::string name = backup_info.name_for_open;
2879 backup_info = BackupInfo();
2880 ASSERT_OK(DB::OpenForReadOnly(opts, name, &db));
2881
2882 AssertExists(db, 0, 100);
2883 AssertEmpty(db, 100, 200);
2884
2885 delete db;
2886 db = nullptr;
2887
2888 // Case 2: Keeping BackupInfo alive rather than BackupEngine also suffices
2889 ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 2U, &backup_info,
2890 /*with file details*/ true));
2891 CloseBackupEngine();
2892 opts.create_if_missing = true; // check also OK (though pointless)
2893 opts.env = backup_info.env_for_open.get();
2894 name = backup_info.name_for_open;
2895 // Note: keeping backup_info alive
2896 ASSERT_OK(DB::OpenForReadOnly(opts, name, &db));
2897
2898 AssertExists(db, 0, 200);
2899 delete db;
2900 db = nullptr;
2901
2902 // Now try opening read-write and make sure it fails, for safety.
2903 ASSERT_TRUE(DB::Open(opts, name, &db).IsIOError());
2904 }
2905
TEST_F(BackupEngineTest,ProgressCallbackDuringBackup)2906 TEST_F(BackupEngineTest, ProgressCallbackDuringBackup) {
2907 DestroyDB(dbname_, options_);
2908 // Too big for this small DB
2909 backupable_options_->callback_trigger_interval_size = 100000;
2910 OpenDBAndBackupEngine(true);
2911 FillDB(db_.get(), 0, 100);
2912 bool is_callback_invoked = false;
2913 ASSERT_OK(backup_engine_->CreateNewBackup(
2914 db_.get(), true,
2915 [&is_callback_invoked]() { is_callback_invoked = true; }));
2916 ASSERT_FALSE(is_callback_invoked);
2917 CloseBackupEngine();
2918
2919 // Easily small enough for this small DB
2920 backupable_options_->callback_trigger_interval_size = 1000;
2921 OpenBackupEngine();
2922 ASSERT_OK(backup_engine_->CreateNewBackup(
2923 db_.get(), true,
2924 [&is_callback_invoked]() { is_callback_invoked = true; }));
2925 ASSERT_TRUE(is_callback_invoked);
2926 CloseDBAndBackupEngine();
2927 DestroyDB(dbname_, options_);
2928 }
2929
TEST_F(BackupEngineTest,GarbageCollectionBeforeBackup)2930 TEST_F(BackupEngineTest, GarbageCollectionBeforeBackup) {
2931 DestroyDB(dbname_, options_);
2932 OpenDBAndBackupEngine(true);
2933
2934 ASSERT_OK(backup_chroot_env_->CreateDirIfMissing(backupdir_ + "/shared"));
2935 std::string file_five = backupdir_ + "/shared/000009.sst";
2936 std::string file_five_contents = "I'm not really a sst file";
2937 // this depends on the fact that 00009.sst is the first file created by the DB
2938 ASSERT_OK(file_manager_->WriteToFile(file_five, file_five_contents));
2939
2940 FillDB(db_.get(), 0, 100);
2941 // backup overwrites file 000009.sst
2942 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2943
2944 std::string new_file_five_contents;
2945 ASSERT_OK(ReadFileToString(backup_chroot_env_.get(), file_five,
2946 &new_file_five_contents));
2947 // file 000009.sst was overwritten
2948 ASSERT_TRUE(new_file_five_contents != file_five_contents);
2949
2950 CloseDBAndBackupEngine();
2951
2952 AssertBackupConsistency(0, 0, 100);
2953 }
2954
2955 // Test that we properly propagate Env failures
TEST_F(BackupEngineTest,EnvFailures)2956 TEST_F(BackupEngineTest, EnvFailures) {
2957 BackupEngine* backup_engine;
2958
2959 // get children failure
2960 {
2961 test_backup_env_->SetGetChildrenFailure(true);
2962 ASSERT_NOK(BackupEngine::Open(test_db_env_.get(), *backupable_options_,
2963 &backup_engine));
2964 test_backup_env_->SetGetChildrenFailure(false);
2965 }
2966
2967 // created dir failure
2968 {
2969 test_backup_env_->SetCreateDirIfMissingFailure(true);
2970 ASSERT_NOK(BackupEngine::Open(test_db_env_.get(), *backupable_options_,
2971 &backup_engine));
2972 test_backup_env_->SetCreateDirIfMissingFailure(false);
2973 }
2974
2975 // new directory failure
2976 {
2977 test_backup_env_->SetNewDirectoryFailure(true);
2978 ASSERT_NOK(BackupEngine::Open(test_db_env_.get(), *backupable_options_,
2979 &backup_engine));
2980 test_backup_env_->SetNewDirectoryFailure(false);
2981 }
2982
2983 // Read from meta-file failure
2984 {
2985 DestroyDB(dbname_, options_);
2986 OpenDBAndBackupEngine(true);
2987 FillDB(db_.get(), 0, 100);
2988 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
2989 CloseDBAndBackupEngine();
2990 test_backup_env_->SetDummySequentialFile(true);
2991 test_backup_env_->SetDummySequentialFileFailReads(true);
2992 backupable_options_->destroy_old_data = false;
2993 ASSERT_NOK(BackupEngine::Open(test_db_env_.get(), *backupable_options_,
2994 &backup_engine));
2995 test_backup_env_->SetDummySequentialFile(false);
2996 test_backup_env_->SetDummySequentialFileFailReads(false);
2997 }
2998
2999 // no failure
3000 {
3001 ASSERT_OK(BackupEngine::Open(test_db_env_.get(), *backupable_options_,
3002 &backup_engine));
3003 delete backup_engine;
3004 }
3005 }
3006
3007 // Verify manifest can roll while a backup is being created with the old
3008 // manifest.
TEST_F(BackupEngineTest,ChangeManifestDuringBackupCreation)3009 TEST_F(BackupEngineTest, ChangeManifestDuringBackupCreation) {
3010 DestroyDB(dbname_, options_);
3011 options_.max_manifest_file_size = 0; // always rollover manifest for file add
3012 OpenDBAndBackupEngine(true);
3013 FillDB(db_.get(), 0, 100, kAutoFlushOnly);
3014
3015 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
3016 {"CheckpointImpl::CreateCheckpoint:SavedLiveFiles1",
3017 "VersionSet::LogAndApply:WriteManifest"},
3018 {"VersionSet::LogAndApply:WriteManifestDone",
3019 "CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"},
3020 });
3021 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
3022
3023 ROCKSDB_NAMESPACE::port::Thread flush_thread{
3024 [this]() { ASSERT_OK(db_->Flush(FlushOptions())); }};
3025
3026 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
3027
3028 flush_thread.join();
3029 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3030
3031 // The last manifest roll would've already been cleaned up by the full scan
3032 // that happens when CreateNewBackup invokes EnableFileDeletions. We need to
3033 // trigger another roll to verify non-full scan purges stale manifests.
3034 DBImpl* db_impl = static_cast_with_check<DBImpl>(db_.get());
3035 std::string prev_manifest_path =
3036 DescriptorFileName(dbname_, db_impl->TEST_Current_Manifest_FileNo());
3037 FillDB(db_.get(), 0, 100, kAutoFlushOnly);
3038 ASSERT_OK(db_chroot_env_->FileExists(prev_manifest_path));
3039 ASSERT_OK(db_->Flush(FlushOptions()));
3040 ASSERT_TRUE(db_chroot_env_->FileExists(prev_manifest_path).IsNotFound());
3041
3042 CloseDBAndBackupEngine();
3043 DestroyDB(dbname_, options_);
3044 AssertBackupConsistency(0, 0, 100);
3045 }
3046
3047 // see https://github.com/facebook/rocksdb/issues/921
TEST_F(BackupEngineTest,Issue921Test)3048 TEST_F(BackupEngineTest, Issue921Test) {
3049 BackupEngine* backup_engine;
3050 backupable_options_->share_table_files = false;
3051 ASSERT_OK(
3052 backup_chroot_env_->CreateDirIfMissing(backupable_options_->backup_dir));
3053 backupable_options_->backup_dir += "/new_dir";
3054 ASSERT_OK(BackupEngine::Open(backup_chroot_env_.get(), *backupable_options_,
3055 &backup_engine));
3056
3057 delete backup_engine;
3058 }
3059
TEST_F(BackupEngineTest,BackupWithMetadata)3060 TEST_F(BackupEngineTest, BackupWithMetadata) {
3061 const int keys_iteration = 5000;
3062 OpenDBAndBackupEngine(true);
3063 // create five backups
3064 for (int i = 0; i < 5; ++i) {
3065 const std::string metadata = std::to_string(i);
3066 FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
3067 // Here also test CreateNewBackupWithMetadata with CreateBackupOptions
3068 // and outputting saved BackupID.
3069 CreateBackupOptions opts;
3070 opts.flush_before_backup = true;
3071 BackupID new_id = 0;
3072 ASSERT_OK(backup_engine_->CreateNewBackupWithMetadata(opts, db_.get(),
3073 metadata, &new_id));
3074 ASSERT_EQ(new_id, static_cast<BackupID>(i + 1));
3075 }
3076 CloseDBAndBackupEngine();
3077
3078 OpenDBAndBackupEngine();
3079 { // Verify in bulk BackupInfo
3080 std::vector<BackupInfo> backup_infos;
3081 backup_engine_->GetBackupInfo(&backup_infos);
3082 ASSERT_EQ(5, backup_infos.size());
3083 for (int i = 0; i < 5; i++) {
3084 ASSERT_EQ(std::to_string(i), backup_infos[i].app_metadata);
3085 }
3086 }
3087 // Also verify in individual BackupInfo
3088 for (int i = 0; i < 5; i++) {
3089 BackupInfo backup_info;
3090 ASSERT_OK(backup_engine_->GetBackupInfo(static_cast<BackupID>(i + 1),
3091 &backup_info));
3092 ASSERT_EQ(std::to_string(i), backup_info.app_metadata);
3093 }
3094 CloseDBAndBackupEngine();
3095 DestroyDB(dbname_, options_);
3096 }
3097
TEST_F(BackupEngineTest,BinaryMetadata)3098 TEST_F(BackupEngineTest, BinaryMetadata) {
3099 OpenDBAndBackupEngine(true);
3100 std::string binaryMetadata = "abc\ndef";
3101 binaryMetadata.push_back('\0');
3102 binaryMetadata.append("ghi");
3103 ASSERT_OK(
3104 backup_engine_->CreateNewBackupWithMetadata(db_.get(), binaryMetadata));
3105 CloseDBAndBackupEngine();
3106
3107 OpenDBAndBackupEngine();
3108 std::vector<BackupInfo> backup_infos;
3109 backup_engine_->GetBackupInfo(&backup_infos);
3110 ASSERT_EQ(1, backup_infos.size());
3111 ASSERT_EQ(binaryMetadata, backup_infos[0].app_metadata);
3112 CloseDBAndBackupEngine();
3113 DestroyDB(dbname_, options_);
3114 }
3115
TEST_F(BackupEngineTest,MetadataTooLarge)3116 TEST_F(BackupEngineTest, MetadataTooLarge) {
3117 OpenDBAndBackupEngine(true);
3118 std::string largeMetadata(1024 * 1024 + 1, 0);
3119 ASSERT_NOK(
3120 backup_engine_->CreateNewBackupWithMetadata(db_.get(), largeMetadata));
3121 CloseDBAndBackupEngine();
3122 DestroyDB(dbname_, options_);
3123 }
3124
TEST_F(BackupEngineTest,FutureMetaSchemaVersion2_SizeCorruption)3125 TEST_F(BackupEngineTest, FutureMetaSchemaVersion2_SizeCorruption) {
3126 OpenDBAndBackupEngine(true);
3127
3128 // Backup 1: no future schema, no sizes, with checksums
3129 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
3130
3131 // Backup 2: no checksums, no sizes
3132 TEST_FutureSchemaVersion2Options test_opts;
3133 test_opts.crc32c_checksums = false;
3134 test_opts.file_sizes = false;
3135 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3136 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
3137
3138 // Backup 3: no checksums, with sizes
3139 test_opts.file_sizes = true;
3140 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3141 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
3142
3143 // Backup 4: with checksums and sizes
3144 test_opts.crc32c_checksums = true;
3145 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3146 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
3147
3148 CloseDBAndBackupEngine();
3149
3150 // Corrupt all the CURRENT files with the wrong size
3151 const std::string private_dir = backupdir_ + "/private";
3152
3153 for (int id = 1; id <= 3; ++id) {
3154 ASSERT_OK(file_manager_->WriteToFile(
3155 private_dir + "/" + ToString(id) + "/CURRENT", "x"));
3156 }
3157 // Except corrupt Backup 4 with same size CURRENT file
3158 {
3159 uint64_t size = 0;
3160 ASSERT_OK(test_backup_env_->GetFileSize(private_dir + "/4/CURRENT", &size));
3161 ASSERT_OK(file_manager_->WriteToFile(private_dir + "/4/CURRENT",
3162 std::string(size, 'x')));
3163 }
3164
3165 OpenBackupEngine();
3166
3167 // Only the one with sizes in metadata will be immediately detected
3168 // as corrupt
3169 std::vector<BackupID> corrupted;
3170 backup_engine_->GetCorruptedBackups(&corrupted);
3171 ASSERT_EQ(corrupted.size(), 1);
3172 ASSERT_EQ(corrupted[0], 3);
3173
3174 // Size corruption detected on Restore with checksum
3175 ASSERT_TRUE(backup_engine_->RestoreDBFromBackup(1 /*id*/, dbname_, dbname_)
3176 .IsCorruption());
3177
3178 // Size corruption not detected without checksums nor sizes
3179 ASSERT_OK(backup_engine_->RestoreDBFromBackup(2 /*id*/, dbname_, dbname_));
3180
3181 // Non-size corruption detected on Restore with checksum
3182 ASSERT_TRUE(backup_engine_->RestoreDBFromBackup(4 /*id*/, dbname_, dbname_)
3183 .IsCorruption());
3184
3185 CloseBackupEngine();
3186 }
3187
TEST_F(BackupEngineTest,FutureMetaSchemaVersion2_NotSupported)3188 TEST_F(BackupEngineTest, FutureMetaSchemaVersion2_NotSupported) {
3189 TEST_FutureSchemaVersion2Options test_opts;
3190 std::string app_metadata = "abc\ndef";
3191
3192 OpenDBAndBackupEngine(true);
3193 // Start with supported
3194 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3195 ASSERT_OK(
3196 backup_engine_->CreateNewBackupWithMetadata(db_.get(), app_metadata));
3197
3198 // Because we are injecting badness with a TEST API, the badness is only
3199 // detected on attempt to restore.
3200 // Not supported versions
3201 test_opts.version = "3";
3202 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3203 ASSERT_OK(
3204 backup_engine_->CreateNewBackupWithMetadata(db_.get(), app_metadata));
3205 test_opts.version = "23.45.67";
3206 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3207 ASSERT_OK(
3208 backup_engine_->CreateNewBackupWithMetadata(db_.get(), app_metadata));
3209 test_opts.version = "2";
3210
3211 // Non-ignorable fields
3212 test_opts.meta_fields["ni::blah"] = "123";
3213 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3214 ASSERT_OK(
3215 backup_engine_->CreateNewBackupWithMetadata(db_.get(), app_metadata));
3216 test_opts.meta_fields.clear();
3217
3218 test_opts.file_fields["ni::123"] = "xyz";
3219 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3220 ASSERT_OK(
3221 backup_engine_->CreateNewBackupWithMetadata(db_.get(), app_metadata));
3222 test_opts.file_fields.clear();
3223
3224 test_opts.footer_fields["ni::123"] = "xyz";
3225 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3226 ASSERT_OK(
3227 backup_engine_->CreateNewBackupWithMetadata(db_.get(), app_metadata));
3228 test_opts.footer_fields.clear();
3229 CloseDBAndBackupEngine();
3230
3231 OpenBackupEngine();
3232 std::vector<BackupID> corrupted;
3233 backup_engine_->GetCorruptedBackups(&corrupted);
3234 ASSERT_EQ(corrupted.size(), 5);
3235
3236 ASSERT_OK(backup_engine_->RestoreDBFromLatestBackup(dbname_, dbname_));
3237 CloseBackupEngine();
3238 }
3239
TEST_F(BackupEngineTest,FutureMetaSchemaVersion2_Restore)3240 TEST_F(BackupEngineTest, FutureMetaSchemaVersion2_Restore) {
3241 TEST_FutureSchemaVersion2Options test_opts;
3242 const int keys_iteration = 5000;
3243
3244 OpenDBAndBackupEngine(true, false, kShareWithChecksum);
3245 FillDB(db_.get(), 0, keys_iteration);
3246 // Start with minimum metadata to ensure it works without it being filled
3247 // based on shared files also in other backups with the metadata.
3248 test_opts.crc32c_checksums = false;
3249 test_opts.file_sizes = false;
3250 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3251 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3252 CloseDBAndBackupEngine();
3253
3254 AssertBackupConsistency(1 /* id */, 0, keys_iteration, keys_iteration * 2);
3255
3256 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
3257 kShareWithChecksum);
3258 test_opts.file_sizes = true;
3259 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3260 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3261 CloseDBAndBackupEngine();
3262
3263 for (int id = 1; id <= 2; ++id) {
3264 AssertBackupConsistency(id, 0, keys_iteration, keys_iteration * 2);
3265 }
3266
3267 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
3268 kShareWithChecksum);
3269 test_opts.crc32c_checksums = true;
3270 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3271 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3272 CloseDBAndBackupEngine();
3273
3274 for (int id = 1; id <= 3; ++id) {
3275 AssertBackupConsistency(id, 0, keys_iteration, keys_iteration * 2);
3276 }
3277
3278 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
3279 kShareWithChecksum);
3280 // No TEST_EnableWriteFutureSchemaVersion2
3281 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3282 CloseDBAndBackupEngine();
3283
3284 for (int id = 1; id <= 4; ++id) {
3285 AssertBackupConsistency(id, 0, keys_iteration, keys_iteration * 2);
3286 }
3287
3288 OpenDBAndBackupEngine(false /* destroy_old_data */, false,
3289 kShareWithChecksum);
3290 // Minor version updates should be forward-compatible
3291 test_opts.version = "2.5.70";
3292 test_opts.meta_fields["asdf.3456"] = "-42";
3293 test_opts.meta_fields["__QRST"] = " 1 $ %%& ";
3294 test_opts.file_fields["z94._"] = "^\\";
3295 test_opts.file_fields["_7yyyyyyyyy"] = "111111111111";
3296 test_opts.footer_fields["Qwzn.tz89"] = "ASDF!!@# ##=\t ";
3297 test_opts.footer_fields["yes"] = "no!";
3298 TEST_EnableWriteFutureSchemaVersion2(backup_engine_.get(), test_opts);
3299 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3300 CloseDBAndBackupEngine();
3301
3302 for (int id = 1; id <= 5; ++id) {
3303 AssertBackupConsistency(id, 0, keys_iteration, keys_iteration * 2);
3304 }
3305 }
3306
TEST_F(BackupEngineTest,Concurrency)3307 TEST_F(BackupEngineTest, Concurrency) {
3308 // Check that we can simultaneously:
3309 // * Run several read operations in different threads on a single
3310 // BackupEngine object, and
3311 // * With another BackupEngine object on the same
3312 // backup_dir, run the same read operations in another thread, and
3313 // * With yet another BackupEngine object on the same
3314 // backup_dir, create two new backups in parallel threads.
3315 //
3316 // Because of the challenges of integrating this into db_stress,
3317 // this is a non-deterministic mini-stress test here instead.
3318
3319 // To check for a race condition in handling buffer size based on byte
3320 // burst limit, we need a (generous) rate limiter
3321 std::shared_ptr<RateLimiter> limiter{NewGenericRateLimiter(1000000000)};
3322 backupable_options_->backup_rate_limiter = limiter;
3323 backupable_options_->restore_rate_limiter = limiter;
3324
3325 OpenDBAndBackupEngine(true, false, kShareWithChecksum);
3326
3327 static constexpr int keys_iteration = 5000;
3328 FillDB(db_.get(), 0, keys_iteration);
3329 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
3330
3331 FillDB(db_.get(), keys_iteration, 2 * keys_iteration);
3332 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
3333
3334 static constexpr int max_factor = 3;
3335 FillDB(db_.get(), 2 * keys_iteration, max_factor * keys_iteration);
3336 // will create another backup soon...
3337
3338 Options db_opts = options_;
3339 db_opts.wal_dir = "";
3340 db_opts.create_if_missing = false;
3341 BackupableDBOptions be_opts = *backupable_options_;
3342 be_opts.destroy_old_data = false;
3343
3344 std::mt19937 rng{std::random_device()()};
3345
3346 std::array<std::thread, 4> read_threads;
3347 std::array<std::thread, 4> restore_verify_threads;
3348 for (uint32_t i = 0; i < read_threads.size(); ++i) {
3349 uint32_t sleep_micros = rng() % 100000;
3350 read_threads[i] =
3351 std::thread([this, i, sleep_micros, &db_opts, &be_opts,
3352 &restore_verify_threads, &limiter] {
3353 test_db_env_->SleepForMicroseconds(sleep_micros);
3354
3355 // Whether to also re-open the BackupEngine, potentially seeing
3356 // additional backups
3357 bool reopen = i == 3;
3358 // Whether we are going to restore "latest"
3359 bool latest = i > 1;
3360
3361 BackupEngine* my_be;
3362 if (reopen) {
3363 ASSERT_OK(BackupEngine::Open(test_db_env_.get(), be_opts, &my_be));
3364 } else {
3365 my_be = backup_engine_.get();
3366 }
3367
3368 // Verify metadata (we don't receive updates from concurrently
3369 // creating a new backup)
3370 std::vector<BackupInfo> infos;
3371 my_be->GetBackupInfo(&infos);
3372 const uint32_t count = static_cast<uint32_t>(infos.size());
3373 infos.clear();
3374 if (reopen) {
3375 ASSERT_GE(count, 2U);
3376 ASSERT_LE(count, 4U);
3377 fprintf(stderr, "Reopen saw %u backups\n", count);
3378 } else {
3379 ASSERT_EQ(count, 2U);
3380 }
3381 std::vector<BackupID> ids;
3382 my_be->GetCorruptedBackups(&ids);
3383 ASSERT_EQ(ids.size(), 0U);
3384
3385 // (Eventually, see below) Restore one of the backups, or "latest"
3386 std::string restore_db_dir = dbname_ + "/restore" + ToString(i);
3387 DestroyDir(test_db_env_.get(), restore_db_dir).PermitUncheckedError();
3388 BackupID to_restore;
3389 if (latest) {
3390 to_restore = count;
3391 } else {
3392 to_restore = i + 1;
3393 }
3394
3395 // Open restored DB to verify its contents, but test atomic restore
3396 // by doing it async and ensuring we either get OK or InvalidArgument
3397 restore_verify_threads[i] =
3398 std::thread([this, &db_opts, restore_db_dir, to_restore] {
3399 DB* restored;
3400 Status s;
3401 for (;;) {
3402 s = DB::Open(db_opts, restore_db_dir, &restored);
3403 if (s.IsInvalidArgument()) {
3404 // Restore hasn't finished
3405 test_db_env_->SleepForMicroseconds(1000);
3406 continue;
3407 } else {
3408 // We should only get InvalidArgument if restore is
3409 // incomplete, or OK if complete
3410 ASSERT_OK(s);
3411 break;
3412 }
3413 }
3414 int factor = std::min(static_cast<int>(to_restore), max_factor);
3415 AssertExists(restored, 0, factor * keys_iteration);
3416 AssertEmpty(restored, factor * keys_iteration,
3417 (factor + 1) * keys_iteration);
3418 delete restored;
3419 });
3420
3421 // (Ok now) Restore one of the backups, or "latest"
3422 if (latest) {
3423 ASSERT_OK(my_be->RestoreDBFromLatestBackup(restore_db_dir,
3424 restore_db_dir));
3425 } else {
3426 ASSERT_OK(my_be->VerifyBackup(to_restore, true));
3427 ASSERT_OK(my_be->RestoreDBFromBackup(to_restore, restore_db_dir,
3428 restore_db_dir));
3429 }
3430
3431 // Test for race condition in reconfiguring limiter
3432 // FIXME: this could set to a different value in all threads, except
3433 // GenericRateLimiter::SetBytesPerSecond has a write-write race
3434 // reported by TSAN
3435 if (i == 0) {
3436 limiter->SetBytesPerSecond(2000000000);
3437 }
3438
3439 // Re-verify metadata (we don't receive updates from concurrently
3440 // creating a new backup)
3441 my_be->GetBackupInfo(&infos);
3442 ASSERT_EQ(infos.size(), count);
3443 my_be->GetCorruptedBackups(&ids);
3444 ASSERT_EQ(ids.size(), 0);
3445 // fprintf(stderr, "Finished read thread\n");
3446
3447 if (reopen) {
3448 delete my_be;
3449 }
3450 });
3451 }
3452
3453 BackupEngine* alt_be;
3454 ASSERT_OK(BackupEngine::Open(test_db_env_.get(), be_opts, &alt_be));
3455
3456 std::array<std::thread, 2> append_threads;
3457 for (unsigned i = 0; i < append_threads.size(); ++i) {
3458 uint32_t sleep_micros = rng() % 100000;
3459 append_threads[i] = std::thread([this, sleep_micros, alt_be] {
3460 test_db_env_->SleepForMicroseconds(sleep_micros);
3461 // WART: CreateNewBackup doesn't tell you the BackupID it just created,
3462 // which is ugly for multithreaded setting.
3463 // TODO: add delete backup also when that is added
3464 ASSERT_OK(alt_be->CreateNewBackup(db_.get()));
3465 // fprintf(stderr, "Finished append thread\n");
3466 });
3467 }
3468
3469 for (auto& t : append_threads) {
3470 t.join();
3471 }
3472 // Verify metadata
3473 std::vector<BackupInfo> infos;
3474 alt_be->GetBackupInfo(&infos);
3475 ASSERT_EQ(infos.size(), 2 + append_threads.size());
3476
3477 for (auto& t : read_threads) {
3478 t.join();
3479 }
3480
3481 delete alt_be;
3482
3483 for (auto& t : restore_verify_threads) {
3484 t.join();
3485 }
3486
3487 CloseDBAndBackupEngine();
3488 }
3489
TEST_F(BackupEngineTest,LimitBackupsOpened)3490 TEST_F(BackupEngineTest, LimitBackupsOpened) {
3491 // Verify the specified max backups are opened, including skipping over
3492 // corrupted backups.
3493 //
3494 // Setup:
3495 // - backups 1, 2, and 4 are valid
3496 // - backup 3 is corrupt
3497 // - max_valid_backups_to_open == 2
3498 //
3499 // Expectation: the engine opens backups 4 and 2 since those are latest two
3500 // non-corrupt backups.
3501 const int kNumKeys = 5000;
3502 OpenDBAndBackupEngine(true);
3503 for (int i = 1; i <= 4; ++i) {
3504 FillDB(db_.get(), kNumKeys * i, kNumKeys * (i + 1));
3505 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3506 if (i == 3) {
3507 ASSERT_OK(file_manager_->CorruptFile(backupdir_ + "/meta/3", 3));
3508 }
3509 }
3510 CloseDBAndBackupEngine();
3511
3512 backupable_options_->max_valid_backups_to_open = 2;
3513 backupable_options_->destroy_old_data = false;
3514 BackupEngineReadOnly* read_only_backup_engine;
3515 ASSERT_OK(BackupEngineReadOnly::Open(backup_chroot_env_.get(),
3516 *backupable_options_,
3517 &read_only_backup_engine));
3518
3519 std::vector<BackupInfo> backup_infos;
3520 read_only_backup_engine->GetBackupInfo(&backup_infos);
3521 ASSERT_EQ(2, backup_infos.size());
3522 ASSERT_EQ(2, backup_infos[0].backup_id);
3523 ASSERT_EQ(4, backup_infos[1].backup_id);
3524 delete read_only_backup_engine;
3525 }
3526
TEST_F(BackupEngineTest,IgnoreLimitBackupsOpenedWhenNotReadOnly)3527 TEST_F(BackupEngineTest, IgnoreLimitBackupsOpenedWhenNotReadOnly) {
3528 // Verify the specified max_valid_backups_to_open is ignored if the engine
3529 // is not read-only.
3530 //
3531 // Setup:
3532 // - backups 1, 2, and 4 are valid
3533 // - backup 3 is corrupt
3534 // - max_valid_backups_to_open == 2
3535 //
3536 // Expectation: the engine opens backups 4, 2, and 1 since those are latest
3537 // non-corrupt backups, by ignoring max_valid_backups_to_open == 2.
3538 const int kNumKeys = 5000;
3539 OpenDBAndBackupEngine(true);
3540 for (int i = 1; i <= 4; ++i) {
3541 FillDB(db_.get(), kNumKeys * i, kNumKeys * (i + 1));
3542 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3543 if (i == 3) {
3544 ASSERT_OK(file_manager_->CorruptFile(backupdir_ + "/meta/3", 3));
3545 }
3546 }
3547 CloseDBAndBackupEngine();
3548
3549 backupable_options_->max_valid_backups_to_open = 2;
3550 OpenDBAndBackupEngine();
3551 std::vector<BackupInfo> backup_infos;
3552 backup_engine_->GetBackupInfo(&backup_infos);
3553 ASSERT_EQ(3, backup_infos.size());
3554 ASSERT_EQ(1, backup_infos[0].backup_id);
3555 ASSERT_EQ(2, backup_infos[1].backup_id);
3556 ASSERT_EQ(4, backup_infos[2].backup_id);
3557 CloseDBAndBackupEngine();
3558 DestroyDB(dbname_, options_);
3559 }
3560
TEST_F(BackupEngineTest,CreateWhenLatestBackupCorrupted)3561 TEST_F(BackupEngineTest, CreateWhenLatestBackupCorrupted) {
3562 // we should pick an ID greater than corrupted backups' IDs so creation can
3563 // succeed even when latest backup is corrupted.
3564 const int kNumKeys = 5000;
3565 OpenDBAndBackupEngine(true /* destroy_old_data */);
3566 BackupInfo backup_info;
3567 ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).IsNotFound());
3568 FillDB(db_.get(), 0 /* from */, kNumKeys);
3569 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
3570 true /* flush_before_backup */));
3571 ASSERT_OK(file_manager_->CorruptFile(backupdir_ + "/meta/1",
3572 3 /* bytes_to_corrupt */));
3573 CloseDBAndBackupEngine();
3574
3575 OpenDBAndBackupEngine();
3576 ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).IsNotFound());
3577
3578 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
3579 true /* flush_before_backup */));
3580
3581 ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).ok());
3582 ASSERT_EQ(2, backup_info.backup_id);
3583
3584 std::vector<BackupInfo> backup_infos;
3585 backup_engine_->GetBackupInfo(&backup_infos);
3586 ASSERT_EQ(1, backup_infos.size());
3587 ASSERT_EQ(2, backup_infos[0].backup_id);
3588
3589 // Verify individual GetBackupInfo by ID
3590 ASSERT_TRUE(backup_engine_->GetBackupInfo(0U, &backup_info).IsNotFound());
3591 ASSERT_TRUE(backup_engine_->GetBackupInfo(1U, &backup_info).IsCorruption());
3592 ASSERT_TRUE(backup_engine_->GetBackupInfo(2U, &backup_info).ok());
3593 ASSERT_TRUE(backup_engine_->GetBackupInfo(3U, &backup_info).IsNotFound());
3594 ASSERT_TRUE(
3595 backup_engine_->GetBackupInfo(999999U, &backup_info).IsNotFound());
3596 }
3597
TEST_F(BackupEngineTest,WriteOnlyEngineNoSharedFileDeletion)3598 TEST_F(BackupEngineTest, WriteOnlyEngineNoSharedFileDeletion) {
3599 // Verifies a write-only BackupEngine does not delete files belonging to valid
3600 // backups when GarbageCollect, PurgeOldBackups, or DeleteBackup are called.
3601 const int kNumKeys = 5000;
3602 for (int i = 0; i < 3; ++i) {
3603 OpenDBAndBackupEngine(i == 0 /* destroy_old_data */);
3604 FillDB(db_.get(), i * kNumKeys, (i + 1) * kNumKeys);
3605 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
3606 CloseDBAndBackupEngine();
3607
3608 backupable_options_->max_valid_backups_to_open = 0;
3609 OpenDBAndBackupEngine();
3610 switch (i) {
3611 case 0:
3612 ASSERT_OK(backup_engine_->GarbageCollect());
3613 break;
3614 case 1:
3615 ASSERT_OK(backup_engine_->PurgeOldBackups(1 /* num_backups_to_keep */));
3616 break;
3617 case 2:
3618 ASSERT_OK(backup_engine_->DeleteBackup(2 /* backup_id */));
3619 break;
3620 default:
3621 assert(false);
3622 }
3623 CloseDBAndBackupEngine();
3624
3625 backupable_options_->max_valid_backups_to_open = port::kMaxInt32;
3626 AssertBackupConsistency(i + 1, 0, (i + 1) * kNumKeys);
3627 }
3628 }
3629
TEST_P(BackupEngineTestWithParam,BackupUsingDirectIO)3630 TEST_P(BackupEngineTestWithParam, BackupUsingDirectIO) {
3631 // Tests direct I/O on the backup engine's reads and writes on the DB env and
3632 // backup env
3633 // We use ChrootEnv underneath so the below line checks for direct I/O support
3634 // in the chroot directory, not the true filesystem root.
3635 if (!test::IsDirectIOSupported(test_db_env_.get(), "/")) {
3636 ROCKSDB_GTEST_SKIP("Test requires Direct I/O Support");
3637 return;
3638 }
3639 const int kNumKeysPerBackup = 100;
3640 const int kNumBackups = 3;
3641 options_.use_direct_reads = true;
3642 OpenDBAndBackupEngine(true /* destroy_old_data */);
3643 for (int i = 0; i < kNumBackups; ++i) {
3644 FillDB(db_.get(), i * kNumKeysPerBackup /* from */,
3645 (i + 1) * kNumKeysPerBackup /* to */, kFlushAll);
3646
3647 // Clear the file open counters and then do a bunch of backup engine ops.
3648 // For all ops, files should be opened in direct mode.
3649 test_backup_env_->ClearFileOpenCounters();
3650 test_db_env_->ClearFileOpenCounters();
3651 CloseBackupEngine();
3652 OpenBackupEngine();
3653 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
3654 false /* flush_before_backup */));
3655 ASSERT_OK(backup_engine_->VerifyBackup(i + 1));
3656 CloseBackupEngine();
3657 OpenBackupEngine();
3658 std::vector<BackupInfo> backup_infos;
3659 backup_engine_->GetBackupInfo(&backup_infos);
3660 ASSERT_EQ(static_cast<size_t>(i + 1), backup_infos.size());
3661
3662 // Verify backup engine always opened files with direct I/O
3663 ASSERT_EQ(0, test_db_env_->num_writers());
3664 ASSERT_GE(test_db_env_->num_direct_rand_readers(), 0);
3665 ASSERT_GT(test_db_env_->num_direct_seq_readers(), 0);
3666 // Currently the DB doesn't support reading WALs or manifest with direct
3667 // I/O, so subtract two.
3668 ASSERT_EQ(test_db_env_->num_seq_readers() - 2,
3669 test_db_env_->num_direct_seq_readers());
3670 ASSERT_EQ(test_db_env_->num_rand_readers(),
3671 test_db_env_->num_direct_rand_readers());
3672 }
3673 CloseDBAndBackupEngine();
3674
3675 for (int i = 0; i < kNumBackups; ++i) {
3676 AssertBackupConsistency(i + 1 /* backup_id */,
3677 i * kNumKeysPerBackup /* start_exist */,
3678 (i + 1) * kNumKeysPerBackup /* end_exist */,
3679 (i + 2) * kNumKeysPerBackup /* end */);
3680 }
3681 }
3682
TEST_F(BackupEngineTest,BackgroundThreadCpuPriority)3683 TEST_F(BackupEngineTest, BackgroundThreadCpuPriority) {
3684 std::atomic<CpuPriority> priority(CpuPriority::kNormal);
3685 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
3686 "BackupEngineImpl::Initialize:SetCpuPriority", [&](void* new_priority) {
3687 priority.store(*reinterpret_cast<CpuPriority*>(new_priority));
3688 });
3689 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
3690
3691 // 1 thread is easier to test, otherwise, we may not be sure which thread
3692 // actually does the work during CreateNewBackup.
3693 backupable_options_->max_background_operations = 1;
3694 OpenDBAndBackupEngine(true);
3695
3696 {
3697 FillDB(db_.get(), 0, 100);
3698
3699 // by default, cpu priority is not changed.
3700 CreateBackupOptions options;
3701 ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get()));
3702
3703 ASSERT_EQ(priority, CpuPriority::kNormal);
3704 }
3705
3706 {
3707 FillDB(db_.get(), 101, 200);
3708
3709 // decrease cpu priority from normal to low.
3710 CreateBackupOptions options;
3711 options.decrease_background_thread_cpu_priority = true;
3712 options.background_thread_cpu_priority = CpuPriority::kLow;
3713 ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get()));
3714
3715 ASSERT_EQ(priority, CpuPriority::kLow);
3716 }
3717
3718 {
3719 FillDB(db_.get(), 201, 300);
3720
3721 // try to upgrade cpu priority back to normal,
3722 // the priority should still low.
3723 CreateBackupOptions options;
3724 options.decrease_background_thread_cpu_priority = true;
3725 options.background_thread_cpu_priority = CpuPriority::kNormal;
3726 ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get()));
3727
3728 ASSERT_EQ(priority, CpuPriority::kLow);
3729 }
3730
3731 {
3732 FillDB(db_.get(), 301, 400);
3733
3734 // decrease cpu priority from low to idle.
3735 CreateBackupOptions options;
3736 options.decrease_background_thread_cpu_priority = true;
3737 options.background_thread_cpu_priority = CpuPriority::kIdle;
3738 ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get()));
3739
3740 ASSERT_EQ(priority, CpuPriority::kIdle);
3741 }
3742
3743 {
3744 FillDB(db_.get(), 301, 400);
3745
3746 // reset priority to later verify that it's not updated by SetCpuPriority.
3747 priority = CpuPriority::kNormal;
3748
3749 // setting the same cpu priority won't call SetCpuPriority.
3750 CreateBackupOptions options;
3751 options.decrease_background_thread_cpu_priority = true;
3752 options.background_thread_cpu_priority = CpuPriority::kIdle;
3753
3754 // Also check output backup_id with CreateNewBackup
3755 BackupID new_id = 0;
3756 ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get(), &new_id));
3757 ASSERT_EQ(new_id, 5U);
3758
3759 ASSERT_EQ(priority, CpuPriority::kNormal);
3760 }
3761
3762 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3763 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
3764 CloseDBAndBackupEngine();
3765 DestroyDB(dbname_, options_);
3766 }
3767
3768 // Populates `*total_size` with the size of all files under `backup_dir`.
3769 // We don't go through `BackupEngine` currently because it's hard to figure out
3770 // the metadata file size.
GetSizeOfBackupFiles(FileSystem * backup_fs,const std::string & backup_dir,size_t * total_size)3771 Status GetSizeOfBackupFiles(FileSystem* backup_fs,
3772 const std::string& backup_dir, size_t* total_size) {
3773 *total_size = 0;
3774 std::vector<std::string> dir_stack = {backup_dir};
3775 Status s;
3776 while (s.ok() && !dir_stack.empty()) {
3777 std::string dir = std::move(dir_stack.back());
3778 dir_stack.pop_back();
3779 std::vector<std::string> children;
3780 s = backup_fs->GetChildren(dir, IOOptions(), &children, nullptr /* dbg */);
3781 for (size_t i = 0; s.ok() && i < children.size(); ++i) {
3782 std::string path = dir + "/" + children[i];
3783 bool is_dir;
3784 s = backup_fs->IsDirectory(path, IOOptions(), &is_dir, nullptr /* dbg */);
3785 uint64_t file_size = 0;
3786 if (s.ok()) {
3787 if (is_dir) {
3788 dir_stack.emplace_back(std::move(path));
3789 } else {
3790 s = backup_fs->GetFileSize(path, IOOptions(), &file_size,
3791 nullptr /* dbg */);
3792 }
3793 }
3794 if (s.ok()) {
3795 *total_size += file_size;
3796 }
3797 }
3798 }
3799 return s;
3800 }
3801
TEST_F(BackupEngineTest,IOStats)3802 TEST_F(BackupEngineTest, IOStats) {
3803 // Tests the `BACKUP_READ_BYTES` and `BACKUP_WRITE_BYTES` ticker stats have
3804 // the expected values according to the files in the backups.
3805
3806 // These ticker stats are expected to be populated regardless of `PerfLevel`
3807 // in user thread
3808 SetPerfLevel(kDisable);
3809
3810 options_.statistics = CreateDBStatistics();
3811 OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
3812 kShareWithChecksum);
3813
3814 FillDB(db_.get(), 0 /* from */, 100 /* to */, kFlushMost);
3815
3816 ASSERT_EQ(0, options_.statistics->getTickerCount(BACKUP_READ_BYTES));
3817 ASSERT_EQ(0, options_.statistics->getTickerCount(BACKUP_WRITE_BYTES));
3818 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
3819 false /* flush_before_backup */));
3820
3821 size_t orig_backup_files_size;
3822 ASSERT_OK(GetSizeOfBackupFiles(test_backup_env_->GetFileSystem().get(),
3823 backupdir_, &orig_backup_files_size));
3824 size_t expected_bytes_written = orig_backup_files_size;
3825 ASSERT_EQ(expected_bytes_written,
3826 options_.statistics->getTickerCount(BACKUP_WRITE_BYTES));
3827 // Bytes read is more difficult to pin down since there are reads for many
3828 // purposes other than creating file, like `GetSortedWalFiles()` to find first
3829 // sequence number, or `CreateNewBackup()` thread to find SST file session ID.
3830 // So we loosely require there are at least as many reads as needed for
3831 // copying, but not as many as twice that.
3832 ASSERT_GE(options_.statistics->getTickerCount(BACKUP_READ_BYTES),
3833 expected_bytes_written);
3834 ASSERT_LT(expected_bytes_written,
3835 2 * options_.statistics->getTickerCount(BACKUP_READ_BYTES));
3836
3837 FillDB(db_.get(), 100 /* from */, 200 /* to */, kFlushMost);
3838
3839 ASSERT_OK(options_.statistics->Reset());
3840 ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
3841 false /* flush_before_backup */));
3842 size_t final_backup_files_size;
3843 ASSERT_OK(GetSizeOfBackupFiles(test_backup_env_->GetFileSystem().get(),
3844 backupdir_, &final_backup_files_size));
3845 expected_bytes_written = final_backup_files_size - orig_backup_files_size;
3846 ASSERT_EQ(expected_bytes_written,
3847 options_.statistics->getTickerCount(BACKUP_WRITE_BYTES));
3848 // See above for why these bounds were chosen.
3849 ASSERT_GE(options_.statistics->getTickerCount(BACKUP_READ_BYTES),
3850 expected_bytes_written);
3851 ASSERT_LT(expected_bytes_written,
3852 2 * options_.statistics->getTickerCount(BACKUP_READ_BYTES));
3853 }
3854
3855 } // anon namespace
3856
3857 } // namespace ROCKSDB_NAMESPACE
3858
main(int argc,char ** argv)3859 int main(int argc, char** argv) {
3860 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
3861 ::testing::InitGoogleTest(&argc, argv);
3862 return RUN_ALL_TESTS();
3863 }
3864
3865 #else
3866 #include <stdio.h>
3867
main(int,char **)3868 int main(int /*argc*/, char** /*argv*/) {
3869 fprintf(stderr, "SKIPPED as BackupEngine is not supported in ROCKSDB_LITE\n");
3870 return 0;
3871 }
3872
3873 #endif // !defined(ROCKSDB_LITE) && !defined(OS_WIN)
3874