1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5 #include "leveldb/db.h"
6
7 #include <atomic>
8 #include <string>
9
10 #include "db/db_impl.h"
11 #include "db/filename.h"
12 #include "db/version_set.h"
13 #include "db/write_batch_internal.h"
14 #include "leveldb/cache.h"
15 #include "leveldb/env.h"
16 #include "leveldb/filter_policy.h"
17 #include "leveldb/table.h"
18 #include "port/port.h"
19 #include "port/thread_annotations.h"
20 #include "util/hash.h"
21 #include "util/logging.h"
22 #include "util/mutexlock.h"
23 #include "util/testharness.h"
24 #include "util/testutil.h"
25
26 namespace leveldb {
27
RandomString(Random * rnd,int len)28 static std::string RandomString(Random* rnd, int len) {
29 std::string r;
30 test::RandomString(rnd, len, &r);
31 return r;
32 }
33
RandomKey(Random * rnd)34 static std::string RandomKey(Random* rnd) {
35 int len =
36 (rnd->OneIn(3) ? 1 // Short sometimes to encourage collisions
37 : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
38 return test::RandomKey(rnd, len);
39 }
40
41 namespace {
42 class AtomicCounter {
43 public:
AtomicCounter()44 AtomicCounter() : count_(0) {}
Increment()45 void Increment() { IncrementBy(1); }
IncrementBy(int count)46 void IncrementBy(int count) LOCKS_EXCLUDED(mu_) {
47 MutexLock l(&mu_);
48 count_ += count;
49 }
Read()50 int Read() LOCKS_EXCLUDED(mu_) {
51 MutexLock l(&mu_);
52 return count_;
53 }
Reset()54 void Reset() LOCKS_EXCLUDED(mu_) {
55 MutexLock l(&mu_);
56 count_ = 0;
57 }
58
59 private:
60 port::Mutex mu_;
61 int count_ GUARDED_BY(mu_);
62 };
63
DelayMilliseconds(int millis)64 void DelayMilliseconds(int millis) {
65 Env::Default()->SleepForMicroseconds(millis * 1000);
66 }
67 } // namespace
68
69 // Test Env to override default Env behavior for testing.
70 class TestEnv : public EnvWrapper {
71 public:
TestEnv(Env * base)72 explicit TestEnv(Env* base) : EnvWrapper(base), ignore_dot_files_(false) {}
73
SetIgnoreDotFiles(bool ignored)74 void SetIgnoreDotFiles(bool ignored) { ignore_dot_files_ = ignored; }
75
GetChildren(const std::string & dir,std::vector<std::string> * result)76 Status GetChildren(const std::string& dir,
77 std::vector<std::string>* result) override {
78 Status s = target()->GetChildren(dir, result);
79 if (!s.ok() || !ignore_dot_files_) {
80 return s;
81 }
82
83 std::vector<std::string>::iterator it = result->begin();
84 while (it != result->end()) {
85 if ((*it == ".") || (*it == "..")) {
86 it = result->erase(it);
87 } else {
88 ++it;
89 }
90 }
91
92 return s;
93 }
94
95 private:
96 bool ignore_dot_files_;
97 };
98
99 // Special Env used to delay background operations.
100 class SpecialEnv : public EnvWrapper {
101 public:
102 // sstable/log Sync() calls are blocked while this pointer is non-null.
103 std::atomic<bool> delay_data_sync_;
104
105 // sstable/log Sync() calls return an error.
106 std::atomic<bool> data_sync_error_;
107
108 // Simulate no-space errors while this pointer is non-null.
109 std::atomic<bool> no_space_;
110
111 // Simulate non-writable file system while this pointer is non-null.
112 std::atomic<bool> non_writable_;
113
114 // Force sync of manifest files to fail while this pointer is non-null.
115 std::atomic<bool> manifest_sync_error_;
116
117 // Force write to manifest files to fail while this pointer is non-null.
118 std::atomic<bool> manifest_write_error_;
119
120 bool count_random_reads_;
121 AtomicCounter random_read_counter_;
122
SpecialEnv(Env * base)123 explicit SpecialEnv(Env* base)
124 : EnvWrapper(base),
125 delay_data_sync_(false),
126 data_sync_error_(false),
127 no_space_(false),
128 non_writable_(false),
129 manifest_sync_error_(false),
130 manifest_write_error_(false),
131 count_random_reads_(false) {}
132
NewWritableFile(const std::string & f,WritableFile ** r)133 Status NewWritableFile(const std::string& f, WritableFile** r) {
134 class DataFile : public WritableFile {
135 private:
136 SpecialEnv* const env_;
137 WritableFile* const base_;
138
139 public:
140 DataFile(SpecialEnv* env, WritableFile* base) : env_(env), base_(base) {}
141 ~DataFile() { delete base_; }
142 Status Append(const Slice& data) {
143 if (env_->no_space_.load(std::memory_order_acquire)) {
144 // Drop writes on the floor
145 return Status::OK();
146 } else {
147 return base_->Append(data);
148 }
149 }
150 Status Close() { return base_->Close(); }
151 Status Flush() { return base_->Flush(); }
152 Status Sync() {
153 if (env_->data_sync_error_.load(std::memory_order_acquire)) {
154 return Status::IOError("simulated data sync error");
155 }
156 while (env_->delay_data_sync_.load(std::memory_order_acquire)) {
157 DelayMilliseconds(100);
158 }
159 return base_->Sync();
160 }
161 std::string GetName() const override { return ""; }
162 };
163 class ManifestFile : public WritableFile {
164 private:
165 SpecialEnv* env_;
166 WritableFile* base_;
167
168 public:
169 ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) {}
170 ~ManifestFile() { delete base_; }
171 Status Append(const Slice& data) {
172 if (env_->manifest_write_error_.load(std::memory_order_acquire)) {
173 return Status::IOError("simulated writer error");
174 } else {
175 return base_->Append(data);
176 }
177 }
178 Status Close() { return base_->Close(); }
179 Status Flush() { return base_->Flush(); }
180 Status Sync() {
181 if (env_->manifest_sync_error_.load(std::memory_order_acquire)) {
182 return Status::IOError("simulated sync error");
183 } else {
184 return base_->Sync();
185 }
186 }
187 std::string GetName() const override { return ""; }
188 };
189
190 if (non_writable_.load(std::memory_order_acquire)) {
191 return Status::IOError("simulated write error");
192 }
193
194 Status s = target()->NewWritableFile(f, r);
195 if (s.ok()) {
196 if (strstr(f.c_str(), ".ldb") != nullptr ||
197 strstr(f.c_str(), ".log") != nullptr) {
198 *r = new DataFile(this, *r);
199 } else if (strstr(f.c_str(), "MANIFEST") != nullptr) {
200 *r = new ManifestFile(this, *r);
201 }
202 }
203 return s;
204 }
205
NewRandomAccessFile(const std::string & f,RandomAccessFile ** r)206 Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
207 class CountingFile : public RandomAccessFile {
208 private:
209 RandomAccessFile* target_;
210 AtomicCounter* counter_;
211
212 public:
213 CountingFile(RandomAccessFile* target, AtomicCounter* counter)
214 : target_(target), counter_(counter) {}
215 ~CountingFile() override { delete target_; }
216 Status Read(uint64_t offset, size_t n, Slice* result,
217 char* scratch) const override {
218 counter_->Increment();
219 return target_->Read(offset, n, result, scratch);
220 }
221 std::string GetName() const override { return ""; }
222 };
223
224 Status s = target()->NewRandomAccessFile(f, r);
225 if (s.ok() && count_random_reads_) {
226 *r = new CountingFile(*r, &random_read_counter_);
227 }
228 return s;
229 }
230 };
231
232 class DBTest {
233 public:
234 std::string dbname_;
235 SpecialEnv* env_;
236 DB* db_;
237
238 Options last_options_;
239
DBTest()240 DBTest() : env_(new SpecialEnv(Env::Default())), option_config_(kDefault) {
241 filter_policy_ = NewBloomFilterPolicy(10);
242 dbname_ = test::TmpDir() + "/db_test";
243 DestroyDB(dbname_, Options());
244 db_ = nullptr;
245 Reopen();
246 }
247
~DBTest()248 ~DBTest() {
249 delete db_;
250 DestroyDB(dbname_, Options());
251 delete env_;
252 delete filter_policy_;
253 }
254
255 // Switch to a fresh database with the next option configuration to
256 // test. Return false if there are no more configurations to test.
ChangeOptions()257 bool ChangeOptions() {
258 option_config_++;
259 if (option_config_ >= kEnd) {
260 return false;
261 } else {
262 DestroyAndReopen();
263 return true;
264 }
265 }
266
267 // Return the current option configuration.
CurrentOptions()268 Options CurrentOptions() {
269 Options options;
270 options.reuse_logs = false;
271 switch (option_config_) {
272 case kReuse:
273 options.reuse_logs = true;
274 break;
275 case kFilter:
276 options.filter_policy = filter_policy_;
277 break;
278 case kUncompressed:
279 options.compression = kNoCompression;
280 break;
281 default:
282 break;
283 }
284 return options;
285 }
286
dbfull()287 DBImpl* dbfull() { return reinterpret_cast<DBImpl*>(db_); }
288
Reopen(Options * options=nullptr)289 void Reopen(Options* options = nullptr) { ASSERT_OK(TryReopen(options)); }
290
Close()291 void Close() {
292 delete db_;
293 db_ = nullptr;
294 }
295
DestroyAndReopen(Options * options=nullptr)296 void DestroyAndReopen(Options* options = nullptr) {
297 delete db_;
298 db_ = nullptr;
299 DestroyDB(dbname_, Options());
300 ASSERT_OK(TryReopen(options));
301 }
302
TryReopen(Options * options)303 Status TryReopen(Options* options) {
304 delete db_;
305 db_ = nullptr;
306 Options opts;
307 if (options != nullptr) {
308 opts = *options;
309 } else {
310 opts = CurrentOptions();
311 opts.create_if_missing = true;
312 }
313 last_options_ = opts;
314
315 return DB::Open(opts, dbname_, &db_);
316 }
317
Put(const std::string & k,const std::string & v)318 Status Put(const std::string& k, const std::string& v) {
319 return db_->Put(WriteOptions(), k, v);
320 }
321
Delete(const std::string & k)322 Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); }
323
Get(const std::string & k,const Snapshot * snapshot=nullptr)324 std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
325 ReadOptions options;
326 options.snapshot = snapshot;
327 std::string result;
328 Status s = db_->Get(options, k, &result);
329 if (s.IsNotFound()) {
330 result = "NOT_FOUND";
331 } else if (!s.ok()) {
332 result = s.ToString();
333 }
334 return result;
335 }
336
337 // Return a string that contains all key,value pairs in order,
338 // formatted like "(k1->v1)(k2->v2)".
Contents()339 std::string Contents() {
340 std::vector<std::string> forward;
341 std::string result;
342 Iterator* iter = db_->NewIterator(ReadOptions());
343 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
344 std::string s = IterStatus(iter);
345 result.push_back('(');
346 result.append(s);
347 result.push_back(')');
348 forward.push_back(s);
349 }
350
351 // Check reverse iteration results are the reverse of forward results
352 size_t matched = 0;
353 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
354 ASSERT_LT(matched, forward.size());
355 ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
356 matched++;
357 }
358 ASSERT_EQ(matched, forward.size());
359
360 delete iter;
361 return result;
362 }
363
AllEntriesFor(const Slice & user_key)364 std::string AllEntriesFor(const Slice& user_key) {
365 Iterator* iter = dbfull()->TEST_NewInternalIterator();
366 InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
367 iter->Seek(target.Encode());
368 std::string result;
369 if (!iter->status().ok()) {
370 result = iter->status().ToString();
371 } else {
372 result = "[ ";
373 bool first = true;
374 while (iter->Valid()) {
375 ParsedInternalKey ikey;
376 if (!ParseInternalKey(iter->key(), &ikey)) {
377 result += "CORRUPTED";
378 } else {
379 if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
380 break;
381 }
382 if (!first) {
383 result += ", ";
384 }
385 first = false;
386 switch (ikey.type) {
387 case kTypeValue:
388 result += iter->value().ToString();
389 break;
390 case kTypeDeletion:
391 result += "DEL";
392 break;
393 }
394 }
395 iter->Next();
396 }
397 if (!first) {
398 result += " ";
399 }
400 result += "]";
401 }
402 delete iter;
403 return result;
404 }
405
NumTableFilesAtLevel(int level)406 int NumTableFilesAtLevel(int level) {
407 std::string property;
408 ASSERT_TRUE(db_->GetProperty(
409 "leveldb.num-files-at-level" + NumberToString(level), &property));
410 return std::stoi(property);
411 }
412
TotalTableFiles()413 int TotalTableFiles() {
414 int result = 0;
415 for (int level = 0; level < config::kNumLevels; level++) {
416 result += NumTableFilesAtLevel(level);
417 }
418 return result;
419 }
420
421 // Return spread of files per level
FilesPerLevel()422 std::string FilesPerLevel() {
423 std::string result;
424 int last_non_zero_offset = 0;
425 for (int level = 0; level < config::kNumLevels; level++) {
426 int f = NumTableFilesAtLevel(level);
427 char buf[100];
428 snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
429 result += buf;
430 if (f > 0) {
431 last_non_zero_offset = result.size();
432 }
433 }
434 result.resize(last_non_zero_offset);
435 return result;
436 }
437
CountFiles()438 int CountFiles() {
439 std::vector<std::string> files;
440 env_->GetChildren(dbname_, &files);
441 return static_cast<int>(files.size());
442 }
443
Size(const Slice & start,const Slice & limit)444 uint64_t Size(const Slice& start, const Slice& limit) {
445 Range r(start, limit);
446 uint64_t size;
447 db_->GetApproximateSizes(&r, 1, &size);
448 return size;
449 }
450
Compact(const Slice & start,const Slice & limit)451 void Compact(const Slice& start, const Slice& limit) {
452 db_->CompactRange(&start, &limit);
453 }
454
455 // Do n memtable compactions, each of which produces an sstable
456 // covering the range [small_key,large_key].
MakeTables(int n,const std::string & small_key,const std::string & large_key)457 void MakeTables(int n, const std::string& small_key,
458 const std::string& large_key) {
459 for (int i = 0; i < n; i++) {
460 Put(small_key, "begin");
461 Put(large_key, "end");
462 dbfull()->TEST_CompactMemTable();
463 }
464 }
465
466 // Prevent pushing of new sstables into deeper levels by adding
467 // tables that cover a specified range to all levels.
FillLevels(const std::string & smallest,const std::string & largest)468 void FillLevels(const std::string& smallest, const std::string& largest) {
469 MakeTables(config::kNumLevels, smallest, largest);
470 }
471
DumpFileCounts(const char * label)472 void DumpFileCounts(const char* label) {
473 fprintf(stderr, "---\n%s:\n", label);
474 fprintf(
475 stderr, "maxoverlap: %lld\n",
476 static_cast<long long>(dbfull()->TEST_MaxNextLevelOverlappingBytes()));
477 for (int level = 0; level < config::kNumLevels; level++) {
478 int num = NumTableFilesAtLevel(level);
479 if (num > 0) {
480 fprintf(stderr, " level %3d : %d files\n", level, num);
481 }
482 }
483 }
484
DumpSSTableList()485 std::string DumpSSTableList() {
486 std::string property;
487 db_->GetProperty("leveldb.sstables", &property);
488 return property;
489 }
490
IterStatus(Iterator * iter)491 std::string IterStatus(Iterator* iter) {
492 std::string result;
493 if (iter->Valid()) {
494 result = iter->key().ToString() + "->" + iter->value().ToString();
495 } else {
496 result = "(invalid)";
497 }
498 return result;
499 }
500
DeleteAnSSTFile()501 bool DeleteAnSSTFile() {
502 std::vector<std::string> filenames;
503 ASSERT_OK(env_->GetChildren(dbname_, &filenames));
504 uint64_t number;
505 FileType type;
506 for (size_t i = 0; i < filenames.size(); i++) {
507 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
508 ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number)));
509 return true;
510 }
511 }
512 return false;
513 }
514
515 // Returns number of files renamed.
RenameLDBToSST()516 int RenameLDBToSST() {
517 std::vector<std::string> filenames;
518 ASSERT_OK(env_->GetChildren(dbname_, &filenames));
519 uint64_t number;
520 FileType type;
521 int files_renamed = 0;
522 for (size_t i = 0; i < filenames.size(); i++) {
523 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
524 const std::string from = TableFileName(dbname_, number);
525 const std::string to = SSTTableFileName(dbname_, number);
526 ASSERT_OK(env_->RenameFile(from, to));
527 files_renamed++;
528 }
529 }
530 return files_renamed;
531 }
532
533 private:
534 // Sequence of option configurations to try
535 enum OptionConfig { kDefault, kReuse, kFilter, kUncompressed, kEnd };
536
537 const FilterPolicy* filter_policy_;
538 int option_config_;
539 };
540
TEST(DBTest,Empty)541 TEST(DBTest, Empty) {
542 do {
543 ASSERT_TRUE(db_ != nullptr);
544 ASSERT_EQ("NOT_FOUND", Get("foo"));
545 } while (ChangeOptions());
546 }
547
TEST(DBTest,EmptyKey)548 TEST(DBTest, EmptyKey) {
549 do {
550 ASSERT_OK(Put("", "v1"));
551 ASSERT_EQ("v1", Get(""));
552 ASSERT_OK(Put("", "v2"));
553 ASSERT_EQ("v2", Get(""));
554 } while (ChangeOptions());
555 }
556
TEST(DBTest,EmptyValue)557 TEST(DBTest, EmptyValue) {
558 do {
559 ASSERT_OK(Put("key", "v1"));
560 ASSERT_EQ("v1", Get("key"));
561 ASSERT_OK(Put("key", ""));
562 ASSERT_EQ("", Get("key"));
563 ASSERT_OK(Put("key", "v2"));
564 ASSERT_EQ("v2", Get("key"));
565 } while (ChangeOptions());
566 }
567
TEST(DBTest,ReadWrite)568 TEST(DBTest, ReadWrite) {
569 do {
570 ASSERT_OK(Put("foo", "v1"));
571 ASSERT_EQ("v1", Get("foo"));
572 ASSERT_OK(Put("bar", "v2"));
573 ASSERT_OK(Put("foo", "v3"));
574 ASSERT_EQ("v3", Get("foo"));
575 ASSERT_EQ("v2", Get("bar"));
576 } while (ChangeOptions());
577 }
578
TEST(DBTest,PutDeleteGet)579 TEST(DBTest, PutDeleteGet) {
580 do {
581 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
582 ASSERT_EQ("v1", Get("foo"));
583 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
584 ASSERT_EQ("v2", Get("foo"));
585 ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
586 ASSERT_EQ("NOT_FOUND", Get("foo"));
587 } while (ChangeOptions());
588 }
589
TEST(DBTest,GetFromImmutableLayer)590 TEST(DBTest, GetFromImmutableLayer) {
591 do {
592 Options options = CurrentOptions();
593 options.env = env_;
594 options.write_buffer_size = 100000; // Small write buffer
595 Reopen(&options);
596
597 ASSERT_OK(Put("foo", "v1"));
598 ASSERT_EQ("v1", Get("foo"));
599
600 // Block sync calls.
601 env_->delay_data_sync_.store(true, std::memory_order_release);
602 Put("k1", std::string(100000, 'x')); // Fill memtable.
603 Put("k2", std::string(100000, 'y')); // Trigger compaction.
604 ASSERT_EQ("v1", Get("foo"));
605 // Release sync calls.
606 env_->delay_data_sync_.store(false, std::memory_order_release);
607 } while (ChangeOptions());
608 }
609
TEST(DBTest,GetFromVersions)610 TEST(DBTest, GetFromVersions) {
611 do {
612 ASSERT_OK(Put("foo", "v1"));
613 dbfull()->TEST_CompactMemTable();
614 ASSERT_EQ("v1", Get("foo"));
615 } while (ChangeOptions());
616 }
617
TEST(DBTest,GetMemUsage)618 TEST(DBTest, GetMemUsage) {
619 do {
620 ASSERT_OK(Put("foo", "v1"));
621 std::string val;
622 ASSERT_TRUE(db_->GetProperty("leveldb.approximate-memory-usage", &val));
623 int mem_usage = std::stoi(val);
624 ASSERT_GT(mem_usage, 0);
625 ASSERT_LT(mem_usage, 5 * 1024 * 1024);
626 } while (ChangeOptions());
627 }
628
TEST(DBTest,GetSnapshot)629 TEST(DBTest, GetSnapshot) {
630 do {
631 // Try with both a short key and a long key
632 for (int i = 0; i < 2; i++) {
633 std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
634 ASSERT_OK(Put(key, "v1"));
635 const Snapshot* s1 = db_->GetSnapshot();
636 ASSERT_OK(Put(key, "v2"));
637 ASSERT_EQ("v2", Get(key));
638 ASSERT_EQ("v1", Get(key, s1));
639 dbfull()->TEST_CompactMemTable();
640 ASSERT_EQ("v2", Get(key));
641 ASSERT_EQ("v1", Get(key, s1));
642 db_->ReleaseSnapshot(s1);
643 }
644 } while (ChangeOptions());
645 }
646
TEST(DBTest,GetIdenticalSnapshots)647 TEST(DBTest, GetIdenticalSnapshots) {
648 do {
649 // Try with both a short key and a long key
650 for (int i = 0; i < 2; i++) {
651 std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
652 ASSERT_OK(Put(key, "v1"));
653 const Snapshot* s1 = db_->GetSnapshot();
654 const Snapshot* s2 = db_->GetSnapshot();
655 const Snapshot* s3 = db_->GetSnapshot();
656 ASSERT_OK(Put(key, "v2"));
657 ASSERT_EQ("v2", Get(key));
658 ASSERT_EQ("v1", Get(key, s1));
659 ASSERT_EQ("v1", Get(key, s2));
660 ASSERT_EQ("v1", Get(key, s3));
661 db_->ReleaseSnapshot(s1);
662 dbfull()->TEST_CompactMemTable();
663 ASSERT_EQ("v2", Get(key));
664 ASSERT_EQ("v1", Get(key, s2));
665 db_->ReleaseSnapshot(s2);
666 ASSERT_EQ("v1", Get(key, s3));
667 db_->ReleaseSnapshot(s3);
668 }
669 } while (ChangeOptions());
670 }
671
TEST(DBTest,IterateOverEmptySnapshot)672 TEST(DBTest, IterateOverEmptySnapshot) {
673 do {
674 const Snapshot* snapshot = db_->GetSnapshot();
675 ReadOptions read_options;
676 read_options.snapshot = snapshot;
677 ASSERT_OK(Put("foo", "v1"));
678 ASSERT_OK(Put("foo", "v2"));
679
680 Iterator* iterator1 = db_->NewIterator(read_options);
681 iterator1->SeekToFirst();
682 ASSERT_TRUE(!iterator1->Valid());
683 delete iterator1;
684
685 dbfull()->TEST_CompactMemTable();
686
687 Iterator* iterator2 = db_->NewIterator(read_options);
688 iterator2->SeekToFirst();
689 ASSERT_TRUE(!iterator2->Valid());
690 delete iterator2;
691
692 db_->ReleaseSnapshot(snapshot);
693 } while (ChangeOptions());
694 }
695
TEST(DBTest,GetLevel0Ordering)696 TEST(DBTest, GetLevel0Ordering) {
697 do {
698 // Check that we process level-0 files in correct order. The code
699 // below generates two level-0 files where the earlier one comes
700 // before the later one in the level-0 file list since the earlier
701 // one has a smaller "smallest" key.
702 ASSERT_OK(Put("bar", "b"));
703 ASSERT_OK(Put("foo", "v1"));
704 dbfull()->TEST_CompactMemTable();
705 ASSERT_OK(Put("foo", "v2"));
706 dbfull()->TEST_CompactMemTable();
707 ASSERT_EQ("v2", Get("foo"));
708 } while (ChangeOptions());
709 }
710
TEST(DBTest,GetOrderedByLevels)711 TEST(DBTest, GetOrderedByLevels) {
712 do {
713 ASSERT_OK(Put("foo", "v1"));
714 Compact("a", "z");
715 ASSERT_EQ("v1", Get("foo"));
716 ASSERT_OK(Put("foo", "v2"));
717 ASSERT_EQ("v2", Get("foo"));
718 dbfull()->TEST_CompactMemTable();
719 ASSERT_EQ("v2", Get("foo"));
720 } while (ChangeOptions());
721 }
722
TEST(DBTest,GetPicksCorrectFile)723 TEST(DBTest, GetPicksCorrectFile) {
724 do {
725 // Arrange to have multiple files in a non-level-0 level.
726 ASSERT_OK(Put("a", "va"));
727 Compact("a", "b");
728 ASSERT_OK(Put("x", "vx"));
729 Compact("x", "y");
730 ASSERT_OK(Put("f", "vf"));
731 Compact("f", "g");
732 ASSERT_EQ("va", Get("a"));
733 ASSERT_EQ("vf", Get("f"));
734 ASSERT_EQ("vx", Get("x"));
735 } while (ChangeOptions());
736 }
737
TEST(DBTest,GetEncountersEmptyLevel)738 TEST(DBTest, GetEncountersEmptyLevel) {
739 do {
740 // Arrange for the following to happen:
741 // * sstable A in level 0
742 // * nothing in level 1
743 // * sstable B in level 2
744 // Then do enough Get() calls to arrange for an automatic compaction
745 // of sstable A. A bug would cause the compaction to be marked as
746 // occurring at level 1 (instead of the correct level 0).
747
748 // Step 1: First place sstables in levels 0 and 2
749 int compaction_count = 0;
750 while (NumTableFilesAtLevel(0) == 0 || NumTableFilesAtLevel(2) == 0) {
751 ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
752 compaction_count++;
753 Put("a", "begin");
754 Put("z", "end");
755 dbfull()->TEST_CompactMemTable();
756 }
757
758 // Step 2: clear level 1 if necessary.
759 dbfull()->TEST_CompactRange(1, nullptr, nullptr);
760 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
761 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
762 ASSERT_EQ(NumTableFilesAtLevel(2), 1);
763
764 // Step 3: read a bunch of times
765 for (int i = 0; i < 1000; i++) {
766 ASSERT_EQ("NOT_FOUND", Get("missing"));
767 }
768
769 // Step 4: Wait for compaction to finish
770 DelayMilliseconds(1000);
771
772 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
773 } while (ChangeOptions());
774 }
775
TEST(DBTest,IterEmpty)776 TEST(DBTest, IterEmpty) {
777 Iterator* iter = db_->NewIterator(ReadOptions());
778
779 iter->SeekToFirst();
780 ASSERT_EQ(IterStatus(iter), "(invalid)");
781
782 iter->SeekToLast();
783 ASSERT_EQ(IterStatus(iter), "(invalid)");
784
785 iter->Seek("foo");
786 ASSERT_EQ(IterStatus(iter), "(invalid)");
787
788 delete iter;
789 }
790
TEST(DBTest,IterSingle)791 TEST(DBTest, IterSingle) {
792 ASSERT_OK(Put("a", "va"));
793 Iterator* iter = db_->NewIterator(ReadOptions());
794
795 iter->SeekToFirst();
796 ASSERT_EQ(IterStatus(iter), "a->va");
797 iter->Next();
798 ASSERT_EQ(IterStatus(iter), "(invalid)");
799 iter->SeekToFirst();
800 ASSERT_EQ(IterStatus(iter), "a->va");
801 iter->Prev();
802 ASSERT_EQ(IterStatus(iter), "(invalid)");
803
804 iter->SeekToLast();
805 ASSERT_EQ(IterStatus(iter), "a->va");
806 iter->Next();
807 ASSERT_EQ(IterStatus(iter), "(invalid)");
808 iter->SeekToLast();
809 ASSERT_EQ(IterStatus(iter), "a->va");
810 iter->Prev();
811 ASSERT_EQ(IterStatus(iter), "(invalid)");
812
813 iter->Seek("");
814 ASSERT_EQ(IterStatus(iter), "a->va");
815 iter->Next();
816 ASSERT_EQ(IterStatus(iter), "(invalid)");
817
818 iter->Seek("a");
819 ASSERT_EQ(IterStatus(iter), "a->va");
820 iter->Next();
821 ASSERT_EQ(IterStatus(iter), "(invalid)");
822
823 iter->Seek("b");
824 ASSERT_EQ(IterStatus(iter), "(invalid)");
825
826 delete iter;
827 }
828
TEST(DBTest,IterMulti)829 TEST(DBTest, IterMulti) {
830 ASSERT_OK(Put("a", "va"));
831 ASSERT_OK(Put("b", "vb"));
832 ASSERT_OK(Put("c", "vc"));
833 Iterator* iter = db_->NewIterator(ReadOptions());
834
835 iter->SeekToFirst();
836 ASSERT_EQ(IterStatus(iter), "a->va");
837 iter->Next();
838 ASSERT_EQ(IterStatus(iter), "b->vb");
839 iter->Next();
840 ASSERT_EQ(IterStatus(iter), "c->vc");
841 iter->Next();
842 ASSERT_EQ(IterStatus(iter), "(invalid)");
843 iter->SeekToFirst();
844 ASSERT_EQ(IterStatus(iter), "a->va");
845 iter->Prev();
846 ASSERT_EQ(IterStatus(iter), "(invalid)");
847
848 iter->SeekToLast();
849 ASSERT_EQ(IterStatus(iter), "c->vc");
850 iter->Prev();
851 ASSERT_EQ(IterStatus(iter), "b->vb");
852 iter->Prev();
853 ASSERT_EQ(IterStatus(iter), "a->va");
854 iter->Prev();
855 ASSERT_EQ(IterStatus(iter), "(invalid)");
856 iter->SeekToLast();
857 ASSERT_EQ(IterStatus(iter), "c->vc");
858 iter->Next();
859 ASSERT_EQ(IterStatus(iter), "(invalid)");
860
861 iter->Seek("");
862 ASSERT_EQ(IterStatus(iter), "a->va");
863 iter->Seek("a");
864 ASSERT_EQ(IterStatus(iter), "a->va");
865 iter->Seek("ax");
866 ASSERT_EQ(IterStatus(iter), "b->vb");
867 iter->Seek("b");
868 ASSERT_EQ(IterStatus(iter), "b->vb");
869 iter->Seek("z");
870 ASSERT_EQ(IterStatus(iter), "(invalid)");
871
872 // Switch from reverse to forward
873 iter->SeekToLast();
874 iter->Prev();
875 iter->Prev();
876 iter->Next();
877 ASSERT_EQ(IterStatus(iter), "b->vb");
878
879 // Switch from forward to reverse
880 iter->SeekToFirst();
881 iter->Next();
882 iter->Next();
883 iter->Prev();
884 ASSERT_EQ(IterStatus(iter), "b->vb");
885
886 // Make sure iter stays at snapshot
887 ASSERT_OK(Put("a", "va2"));
888 ASSERT_OK(Put("a2", "va3"));
889 ASSERT_OK(Put("b", "vb2"));
890 ASSERT_OK(Put("c", "vc2"));
891 ASSERT_OK(Delete("b"));
892 iter->SeekToFirst();
893 ASSERT_EQ(IterStatus(iter), "a->va");
894 iter->Next();
895 ASSERT_EQ(IterStatus(iter), "b->vb");
896 iter->Next();
897 ASSERT_EQ(IterStatus(iter), "c->vc");
898 iter->Next();
899 ASSERT_EQ(IterStatus(iter), "(invalid)");
900 iter->SeekToLast();
901 ASSERT_EQ(IterStatus(iter), "c->vc");
902 iter->Prev();
903 ASSERT_EQ(IterStatus(iter), "b->vb");
904 iter->Prev();
905 ASSERT_EQ(IterStatus(iter), "a->va");
906 iter->Prev();
907 ASSERT_EQ(IterStatus(iter), "(invalid)");
908
909 delete iter;
910 }
911
TEST(DBTest,IterSmallAndLargeMix)912 TEST(DBTest, IterSmallAndLargeMix) {
913 ASSERT_OK(Put("a", "va"));
914 ASSERT_OK(Put("b", std::string(100000, 'b')));
915 ASSERT_OK(Put("c", "vc"));
916 ASSERT_OK(Put("d", std::string(100000, 'd')));
917 ASSERT_OK(Put("e", std::string(100000, 'e')));
918
919 Iterator* iter = db_->NewIterator(ReadOptions());
920
921 iter->SeekToFirst();
922 ASSERT_EQ(IterStatus(iter), "a->va");
923 iter->Next();
924 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
925 iter->Next();
926 ASSERT_EQ(IterStatus(iter), "c->vc");
927 iter->Next();
928 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
929 iter->Next();
930 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
931 iter->Next();
932 ASSERT_EQ(IterStatus(iter), "(invalid)");
933
934 iter->SeekToLast();
935 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
936 iter->Prev();
937 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
938 iter->Prev();
939 ASSERT_EQ(IterStatus(iter), "c->vc");
940 iter->Prev();
941 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
942 iter->Prev();
943 ASSERT_EQ(IterStatus(iter), "a->va");
944 iter->Prev();
945 ASSERT_EQ(IterStatus(iter), "(invalid)");
946
947 delete iter;
948 }
949
TEST(DBTest,IterMultiWithDelete)950 TEST(DBTest, IterMultiWithDelete) {
951 do {
952 ASSERT_OK(Put("a", "va"));
953 ASSERT_OK(Put("b", "vb"));
954 ASSERT_OK(Put("c", "vc"));
955 ASSERT_OK(Delete("b"));
956 ASSERT_EQ("NOT_FOUND", Get("b"));
957
958 Iterator* iter = db_->NewIterator(ReadOptions());
959 iter->Seek("c");
960 ASSERT_EQ(IterStatus(iter), "c->vc");
961 iter->Prev();
962 ASSERT_EQ(IterStatus(iter), "a->va");
963 delete iter;
964 } while (ChangeOptions());
965 }
966
TEST(DBTest,Recover)967 TEST(DBTest, Recover) {
968 do {
969 ASSERT_OK(Put("foo", "v1"));
970 ASSERT_OK(Put("baz", "v5"));
971
972 Reopen();
973 ASSERT_EQ("v1", Get("foo"));
974
975 ASSERT_EQ("v1", Get("foo"));
976 ASSERT_EQ("v5", Get("baz"));
977 ASSERT_OK(Put("bar", "v2"));
978 ASSERT_OK(Put("foo", "v3"));
979
980 Reopen();
981 ASSERT_EQ("v3", Get("foo"));
982 ASSERT_OK(Put("foo", "v4"));
983 ASSERT_EQ("v4", Get("foo"));
984 ASSERT_EQ("v2", Get("bar"));
985 ASSERT_EQ("v5", Get("baz"));
986 } while (ChangeOptions());
987 }
988
TEST(DBTest,RecoveryWithEmptyLog)989 TEST(DBTest, RecoveryWithEmptyLog) {
990 do {
991 ASSERT_OK(Put("foo", "v1"));
992 ASSERT_OK(Put("foo", "v2"));
993 Reopen();
994 Reopen();
995 ASSERT_OK(Put("foo", "v3"));
996 Reopen();
997 ASSERT_EQ("v3", Get("foo"));
998 } while (ChangeOptions());
999 }
1000
1001 // Check that writes done during a memtable compaction are recovered
1002 // if the database is shutdown during the memtable compaction.
TEST(DBTest,RecoverDuringMemtableCompaction)1003 TEST(DBTest, RecoverDuringMemtableCompaction) {
1004 do {
1005 Options options = CurrentOptions();
1006 options.env = env_;
1007 options.write_buffer_size = 1000000;
1008 Reopen(&options);
1009
1010 // Trigger a long memtable compaction and reopen the database during it
1011 ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
1012 ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
1013 ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
1014 ASSERT_OK(Put("bar", "v2")); // Goes to new log file
1015
1016 Reopen(&options);
1017 ASSERT_EQ("v1", Get("foo"));
1018 ASSERT_EQ("v2", Get("bar"));
1019 ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
1020 ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
1021 } while (ChangeOptions());
1022 }
1023
Key(int i)1024 static std::string Key(int i) {
1025 char buf[100];
1026 snprintf(buf, sizeof(buf), "key%06d", i);
1027 return std::string(buf);
1028 }
1029
TEST(DBTest,MinorCompactionsHappen)1030 TEST(DBTest, MinorCompactionsHappen) {
1031 Options options = CurrentOptions();
1032 options.write_buffer_size = 10000;
1033 Reopen(&options);
1034
1035 const int N = 500;
1036
1037 int starting_num_tables = TotalTableFiles();
1038 for (int i = 0; i < N; i++) {
1039 ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
1040 }
1041 int ending_num_tables = TotalTableFiles();
1042 ASSERT_GT(ending_num_tables, starting_num_tables);
1043
1044 for (int i = 0; i < N; i++) {
1045 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
1046 }
1047
1048 Reopen();
1049
1050 for (int i = 0; i < N; i++) {
1051 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
1052 }
1053 }
1054
TEST(DBTest,RecoverWithLargeLog)1055 TEST(DBTest, RecoverWithLargeLog) {
1056 {
1057 Options options = CurrentOptions();
1058 Reopen(&options);
1059 ASSERT_OK(Put("big1", std::string(200000, '1')));
1060 ASSERT_OK(Put("big2", std::string(200000, '2')));
1061 ASSERT_OK(Put("small3", std::string(10, '3')));
1062 ASSERT_OK(Put("small4", std::string(10, '4')));
1063 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1064 }
1065
1066 // Make sure that if we re-open with a small write buffer size that
1067 // we flush table files in the middle of a large log file.
1068 Options options = CurrentOptions();
1069 options.write_buffer_size = 100000;
1070 Reopen(&options);
1071 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
1072 ASSERT_EQ(std::string(200000, '1'), Get("big1"));
1073 ASSERT_EQ(std::string(200000, '2'), Get("big2"));
1074 ASSERT_EQ(std::string(10, '3'), Get("small3"));
1075 ASSERT_EQ(std::string(10, '4'), Get("small4"));
1076 ASSERT_GT(NumTableFilesAtLevel(0), 1);
1077 }
1078
TEST(DBTest,CompactionsGenerateMultipleFiles)1079 TEST(DBTest, CompactionsGenerateMultipleFiles) {
1080 Options options = CurrentOptions();
1081 options.write_buffer_size = 100000000; // Large write buffer
1082 Reopen(&options);
1083
1084 Random rnd(301);
1085
1086 // Write 8MB (80 values, each 100K)
1087 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1088 std::vector<std::string> values;
1089 for (int i = 0; i < 80; i++) {
1090 values.push_back(RandomString(&rnd, 100000));
1091 ASSERT_OK(Put(Key(i), values[i]));
1092 }
1093
1094 // Reopening moves updates to level-0
1095 Reopen(&options);
1096 dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1097
1098 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1099 ASSERT_GT(NumTableFilesAtLevel(1), 1);
1100 for (int i = 0; i < 80; i++) {
1101 ASSERT_EQ(Get(Key(i)), values[i]);
1102 }
1103 }
1104
TEST(DBTest,RepeatedWritesToSameKey)1105 TEST(DBTest, RepeatedWritesToSameKey) {
1106 Options options = CurrentOptions();
1107 options.env = env_;
1108 options.write_buffer_size = 100000; // Small write buffer
1109 Reopen(&options);
1110
1111 // We must have at most one file per level except for level-0,
1112 // which may have up to kL0_StopWritesTrigger files.
1113 const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
1114
1115 Random rnd(301);
1116 std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
1117 for (int i = 0; i < 5 * kMaxFiles; i++) {
1118 Put("key", value);
1119 ASSERT_LE(TotalTableFiles(), kMaxFiles);
1120 fprintf(stderr, "after %d: %d files\n", i + 1, TotalTableFiles());
1121 }
1122 }
1123
TEST(DBTest,SparseMerge)1124 TEST(DBTest, SparseMerge) {
1125 Options options = CurrentOptions();
1126 options.compression = kNoCompression;
1127 Reopen(&options);
1128
1129 FillLevels("A", "Z");
1130
1131 // Suppose there is:
1132 // small amount of data with prefix A
1133 // large amount of data with prefix B
1134 // small amount of data with prefix C
1135 // and that recent updates have made small changes to all three prefixes.
1136 // Check that we do not do a compaction that merges all of B in one shot.
1137 const std::string value(1000, 'x');
1138 Put("A", "va");
1139 // Write approximately 100MB of "B" values
1140 for (int i = 0; i < 100000; i++) {
1141 char key[100];
1142 snprintf(key, sizeof(key), "B%010d", i);
1143 Put(key, value);
1144 }
1145 Put("C", "vc");
1146 dbfull()->TEST_CompactMemTable();
1147 dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1148
1149 // Make sparse update
1150 Put("A", "va2");
1151 Put("B100", "bvalue2");
1152 Put("C", "vc2");
1153 dbfull()->TEST_CompactMemTable();
1154
1155 // Compactions should not cause us to create a situation where
1156 // a file overlaps too much data at the next level.
1157 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
1158 dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1159 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
1160 dbfull()->TEST_CompactRange(1, nullptr, nullptr);
1161 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
1162 }
1163
Between(uint64_t val,uint64_t low,uint64_t high)1164 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1165 bool result = (val >= low) && (val <= high);
1166 if (!result) {
1167 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1168 (unsigned long long)(val), (unsigned long long)(low),
1169 (unsigned long long)(high));
1170 }
1171 return result;
1172 }
1173
TEST(DBTest,ApproximateSizes)1174 TEST(DBTest, ApproximateSizes) {
1175 do {
1176 Options options = CurrentOptions();
1177 options.write_buffer_size = 100000000; // Large write buffer
1178 options.compression = kNoCompression;
1179 DestroyAndReopen();
1180
1181 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1182 Reopen(&options);
1183 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1184
1185 // Write 8MB (80 values, each 100K)
1186 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1187 const int N = 80;
1188 static const int S1 = 100000;
1189 static const int S2 = 105000; // Allow some expansion from metadata
1190 Random rnd(301);
1191 for (int i = 0; i < N; i++) {
1192 ASSERT_OK(Put(Key(i), RandomString(&rnd, S1)));
1193 }
1194
1195 // 0 because GetApproximateSizes() does not account for memtable space
1196 ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
1197
1198 if (options.reuse_logs) {
1199 // Recovery will reuse memtable, and GetApproximateSizes() does not
1200 // account for memtable usage;
1201 Reopen(&options);
1202 ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
1203 continue;
1204 }
1205
1206 // Check sizes across recovery by reopening a few times
1207 for (int run = 0; run < 3; run++) {
1208 Reopen(&options);
1209
1210 for (int compact_start = 0; compact_start < N; compact_start += 10) {
1211 for (int i = 0; i < N; i += 10) {
1212 ASSERT_TRUE(Between(Size("", Key(i)), S1 * i, S2 * i));
1213 ASSERT_TRUE(Between(Size("", Key(i) + ".suffix"), S1 * (i + 1),
1214 S2 * (i + 1)));
1215 ASSERT_TRUE(Between(Size(Key(i), Key(i + 10)), S1 * 10, S2 * 10));
1216 }
1217 ASSERT_TRUE(Between(Size("", Key(50)), S1 * 50, S2 * 50));
1218 ASSERT_TRUE(Between(Size("", Key(50) + ".suffix"), S1 * 50, S2 * 50));
1219
1220 std::string cstart_str = Key(compact_start);
1221 std::string cend_str = Key(compact_start + 9);
1222 Slice cstart = cstart_str;
1223 Slice cend = cend_str;
1224 dbfull()->TEST_CompactRange(0, &cstart, &cend);
1225 }
1226
1227 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1228 ASSERT_GT(NumTableFilesAtLevel(1), 0);
1229 }
1230 } while (ChangeOptions());
1231 }
1232
TEST(DBTest,ApproximateSizes_MixOfSmallAndLarge)1233 TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
1234 do {
1235 Options options = CurrentOptions();
1236 options.compression = kNoCompression;
1237 Reopen();
1238
1239 Random rnd(301);
1240 std::string big1 = RandomString(&rnd, 100000);
1241 ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
1242 ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
1243 ASSERT_OK(Put(Key(2), big1));
1244 ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
1245 ASSERT_OK(Put(Key(4), big1));
1246 ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
1247 ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
1248 ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
1249
1250 if (options.reuse_logs) {
1251 // Need to force a memtable compaction since recovery does not do so.
1252 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1253 }
1254
1255 // Check sizes across recovery by reopening a few times
1256 for (int run = 0; run < 3; run++) {
1257 Reopen(&options);
1258
1259 ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
1260 ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
1261 ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
1262 ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
1263 ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
1264 ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
1265 ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
1266 ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
1267 ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000));
1268
1269 ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
1270
1271 dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1272 }
1273 } while (ChangeOptions());
1274 }
1275
TEST(DBTest,IteratorPinsRef)1276 TEST(DBTest, IteratorPinsRef) {
1277 Put("foo", "hello");
1278
1279 // Get iterator that will yield the current contents of the DB.
1280 Iterator* iter = db_->NewIterator(ReadOptions());
1281
1282 // Write to force compactions
1283 Put("foo", "newvalue1");
1284 for (int i = 0; i < 100; i++) {
1285 ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
1286 }
1287 Put("foo", "newvalue2");
1288
1289 iter->SeekToFirst();
1290 ASSERT_TRUE(iter->Valid());
1291 ASSERT_EQ("foo", iter->key().ToString());
1292 ASSERT_EQ("hello", iter->value().ToString());
1293 iter->Next();
1294 ASSERT_TRUE(!iter->Valid());
1295 delete iter;
1296 }
1297
TEST(DBTest,Snapshot)1298 TEST(DBTest, Snapshot) {
1299 do {
1300 Put("foo", "v1");
1301 const Snapshot* s1 = db_->GetSnapshot();
1302 Put("foo", "v2");
1303 const Snapshot* s2 = db_->GetSnapshot();
1304 Put("foo", "v3");
1305 const Snapshot* s3 = db_->GetSnapshot();
1306
1307 Put("foo", "v4");
1308 ASSERT_EQ("v1", Get("foo", s1));
1309 ASSERT_EQ("v2", Get("foo", s2));
1310 ASSERT_EQ("v3", Get("foo", s3));
1311 ASSERT_EQ("v4", Get("foo"));
1312
1313 db_->ReleaseSnapshot(s3);
1314 ASSERT_EQ("v1", Get("foo", s1));
1315 ASSERT_EQ("v2", Get("foo", s2));
1316 ASSERT_EQ("v4", Get("foo"));
1317
1318 db_->ReleaseSnapshot(s1);
1319 ASSERT_EQ("v2", Get("foo", s2));
1320 ASSERT_EQ("v4", Get("foo"));
1321
1322 db_->ReleaseSnapshot(s2);
1323 ASSERT_EQ("v4", Get("foo"));
1324 } while (ChangeOptions());
1325 }
1326
TEST(DBTest,HiddenValuesAreRemoved)1327 TEST(DBTest, HiddenValuesAreRemoved) {
1328 do {
1329 Random rnd(301);
1330 FillLevels("a", "z");
1331
1332 std::string big = RandomString(&rnd, 50000);
1333 Put("foo", big);
1334 Put("pastfoo", "v");
1335 const Snapshot* snapshot = db_->GetSnapshot();
1336 Put("foo", "tiny");
1337 Put("pastfoo2", "v2"); // Advance sequence number one more
1338
1339 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1340 ASSERT_GT(NumTableFilesAtLevel(0), 0);
1341
1342 ASSERT_EQ(big, Get("foo", snapshot));
1343 ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
1344 db_->ReleaseSnapshot(snapshot);
1345 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
1346 Slice x("x");
1347 dbfull()->TEST_CompactRange(0, nullptr, &x);
1348 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1349 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1350 ASSERT_GE(NumTableFilesAtLevel(1), 1);
1351 dbfull()->TEST_CompactRange(1, nullptr, &x);
1352 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1353
1354 ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
1355 } while (ChangeOptions());
1356 }
1357
TEST(DBTest,DeletionMarkers1)1358 TEST(DBTest, DeletionMarkers1) {
1359 Put("foo", "v1");
1360 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1361 const int last = config::kMaxMemCompactLevel;
1362 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
1363
1364 // Place a table at level last-1 to prevent merging with preceding mutation
1365 Put("a", "begin");
1366 Put("z", "end");
1367 dbfull()->TEST_CompactMemTable();
1368 ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1369 ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1);
1370
1371 Delete("foo");
1372 Put("foo", "v2");
1373 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1374 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
1375 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1376 Slice z("z");
1377 dbfull()->TEST_CompactRange(last - 2, nullptr, &z);
1378 // DEL eliminated, but v1 remains because we aren't compacting that level
1379 // (DEL can be eliminated because v2 hides v1).
1380 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
1381 dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr);
1382 // Merging last-1 w/ last, so we are the base level for "foo", so
1383 // DEL is removed. (as is v1).
1384 ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
1385 }
1386
TEST(DBTest,DeletionMarkers2)1387 TEST(DBTest, DeletionMarkers2) {
1388 Put("foo", "v1");
1389 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1390 const int last = config::kMaxMemCompactLevel;
1391 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
1392
1393 // Place a table at level last-1 to prevent merging with preceding mutation
1394 Put("a", "begin");
1395 Put("z", "end");
1396 dbfull()->TEST_CompactMemTable();
1397 ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1398 ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1);
1399
1400 Delete("foo");
1401 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1402 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
1403 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1404 dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr);
1405 // DEL kept: "last" file overlaps
1406 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1407 dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr);
1408 // Merging last-1 w/ last, so we are the base level for "foo", so
1409 // DEL is removed. (as is v1).
1410 ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
1411 }
1412
TEST(DBTest,OverlapInLevel0)1413 TEST(DBTest, OverlapInLevel0) {
1414 do {
1415 ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
1416
1417 // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
1418 // 0.
1419 ASSERT_OK(Put("100", "v100"));
1420 ASSERT_OK(Put("999", "v999"));
1421 dbfull()->TEST_CompactMemTable();
1422 ASSERT_OK(Delete("100"));
1423 ASSERT_OK(Delete("999"));
1424 dbfull()->TEST_CompactMemTable();
1425 ASSERT_EQ("0,1,1", FilesPerLevel());
1426
1427 // Make files spanning the following ranges in level-0:
1428 // files[0] 200 .. 900
1429 // files[1] 300 .. 500
1430 // Note that files are sorted by smallest key.
1431 ASSERT_OK(Put("300", "v300"));
1432 ASSERT_OK(Put("500", "v500"));
1433 dbfull()->TEST_CompactMemTable();
1434 ASSERT_OK(Put("200", "v200"));
1435 ASSERT_OK(Put("600", "v600"));
1436 ASSERT_OK(Put("900", "v900"));
1437 dbfull()->TEST_CompactMemTable();
1438 ASSERT_EQ("2,1,1", FilesPerLevel());
1439
1440 // Compact away the placeholder files we created initially
1441 dbfull()->TEST_CompactRange(1, nullptr, nullptr);
1442 dbfull()->TEST_CompactRange(2, nullptr, nullptr);
1443 ASSERT_EQ("2", FilesPerLevel());
1444
1445 // Do a memtable compaction. Before bug-fix, the compaction would
1446 // not detect the overlap with level-0 files and would incorrectly place
1447 // the deletion in a deeper level.
1448 ASSERT_OK(Delete("600"));
1449 dbfull()->TEST_CompactMemTable();
1450 ASSERT_EQ("3", FilesPerLevel());
1451 ASSERT_EQ("NOT_FOUND", Get("600"));
1452 } while (ChangeOptions());
1453 }
1454
TEST(DBTest,L0_CompactionBug_Issue44_a)1455 TEST(DBTest, L0_CompactionBug_Issue44_a) {
1456 Reopen();
1457 ASSERT_OK(Put("b", "v"));
1458 Reopen();
1459 ASSERT_OK(Delete("b"));
1460 ASSERT_OK(Delete("a"));
1461 Reopen();
1462 ASSERT_OK(Delete("a"));
1463 Reopen();
1464 ASSERT_OK(Put("a", "v"));
1465 Reopen();
1466 Reopen();
1467 ASSERT_EQ("(a->v)", Contents());
1468 DelayMilliseconds(1000); // Wait for compaction to finish
1469 ASSERT_EQ("(a->v)", Contents());
1470 }
1471
TEST(DBTest,L0_CompactionBug_Issue44_b)1472 TEST(DBTest, L0_CompactionBug_Issue44_b) {
1473 Reopen();
1474 Put("", "");
1475 Reopen();
1476 Delete("e");
1477 Put("", "");
1478 Reopen();
1479 Put("c", "cv");
1480 Reopen();
1481 Put("", "");
1482 Reopen();
1483 Put("", "");
1484 DelayMilliseconds(1000); // Wait for compaction to finish
1485 Reopen();
1486 Put("d", "dv");
1487 Reopen();
1488 Put("", "");
1489 Reopen();
1490 Delete("d");
1491 Delete("b");
1492 Reopen();
1493 ASSERT_EQ("(->)(c->cv)", Contents());
1494 DelayMilliseconds(1000); // Wait for compaction to finish
1495 ASSERT_EQ("(->)(c->cv)", Contents());
1496 }
1497
TEST(DBTest,Fflush_Issue474)1498 TEST(DBTest, Fflush_Issue474) {
1499 static const int kNum = 100000;
1500 Random rnd(test::RandomSeed());
1501 for (int i = 0; i < kNum; i++) {
1502 fflush(nullptr);
1503 ASSERT_OK(Put(RandomKey(&rnd), RandomString(&rnd, 100)));
1504 }
1505 }
1506
TEST(DBTest,ComparatorCheck)1507 TEST(DBTest, ComparatorCheck) {
1508 class NewComparator : public Comparator {
1509 public:
1510 const char* Name() const override { return "leveldb.NewComparator"; }
1511 int Compare(const Slice& a, const Slice& b) const override {
1512 return BytewiseComparator()->Compare(a, b);
1513 }
1514 void FindShortestSeparator(std::string* s, const Slice& l) const override {
1515 BytewiseComparator()->FindShortestSeparator(s, l);
1516 }
1517 void FindShortSuccessor(std::string* key) const override {
1518 BytewiseComparator()->FindShortSuccessor(key);
1519 }
1520 };
1521 NewComparator cmp;
1522 Options new_options = CurrentOptions();
1523 new_options.comparator = &cmp;
1524 Status s = TryReopen(&new_options);
1525 ASSERT_TRUE(!s.ok());
1526 ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
1527 << s.ToString();
1528 }
1529
TEST(DBTest,CustomComparator)1530 TEST(DBTest, CustomComparator) {
1531 class NumberComparator : public Comparator {
1532 public:
1533 const char* Name() const override { return "test.NumberComparator"; }
1534 int Compare(const Slice& a, const Slice& b) const override {
1535 return ToNumber(a) - ToNumber(b);
1536 }
1537 void FindShortestSeparator(std::string* s, const Slice& l) const override {
1538 ToNumber(*s); // Check format
1539 ToNumber(l); // Check format
1540 }
1541 void FindShortSuccessor(std::string* key) const override {
1542 ToNumber(*key); // Check format
1543 }
1544
1545 private:
1546 static int ToNumber(const Slice& x) {
1547 // Check that there are no extra characters.
1548 ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
1549 << EscapeString(x);
1550 int val;
1551 char ignored;
1552 ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
1553 << EscapeString(x);
1554 return val;
1555 }
1556 };
1557 NumberComparator cmp;
1558 Options new_options = CurrentOptions();
1559 new_options.create_if_missing = true;
1560 new_options.comparator = &cmp;
1561 new_options.filter_policy = nullptr; // Cannot use bloom filters
1562 new_options.write_buffer_size = 1000; // Compact more often
1563 DestroyAndReopen(&new_options);
1564 ASSERT_OK(Put("[10]", "ten"));
1565 ASSERT_OK(Put("[0x14]", "twenty"));
1566 for (int i = 0; i < 2; i++) {
1567 ASSERT_EQ("ten", Get("[10]"));
1568 ASSERT_EQ("ten", Get("[0xa]"));
1569 ASSERT_EQ("twenty", Get("[20]"));
1570 ASSERT_EQ("twenty", Get("[0x14]"));
1571 ASSERT_EQ("NOT_FOUND", Get("[15]"));
1572 ASSERT_EQ("NOT_FOUND", Get("[0xf]"));
1573 Compact("[0]", "[9999]");
1574 }
1575
1576 for (int run = 0; run < 2; run++) {
1577 for (int i = 0; i < 1000; i++) {
1578 char buf[100];
1579 snprintf(buf, sizeof(buf), "[%d]", i * 10);
1580 ASSERT_OK(Put(buf, buf));
1581 }
1582 Compact("[0]", "[1000000]");
1583 }
1584 }
1585
TEST(DBTest,ManualCompaction)1586 TEST(DBTest, ManualCompaction) {
1587 ASSERT_EQ(config::kMaxMemCompactLevel, 2)
1588 << "Need to update this test to match kMaxMemCompactLevel";
1589
1590 MakeTables(3, "p", "q");
1591 ASSERT_EQ("1,1,1", FilesPerLevel());
1592
1593 // Compaction range falls before files
1594 Compact("", "c");
1595 ASSERT_EQ("1,1,1", FilesPerLevel());
1596
1597 // Compaction range falls after files
1598 Compact("r", "z");
1599 ASSERT_EQ("1,1,1", FilesPerLevel());
1600
1601 // Compaction range overlaps files
1602 Compact("p1", "p9");
1603 ASSERT_EQ("0,0,1", FilesPerLevel());
1604
1605 // Populate a different range
1606 MakeTables(3, "c", "e");
1607 ASSERT_EQ("1,1,2", FilesPerLevel());
1608
1609 // Compact just the new range
1610 Compact("b", "f");
1611 ASSERT_EQ("0,0,2", FilesPerLevel());
1612
1613 // Compact all
1614 MakeTables(1, "a", "z");
1615 ASSERT_EQ("0,1,2", FilesPerLevel());
1616 db_->CompactRange(nullptr, nullptr);
1617 ASSERT_EQ("0,0,1", FilesPerLevel());
1618 }
1619
TEST(DBTest,DBOpen_Options)1620 TEST(DBTest, DBOpen_Options) {
1621 std::string dbname = test::TmpDir() + "/db_options_test";
1622 DestroyDB(dbname, Options());
1623
1624 // Does not exist, and create_if_missing == false: error
1625 DB* db = nullptr;
1626 Options opts;
1627 opts.create_if_missing = false;
1628 Status s = DB::Open(opts, dbname, &db);
1629 ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
1630 ASSERT_TRUE(db == nullptr);
1631
1632 // Does not exist, and create_if_missing == true: OK
1633 opts.create_if_missing = true;
1634 s = DB::Open(opts, dbname, &db);
1635 ASSERT_OK(s);
1636 ASSERT_TRUE(db != nullptr);
1637
1638 delete db;
1639 db = nullptr;
1640
1641 // Does exist, and error_if_exists == true: error
1642 opts.create_if_missing = false;
1643 opts.error_if_exists = true;
1644 s = DB::Open(opts, dbname, &db);
1645 ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
1646 ASSERT_TRUE(db == nullptr);
1647
1648 // Does exist, and error_if_exists == false: OK
1649 opts.create_if_missing = true;
1650 opts.error_if_exists = false;
1651 s = DB::Open(opts, dbname, &db);
1652 ASSERT_OK(s);
1653 ASSERT_TRUE(db != nullptr);
1654
1655 delete db;
1656 db = nullptr;
1657 }
1658
TEST(DBTest,DestroyEmptyDir)1659 TEST(DBTest, DestroyEmptyDir) {
1660 std::string dbname = test::TmpDir() + "/db_empty_dir";
1661 TestEnv env(Env::Default());
1662 env.DeleteDir(dbname);
1663 ASSERT_TRUE(!env.FileExists(dbname));
1664
1665 Options opts;
1666 opts.env = &env;
1667
1668 ASSERT_OK(env.CreateDir(dbname));
1669 ASSERT_TRUE(env.FileExists(dbname));
1670 std::vector<std::string> children;
1671 ASSERT_OK(env.GetChildren(dbname, &children));
1672 // The stock Env's do not filter out '.' and '..' special files.
1673 ASSERT_EQ(2, children.size());
1674 ASSERT_OK(DestroyDB(dbname, opts));
1675 ASSERT_TRUE(!env.FileExists(dbname));
1676
1677 // Should also be destroyed if Env is filtering out dot files.
1678 env.SetIgnoreDotFiles(true);
1679 ASSERT_OK(env.CreateDir(dbname));
1680 ASSERT_TRUE(env.FileExists(dbname));
1681 ASSERT_OK(env.GetChildren(dbname, &children));
1682 ASSERT_EQ(0, children.size());
1683 ASSERT_OK(DestroyDB(dbname, opts));
1684 ASSERT_TRUE(!env.FileExists(dbname));
1685 }
1686
TEST(DBTest,DestroyOpenDB)1687 TEST(DBTest, DestroyOpenDB) {
1688 std::string dbname = test::TmpDir() + "/open_db_dir";
1689 env_->DeleteDir(dbname);
1690 ASSERT_TRUE(!env_->FileExists(dbname));
1691
1692 Options opts;
1693 opts.create_if_missing = true;
1694 DB* db = nullptr;
1695 ASSERT_OK(DB::Open(opts, dbname, &db));
1696 ASSERT_TRUE(db != nullptr);
1697
1698 // Must fail to destroy an open db.
1699 ASSERT_TRUE(env_->FileExists(dbname));
1700 ASSERT_TRUE(!DestroyDB(dbname, Options()).ok());
1701 ASSERT_TRUE(env_->FileExists(dbname));
1702
1703 delete db;
1704 db = nullptr;
1705
1706 // Should succeed destroying a closed db.
1707 ASSERT_OK(DestroyDB(dbname, Options()));
1708 ASSERT_TRUE(!env_->FileExists(dbname));
1709 }
1710
TEST(DBTest,Locking)1711 TEST(DBTest, Locking) {
1712 DB* db2 = nullptr;
1713 Status s = DB::Open(CurrentOptions(), dbname_, &db2);
1714 ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
1715 }
1716
1717 // Check that number of files does not grow when we are out of space
TEST(DBTest,NoSpace)1718 TEST(DBTest, NoSpace) {
1719 Options options = CurrentOptions();
1720 options.env = env_;
1721 Reopen(&options);
1722
1723 ASSERT_OK(Put("foo", "v1"));
1724 ASSERT_EQ("v1", Get("foo"));
1725 Compact("a", "z");
1726 const int num_files = CountFiles();
1727 // Force out-of-space errors.
1728 env_->no_space_.store(true, std::memory_order_release);
1729 for (int i = 0; i < 10; i++) {
1730 for (int level = 0; level < config::kNumLevels - 1; level++) {
1731 dbfull()->TEST_CompactRange(level, nullptr, nullptr);
1732 }
1733 }
1734 env_->no_space_.store(false, std::memory_order_release);
1735 ASSERT_LT(CountFiles(), num_files + 3);
1736 }
1737
TEST(DBTest,NonWritableFileSystem)1738 TEST(DBTest, NonWritableFileSystem) {
1739 Options options = CurrentOptions();
1740 options.write_buffer_size = 1000;
1741 options.env = env_;
1742 Reopen(&options);
1743 ASSERT_OK(Put("foo", "v1"));
1744 // Force errors for new files.
1745 env_->non_writable_.store(true, std::memory_order_release);
1746 std::string big(100000, 'x');
1747 int errors = 0;
1748 for (int i = 0; i < 20; i++) {
1749 fprintf(stderr, "iter %d; errors %d\n", i, errors);
1750 if (!Put("foo", big).ok()) {
1751 errors++;
1752 DelayMilliseconds(100);
1753 }
1754 }
1755 ASSERT_GT(errors, 0);
1756 env_->non_writable_.store(false, std::memory_order_release);
1757 }
1758
TEST(DBTest,WriteSyncError)1759 TEST(DBTest, WriteSyncError) {
1760 // Check that log sync errors cause the DB to disallow future writes.
1761
1762 // (a) Cause log sync calls to fail
1763 Options options = CurrentOptions();
1764 options.env = env_;
1765 Reopen(&options);
1766 env_->data_sync_error_.store(true, std::memory_order_release);
1767
1768 // (b) Normal write should succeed
1769 WriteOptions w;
1770 ASSERT_OK(db_->Put(w, "k1", "v1"));
1771 ASSERT_EQ("v1", Get("k1"));
1772
1773 // (c) Do a sync write; should fail
1774 w.sync = true;
1775 ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok());
1776 ASSERT_EQ("v1", Get("k1"));
1777 ASSERT_EQ("NOT_FOUND", Get("k2"));
1778
1779 // (d) make sync behave normally
1780 env_->data_sync_error_.store(false, std::memory_order_release);
1781
1782 // (e) Do a non-sync write; should fail
1783 w.sync = false;
1784 ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok());
1785 ASSERT_EQ("v1", Get("k1"));
1786 ASSERT_EQ("NOT_FOUND", Get("k2"));
1787 ASSERT_EQ("NOT_FOUND", Get("k3"));
1788 }
1789
TEST(DBTest,ManifestWriteError)1790 TEST(DBTest, ManifestWriteError) {
1791 // Test for the following problem:
1792 // (a) Compaction produces file F
1793 // (b) Log record containing F is written to MANIFEST file, but Sync() fails
1794 // (c) GC deletes F
1795 // (d) After reopening DB, reads fail since deleted F is named in log record
1796
1797 // We iterate twice. In the second iteration, everything is the
1798 // same except the log record never makes it to the MANIFEST file.
1799 for (int iter = 0; iter < 2; iter++) {
1800 std::atomic<bool>* error_type = (iter == 0) ? &env_->manifest_sync_error_
1801 : &env_->manifest_write_error_;
1802
1803 // Insert foo=>bar mapping
1804 Options options = CurrentOptions();
1805 options.env = env_;
1806 options.create_if_missing = true;
1807 options.error_if_exists = false;
1808 DestroyAndReopen(&options);
1809 ASSERT_OK(Put("foo", "bar"));
1810 ASSERT_EQ("bar", Get("foo"));
1811
1812 // Memtable compaction (will succeed)
1813 dbfull()->TEST_CompactMemTable();
1814 ASSERT_EQ("bar", Get("foo"));
1815 const int last = config::kMaxMemCompactLevel;
1816 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
1817
1818 // Merging compaction (will fail)
1819 error_type->store(true, std::memory_order_release);
1820 dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail
1821 ASSERT_EQ("bar", Get("foo"));
1822
1823 // Recovery: should not lose data
1824 error_type->store(false, std::memory_order_release);
1825 Reopen(&options);
1826 ASSERT_EQ("bar", Get("foo"));
1827 }
1828 }
1829
TEST(DBTest,MissingSSTFile)1830 TEST(DBTest, MissingSSTFile) {
1831 ASSERT_OK(Put("foo", "bar"));
1832 ASSERT_EQ("bar", Get("foo"));
1833
1834 // Dump the memtable to disk.
1835 dbfull()->TEST_CompactMemTable();
1836 ASSERT_EQ("bar", Get("foo"));
1837
1838 Close();
1839 ASSERT_TRUE(DeleteAnSSTFile());
1840 Options options = CurrentOptions();
1841 options.paranoid_checks = true;
1842 Status s = TryReopen(&options);
1843 ASSERT_TRUE(!s.ok());
1844 ASSERT_TRUE(s.ToString().find("issing") != std::string::npos) << s.ToString();
1845 }
1846
TEST(DBTest,StillReadSST)1847 TEST(DBTest, StillReadSST) {
1848 ASSERT_OK(Put("foo", "bar"));
1849 ASSERT_EQ("bar", Get("foo"));
1850
1851 // Dump the memtable to disk.
1852 dbfull()->TEST_CompactMemTable();
1853 ASSERT_EQ("bar", Get("foo"));
1854 Close();
1855 ASSERT_GT(RenameLDBToSST(), 0);
1856 Options options = CurrentOptions();
1857 options.paranoid_checks = true;
1858 Status s = TryReopen(&options);
1859 ASSERT_TRUE(s.ok());
1860 ASSERT_EQ("bar", Get("foo"));
1861 }
1862
TEST(DBTest,FilesDeletedAfterCompaction)1863 TEST(DBTest, FilesDeletedAfterCompaction) {
1864 ASSERT_OK(Put("foo", "v2"));
1865 Compact("a", "z");
1866 const int num_files = CountFiles();
1867 for (int i = 0; i < 10; i++) {
1868 ASSERT_OK(Put("foo", "v2"));
1869 Compact("a", "z");
1870 }
1871 ASSERT_EQ(CountFiles(), num_files);
1872 }
1873
TEST(DBTest,BloomFilter)1874 TEST(DBTest, BloomFilter) {
1875 env_->count_random_reads_ = true;
1876 Options options = CurrentOptions();
1877 options.env = env_;
1878 options.block_cache = NewLRUCache(0); // Prevent cache hits
1879 options.filter_policy = NewBloomFilterPolicy(10);
1880 Reopen(&options);
1881
1882 // Populate multiple layers
1883 const int N = 10000;
1884 for (int i = 0; i < N; i++) {
1885 ASSERT_OK(Put(Key(i), Key(i)));
1886 }
1887 Compact("a", "z");
1888 for (int i = 0; i < N; i += 100) {
1889 ASSERT_OK(Put(Key(i), Key(i)));
1890 }
1891 dbfull()->TEST_CompactMemTable();
1892
1893 // Prevent auto compactions triggered by seeks
1894 env_->delay_data_sync_.store(true, std::memory_order_release);
1895
1896 // Lookup present keys. Should rarely read from small sstable.
1897 env_->random_read_counter_.Reset();
1898 for (int i = 0; i < N; i++) {
1899 ASSERT_EQ(Key(i), Get(Key(i)));
1900 }
1901 int reads = env_->random_read_counter_.Read();
1902 fprintf(stderr, "%d present => %d reads\n", N, reads);
1903 ASSERT_GE(reads, N);
1904 ASSERT_LE(reads, N + 2 * N / 100);
1905
1906 // Lookup present keys. Should rarely read from either sstable.
1907 env_->random_read_counter_.Reset();
1908 for (int i = 0; i < N; i++) {
1909 ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing"));
1910 }
1911 reads = env_->random_read_counter_.Read();
1912 fprintf(stderr, "%d missing => %d reads\n", N, reads);
1913 ASSERT_LE(reads, 3 * N / 100);
1914
1915 env_->delay_data_sync_.store(false, std::memory_order_release);
1916 Close();
1917 delete options.block_cache;
1918 delete options.filter_policy;
1919 }
1920
1921 // Multi-threaded test:
1922 namespace {
1923
1924 static const int kNumThreads = 4;
1925 static const int kTestSeconds = 10;
1926 static const int kNumKeys = 1000;
1927
1928 struct MTState {
1929 DBTest* test;
1930 std::atomic<bool> stop;
1931 std::atomic<int> counter[kNumThreads];
1932 std::atomic<bool> thread_done[kNumThreads];
1933 };
1934
1935 struct MTThread {
1936 MTState* state;
1937 int id;
1938 };
1939
MTThreadBody(void * arg)1940 static void MTThreadBody(void* arg) {
1941 MTThread* t = reinterpret_cast<MTThread*>(arg);
1942 int id = t->id;
1943 DB* db = t->state->test->db_;
1944 int counter = 0;
1945 fprintf(stderr, "... starting thread %d\n", id);
1946 Random rnd(1000 + id);
1947 std::string value;
1948 char valbuf[1500];
1949 while (!t->state->stop.load(std::memory_order_acquire)) {
1950 t->state->counter[id].store(counter, std::memory_order_release);
1951
1952 int key = rnd.Uniform(kNumKeys);
1953 char keybuf[20];
1954 snprintf(keybuf, sizeof(keybuf), "%016d", key);
1955
1956 if (rnd.OneIn(2)) {
1957 // Write values of the form <key, my id, counter>.
1958 // We add some padding for force compactions.
1959 snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d", key, id,
1960 static_cast<int>(counter));
1961 ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
1962 } else {
1963 // Read a value and verify that it matches the pattern written above.
1964 Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
1965 if (s.IsNotFound()) {
1966 // Key has not yet been written
1967 } else {
1968 // Check that the writer thread counter is >= the counter in the value
1969 ASSERT_OK(s);
1970 int k, w, c;
1971 ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
1972 ASSERT_EQ(k, key);
1973 ASSERT_GE(w, 0);
1974 ASSERT_LT(w, kNumThreads);
1975 ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
1976 }
1977 }
1978 counter++;
1979 }
1980 t->state->thread_done[id].store(true, std::memory_order_release);
1981 fprintf(stderr, "... stopping thread %d after %d ops\n", id, counter);
1982 }
1983
1984 } // namespace
1985
TEST(DBTest,MultiThreaded)1986 TEST(DBTest, MultiThreaded) {
1987 do {
1988 // Initialize state
1989 MTState mt;
1990 mt.test = this;
1991 mt.stop.store(false, std::memory_order_release);
1992 for (int id = 0; id < kNumThreads; id++) {
1993 mt.counter[id].store(false, std::memory_order_release);
1994 mt.thread_done[id].store(false, std::memory_order_release);
1995 }
1996
1997 // Start threads
1998 MTThread thread[kNumThreads];
1999 for (int id = 0; id < kNumThreads; id++) {
2000 thread[id].state = &mt;
2001 thread[id].id = id;
2002 env_->StartThread(MTThreadBody, &thread[id]);
2003 }
2004
2005 // Let them run for a while
2006 DelayMilliseconds(kTestSeconds * 1000);
2007
2008 // Stop the threads and wait for them to finish
2009 mt.stop.store(true, std::memory_order_release);
2010 for (int id = 0; id < kNumThreads; id++) {
2011 while (!mt.thread_done[id].load(std::memory_order_acquire)) {
2012 DelayMilliseconds(100);
2013 }
2014 }
2015 } while (ChangeOptions());
2016 }
2017
2018 namespace {
2019 typedef std::map<std::string, std::string> KVMap;
2020 }
2021
2022 class ModelDB : public DB {
2023 public:
2024 class ModelSnapshot : public Snapshot {
2025 public:
2026 KVMap map_;
2027 };
2028
ModelDB(const Options & options)2029 explicit ModelDB(const Options& options) : options_(options) {}
2030 ~ModelDB() override = default;
Put(const WriteOptions & o,const Slice & k,const Slice & v)2031 Status Put(const WriteOptions& o, const Slice& k, const Slice& v) override {
2032 return DB::Put(o, k, v);
2033 }
Delete(const WriteOptions & o,const Slice & key)2034 Status Delete(const WriteOptions& o, const Slice& key) override {
2035 return DB::Delete(o, key);
2036 }
Get(const ReadOptions & options,const Slice & key,std::string * value)2037 Status Get(const ReadOptions& options, const Slice& key,
2038 std::string* value) override {
2039 assert(false); // Not implemented
2040 return Status::NotFound(key);
2041 }
NewIterator(const ReadOptions & options)2042 Iterator* NewIterator(const ReadOptions& options) override {
2043 if (options.snapshot == nullptr) {
2044 KVMap* saved = new KVMap;
2045 *saved = map_;
2046 return new ModelIter(saved, true);
2047 } else {
2048 const KVMap* snapshot_state =
2049 &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
2050 return new ModelIter(snapshot_state, false);
2051 }
2052 }
GetSnapshot()2053 const Snapshot* GetSnapshot() override {
2054 ModelSnapshot* snapshot = new ModelSnapshot;
2055 snapshot->map_ = map_;
2056 return snapshot;
2057 }
2058
ReleaseSnapshot(const Snapshot * snapshot)2059 void ReleaseSnapshot(const Snapshot* snapshot) override {
2060 delete reinterpret_cast<const ModelSnapshot*>(snapshot);
2061 }
Write(const WriteOptions & options,WriteBatch * batch)2062 Status Write(const WriteOptions& options, WriteBatch* batch) override {
2063 class Handler : public WriteBatch::Handler {
2064 public:
2065 KVMap* map_;
2066 void Put(const Slice& key, const Slice& value) override {
2067 (*map_)[key.ToString()] = value.ToString();
2068 }
2069 void Delete(const Slice& key) override { map_->erase(key.ToString()); }
2070 };
2071 Handler handler;
2072 handler.map_ = &map_;
2073 return batch->Iterate(&handler);
2074 }
2075
GetProperty(const Slice & property,std::string * value)2076 bool GetProperty(const Slice& property, std::string* value) override {
2077 return false;
2078 }
GetApproximateSizes(const Range * r,int n,uint64_t * sizes)2079 void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) override {
2080 for (int i = 0; i < n; i++) {
2081 sizes[i] = 0;
2082 }
2083 }
CompactRange(const Slice * start,const Slice * end)2084 void CompactRange(const Slice* start, const Slice* end) override {}
2085
2086 private:
2087 class ModelIter : public Iterator {
2088 public:
ModelIter(const KVMap * map,bool owned)2089 ModelIter(const KVMap* map, bool owned)
2090 : map_(map), owned_(owned), iter_(map_->end()) {}
~ModelIter()2091 ~ModelIter() override {
2092 if (owned_) delete map_;
2093 }
Valid() const2094 bool Valid() const override { return iter_ != map_->end(); }
SeekToFirst()2095 void SeekToFirst() override { iter_ = map_->begin(); }
SeekToLast()2096 void SeekToLast() override {
2097 if (map_->empty()) {
2098 iter_ = map_->end();
2099 } else {
2100 iter_ = map_->find(map_->rbegin()->first);
2101 }
2102 }
Seek(const Slice & k)2103 void Seek(const Slice& k) override {
2104 iter_ = map_->lower_bound(k.ToString());
2105 }
Next()2106 void Next() override { ++iter_; }
Prev()2107 void Prev() override { --iter_; }
key() const2108 Slice key() const override { return iter_->first; }
value() const2109 Slice value() const override { return iter_->second; }
status() const2110 Status status() const override { return Status::OK(); }
2111
2112 private:
2113 const KVMap* const map_;
2114 const bool owned_; // Do we own map_
2115 KVMap::const_iterator iter_;
2116 };
2117 const Options options_;
2118 KVMap map_;
2119 };
2120
CompareIterators(int step,DB * model,DB * db,const Snapshot * model_snap,const Snapshot * db_snap)2121 static bool CompareIterators(int step, DB* model, DB* db,
2122 const Snapshot* model_snap,
2123 const Snapshot* db_snap) {
2124 ReadOptions options;
2125 options.snapshot = model_snap;
2126 Iterator* miter = model->NewIterator(options);
2127 options.snapshot = db_snap;
2128 Iterator* dbiter = db->NewIterator(options);
2129 bool ok = true;
2130 int count = 0;
2131 for (miter->SeekToFirst(), dbiter->SeekToFirst();
2132 ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) {
2133 count++;
2134 if (miter->key().compare(dbiter->key()) != 0) {
2135 fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step,
2136 EscapeString(miter->key()).c_str(),
2137 EscapeString(dbiter->key()).c_str());
2138 ok = false;
2139 break;
2140 }
2141
2142 if (miter->value().compare(dbiter->value()) != 0) {
2143 fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
2144 step, EscapeString(miter->key()).c_str(),
2145 EscapeString(miter->value()).c_str(),
2146 EscapeString(miter->value()).c_str());
2147 ok = false;
2148 }
2149 }
2150
2151 if (ok) {
2152 if (miter->Valid() != dbiter->Valid()) {
2153 fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
2154 step, miter->Valid(), dbiter->Valid());
2155 ok = false;
2156 }
2157 }
2158 fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
2159 delete miter;
2160 delete dbiter;
2161 return ok;
2162 }
2163
TEST(DBTest,Randomized)2164 TEST(DBTest, Randomized) {
2165 Random rnd(test::RandomSeed());
2166 do {
2167 ModelDB model(CurrentOptions());
2168 const int N = 10000;
2169 const Snapshot* model_snap = nullptr;
2170 const Snapshot* db_snap = nullptr;
2171 std::string k, v;
2172 for (int step = 0; step < N; step++) {
2173 if (step % 100 == 0) {
2174 fprintf(stderr, "Step %d of %d\n", step, N);
2175 }
2176 // TODO(sanjay): Test Get() works
2177 int p = rnd.Uniform(100);
2178 if (p < 45) { // Put
2179 k = RandomKey(&rnd);
2180 v = RandomString(
2181 &rnd, rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8));
2182 ASSERT_OK(model.Put(WriteOptions(), k, v));
2183 ASSERT_OK(db_->Put(WriteOptions(), k, v));
2184
2185 } else if (p < 90) { // Delete
2186 k = RandomKey(&rnd);
2187 ASSERT_OK(model.Delete(WriteOptions(), k));
2188 ASSERT_OK(db_->Delete(WriteOptions(), k));
2189
2190 } else { // Multi-element batch
2191 WriteBatch b;
2192 const int num = rnd.Uniform(8);
2193 for (int i = 0; i < num; i++) {
2194 if (i == 0 || !rnd.OneIn(10)) {
2195 k = RandomKey(&rnd);
2196 } else {
2197 // Periodically re-use the same key from the previous iter, so
2198 // we have multiple entries in the write batch for the same key
2199 }
2200 if (rnd.OneIn(2)) {
2201 v = RandomString(&rnd, rnd.Uniform(10));
2202 b.Put(k, v);
2203 } else {
2204 b.Delete(k);
2205 }
2206 }
2207 ASSERT_OK(model.Write(WriteOptions(), &b));
2208 ASSERT_OK(db_->Write(WriteOptions(), &b));
2209 }
2210
2211 if ((step % 100) == 0) {
2212 ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
2213 ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
2214 // Save a snapshot from each DB this time that we'll use next
2215 // time we compare things, to make sure the current state is
2216 // preserved with the snapshot
2217 if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
2218 if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
2219
2220 Reopen();
2221 ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
2222
2223 model_snap = model.GetSnapshot();
2224 db_snap = db_->GetSnapshot();
2225 }
2226 }
2227 if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
2228 if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
2229 } while (ChangeOptions());
2230 }
2231
MakeKey(unsigned int num)2232 std::string MakeKey(unsigned int num) {
2233 char buf[30];
2234 snprintf(buf, sizeof(buf), "%016u", num);
2235 return std::string(buf);
2236 }
2237
BM_LogAndApply(int iters,int num_base_files)2238 void BM_LogAndApply(int iters, int num_base_files) {
2239 std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
2240 DestroyDB(dbname, Options());
2241
2242 DB* db = nullptr;
2243 Options opts;
2244 opts.create_if_missing = true;
2245 Status s = DB::Open(opts, dbname, &db);
2246 ASSERT_OK(s);
2247 ASSERT_TRUE(db != nullptr);
2248
2249 delete db;
2250 db = nullptr;
2251
2252 Env* env = Env::Default();
2253
2254 port::Mutex mu;
2255 MutexLock l(&mu);
2256
2257 InternalKeyComparator cmp(BytewiseComparator());
2258 Options options;
2259 VersionSet vset(dbname, &options, nullptr, &cmp);
2260 bool save_manifest;
2261 ASSERT_OK(vset.Recover(&save_manifest));
2262 VersionEdit vbase;
2263 uint64_t fnum = 1;
2264 for (int i = 0; i < num_base_files; i++) {
2265 InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
2266 InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
2267 vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
2268 }
2269 ASSERT_OK(vset.LogAndApply(&vbase, &mu));
2270
2271 uint64_t start_micros = env->NowMicros();
2272
2273 for (int i = 0; i < iters; i++) {
2274 VersionEdit vedit;
2275 vedit.DeleteFile(2, fnum);
2276 InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
2277 InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
2278 vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
2279 vset.LogAndApply(&vedit, &mu);
2280 }
2281 uint64_t stop_micros = env->NowMicros();
2282 unsigned int us = stop_micros - start_micros;
2283 char buf[16];
2284 snprintf(buf, sizeof(buf), "%d", num_base_files);
2285 fprintf(stderr,
2286 "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n", buf,
2287 iters, us, ((float)us) / iters);
2288 }
2289
2290 } // namespace leveldb
2291
main(int argc,char ** argv)2292 int main(int argc, char** argv) {
2293 if (argc > 1 && std::string(argv[1]) == "--benchmark") {
2294 leveldb::BM_LogAndApply(1000, 1);
2295 leveldb::BM_LogAndApply(1000, 100);
2296 leveldb::BM_LogAndApply(1000, 10000);
2297 leveldb::BM_LogAndApply(100, 100000);
2298 return 0;
2299 }
2300
2301 return leveldb::test::RunAllTests();
2302 }
2303