1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 
5 #include "leveldb/db.h"
6 
7 #include <atomic>
8 #include <string>
9 
10 #include "db/db_impl.h"
11 #include "db/filename.h"
12 #include "db/version_set.h"
13 #include "db/write_batch_internal.h"
14 #include "leveldb/cache.h"
15 #include "leveldb/env.h"
16 #include "leveldb/filter_policy.h"
17 #include "leveldb/table.h"
18 #include "port/port.h"
19 #include "port/thread_annotations.h"
20 #include "util/hash.h"
21 #include "util/logging.h"
22 #include "util/mutexlock.h"
23 #include "util/testharness.h"
24 #include "util/testutil.h"
25 
26 namespace leveldb {
27 
RandomString(Random * rnd,int len)28 static std::string RandomString(Random* rnd, int len) {
29   std::string r;
30   test::RandomString(rnd, len, &r);
31   return r;
32 }
33 
RandomKey(Random * rnd)34 static std::string RandomKey(Random* rnd) {
35   int len =
36       (rnd->OneIn(3) ? 1  // Short sometimes to encourage collisions
37                      : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
38   return test::RandomKey(rnd, len);
39 }
40 
41 namespace {
42 class AtomicCounter {
43  public:
AtomicCounter()44   AtomicCounter() : count_(0) {}
Increment()45   void Increment() { IncrementBy(1); }
IncrementBy(int count)46   void IncrementBy(int count) LOCKS_EXCLUDED(mu_) {
47     MutexLock l(&mu_);
48     count_ += count;
49   }
Read()50   int Read() LOCKS_EXCLUDED(mu_) {
51     MutexLock l(&mu_);
52     return count_;
53   }
Reset()54   void Reset() LOCKS_EXCLUDED(mu_) {
55     MutexLock l(&mu_);
56     count_ = 0;
57   }
58 
59  private:
60   port::Mutex mu_;
61   int count_ GUARDED_BY(mu_);
62 };
63 
DelayMilliseconds(int millis)64 void DelayMilliseconds(int millis) {
65   Env::Default()->SleepForMicroseconds(millis * 1000);
66 }
67 }  // namespace
68 
69 // Test Env to override default Env behavior for testing.
70 class TestEnv : public EnvWrapper {
71  public:
TestEnv(Env * base)72   explicit TestEnv(Env* base) : EnvWrapper(base), ignore_dot_files_(false) {}
73 
SetIgnoreDotFiles(bool ignored)74   void SetIgnoreDotFiles(bool ignored) { ignore_dot_files_ = ignored; }
75 
GetChildren(const std::string & dir,std::vector<std::string> * result)76   Status GetChildren(const std::string& dir,
77                      std::vector<std::string>* result) override {
78     Status s = target()->GetChildren(dir, result);
79     if (!s.ok() || !ignore_dot_files_) {
80       return s;
81     }
82 
83     std::vector<std::string>::iterator it = result->begin();
84     while (it != result->end()) {
85       if ((*it == ".") || (*it == "..")) {
86         it = result->erase(it);
87       } else {
88         ++it;
89       }
90     }
91 
92     return s;
93   }
94 
95  private:
96   bool ignore_dot_files_;
97 };
98 
99 // Special Env used to delay background operations.
100 class SpecialEnv : public EnvWrapper {
101  public:
102   // sstable/log Sync() calls are blocked while this pointer is non-null.
103   std::atomic<bool> delay_data_sync_;
104 
105   // sstable/log Sync() calls return an error.
106   std::atomic<bool> data_sync_error_;
107 
108   // Simulate no-space errors while this pointer is non-null.
109   std::atomic<bool> no_space_;
110 
111   // Simulate non-writable file system while this pointer is non-null.
112   std::atomic<bool> non_writable_;
113 
114   // Force sync of manifest files to fail while this pointer is non-null.
115   std::atomic<bool> manifest_sync_error_;
116 
117   // Force write to manifest files to fail while this pointer is non-null.
118   std::atomic<bool> manifest_write_error_;
119 
120   bool count_random_reads_;
121   AtomicCounter random_read_counter_;
122 
SpecialEnv(Env * base)123   explicit SpecialEnv(Env* base)
124       : EnvWrapper(base),
125         delay_data_sync_(false),
126         data_sync_error_(false),
127         no_space_(false),
128         non_writable_(false),
129         manifest_sync_error_(false),
130         manifest_write_error_(false),
131         count_random_reads_(false) {}
132 
NewWritableFile(const std::string & f,WritableFile ** r)133   Status NewWritableFile(const std::string& f, WritableFile** r) {
134     class DataFile : public WritableFile {
135      private:
136       SpecialEnv* const env_;
137       WritableFile* const base_;
138 
139      public:
140       DataFile(SpecialEnv* env, WritableFile* base) : env_(env), base_(base) {}
141       ~DataFile() { delete base_; }
142       Status Append(const Slice& data) {
143         if (env_->no_space_.load(std::memory_order_acquire)) {
144           // Drop writes on the floor
145           return Status::OK();
146         } else {
147           return base_->Append(data);
148         }
149       }
150       Status Close() { return base_->Close(); }
151       Status Flush() { return base_->Flush(); }
152       Status Sync() {
153         if (env_->data_sync_error_.load(std::memory_order_acquire)) {
154           return Status::IOError("simulated data sync error");
155         }
156         while (env_->delay_data_sync_.load(std::memory_order_acquire)) {
157           DelayMilliseconds(100);
158         }
159         return base_->Sync();
160       }
161       std::string GetName() const override { return ""; }
162     };
163     class ManifestFile : public WritableFile {
164      private:
165       SpecialEnv* env_;
166       WritableFile* base_;
167 
168      public:
169       ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) {}
170       ~ManifestFile() { delete base_; }
171       Status Append(const Slice& data) {
172         if (env_->manifest_write_error_.load(std::memory_order_acquire)) {
173           return Status::IOError("simulated writer error");
174         } else {
175           return base_->Append(data);
176         }
177       }
178       Status Close() { return base_->Close(); }
179       Status Flush() { return base_->Flush(); }
180       Status Sync() {
181         if (env_->manifest_sync_error_.load(std::memory_order_acquire)) {
182           return Status::IOError("simulated sync error");
183         } else {
184           return base_->Sync();
185         }
186       }
187       std::string GetName() const override { return ""; }
188     };
189 
190     if (non_writable_.load(std::memory_order_acquire)) {
191       return Status::IOError("simulated write error");
192     }
193 
194     Status s = target()->NewWritableFile(f, r);
195     if (s.ok()) {
196       if (strstr(f.c_str(), ".ldb") != nullptr ||
197           strstr(f.c_str(), ".log") != nullptr) {
198         *r = new DataFile(this, *r);
199       } else if (strstr(f.c_str(), "MANIFEST") != nullptr) {
200         *r = new ManifestFile(this, *r);
201       }
202     }
203     return s;
204   }
205 
NewRandomAccessFile(const std::string & f,RandomAccessFile ** r)206   Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
207     class CountingFile : public RandomAccessFile {
208      private:
209       RandomAccessFile* target_;
210       AtomicCounter* counter_;
211 
212      public:
213       CountingFile(RandomAccessFile* target, AtomicCounter* counter)
214           : target_(target), counter_(counter) {}
215       ~CountingFile() override { delete target_; }
216       Status Read(uint64_t offset, size_t n, Slice* result,
217                   char* scratch) const override {
218         counter_->Increment();
219         return target_->Read(offset, n, result, scratch);
220       }
221       std::string GetName() const override { return ""; }
222     };
223 
224     Status s = target()->NewRandomAccessFile(f, r);
225     if (s.ok() && count_random_reads_) {
226       *r = new CountingFile(*r, &random_read_counter_);
227     }
228     return s;
229   }
230 };
231 
232 class DBTest {
233  public:
234   std::string dbname_;
235   SpecialEnv* env_;
236   DB* db_;
237 
238   Options last_options_;
239 
DBTest()240   DBTest() : env_(new SpecialEnv(Env::Default())), option_config_(kDefault) {
241     filter_policy_ = NewBloomFilterPolicy(10);
242     dbname_ = test::TmpDir() + "/db_test";
243     DestroyDB(dbname_, Options());
244     db_ = nullptr;
245     Reopen();
246   }
247 
~DBTest()248   ~DBTest() {
249     delete db_;
250     DestroyDB(dbname_, Options());
251     delete env_;
252     delete filter_policy_;
253   }
254 
255   // Switch to a fresh database with the next option configuration to
256   // test.  Return false if there are no more configurations to test.
ChangeOptions()257   bool ChangeOptions() {
258     option_config_++;
259     if (option_config_ >= kEnd) {
260       return false;
261     } else {
262       DestroyAndReopen();
263       return true;
264     }
265   }
266 
267   // Return the current option configuration.
CurrentOptions()268   Options CurrentOptions() {
269     Options options;
270     options.reuse_logs = false;
271     switch (option_config_) {
272       case kReuse:
273         options.reuse_logs = true;
274         break;
275       case kFilter:
276         options.filter_policy = filter_policy_;
277         break;
278       case kUncompressed:
279         options.compression = kNoCompression;
280         break;
281       default:
282         break;
283     }
284     return options;
285   }
286 
dbfull()287   DBImpl* dbfull() { return reinterpret_cast<DBImpl*>(db_); }
288 
Reopen(Options * options=nullptr)289   void Reopen(Options* options = nullptr) { ASSERT_OK(TryReopen(options)); }
290 
Close()291   void Close() {
292     delete db_;
293     db_ = nullptr;
294   }
295 
DestroyAndReopen(Options * options=nullptr)296   void DestroyAndReopen(Options* options = nullptr) {
297     delete db_;
298     db_ = nullptr;
299     DestroyDB(dbname_, Options());
300     ASSERT_OK(TryReopen(options));
301   }
302 
TryReopen(Options * options)303   Status TryReopen(Options* options) {
304     delete db_;
305     db_ = nullptr;
306     Options opts;
307     if (options != nullptr) {
308       opts = *options;
309     } else {
310       opts = CurrentOptions();
311       opts.create_if_missing = true;
312     }
313     last_options_ = opts;
314 
315     return DB::Open(opts, dbname_, &db_);
316   }
317 
Put(const std::string & k,const std::string & v)318   Status Put(const std::string& k, const std::string& v) {
319     return db_->Put(WriteOptions(), k, v);
320   }
321 
Delete(const std::string & k)322   Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); }
323 
Get(const std::string & k,const Snapshot * snapshot=nullptr)324   std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
325     ReadOptions options;
326     options.snapshot = snapshot;
327     std::string result;
328     Status s = db_->Get(options, k, &result);
329     if (s.IsNotFound()) {
330       result = "NOT_FOUND";
331     } else if (!s.ok()) {
332       result = s.ToString();
333     }
334     return result;
335   }
336 
337   // Return a string that contains all key,value pairs in order,
338   // formatted like "(k1->v1)(k2->v2)".
Contents()339   std::string Contents() {
340     std::vector<std::string> forward;
341     std::string result;
342     Iterator* iter = db_->NewIterator(ReadOptions());
343     for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
344       std::string s = IterStatus(iter);
345       result.push_back('(');
346       result.append(s);
347       result.push_back(')');
348       forward.push_back(s);
349     }
350 
351     // Check reverse iteration results are the reverse of forward results
352     size_t matched = 0;
353     for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
354       ASSERT_LT(matched, forward.size());
355       ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
356       matched++;
357     }
358     ASSERT_EQ(matched, forward.size());
359 
360     delete iter;
361     return result;
362   }
363 
AllEntriesFor(const Slice & user_key)364   std::string AllEntriesFor(const Slice& user_key) {
365     Iterator* iter = dbfull()->TEST_NewInternalIterator();
366     InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
367     iter->Seek(target.Encode());
368     std::string result;
369     if (!iter->status().ok()) {
370       result = iter->status().ToString();
371     } else {
372       result = "[ ";
373       bool first = true;
374       while (iter->Valid()) {
375         ParsedInternalKey ikey;
376         if (!ParseInternalKey(iter->key(), &ikey)) {
377           result += "CORRUPTED";
378         } else {
379           if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
380             break;
381           }
382           if (!first) {
383             result += ", ";
384           }
385           first = false;
386           switch (ikey.type) {
387             case kTypeValue:
388               result += iter->value().ToString();
389               break;
390             case kTypeDeletion:
391               result += "DEL";
392               break;
393           }
394         }
395         iter->Next();
396       }
397       if (!first) {
398         result += " ";
399       }
400       result += "]";
401     }
402     delete iter;
403     return result;
404   }
405 
NumTableFilesAtLevel(int level)406   int NumTableFilesAtLevel(int level) {
407     std::string property;
408     ASSERT_TRUE(db_->GetProperty(
409         "leveldb.num-files-at-level" + NumberToString(level), &property));
410     return std::stoi(property);
411   }
412 
TotalTableFiles()413   int TotalTableFiles() {
414     int result = 0;
415     for (int level = 0; level < config::kNumLevels; level++) {
416       result += NumTableFilesAtLevel(level);
417     }
418     return result;
419   }
420 
421   // Return spread of files per level
FilesPerLevel()422   std::string FilesPerLevel() {
423     std::string result;
424     int last_non_zero_offset = 0;
425     for (int level = 0; level < config::kNumLevels; level++) {
426       int f = NumTableFilesAtLevel(level);
427       char buf[100];
428       snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
429       result += buf;
430       if (f > 0) {
431         last_non_zero_offset = result.size();
432       }
433     }
434     result.resize(last_non_zero_offset);
435     return result;
436   }
437 
CountFiles()438   int CountFiles() {
439     std::vector<std::string> files;
440     env_->GetChildren(dbname_, &files);
441     return static_cast<int>(files.size());
442   }
443 
Size(const Slice & start,const Slice & limit)444   uint64_t Size(const Slice& start, const Slice& limit) {
445     Range r(start, limit);
446     uint64_t size;
447     db_->GetApproximateSizes(&r, 1, &size);
448     return size;
449   }
450 
Compact(const Slice & start,const Slice & limit)451   void Compact(const Slice& start, const Slice& limit) {
452     db_->CompactRange(&start, &limit);
453   }
454 
455   // Do n memtable compactions, each of which produces an sstable
456   // covering the range [small_key,large_key].
MakeTables(int n,const std::string & small_key,const std::string & large_key)457   void MakeTables(int n, const std::string& small_key,
458                   const std::string& large_key) {
459     for (int i = 0; i < n; i++) {
460       Put(small_key, "begin");
461       Put(large_key, "end");
462       dbfull()->TEST_CompactMemTable();
463     }
464   }
465 
466   // Prevent pushing of new sstables into deeper levels by adding
467   // tables that cover a specified range to all levels.
FillLevels(const std::string & smallest,const std::string & largest)468   void FillLevels(const std::string& smallest, const std::string& largest) {
469     MakeTables(config::kNumLevels, smallest, largest);
470   }
471 
DumpFileCounts(const char * label)472   void DumpFileCounts(const char* label) {
473     fprintf(stderr, "---\n%s:\n", label);
474     fprintf(
475         stderr, "maxoverlap: %lld\n",
476         static_cast<long long>(dbfull()->TEST_MaxNextLevelOverlappingBytes()));
477     for (int level = 0; level < config::kNumLevels; level++) {
478       int num = NumTableFilesAtLevel(level);
479       if (num > 0) {
480         fprintf(stderr, "  level %3d : %d files\n", level, num);
481       }
482     }
483   }
484 
DumpSSTableList()485   std::string DumpSSTableList() {
486     std::string property;
487     db_->GetProperty("leveldb.sstables", &property);
488     return property;
489   }
490 
IterStatus(Iterator * iter)491   std::string IterStatus(Iterator* iter) {
492     std::string result;
493     if (iter->Valid()) {
494       result = iter->key().ToString() + "->" + iter->value().ToString();
495     } else {
496       result = "(invalid)";
497     }
498     return result;
499   }
500 
DeleteAnSSTFile()501   bool DeleteAnSSTFile() {
502     std::vector<std::string> filenames;
503     ASSERT_OK(env_->GetChildren(dbname_, &filenames));
504     uint64_t number;
505     FileType type;
506     for (size_t i = 0; i < filenames.size(); i++) {
507       if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
508         ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number)));
509         return true;
510       }
511     }
512     return false;
513   }
514 
515   // Returns number of files renamed.
RenameLDBToSST()516   int RenameLDBToSST() {
517     std::vector<std::string> filenames;
518     ASSERT_OK(env_->GetChildren(dbname_, &filenames));
519     uint64_t number;
520     FileType type;
521     int files_renamed = 0;
522     for (size_t i = 0; i < filenames.size(); i++) {
523       if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
524         const std::string from = TableFileName(dbname_, number);
525         const std::string to = SSTTableFileName(dbname_, number);
526         ASSERT_OK(env_->RenameFile(from, to));
527         files_renamed++;
528       }
529     }
530     return files_renamed;
531   }
532 
533  private:
534   // Sequence of option configurations to try
535   enum OptionConfig { kDefault, kReuse, kFilter, kUncompressed, kEnd };
536 
537   const FilterPolicy* filter_policy_;
538   int option_config_;
539 };
540 
TEST(DBTest,Empty)541 TEST(DBTest, Empty) {
542   do {
543     ASSERT_TRUE(db_ != nullptr);
544     ASSERT_EQ("NOT_FOUND", Get("foo"));
545   } while (ChangeOptions());
546 }
547 
TEST(DBTest,EmptyKey)548 TEST(DBTest, EmptyKey) {
549   do {
550     ASSERT_OK(Put("", "v1"));
551     ASSERT_EQ("v1", Get(""));
552     ASSERT_OK(Put("", "v2"));
553     ASSERT_EQ("v2", Get(""));
554   } while (ChangeOptions());
555 }
556 
TEST(DBTest,EmptyValue)557 TEST(DBTest, EmptyValue) {
558   do {
559     ASSERT_OK(Put("key", "v1"));
560     ASSERT_EQ("v1", Get("key"));
561     ASSERT_OK(Put("key", ""));
562     ASSERT_EQ("", Get("key"));
563     ASSERT_OK(Put("key", "v2"));
564     ASSERT_EQ("v2", Get("key"));
565   } while (ChangeOptions());
566 }
567 
TEST(DBTest,ReadWrite)568 TEST(DBTest, ReadWrite) {
569   do {
570     ASSERT_OK(Put("foo", "v1"));
571     ASSERT_EQ("v1", Get("foo"));
572     ASSERT_OK(Put("bar", "v2"));
573     ASSERT_OK(Put("foo", "v3"));
574     ASSERT_EQ("v3", Get("foo"));
575     ASSERT_EQ("v2", Get("bar"));
576   } while (ChangeOptions());
577 }
578 
TEST(DBTest,PutDeleteGet)579 TEST(DBTest, PutDeleteGet) {
580   do {
581     ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
582     ASSERT_EQ("v1", Get("foo"));
583     ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
584     ASSERT_EQ("v2", Get("foo"));
585     ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
586     ASSERT_EQ("NOT_FOUND", Get("foo"));
587   } while (ChangeOptions());
588 }
589 
TEST(DBTest,GetFromImmutableLayer)590 TEST(DBTest, GetFromImmutableLayer) {
591   do {
592     Options options = CurrentOptions();
593     options.env = env_;
594     options.write_buffer_size = 100000;  // Small write buffer
595     Reopen(&options);
596 
597     ASSERT_OK(Put("foo", "v1"));
598     ASSERT_EQ("v1", Get("foo"));
599 
600     // Block sync calls.
601     env_->delay_data_sync_.store(true, std::memory_order_release);
602     Put("k1", std::string(100000, 'x'));  // Fill memtable.
603     Put("k2", std::string(100000, 'y'));  // Trigger compaction.
604     ASSERT_EQ("v1", Get("foo"));
605     // Release sync calls.
606     env_->delay_data_sync_.store(false, std::memory_order_release);
607   } while (ChangeOptions());
608 }
609 
TEST(DBTest,GetFromVersions)610 TEST(DBTest, GetFromVersions) {
611   do {
612     ASSERT_OK(Put("foo", "v1"));
613     dbfull()->TEST_CompactMemTable();
614     ASSERT_EQ("v1", Get("foo"));
615   } while (ChangeOptions());
616 }
617 
TEST(DBTest,GetMemUsage)618 TEST(DBTest, GetMemUsage) {
619   do {
620     ASSERT_OK(Put("foo", "v1"));
621     std::string val;
622     ASSERT_TRUE(db_->GetProperty("leveldb.approximate-memory-usage", &val));
623     int mem_usage = std::stoi(val);
624     ASSERT_GT(mem_usage, 0);
625     ASSERT_LT(mem_usage, 5 * 1024 * 1024);
626   } while (ChangeOptions());
627 }
628 
TEST(DBTest,GetSnapshot)629 TEST(DBTest, GetSnapshot) {
630   do {
631     // Try with both a short key and a long key
632     for (int i = 0; i < 2; i++) {
633       std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
634       ASSERT_OK(Put(key, "v1"));
635       const Snapshot* s1 = db_->GetSnapshot();
636       ASSERT_OK(Put(key, "v2"));
637       ASSERT_EQ("v2", Get(key));
638       ASSERT_EQ("v1", Get(key, s1));
639       dbfull()->TEST_CompactMemTable();
640       ASSERT_EQ("v2", Get(key));
641       ASSERT_EQ("v1", Get(key, s1));
642       db_->ReleaseSnapshot(s1);
643     }
644   } while (ChangeOptions());
645 }
646 
TEST(DBTest,GetIdenticalSnapshots)647 TEST(DBTest, GetIdenticalSnapshots) {
648   do {
649     // Try with both a short key and a long key
650     for (int i = 0; i < 2; i++) {
651       std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
652       ASSERT_OK(Put(key, "v1"));
653       const Snapshot* s1 = db_->GetSnapshot();
654       const Snapshot* s2 = db_->GetSnapshot();
655       const Snapshot* s3 = db_->GetSnapshot();
656       ASSERT_OK(Put(key, "v2"));
657       ASSERT_EQ("v2", Get(key));
658       ASSERT_EQ("v1", Get(key, s1));
659       ASSERT_EQ("v1", Get(key, s2));
660       ASSERT_EQ("v1", Get(key, s3));
661       db_->ReleaseSnapshot(s1);
662       dbfull()->TEST_CompactMemTable();
663       ASSERT_EQ("v2", Get(key));
664       ASSERT_EQ("v1", Get(key, s2));
665       db_->ReleaseSnapshot(s2);
666       ASSERT_EQ("v1", Get(key, s3));
667       db_->ReleaseSnapshot(s3);
668     }
669   } while (ChangeOptions());
670 }
671 
TEST(DBTest,IterateOverEmptySnapshot)672 TEST(DBTest, IterateOverEmptySnapshot) {
673   do {
674     const Snapshot* snapshot = db_->GetSnapshot();
675     ReadOptions read_options;
676     read_options.snapshot = snapshot;
677     ASSERT_OK(Put("foo", "v1"));
678     ASSERT_OK(Put("foo", "v2"));
679 
680     Iterator* iterator1 = db_->NewIterator(read_options);
681     iterator1->SeekToFirst();
682     ASSERT_TRUE(!iterator1->Valid());
683     delete iterator1;
684 
685     dbfull()->TEST_CompactMemTable();
686 
687     Iterator* iterator2 = db_->NewIterator(read_options);
688     iterator2->SeekToFirst();
689     ASSERT_TRUE(!iterator2->Valid());
690     delete iterator2;
691 
692     db_->ReleaseSnapshot(snapshot);
693   } while (ChangeOptions());
694 }
695 
TEST(DBTest,GetLevel0Ordering)696 TEST(DBTest, GetLevel0Ordering) {
697   do {
698     // Check that we process level-0 files in correct order.  The code
699     // below generates two level-0 files where the earlier one comes
700     // before the later one in the level-0 file list since the earlier
701     // one has a smaller "smallest" key.
702     ASSERT_OK(Put("bar", "b"));
703     ASSERT_OK(Put("foo", "v1"));
704     dbfull()->TEST_CompactMemTable();
705     ASSERT_OK(Put("foo", "v2"));
706     dbfull()->TEST_CompactMemTable();
707     ASSERT_EQ("v2", Get("foo"));
708   } while (ChangeOptions());
709 }
710 
TEST(DBTest,GetOrderedByLevels)711 TEST(DBTest, GetOrderedByLevels) {
712   do {
713     ASSERT_OK(Put("foo", "v1"));
714     Compact("a", "z");
715     ASSERT_EQ("v1", Get("foo"));
716     ASSERT_OK(Put("foo", "v2"));
717     ASSERT_EQ("v2", Get("foo"));
718     dbfull()->TEST_CompactMemTable();
719     ASSERT_EQ("v2", Get("foo"));
720   } while (ChangeOptions());
721 }
722 
TEST(DBTest,GetPicksCorrectFile)723 TEST(DBTest, GetPicksCorrectFile) {
724   do {
725     // Arrange to have multiple files in a non-level-0 level.
726     ASSERT_OK(Put("a", "va"));
727     Compact("a", "b");
728     ASSERT_OK(Put("x", "vx"));
729     Compact("x", "y");
730     ASSERT_OK(Put("f", "vf"));
731     Compact("f", "g");
732     ASSERT_EQ("va", Get("a"));
733     ASSERT_EQ("vf", Get("f"));
734     ASSERT_EQ("vx", Get("x"));
735   } while (ChangeOptions());
736 }
737 
TEST(DBTest,GetEncountersEmptyLevel)738 TEST(DBTest, GetEncountersEmptyLevel) {
739   do {
740     // Arrange for the following to happen:
741     //   * sstable A in level 0
742     //   * nothing in level 1
743     //   * sstable B in level 2
744     // Then do enough Get() calls to arrange for an automatic compaction
745     // of sstable A.  A bug would cause the compaction to be marked as
746     // occurring at level 1 (instead of the correct level 0).
747 
748     // Step 1: First place sstables in levels 0 and 2
749     int compaction_count = 0;
750     while (NumTableFilesAtLevel(0) == 0 || NumTableFilesAtLevel(2) == 0) {
751       ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
752       compaction_count++;
753       Put("a", "begin");
754       Put("z", "end");
755       dbfull()->TEST_CompactMemTable();
756     }
757 
758     // Step 2: clear level 1 if necessary.
759     dbfull()->TEST_CompactRange(1, nullptr, nullptr);
760     ASSERT_EQ(NumTableFilesAtLevel(0), 1);
761     ASSERT_EQ(NumTableFilesAtLevel(1), 0);
762     ASSERT_EQ(NumTableFilesAtLevel(2), 1);
763 
764     // Step 3: read a bunch of times
765     for (int i = 0; i < 1000; i++) {
766       ASSERT_EQ("NOT_FOUND", Get("missing"));
767     }
768 
769     // Step 4: Wait for compaction to finish
770     DelayMilliseconds(1000);
771 
772     ASSERT_EQ(NumTableFilesAtLevel(0), 0);
773   } while (ChangeOptions());
774 }
775 
TEST(DBTest,IterEmpty)776 TEST(DBTest, IterEmpty) {
777   Iterator* iter = db_->NewIterator(ReadOptions());
778 
779   iter->SeekToFirst();
780   ASSERT_EQ(IterStatus(iter), "(invalid)");
781 
782   iter->SeekToLast();
783   ASSERT_EQ(IterStatus(iter), "(invalid)");
784 
785   iter->Seek("foo");
786   ASSERT_EQ(IterStatus(iter), "(invalid)");
787 
788   delete iter;
789 }
790 
TEST(DBTest,IterSingle)791 TEST(DBTest, IterSingle) {
792   ASSERT_OK(Put("a", "va"));
793   Iterator* iter = db_->NewIterator(ReadOptions());
794 
795   iter->SeekToFirst();
796   ASSERT_EQ(IterStatus(iter), "a->va");
797   iter->Next();
798   ASSERT_EQ(IterStatus(iter), "(invalid)");
799   iter->SeekToFirst();
800   ASSERT_EQ(IterStatus(iter), "a->va");
801   iter->Prev();
802   ASSERT_EQ(IterStatus(iter), "(invalid)");
803 
804   iter->SeekToLast();
805   ASSERT_EQ(IterStatus(iter), "a->va");
806   iter->Next();
807   ASSERT_EQ(IterStatus(iter), "(invalid)");
808   iter->SeekToLast();
809   ASSERT_EQ(IterStatus(iter), "a->va");
810   iter->Prev();
811   ASSERT_EQ(IterStatus(iter), "(invalid)");
812 
813   iter->Seek("");
814   ASSERT_EQ(IterStatus(iter), "a->va");
815   iter->Next();
816   ASSERT_EQ(IterStatus(iter), "(invalid)");
817 
818   iter->Seek("a");
819   ASSERT_EQ(IterStatus(iter), "a->va");
820   iter->Next();
821   ASSERT_EQ(IterStatus(iter), "(invalid)");
822 
823   iter->Seek("b");
824   ASSERT_EQ(IterStatus(iter), "(invalid)");
825 
826   delete iter;
827 }
828 
TEST(DBTest,IterMulti)829 TEST(DBTest, IterMulti) {
830   ASSERT_OK(Put("a", "va"));
831   ASSERT_OK(Put("b", "vb"));
832   ASSERT_OK(Put("c", "vc"));
833   Iterator* iter = db_->NewIterator(ReadOptions());
834 
835   iter->SeekToFirst();
836   ASSERT_EQ(IterStatus(iter), "a->va");
837   iter->Next();
838   ASSERT_EQ(IterStatus(iter), "b->vb");
839   iter->Next();
840   ASSERT_EQ(IterStatus(iter), "c->vc");
841   iter->Next();
842   ASSERT_EQ(IterStatus(iter), "(invalid)");
843   iter->SeekToFirst();
844   ASSERT_EQ(IterStatus(iter), "a->va");
845   iter->Prev();
846   ASSERT_EQ(IterStatus(iter), "(invalid)");
847 
848   iter->SeekToLast();
849   ASSERT_EQ(IterStatus(iter), "c->vc");
850   iter->Prev();
851   ASSERT_EQ(IterStatus(iter), "b->vb");
852   iter->Prev();
853   ASSERT_EQ(IterStatus(iter), "a->va");
854   iter->Prev();
855   ASSERT_EQ(IterStatus(iter), "(invalid)");
856   iter->SeekToLast();
857   ASSERT_EQ(IterStatus(iter), "c->vc");
858   iter->Next();
859   ASSERT_EQ(IterStatus(iter), "(invalid)");
860 
861   iter->Seek("");
862   ASSERT_EQ(IterStatus(iter), "a->va");
863   iter->Seek("a");
864   ASSERT_EQ(IterStatus(iter), "a->va");
865   iter->Seek("ax");
866   ASSERT_EQ(IterStatus(iter), "b->vb");
867   iter->Seek("b");
868   ASSERT_EQ(IterStatus(iter), "b->vb");
869   iter->Seek("z");
870   ASSERT_EQ(IterStatus(iter), "(invalid)");
871 
872   // Switch from reverse to forward
873   iter->SeekToLast();
874   iter->Prev();
875   iter->Prev();
876   iter->Next();
877   ASSERT_EQ(IterStatus(iter), "b->vb");
878 
879   // Switch from forward to reverse
880   iter->SeekToFirst();
881   iter->Next();
882   iter->Next();
883   iter->Prev();
884   ASSERT_EQ(IterStatus(iter), "b->vb");
885 
886   // Make sure iter stays at snapshot
887   ASSERT_OK(Put("a", "va2"));
888   ASSERT_OK(Put("a2", "va3"));
889   ASSERT_OK(Put("b", "vb2"));
890   ASSERT_OK(Put("c", "vc2"));
891   ASSERT_OK(Delete("b"));
892   iter->SeekToFirst();
893   ASSERT_EQ(IterStatus(iter), "a->va");
894   iter->Next();
895   ASSERT_EQ(IterStatus(iter), "b->vb");
896   iter->Next();
897   ASSERT_EQ(IterStatus(iter), "c->vc");
898   iter->Next();
899   ASSERT_EQ(IterStatus(iter), "(invalid)");
900   iter->SeekToLast();
901   ASSERT_EQ(IterStatus(iter), "c->vc");
902   iter->Prev();
903   ASSERT_EQ(IterStatus(iter), "b->vb");
904   iter->Prev();
905   ASSERT_EQ(IterStatus(iter), "a->va");
906   iter->Prev();
907   ASSERT_EQ(IterStatus(iter), "(invalid)");
908 
909   delete iter;
910 }
911 
TEST(DBTest,IterSmallAndLargeMix)912 TEST(DBTest, IterSmallAndLargeMix) {
913   ASSERT_OK(Put("a", "va"));
914   ASSERT_OK(Put("b", std::string(100000, 'b')));
915   ASSERT_OK(Put("c", "vc"));
916   ASSERT_OK(Put("d", std::string(100000, 'd')));
917   ASSERT_OK(Put("e", std::string(100000, 'e')));
918 
919   Iterator* iter = db_->NewIterator(ReadOptions());
920 
921   iter->SeekToFirst();
922   ASSERT_EQ(IterStatus(iter), "a->va");
923   iter->Next();
924   ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
925   iter->Next();
926   ASSERT_EQ(IterStatus(iter), "c->vc");
927   iter->Next();
928   ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
929   iter->Next();
930   ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
931   iter->Next();
932   ASSERT_EQ(IterStatus(iter), "(invalid)");
933 
934   iter->SeekToLast();
935   ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
936   iter->Prev();
937   ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
938   iter->Prev();
939   ASSERT_EQ(IterStatus(iter), "c->vc");
940   iter->Prev();
941   ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
942   iter->Prev();
943   ASSERT_EQ(IterStatus(iter), "a->va");
944   iter->Prev();
945   ASSERT_EQ(IterStatus(iter), "(invalid)");
946 
947   delete iter;
948 }
949 
TEST(DBTest,IterMultiWithDelete)950 TEST(DBTest, IterMultiWithDelete) {
951   do {
952     ASSERT_OK(Put("a", "va"));
953     ASSERT_OK(Put("b", "vb"));
954     ASSERT_OK(Put("c", "vc"));
955     ASSERT_OK(Delete("b"));
956     ASSERT_EQ("NOT_FOUND", Get("b"));
957 
958     Iterator* iter = db_->NewIterator(ReadOptions());
959     iter->Seek("c");
960     ASSERT_EQ(IterStatus(iter), "c->vc");
961     iter->Prev();
962     ASSERT_EQ(IterStatus(iter), "a->va");
963     delete iter;
964   } while (ChangeOptions());
965 }
966 
TEST(DBTest,Recover)967 TEST(DBTest, Recover) {
968   do {
969     ASSERT_OK(Put("foo", "v1"));
970     ASSERT_OK(Put("baz", "v5"));
971 
972     Reopen();
973     ASSERT_EQ("v1", Get("foo"));
974 
975     ASSERT_EQ("v1", Get("foo"));
976     ASSERT_EQ("v5", Get("baz"));
977     ASSERT_OK(Put("bar", "v2"));
978     ASSERT_OK(Put("foo", "v3"));
979 
980     Reopen();
981     ASSERT_EQ("v3", Get("foo"));
982     ASSERT_OK(Put("foo", "v4"));
983     ASSERT_EQ("v4", Get("foo"));
984     ASSERT_EQ("v2", Get("bar"));
985     ASSERT_EQ("v5", Get("baz"));
986   } while (ChangeOptions());
987 }
988 
TEST(DBTest,RecoveryWithEmptyLog)989 TEST(DBTest, RecoveryWithEmptyLog) {
990   do {
991     ASSERT_OK(Put("foo", "v1"));
992     ASSERT_OK(Put("foo", "v2"));
993     Reopen();
994     Reopen();
995     ASSERT_OK(Put("foo", "v3"));
996     Reopen();
997     ASSERT_EQ("v3", Get("foo"));
998   } while (ChangeOptions());
999 }
1000 
1001 // Check that writes done during a memtable compaction are recovered
1002 // if the database is shutdown during the memtable compaction.
TEST(DBTest,RecoverDuringMemtableCompaction)1003 TEST(DBTest, RecoverDuringMemtableCompaction) {
1004   do {
1005     Options options = CurrentOptions();
1006     options.env = env_;
1007     options.write_buffer_size = 1000000;
1008     Reopen(&options);
1009 
1010     // Trigger a long memtable compaction and reopen the database during it
1011     ASSERT_OK(Put("foo", "v1"));                         // Goes to 1st log file
1012     ASSERT_OK(Put("big1", std::string(10000000, 'x')));  // Fills memtable
1013     ASSERT_OK(Put("big2", std::string(1000, 'y')));      // Triggers compaction
1014     ASSERT_OK(Put("bar", "v2"));                         // Goes to new log file
1015 
1016     Reopen(&options);
1017     ASSERT_EQ("v1", Get("foo"));
1018     ASSERT_EQ("v2", Get("bar"));
1019     ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
1020     ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
1021   } while (ChangeOptions());
1022 }
1023 
Key(int i)1024 static std::string Key(int i) {
1025   char buf[100];
1026   snprintf(buf, sizeof(buf), "key%06d", i);
1027   return std::string(buf);
1028 }
1029 
TEST(DBTest,MinorCompactionsHappen)1030 TEST(DBTest, MinorCompactionsHappen) {
1031   Options options = CurrentOptions();
1032   options.write_buffer_size = 10000;
1033   Reopen(&options);
1034 
1035   const int N = 500;
1036 
1037   int starting_num_tables = TotalTableFiles();
1038   for (int i = 0; i < N; i++) {
1039     ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
1040   }
1041   int ending_num_tables = TotalTableFiles();
1042   ASSERT_GT(ending_num_tables, starting_num_tables);
1043 
1044   for (int i = 0; i < N; i++) {
1045     ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
1046   }
1047 
1048   Reopen();
1049 
1050   for (int i = 0; i < N; i++) {
1051     ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
1052   }
1053 }
1054 
TEST(DBTest,RecoverWithLargeLog)1055 TEST(DBTest, RecoverWithLargeLog) {
1056   {
1057     Options options = CurrentOptions();
1058     Reopen(&options);
1059     ASSERT_OK(Put("big1", std::string(200000, '1')));
1060     ASSERT_OK(Put("big2", std::string(200000, '2')));
1061     ASSERT_OK(Put("small3", std::string(10, '3')));
1062     ASSERT_OK(Put("small4", std::string(10, '4')));
1063     ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1064   }
1065 
1066   // Make sure that if we re-open with a small write buffer size that
1067   // we flush table files in the middle of a large log file.
1068   Options options = CurrentOptions();
1069   options.write_buffer_size = 100000;
1070   Reopen(&options);
1071   ASSERT_EQ(NumTableFilesAtLevel(0), 3);
1072   ASSERT_EQ(std::string(200000, '1'), Get("big1"));
1073   ASSERT_EQ(std::string(200000, '2'), Get("big2"));
1074   ASSERT_EQ(std::string(10, '3'), Get("small3"));
1075   ASSERT_EQ(std::string(10, '4'), Get("small4"));
1076   ASSERT_GT(NumTableFilesAtLevel(0), 1);
1077 }
1078 
TEST(DBTest,CompactionsGenerateMultipleFiles)1079 TEST(DBTest, CompactionsGenerateMultipleFiles) {
1080   Options options = CurrentOptions();
1081   options.write_buffer_size = 100000000;  // Large write buffer
1082   Reopen(&options);
1083 
1084   Random rnd(301);
1085 
1086   // Write 8MB (80 values, each 100K)
1087   ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1088   std::vector<std::string> values;
1089   for (int i = 0; i < 80; i++) {
1090     values.push_back(RandomString(&rnd, 100000));
1091     ASSERT_OK(Put(Key(i), values[i]));
1092   }
1093 
1094   // Reopening moves updates to level-0
1095   Reopen(&options);
1096   dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1097 
1098   ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1099   ASSERT_GT(NumTableFilesAtLevel(1), 1);
1100   for (int i = 0; i < 80; i++) {
1101     ASSERT_EQ(Get(Key(i)), values[i]);
1102   }
1103 }
1104 
TEST(DBTest,RepeatedWritesToSameKey)1105 TEST(DBTest, RepeatedWritesToSameKey) {
1106   Options options = CurrentOptions();
1107   options.env = env_;
1108   options.write_buffer_size = 100000;  // Small write buffer
1109   Reopen(&options);
1110 
1111   // We must have at most one file per level except for level-0,
1112   // which may have up to kL0_StopWritesTrigger files.
1113   const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
1114 
1115   Random rnd(301);
1116   std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
1117   for (int i = 0; i < 5 * kMaxFiles; i++) {
1118     Put("key", value);
1119     ASSERT_LE(TotalTableFiles(), kMaxFiles);
1120     fprintf(stderr, "after %d: %d files\n", i + 1, TotalTableFiles());
1121   }
1122 }
1123 
TEST(DBTest,SparseMerge)1124 TEST(DBTest, SparseMerge) {
1125   Options options = CurrentOptions();
1126   options.compression = kNoCompression;
1127   Reopen(&options);
1128 
1129   FillLevels("A", "Z");
1130 
1131   // Suppose there is:
1132   //    small amount of data with prefix A
1133   //    large amount of data with prefix B
1134   //    small amount of data with prefix C
1135   // and that recent updates have made small changes to all three prefixes.
1136   // Check that we do not do a compaction that merges all of B in one shot.
1137   const std::string value(1000, 'x');
1138   Put("A", "va");
1139   // Write approximately 100MB of "B" values
1140   for (int i = 0; i < 100000; i++) {
1141     char key[100];
1142     snprintf(key, sizeof(key), "B%010d", i);
1143     Put(key, value);
1144   }
1145   Put("C", "vc");
1146   dbfull()->TEST_CompactMemTable();
1147   dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1148 
1149   // Make sparse update
1150   Put("A", "va2");
1151   Put("B100", "bvalue2");
1152   Put("C", "vc2");
1153   dbfull()->TEST_CompactMemTable();
1154 
1155   // Compactions should not cause us to create a situation where
1156   // a file overlaps too much data at the next level.
1157   ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
1158   dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1159   ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
1160   dbfull()->TEST_CompactRange(1, nullptr, nullptr);
1161   ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
1162 }
1163 
Between(uint64_t val,uint64_t low,uint64_t high)1164 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1165   bool result = (val >= low) && (val <= high);
1166   if (!result) {
1167     fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1168             (unsigned long long)(val), (unsigned long long)(low),
1169             (unsigned long long)(high));
1170   }
1171   return result;
1172 }
1173 
TEST(DBTest,ApproximateSizes)1174 TEST(DBTest, ApproximateSizes) {
1175   do {
1176     Options options = CurrentOptions();
1177     options.write_buffer_size = 100000000;  // Large write buffer
1178     options.compression = kNoCompression;
1179     DestroyAndReopen();
1180 
1181     ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1182     Reopen(&options);
1183     ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1184 
1185     // Write 8MB (80 values, each 100K)
1186     ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1187     const int N = 80;
1188     static const int S1 = 100000;
1189     static const int S2 = 105000;  // Allow some expansion from metadata
1190     Random rnd(301);
1191     for (int i = 0; i < N; i++) {
1192       ASSERT_OK(Put(Key(i), RandomString(&rnd, S1)));
1193     }
1194 
1195     // 0 because GetApproximateSizes() does not account for memtable space
1196     ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
1197 
1198     if (options.reuse_logs) {
1199       // Recovery will reuse memtable, and GetApproximateSizes() does not
1200       // account for memtable usage;
1201       Reopen(&options);
1202       ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
1203       continue;
1204     }
1205 
1206     // Check sizes across recovery by reopening a few times
1207     for (int run = 0; run < 3; run++) {
1208       Reopen(&options);
1209 
1210       for (int compact_start = 0; compact_start < N; compact_start += 10) {
1211         for (int i = 0; i < N; i += 10) {
1212           ASSERT_TRUE(Between(Size("", Key(i)), S1 * i, S2 * i));
1213           ASSERT_TRUE(Between(Size("", Key(i) + ".suffix"), S1 * (i + 1),
1214                               S2 * (i + 1)));
1215           ASSERT_TRUE(Between(Size(Key(i), Key(i + 10)), S1 * 10, S2 * 10));
1216         }
1217         ASSERT_TRUE(Between(Size("", Key(50)), S1 * 50, S2 * 50));
1218         ASSERT_TRUE(Between(Size("", Key(50) + ".suffix"), S1 * 50, S2 * 50));
1219 
1220         std::string cstart_str = Key(compact_start);
1221         std::string cend_str = Key(compact_start + 9);
1222         Slice cstart = cstart_str;
1223         Slice cend = cend_str;
1224         dbfull()->TEST_CompactRange(0, &cstart, &cend);
1225       }
1226 
1227       ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1228       ASSERT_GT(NumTableFilesAtLevel(1), 0);
1229     }
1230   } while (ChangeOptions());
1231 }
1232 
TEST(DBTest,ApproximateSizes_MixOfSmallAndLarge)1233 TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
1234   do {
1235     Options options = CurrentOptions();
1236     options.compression = kNoCompression;
1237     Reopen();
1238 
1239     Random rnd(301);
1240     std::string big1 = RandomString(&rnd, 100000);
1241     ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
1242     ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
1243     ASSERT_OK(Put(Key(2), big1));
1244     ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
1245     ASSERT_OK(Put(Key(4), big1));
1246     ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
1247     ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
1248     ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
1249 
1250     if (options.reuse_logs) {
1251       // Need to force a memtable compaction since recovery does not do so.
1252       ASSERT_OK(dbfull()->TEST_CompactMemTable());
1253     }
1254 
1255     // Check sizes across recovery by reopening a few times
1256     for (int run = 0; run < 3; run++) {
1257       Reopen(&options);
1258 
1259       ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
1260       ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
1261       ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
1262       ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
1263       ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
1264       ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
1265       ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
1266       ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
1267       ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000));
1268 
1269       ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
1270 
1271       dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1272     }
1273   } while (ChangeOptions());
1274 }
1275 
TEST(DBTest,IteratorPinsRef)1276 TEST(DBTest, IteratorPinsRef) {
1277   Put("foo", "hello");
1278 
1279   // Get iterator that will yield the current contents of the DB.
1280   Iterator* iter = db_->NewIterator(ReadOptions());
1281 
1282   // Write to force compactions
1283   Put("foo", "newvalue1");
1284   for (int i = 0; i < 100; i++) {
1285     ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v')));  // 100K values
1286   }
1287   Put("foo", "newvalue2");
1288 
1289   iter->SeekToFirst();
1290   ASSERT_TRUE(iter->Valid());
1291   ASSERT_EQ("foo", iter->key().ToString());
1292   ASSERT_EQ("hello", iter->value().ToString());
1293   iter->Next();
1294   ASSERT_TRUE(!iter->Valid());
1295   delete iter;
1296 }
1297 
TEST(DBTest,Snapshot)1298 TEST(DBTest, Snapshot) {
1299   do {
1300     Put("foo", "v1");
1301     const Snapshot* s1 = db_->GetSnapshot();
1302     Put("foo", "v2");
1303     const Snapshot* s2 = db_->GetSnapshot();
1304     Put("foo", "v3");
1305     const Snapshot* s3 = db_->GetSnapshot();
1306 
1307     Put("foo", "v4");
1308     ASSERT_EQ("v1", Get("foo", s1));
1309     ASSERT_EQ("v2", Get("foo", s2));
1310     ASSERT_EQ("v3", Get("foo", s3));
1311     ASSERT_EQ("v4", Get("foo"));
1312 
1313     db_->ReleaseSnapshot(s3);
1314     ASSERT_EQ("v1", Get("foo", s1));
1315     ASSERT_EQ("v2", Get("foo", s2));
1316     ASSERT_EQ("v4", Get("foo"));
1317 
1318     db_->ReleaseSnapshot(s1);
1319     ASSERT_EQ("v2", Get("foo", s2));
1320     ASSERT_EQ("v4", Get("foo"));
1321 
1322     db_->ReleaseSnapshot(s2);
1323     ASSERT_EQ("v4", Get("foo"));
1324   } while (ChangeOptions());
1325 }
1326 
TEST(DBTest,HiddenValuesAreRemoved)1327 TEST(DBTest, HiddenValuesAreRemoved) {
1328   do {
1329     Random rnd(301);
1330     FillLevels("a", "z");
1331 
1332     std::string big = RandomString(&rnd, 50000);
1333     Put("foo", big);
1334     Put("pastfoo", "v");
1335     const Snapshot* snapshot = db_->GetSnapshot();
1336     Put("foo", "tiny");
1337     Put("pastfoo2", "v2");  // Advance sequence number one more
1338 
1339     ASSERT_OK(dbfull()->TEST_CompactMemTable());
1340     ASSERT_GT(NumTableFilesAtLevel(0), 0);
1341 
1342     ASSERT_EQ(big, Get("foo", snapshot));
1343     ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
1344     db_->ReleaseSnapshot(snapshot);
1345     ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
1346     Slice x("x");
1347     dbfull()->TEST_CompactRange(0, nullptr, &x);
1348     ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1349     ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1350     ASSERT_GE(NumTableFilesAtLevel(1), 1);
1351     dbfull()->TEST_CompactRange(1, nullptr, &x);
1352     ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1353 
1354     ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
1355   } while (ChangeOptions());
1356 }
1357 
TEST(DBTest,DeletionMarkers1)1358 TEST(DBTest, DeletionMarkers1) {
1359   Put("foo", "v1");
1360   ASSERT_OK(dbfull()->TEST_CompactMemTable());
1361   const int last = config::kMaxMemCompactLevel;
1362   ASSERT_EQ(NumTableFilesAtLevel(last), 1);  // foo => v1 is now in last level
1363 
1364   // Place a table at level last-1 to prevent merging with preceding mutation
1365   Put("a", "begin");
1366   Put("z", "end");
1367   dbfull()->TEST_CompactMemTable();
1368   ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1369   ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1);
1370 
1371   Delete("foo");
1372   Put("foo", "v2");
1373   ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1374   ASSERT_OK(dbfull()->TEST_CompactMemTable());  // Moves to level last-2
1375   ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1376   Slice z("z");
1377   dbfull()->TEST_CompactRange(last - 2, nullptr, &z);
1378   // DEL eliminated, but v1 remains because we aren't compacting that level
1379   // (DEL can be eliminated because v2 hides v1).
1380   ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
1381   dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr);
1382   // Merging last-1 w/ last, so we are the base level for "foo", so
1383   // DEL is removed.  (as is v1).
1384   ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
1385 }
1386 
TEST(DBTest,DeletionMarkers2)1387 TEST(DBTest, DeletionMarkers2) {
1388   Put("foo", "v1");
1389   ASSERT_OK(dbfull()->TEST_CompactMemTable());
1390   const int last = config::kMaxMemCompactLevel;
1391   ASSERT_EQ(NumTableFilesAtLevel(last), 1);  // foo => v1 is now in last level
1392 
1393   // Place a table at level last-1 to prevent merging with preceding mutation
1394   Put("a", "begin");
1395   Put("z", "end");
1396   dbfull()->TEST_CompactMemTable();
1397   ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1398   ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1);
1399 
1400   Delete("foo");
1401   ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1402   ASSERT_OK(dbfull()->TEST_CompactMemTable());  // Moves to level last-2
1403   ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1404   dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr);
1405   // DEL kept: "last" file overlaps
1406   ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1407   dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr);
1408   // Merging last-1 w/ last, so we are the base level for "foo", so
1409   // DEL is removed.  (as is v1).
1410   ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
1411 }
1412 
TEST(DBTest,OverlapInLevel0)1413 TEST(DBTest, OverlapInLevel0) {
1414   do {
1415     ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
1416 
1417     // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
1418     // 0.
1419     ASSERT_OK(Put("100", "v100"));
1420     ASSERT_OK(Put("999", "v999"));
1421     dbfull()->TEST_CompactMemTable();
1422     ASSERT_OK(Delete("100"));
1423     ASSERT_OK(Delete("999"));
1424     dbfull()->TEST_CompactMemTable();
1425     ASSERT_EQ("0,1,1", FilesPerLevel());
1426 
1427     // Make files spanning the following ranges in level-0:
1428     //  files[0]  200 .. 900
1429     //  files[1]  300 .. 500
1430     // Note that files are sorted by smallest key.
1431     ASSERT_OK(Put("300", "v300"));
1432     ASSERT_OK(Put("500", "v500"));
1433     dbfull()->TEST_CompactMemTable();
1434     ASSERT_OK(Put("200", "v200"));
1435     ASSERT_OK(Put("600", "v600"));
1436     ASSERT_OK(Put("900", "v900"));
1437     dbfull()->TEST_CompactMemTable();
1438     ASSERT_EQ("2,1,1", FilesPerLevel());
1439 
1440     // Compact away the placeholder files we created initially
1441     dbfull()->TEST_CompactRange(1, nullptr, nullptr);
1442     dbfull()->TEST_CompactRange(2, nullptr, nullptr);
1443     ASSERT_EQ("2", FilesPerLevel());
1444 
1445     // Do a memtable compaction.  Before bug-fix, the compaction would
1446     // not detect the overlap with level-0 files and would incorrectly place
1447     // the deletion in a deeper level.
1448     ASSERT_OK(Delete("600"));
1449     dbfull()->TEST_CompactMemTable();
1450     ASSERT_EQ("3", FilesPerLevel());
1451     ASSERT_EQ("NOT_FOUND", Get("600"));
1452   } while (ChangeOptions());
1453 }
1454 
TEST(DBTest,L0_CompactionBug_Issue44_a)1455 TEST(DBTest, L0_CompactionBug_Issue44_a) {
1456   Reopen();
1457   ASSERT_OK(Put("b", "v"));
1458   Reopen();
1459   ASSERT_OK(Delete("b"));
1460   ASSERT_OK(Delete("a"));
1461   Reopen();
1462   ASSERT_OK(Delete("a"));
1463   Reopen();
1464   ASSERT_OK(Put("a", "v"));
1465   Reopen();
1466   Reopen();
1467   ASSERT_EQ("(a->v)", Contents());
1468   DelayMilliseconds(1000);  // Wait for compaction to finish
1469   ASSERT_EQ("(a->v)", Contents());
1470 }
1471 
TEST(DBTest,L0_CompactionBug_Issue44_b)1472 TEST(DBTest, L0_CompactionBug_Issue44_b) {
1473   Reopen();
1474   Put("", "");
1475   Reopen();
1476   Delete("e");
1477   Put("", "");
1478   Reopen();
1479   Put("c", "cv");
1480   Reopen();
1481   Put("", "");
1482   Reopen();
1483   Put("", "");
1484   DelayMilliseconds(1000);  // Wait for compaction to finish
1485   Reopen();
1486   Put("d", "dv");
1487   Reopen();
1488   Put("", "");
1489   Reopen();
1490   Delete("d");
1491   Delete("b");
1492   Reopen();
1493   ASSERT_EQ("(->)(c->cv)", Contents());
1494   DelayMilliseconds(1000);  // Wait for compaction to finish
1495   ASSERT_EQ("(->)(c->cv)", Contents());
1496 }
1497 
TEST(DBTest,Fflush_Issue474)1498 TEST(DBTest, Fflush_Issue474) {
1499   static const int kNum = 100000;
1500   Random rnd(test::RandomSeed());
1501   for (int i = 0; i < kNum; i++) {
1502     fflush(nullptr);
1503     ASSERT_OK(Put(RandomKey(&rnd), RandomString(&rnd, 100)));
1504   }
1505 }
1506 
TEST(DBTest,ComparatorCheck)1507 TEST(DBTest, ComparatorCheck) {
1508   class NewComparator : public Comparator {
1509    public:
1510     const char* Name() const override { return "leveldb.NewComparator"; }
1511     int Compare(const Slice& a, const Slice& b) const override {
1512       return BytewiseComparator()->Compare(a, b);
1513     }
1514     void FindShortestSeparator(std::string* s, const Slice& l) const override {
1515       BytewiseComparator()->FindShortestSeparator(s, l);
1516     }
1517     void FindShortSuccessor(std::string* key) const override {
1518       BytewiseComparator()->FindShortSuccessor(key);
1519     }
1520   };
1521   NewComparator cmp;
1522   Options new_options = CurrentOptions();
1523   new_options.comparator = &cmp;
1524   Status s = TryReopen(&new_options);
1525   ASSERT_TRUE(!s.ok());
1526   ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
1527       << s.ToString();
1528 }
1529 
TEST(DBTest,CustomComparator)1530 TEST(DBTest, CustomComparator) {
1531   class NumberComparator : public Comparator {
1532    public:
1533     const char* Name() const override { return "test.NumberComparator"; }
1534     int Compare(const Slice& a, const Slice& b) const override {
1535       return ToNumber(a) - ToNumber(b);
1536     }
1537     void FindShortestSeparator(std::string* s, const Slice& l) const override {
1538       ToNumber(*s);  // Check format
1539       ToNumber(l);   // Check format
1540     }
1541     void FindShortSuccessor(std::string* key) const override {
1542       ToNumber(*key);  // Check format
1543     }
1544 
1545    private:
1546     static int ToNumber(const Slice& x) {
1547       // Check that there are no extra characters.
1548       ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
1549           << EscapeString(x);
1550       int val;
1551       char ignored;
1552       ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
1553           << EscapeString(x);
1554       return val;
1555     }
1556   };
1557   NumberComparator cmp;
1558   Options new_options = CurrentOptions();
1559   new_options.create_if_missing = true;
1560   new_options.comparator = &cmp;
1561   new_options.filter_policy = nullptr;   // Cannot use bloom filters
1562   new_options.write_buffer_size = 1000;  // Compact more often
1563   DestroyAndReopen(&new_options);
1564   ASSERT_OK(Put("[10]", "ten"));
1565   ASSERT_OK(Put("[0x14]", "twenty"));
1566   for (int i = 0; i < 2; i++) {
1567     ASSERT_EQ("ten", Get("[10]"));
1568     ASSERT_EQ("ten", Get("[0xa]"));
1569     ASSERT_EQ("twenty", Get("[20]"));
1570     ASSERT_EQ("twenty", Get("[0x14]"));
1571     ASSERT_EQ("NOT_FOUND", Get("[15]"));
1572     ASSERT_EQ("NOT_FOUND", Get("[0xf]"));
1573     Compact("[0]", "[9999]");
1574   }
1575 
1576   for (int run = 0; run < 2; run++) {
1577     for (int i = 0; i < 1000; i++) {
1578       char buf[100];
1579       snprintf(buf, sizeof(buf), "[%d]", i * 10);
1580       ASSERT_OK(Put(buf, buf));
1581     }
1582     Compact("[0]", "[1000000]");
1583   }
1584 }
1585 
TEST(DBTest,ManualCompaction)1586 TEST(DBTest, ManualCompaction) {
1587   ASSERT_EQ(config::kMaxMemCompactLevel, 2)
1588       << "Need to update this test to match kMaxMemCompactLevel";
1589 
1590   MakeTables(3, "p", "q");
1591   ASSERT_EQ("1,1,1", FilesPerLevel());
1592 
1593   // Compaction range falls before files
1594   Compact("", "c");
1595   ASSERT_EQ("1,1,1", FilesPerLevel());
1596 
1597   // Compaction range falls after files
1598   Compact("r", "z");
1599   ASSERT_EQ("1,1,1", FilesPerLevel());
1600 
1601   // Compaction range overlaps files
1602   Compact("p1", "p9");
1603   ASSERT_EQ("0,0,1", FilesPerLevel());
1604 
1605   // Populate a different range
1606   MakeTables(3, "c", "e");
1607   ASSERT_EQ("1,1,2", FilesPerLevel());
1608 
1609   // Compact just the new range
1610   Compact("b", "f");
1611   ASSERT_EQ("0,0,2", FilesPerLevel());
1612 
1613   // Compact all
1614   MakeTables(1, "a", "z");
1615   ASSERT_EQ("0,1,2", FilesPerLevel());
1616   db_->CompactRange(nullptr, nullptr);
1617   ASSERT_EQ("0,0,1", FilesPerLevel());
1618 }
1619 
TEST(DBTest,DBOpen_Options)1620 TEST(DBTest, DBOpen_Options) {
1621   std::string dbname = test::TmpDir() + "/db_options_test";
1622   DestroyDB(dbname, Options());
1623 
1624   // Does not exist, and create_if_missing == false: error
1625   DB* db = nullptr;
1626   Options opts;
1627   opts.create_if_missing = false;
1628   Status s = DB::Open(opts, dbname, &db);
1629   ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
1630   ASSERT_TRUE(db == nullptr);
1631 
1632   // Does not exist, and create_if_missing == true: OK
1633   opts.create_if_missing = true;
1634   s = DB::Open(opts, dbname, &db);
1635   ASSERT_OK(s);
1636   ASSERT_TRUE(db != nullptr);
1637 
1638   delete db;
1639   db = nullptr;
1640 
1641   // Does exist, and error_if_exists == true: error
1642   opts.create_if_missing = false;
1643   opts.error_if_exists = true;
1644   s = DB::Open(opts, dbname, &db);
1645   ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
1646   ASSERT_TRUE(db == nullptr);
1647 
1648   // Does exist, and error_if_exists == false: OK
1649   opts.create_if_missing = true;
1650   opts.error_if_exists = false;
1651   s = DB::Open(opts, dbname, &db);
1652   ASSERT_OK(s);
1653   ASSERT_TRUE(db != nullptr);
1654 
1655   delete db;
1656   db = nullptr;
1657 }
1658 
TEST(DBTest,DestroyEmptyDir)1659 TEST(DBTest, DestroyEmptyDir) {
1660   std::string dbname = test::TmpDir() + "/db_empty_dir";
1661   TestEnv env(Env::Default());
1662   env.DeleteDir(dbname);
1663   ASSERT_TRUE(!env.FileExists(dbname));
1664 
1665   Options opts;
1666   opts.env = &env;
1667 
1668   ASSERT_OK(env.CreateDir(dbname));
1669   ASSERT_TRUE(env.FileExists(dbname));
1670   std::vector<std::string> children;
1671   ASSERT_OK(env.GetChildren(dbname, &children));
1672   // The stock Env's do not filter out '.' and '..' special files.
1673   ASSERT_EQ(2, children.size());
1674   ASSERT_OK(DestroyDB(dbname, opts));
1675   ASSERT_TRUE(!env.FileExists(dbname));
1676 
1677   // Should also be destroyed if Env is filtering out dot files.
1678   env.SetIgnoreDotFiles(true);
1679   ASSERT_OK(env.CreateDir(dbname));
1680   ASSERT_TRUE(env.FileExists(dbname));
1681   ASSERT_OK(env.GetChildren(dbname, &children));
1682   ASSERT_EQ(0, children.size());
1683   ASSERT_OK(DestroyDB(dbname, opts));
1684   ASSERT_TRUE(!env.FileExists(dbname));
1685 }
1686 
TEST(DBTest,DestroyOpenDB)1687 TEST(DBTest, DestroyOpenDB) {
1688   std::string dbname = test::TmpDir() + "/open_db_dir";
1689   env_->DeleteDir(dbname);
1690   ASSERT_TRUE(!env_->FileExists(dbname));
1691 
1692   Options opts;
1693   opts.create_if_missing = true;
1694   DB* db = nullptr;
1695   ASSERT_OK(DB::Open(opts, dbname, &db));
1696   ASSERT_TRUE(db != nullptr);
1697 
1698   // Must fail to destroy an open db.
1699   ASSERT_TRUE(env_->FileExists(dbname));
1700   ASSERT_TRUE(!DestroyDB(dbname, Options()).ok());
1701   ASSERT_TRUE(env_->FileExists(dbname));
1702 
1703   delete db;
1704   db = nullptr;
1705 
1706   // Should succeed destroying a closed db.
1707   ASSERT_OK(DestroyDB(dbname, Options()));
1708   ASSERT_TRUE(!env_->FileExists(dbname));
1709 }
1710 
TEST(DBTest,Locking)1711 TEST(DBTest, Locking) {
1712   DB* db2 = nullptr;
1713   Status s = DB::Open(CurrentOptions(), dbname_, &db2);
1714   ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
1715 }
1716 
1717 // Check that number of files does not grow when we are out of space
TEST(DBTest,NoSpace)1718 TEST(DBTest, NoSpace) {
1719   Options options = CurrentOptions();
1720   options.env = env_;
1721   Reopen(&options);
1722 
1723   ASSERT_OK(Put("foo", "v1"));
1724   ASSERT_EQ("v1", Get("foo"));
1725   Compact("a", "z");
1726   const int num_files = CountFiles();
1727   // Force out-of-space errors.
1728   env_->no_space_.store(true, std::memory_order_release);
1729   for (int i = 0; i < 10; i++) {
1730     for (int level = 0; level < config::kNumLevels - 1; level++) {
1731       dbfull()->TEST_CompactRange(level, nullptr, nullptr);
1732     }
1733   }
1734   env_->no_space_.store(false, std::memory_order_release);
1735   ASSERT_LT(CountFiles(), num_files + 3);
1736 }
1737 
TEST(DBTest,NonWritableFileSystem)1738 TEST(DBTest, NonWritableFileSystem) {
1739   Options options = CurrentOptions();
1740   options.write_buffer_size = 1000;
1741   options.env = env_;
1742   Reopen(&options);
1743   ASSERT_OK(Put("foo", "v1"));
1744   // Force errors for new files.
1745   env_->non_writable_.store(true, std::memory_order_release);
1746   std::string big(100000, 'x');
1747   int errors = 0;
1748   for (int i = 0; i < 20; i++) {
1749     fprintf(stderr, "iter %d; errors %d\n", i, errors);
1750     if (!Put("foo", big).ok()) {
1751       errors++;
1752       DelayMilliseconds(100);
1753     }
1754   }
1755   ASSERT_GT(errors, 0);
1756   env_->non_writable_.store(false, std::memory_order_release);
1757 }
1758 
TEST(DBTest,WriteSyncError)1759 TEST(DBTest, WriteSyncError) {
1760   // Check that log sync errors cause the DB to disallow future writes.
1761 
1762   // (a) Cause log sync calls to fail
1763   Options options = CurrentOptions();
1764   options.env = env_;
1765   Reopen(&options);
1766   env_->data_sync_error_.store(true, std::memory_order_release);
1767 
1768   // (b) Normal write should succeed
1769   WriteOptions w;
1770   ASSERT_OK(db_->Put(w, "k1", "v1"));
1771   ASSERT_EQ("v1", Get("k1"));
1772 
1773   // (c) Do a sync write; should fail
1774   w.sync = true;
1775   ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok());
1776   ASSERT_EQ("v1", Get("k1"));
1777   ASSERT_EQ("NOT_FOUND", Get("k2"));
1778 
1779   // (d) make sync behave normally
1780   env_->data_sync_error_.store(false, std::memory_order_release);
1781 
1782   // (e) Do a non-sync write; should fail
1783   w.sync = false;
1784   ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok());
1785   ASSERT_EQ("v1", Get("k1"));
1786   ASSERT_EQ("NOT_FOUND", Get("k2"));
1787   ASSERT_EQ("NOT_FOUND", Get("k3"));
1788 }
1789 
TEST(DBTest,ManifestWriteError)1790 TEST(DBTest, ManifestWriteError) {
1791   // Test for the following problem:
1792   // (a) Compaction produces file F
1793   // (b) Log record containing F is written to MANIFEST file, but Sync() fails
1794   // (c) GC deletes F
1795   // (d) After reopening DB, reads fail since deleted F is named in log record
1796 
1797   // We iterate twice.  In the second iteration, everything is the
1798   // same except the log record never makes it to the MANIFEST file.
1799   for (int iter = 0; iter < 2; iter++) {
1800     std::atomic<bool>* error_type = (iter == 0) ? &env_->manifest_sync_error_
1801                                                 : &env_->manifest_write_error_;
1802 
1803     // Insert foo=>bar mapping
1804     Options options = CurrentOptions();
1805     options.env = env_;
1806     options.create_if_missing = true;
1807     options.error_if_exists = false;
1808     DestroyAndReopen(&options);
1809     ASSERT_OK(Put("foo", "bar"));
1810     ASSERT_EQ("bar", Get("foo"));
1811 
1812     // Memtable compaction (will succeed)
1813     dbfull()->TEST_CompactMemTable();
1814     ASSERT_EQ("bar", Get("foo"));
1815     const int last = config::kMaxMemCompactLevel;
1816     ASSERT_EQ(NumTableFilesAtLevel(last), 1);  // foo=>bar is now in last level
1817 
1818     // Merging compaction (will fail)
1819     error_type->store(true, std::memory_order_release);
1820     dbfull()->TEST_CompactRange(last, nullptr, nullptr);  // Should fail
1821     ASSERT_EQ("bar", Get("foo"));
1822 
1823     // Recovery: should not lose data
1824     error_type->store(false, std::memory_order_release);
1825     Reopen(&options);
1826     ASSERT_EQ("bar", Get("foo"));
1827   }
1828 }
1829 
TEST(DBTest,MissingSSTFile)1830 TEST(DBTest, MissingSSTFile) {
1831   ASSERT_OK(Put("foo", "bar"));
1832   ASSERT_EQ("bar", Get("foo"));
1833 
1834   // Dump the memtable to disk.
1835   dbfull()->TEST_CompactMemTable();
1836   ASSERT_EQ("bar", Get("foo"));
1837 
1838   Close();
1839   ASSERT_TRUE(DeleteAnSSTFile());
1840   Options options = CurrentOptions();
1841   options.paranoid_checks = true;
1842   Status s = TryReopen(&options);
1843   ASSERT_TRUE(!s.ok());
1844   ASSERT_TRUE(s.ToString().find("issing") != std::string::npos) << s.ToString();
1845 }
1846 
TEST(DBTest,StillReadSST)1847 TEST(DBTest, StillReadSST) {
1848   ASSERT_OK(Put("foo", "bar"));
1849   ASSERT_EQ("bar", Get("foo"));
1850 
1851   // Dump the memtable to disk.
1852   dbfull()->TEST_CompactMemTable();
1853   ASSERT_EQ("bar", Get("foo"));
1854   Close();
1855   ASSERT_GT(RenameLDBToSST(), 0);
1856   Options options = CurrentOptions();
1857   options.paranoid_checks = true;
1858   Status s = TryReopen(&options);
1859   ASSERT_TRUE(s.ok());
1860   ASSERT_EQ("bar", Get("foo"));
1861 }
1862 
TEST(DBTest,FilesDeletedAfterCompaction)1863 TEST(DBTest, FilesDeletedAfterCompaction) {
1864   ASSERT_OK(Put("foo", "v2"));
1865   Compact("a", "z");
1866   const int num_files = CountFiles();
1867   for (int i = 0; i < 10; i++) {
1868     ASSERT_OK(Put("foo", "v2"));
1869     Compact("a", "z");
1870   }
1871   ASSERT_EQ(CountFiles(), num_files);
1872 }
1873 
TEST(DBTest,BloomFilter)1874 TEST(DBTest, BloomFilter) {
1875   env_->count_random_reads_ = true;
1876   Options options = CurrentOptions();
1877   options.env = env_;
1878   options.block_cache = NewLRUCache(0);  // Prevent cache hits
1879   options.filter_policy = NewBloomFilterPolicy(10);
1880   Reopen(&options);
1881 
1882   // Populate multiple layers
1883   const int N = 10000;
1884   for (int i = 0; i < N; i++) {
1885     ASSERT_OK(Put(Key(i), Key(i)));
1886   }
1887   Compact("a", "z");
1888   for (int i = 0; i < N; i += 100) {
1889     ASSERT_OK(Put(Key(i), Key(i)));
1890   }
1891   dbfull()->TEST_CompactMemTable();
1892 
1893   // Prevent auto compactions triggered by seeks
1894   env_->delay_data_sync_.store(true, std::memory_order_release);
1895 
1896   // Lookup present keys.  Should rarely read from small sstable.
1897   env_->random_read_counter_.Reset();
1898   for (int i = 0; i < N; i++) {
1899     ASSERT_EQ(Key(i), Get(Key(i)));
1900   }
1901   int reads = env_->random_read_counter_.Read();
1902   fprintf(stderr, "%d present => %d reads\n", N, reads);
1903   ASSERT_GE(reads, N);
1904   ASSERT_LE(reads, N + 2 * N / 100);
1905 
1906   // Lookup present keys.  Should rarely read from either sstable.
1907   env_->random_read_counter_.Reset();
1908   for (int i = 0; i < N; i++) {
1909     ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing"));
1910   }
1911   reads = env_->random_read_counter_.Read();
1912   fprintf(stderr, "%d missing => %d reads\n", N, reads);
1913   ASSERT_LE(reads, 3 * N / 100);
1914 
1915   env_->delay_data_sync_.store(false, std::memory_order_release);
1916   Close();
1917   delete options.block_cache;
1918   delete options.filter_policy;
1919 }
1920 
1921 // Multi-threaded test:
1922 namespace {
1923 
1924 static const int kNumThreads = 4;
1925 static const int kTestSeconds = 10;
1926 static const int kNumKeys = 1000;
1927 
1928 struct MTState {
1929   DBTest* test;
1930   std::atomic<bool> stop;
1931   std::atomic<int> counter[kNumThreads];
1932   std::atomic<bool> thread_done[kNumThreads];
1933 };
1934 
1935 struct MTThread {
1936   MTState* state;
1937   int id;
1938 };
1939 
MTThreadBody(void * arg)1940 static void MTThreadBody(void* arg) {
1941   MTThread* t = reinterpret_cast<MTThread*>(arg);
1942   int id = t->id;
1943   DB* db = t->state->test->db_;
1944   int counter = 0;
1945   fprintf(stderr, "... starting thread %d\n", id);
1946   Random rnd(1000 + id);
1947   std::string value;
1948   char valbuf[1500];
1949   while (!t->state->stop.load(std::memory_order_acquire)) {
1950     t->state->counter[id].store(counter, std::memory_order_release);
1951 
1952     int key = rnd.Uniform(kNumKeys);
1953     char keybuf[20];
1954     snprintf(keybuf, sizeof(keybuf), "%016d", key);
1955 
1956     if (rnd.OneIn(2)) {
1957       // Write values of the form <key, my id, counter>.
1958       // We add some padding for force compactions.
1959       snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d", key, id,
1960                static_cast<int>(counter));
1961       ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
1962     } else {
1963       // Read a value and verify that it matches the pattern written above.
1964       Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
1965       if (s.IsNotFound()) {
1966         // Key has not yet been written
1967       } else {
1968         // Check that the writer thread counter is >= the counter in the value
1969         ASSERT_OK(s);
1970         int k, w, c;
1971         ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
1972         ASSERT_EQ(k, key);
1973         ASSERT_GE(w, 0);
1974         ASSERT_LT(w, kNumThreads);
1975         ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
1976       }
1977     }
1978     counter++;
1979   }
1980   t->state->thread_done[id].store(true, std::memory_order_release);
1981   fprintf(stderr, "... stopping thread %d after %d ops\n", id, counter);
1982 }
1983 
1984 }  // namespace
1985 
TEST(DBTest,MultiThreaded)1986 TEST(DBTest, MultiThreaded) {
1987   do {
1988     // Initialize state
1989     MTState mt;
1990     mt.test = this;
1991     mt.stop.store(false, std::memory_order_release);
1992     for (int id = 0; id < kNumThreads; id++) {
1993       mt.counter[id].store(false, std::memory_order_release);
1994       mt.thread_done[id].store(false, std::memory_order_release);
1995     }
1996 
1997     // Start threads
1998     MTThread thread[kNumThreads];
1999     for (int id = 0; id < kNumThreads; id++) {
2000       thread[id].state = &mt;
2001       thread[id].id = id;
2002       env_->StartThread(MTThreadBody, &thread[id]);
2003     }
2004 
2005     // Let them run for a while
2006     DelayMilliseconds(kTestSeconds * 1000);
2007 
2008     // Stop the threads and wait for them to finish
2009     mt.stop.store(true, std::memory_order_release);
2010     for (int id = 0; id < kNumThreads; id++) {
2011       while (!mt.thread_done[id].load(std::memory_order_acquire)) {
2012         DelayMilliseconds(100);
2013       }
2014     }
2015   } while (ChangeOptions());
2016 }
2017 
2018 namespace {
2019 typedef std::map<std::string, std::string> KVMap;
2020 }
2021 
2022 class ModelDB : public DB {
2023  public:
2024   class ModelSnapshot : public Snapshot {
2025    public:
2026     KVMap map_;
2027   };
2028 
ModelDB(const Options & options)2029   explicit ModelDB(const Options& options) : options_(options) {}
2030   ~ModelDB() override = default;
Put(const WriteOptions & o,const Slice & k,const Slice & v)2031   Status Put(const WriteOptions& o, const Slice& k, const Slice& v) override {
2032     return DB::Put(o, k, v);
2033   }
Delete(const WriteOptions & o,const Slice & key)2034   Status Delete(const WriteOptions& o, const Slice& key) override {
2035     return DB::Delete(o, key);
2036   }
Get(const ReadOptions & options,const Slice & key,std::string * value)2037   Status Get(const ReadOptions& options, const Slice& key,
2038              std::string* value) override {
2039     assert(false);  // Not implemented
2040     return Status::NotFound(key);
2041   }
NewIterator(const ReadOptions & options)2042   Iterator* NewIterator(const ReadOptions& options) override {
2043     if (options.snapshot == nullptr) {
2044       KVMap* saved = new KVMap;
2045       *saved = map_;
2046       return new ModelIter(saved, true);
2047     } else {
2048       const KVMap* snapshot_state =
2049           &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
2050       return new ModelIter(snapshot_state, false);
2051     }
2052   }
GetSnapshot()2053   const Snapshot* GetSnapshot() override {
2054     ModelSnapshot* snapshot = new ModelSnapshot;
2055     snapshot->map_ = map_;
2056     return snapshot;
2057   }
2058 
ReleaseSnapshot(const Snapshot * snapshot)2059   void ReleaseSnapshot(const Snapshot* snapshot) override {
2060     delete reinterpret_cast<const ModelSnapshot*>(snapshot);
2061   }
Write(const WriteOptions & options,WriteBatch * batch)2062   Status Write(const WriteOptions& options, WriteBatch* batch) override {
2063     class Handler : public WriteBatch::Handler {
2064      public:
2065       KVMap* map_;
2066       void Put(const Slice& key, const Slice& value) override {
2067         (*map_)[key.ToString()] = value.ToString();
2068       }
2069       void Delete(const Slice& key) override { map_->erase(key.ToString()); }
2070     };
2071     Handler handler;
2072     handler.map_ = &map_;
2073     return batch->Iterate(&handler);
2074   }
2075 
GetProperty(const Slice & property,std::string * value)2076   bool GetProperty(const Slice& property, std::string* value) override {
2077     return false;
2078   }
GetApproximateSizes(const Range * r,int n,uint64_t * sizes)2079   void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) override {
2080     for (int i = 0; i < n; i++) {
2081       sizes[i] = 0;
2082     }
2083   }
CompactRange(const Slice * start,const Slice * end)2084   void CompactRange(const Slice* start, const Slice* end) override {}
2085 
2086  private:
2087   class ModelIter : public Iterator {
2088    public:
ModelIter(const KVMap * map,bool owned)2089     ModelIter(const KVMap* map, bool owned)
2090         : map_(map), owned_(owned), iter_(map_->end()) {}
~ModelIter()2091     ~ModelIter() override {
2092       if (owned_) delete map_;
2093     }
Valid() const2094     bool Valid() const override { return iter_ != map_->end(); }
SeekToFirst()2095     void SeekToFirst() override { iter_ = map_->begin(); }
SeekToLast()2096     void SeekToLast() override {
2097       if (map_->empty()) {
2098         iter_ = map_->end();
2099       } else {
2100         iter_ = map_->find(map_->rbegin()->first);
2101       }
2102     }
Seek(const Slice & k)2103     void Seek(const Slice& k) override {
2104       iter_ = map_->lower_bound(k.ToString());
2105     }
Next()2106     void Next() override { ++iter_; }
Prev()2107     void Prev() override { --iter_; }
key() const2108     Slice key() const override { return iter_->first; }
value() const2109     Slice value() const override { return iter_->second; }
status() const2110     Status status() const override { return Status::OK(); }
2111 
2112    private:
2113     const KVMap* const map_;
2114     const bool owned_;  // Do we own map_
2115     KVMap::const_iterator iter_;
2116   };
2117   const Options options_;
2118   KVMap map_;
2119 };
2120 
CompareIterators(int step,DB * model,DB * db,const Snapshot * model_snap,const Snapshot * db_snap)2121 static bool CompareIterators(int step, DB* model, DB* db,
2122                              const Snapshot* model_snap,
2123                              const Snapshot* db_snap) {
2124   ReadOptions options;
2125   options.snapshot = model_snap;
2126   Iterator* miter = model->NewIterator(options);
2127   options.snapshot = db_snap;
2128   Iterator* dbiter = db->NewIterator(options);
2129   bool ok = true;
2130   int count = 0;
2131   for (miter->SeekToFirst(), dbiter->SeekToFirst();
2132        ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) {
2133     count++;
2134     if (miter->key().compare(dbiter->key()) != 0) {
2135       fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step,
2136               EscapeString(miter->key()).c_str(),
2137               EscapeString(dbiter->key()).c_str());
2138       ok = false;
2139       break;
2140     }
2141 
2142     if (miter->value().compare(dbiter->value()) != 0) {
2143       fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
2144               step, EscapeString(miter->key()).c_str(),
2145               EscapeString(miter->value()).c_str(),
2146               EscapeString(miter->value()).c_str());
2147       ok = false;
2148     }
2149   }
2150 
2151   if (ok) {
2152     if (miter->Valid() != dbiter->Valid()) {
2153       fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
2154               step, miter->Valid(), dbiter->Valid());
2155       ok = false;
2156     }
2157   }
2158   fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
2159   delete miter;
2160   delete dbiter;
2161   return ok;
2162 }
2163 
TEST(DBTest,Randomized)2164 TEST(DBTest, Randomized) {
2165   Random rnd(test::RandomSeed());
2166   do {
2167     ModelDB model(CurrentOptions());
2168     const int N = 10000;
2169     const Snapshot* model_snap = nullptr;
2170     const Snapshot* db_snap = nullptr;
2171     std::string k, v;
2172     for (int step = 0; step < N; step++) {
2173       if (step % 100 == 0) {
2174         fprintf(stderr, "Step %d of %d\n", step, N);
2175       }
2176       // TODO(sanjay): Test Get() works
2177       int p = rnd.Uniform(100);
2178       if (p < 45) {  // Put
2179         k = RandomKey(&rnd);
2180         v = RandomString(
2181             &rnd, rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8));
2182         ASSERT_OK(model.Put(WriteOptions(), k, v));
2183         ASSERT_OK(db_->Put(WriteOptions(), k, v));
2184 
2185       } else if (p < 90) {  // Delete
2186         k = RandomKey(&rnd);
2187         ASSERT_OK(model.Delete(WriteOptions(), k));
2188         ASSERT_OK(db_->Delete(WriteOptions(), k));
2189 
2190       } else {  // Multi-element batch
2191         WriteBatch b;
2192         const int num = rnd.Uniform(8);
2193         for (int i = 0; i < num; i++) {
2194           if (i == 0 || !rnd.OneIn(10)) {
2195             k = RandomKey(&rnd);
2196           } else {
2197             // Periodically re-use the same key from the previous iter, so
2198             // we have multiple entries in the write batch for the same key
2199           }
2200           if (rnd.OneIn(2)) {
2201             v = RandomString(&rnd, rnd.Uniform(10));
2202             b.Put(k, v);
2203           } else {
2204             b.Delete(k);
2205           }
2206         }
2207         ASSERT_OK(model.Write(WriteOptions(), &b));
2208         ASSERT_OK(db_->Write(WriteOptions(), &b));
2209       }
2210 
2211       if ((step % 100) == 0) {
2212         ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
2213         ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
2214         // Save a snapshot from each DB this time that we'll use next
2215         // time we compare things, to make sure the current state is
2216         // preserved with the snapshot
2217         if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
2218         if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
2219 
2220         Reopen();
2221         ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
2222 
2223         model_snap = model.GetSnapshot();
2224         db_snap = db_->GetSnapshot();
2225       }
2226     }
2227     if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
2228     if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
2229   } while (ChangeOptions());
2230 }
2231 
MakeKey(unsigned int num)2232 std::string MakeKey(unsigned int num) {
2233   char buf[30];
2234   snprintf(buf, sizeof(buf), "%016u", num);
2235   return std::string(buf);
2236 }
2237 
BM_LogAndApply(int iters,int num_base_files)2238 void BM_LogAndApply(int iters, int num_base_files) {
2239   std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
2240   DestroyDB(dbname, Options());
2241 
2242   DB* db = nullptr;
2243   Options opts;
2244   opts.create_if_missing = true;
2245   Status s = DB::Open(opts, dbname, &db);
2246   ASSERT_OK(s);
2247   ASSERT_TRUE(db != nullptr);
2248 
2249   delete db;
2250   db = nullptr;
2251 
2252   Env* env = Env::Default();
2253 
2254   port::Mutex mu;
2255   MutexLock l(&mu);
2256 
2257   InternalKeyComparator cmp(BytewiseComparator());
2258   Options options;
2259   VersionSet vset(dbname, &options, nullptr, &cmp);
2260   bool save_manifest;
2261   ASSERT_OK(vset.Recover(&save_manifest));
2262   VersionEdit vbase;
2263   uint64_t fnum = 1;
2264   for (int i = 0; i < num_base_files; i++) {
2265     InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
2266     InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
2267     vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
2268   }
2269   ASSERT_OK(vset.LogAndApply(&vbase, &mu));
2270 
2271   uint64_t start_micros = env->NowMicros();
2272 
2273   for (int i = 0; i < iters; i++) {
2274     VersionEdit vedit;
2275     vedit.DeleteFile(2, fnum);
2276     InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
2277     InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
2278     vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
2279     vset.LogAndApply(&vedit, &mu);
2280   }
2281   uint64_t stop_micros = env->NowMicros();
2282   unsigned int us = stop_micros - start_micros;
2283   char buf[16];
2284   snprintf(buf, sizeof(buf), "%d", num_base_files);
2285   fprintf(stderr,
2286           "BM_LogAndApply/%-6s   %8d iters : %9u us (%7.0f us / iter)\n", buf,
2287           iters, us, ((float)us) / iters);
2288 }
2289 
2290 }  // namespace leveldb
2291 
main(int argc,char ** argv)2292 int main(int argc, char** argv) {
2293   if (argc > 1 && std::string(argv[1]) == "--benchmark") {
2294     leveldb::BM_LogAndApply(1000, 1);
2295     leveldb::BM_LogAndApply(1000, 100);
2296     leveldb::BM_LogAndApply(1000, 10000);
2297     leveldb::BM_LogAndApply(100, 100000);
2298     return 0;
2299   }
2300 
2301   return leveldb::test::RunAllTests();
2302 }
2303