1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5 #include "leveldb/db.h"
6 #include "leveldb/filter_policy.h"
7 #include "db/db_impl.h"
8 #include "db/filename.h"
9 #include "db/version_set.h"
10 #include "db/write_batch_internal.h"
11 #include "leveldb/cache.h"
12 #include "leveldb/env.h"
13 #include "leveldb/table.h"
14 #include "util/hash.h"
15 #include "util/logging.h"
16 #include "util/mutexlock.h"
17 #include "util/testharness.h"
18 #include "util/testutil.h"
19
20 namespace leveldb {
21
RandomString(Random * rnd,int len)22 static std::string RandomString(Random* rnd, int len) {
23 std::string r;
24 test::RandomString(rnd, len, &r);
25 return r;
26 }
27
28 namespace {
29 class AtomicCounter {
30 private:
31 port::Mutex mu_;
32 int count_;
33 public:
AtomicCounter()34 AtomicCounter() : count_(0) { }
Increment()35 void Increment() {
36 IncrementBy(1);
37 }
IncrementBy(int count)38 void IncrementBy(int count) {
39 MutexLock l(&mu_);
40 count_ += count;
41 }
Read()42 int Read() {
43 MutexLock l(&mu_);
44 return count_;
45 }
Reset()46 void Reset() {
47 MutexLock l(&mu_);
48 count_ = 0;
49 }
50 };
51
DelayMilliseconds(int millis)52 void DelayMilliseconds(int millis) {
53 Env::Default()->SleepForMicroseconds(millis * 1000);
54 }
55 }
56
57 // Special Env used to delay background operations
58 class SpecialEnv : public EnvWrapper {
59 public:
60 // sstable/log Sync() calls are blocked while this pointer is non-NULL.
61 port::AtomicPointer delay_data_sync_;
62
63 // sstable/log Sync() calls return an error.
64 port::AtomicPointer data_sync_error_;
65
66 // Simulate no-space errors while this pointer is non-NULL.
67 port::AtomicPointer no_space_;
68
69 // Simulate non-writable file system while this pointer is non-NULL
70 port::AtomicPointer non_writable_;
71
72 // Force sync of manifest files to fail while this pointer is non-NULL
73 port::AtomicPointer manifest_sync_error_;
74
75 // Force write to manifest files to fail while this pointer is non-NULL
76 port::AtomicPointer manifest_write_error_;
77
78 bool count_random_reads_;
79 AtomicCounter random_read_counter_;
80
SpecialEnv(Env * base)81 explicit SpecialEnv(Env* base) : EnvWrapper(base) {
82 delay_data_sync_.Release_Store(NULL);
83 data_sync_error_.Release_Store(NULL);
84 no_space_.Release_Store(NULL);
85 non_writable_.Release_Store(NULL);
86 count_random_reads_ = false;
87 manifest_sync_error_.Release_Store(NULL);
88 manifest_write_error_.Release_Store(NULL);
89 }
90
NewWritableFile(const std::string & f,WritableFile ** r)91 Status NewWritableFile(const std::string& f, WritableFile** r) {
92 class DataFile : public WritableFile {
93 private:
94 SpecialEnv* env_;
95 WritableFile* base_;
96
97 public:
98 DataFile(SpecialEnv* env, WritableFile* base)
99 : env_(env),
100 base_(base) {
101 }
102 ~DataFile() { delete base_; }
103 Status Append(const Slice& data) {
104 if (env_->no_space_.Acquire_Load() != NULL) {
105 // Drop writes on the floor
106 return Status::OK();
107 } else {
108 return base_->Append(data);
109 }
110 }
111 Status Close() { return base_->Close(); }
112 Status Flush() { return base_->Flush(); }
113 Status Sync() {
114 if (env_->data_sync_error_.Acquire_Load() != NULL) {
115 return Status::IOError("simulated data sync error");
116 }
117 while (env_->delay_data_sync_.Acquire_Load() != NULL) {
118 DelayMilliseconds(100);
119 }
120 return base_->Sync();
121 }
122 };
123 class ManifestFile : public WritableFile {
124 private:
125 SpecialEnv* env_;
126 WritableFile* base_;
127 public:
128 ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) { }
129 ~ManifestFile() { delete base_; }
130 Status Append(const Slice& data) {
131 if (env_->manifest_write_error_.Acquire_Load() != NULL) {
132 return Status::IOError("simulated writer error");
133 } else {
134 return base_->Append(data);
135 }
136 }
137 Status Close() { return base_->Close(); }
138 Status Flush() { return base_->Flush(); }
139 Status Sync() {
140 if (env_->manifest_sync_error_.Acquire_Load() != NULL) {
141 return Status::IOError("simulated sync error");
142 } else {
143 return base_->Sync();
144 }
145 }
146 };
147
148 if (non_writable_.Acquire_Load() != NULL) {
149 return Status::IOError("simulated write error");
150 }
151
152 Status s = target()->NewWritableFile(f, r);
153 if (s.ok()) {
154 if (strstr(f.c_str(), ".ldb") != NULL ||
155 strstr(f.c_str(), ".log") != NULL) {
156 *r = new DataFile(this, *r);
157 } else if (strstr(f.c_str(), "MANIFEST") != NULL) {
158 *r = new ManifestFile(this, *r);
159 }
160 }
161 return s;
162 }
163
NewRandomAccessFile(const std::string & f,RandomAccessFile ** r)164 Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
165 class CountingFile : public RandomAccessFile {
166 private:
167 RandomAccessFile* target_;
168 AtomicCounter* counter_;
169 public:
170 CountingFile(RandomAccessFile* target, AtomicCounter* counter)
171 : target_(target), counter_(counter) {
172 }
173 virtual ~CountingFile() { delete target_; }
174 virtual Status Read(uint64_t offset, size_t n, Slice* result,
175 char* scratch) const {
176 counter_->Increment();
177 return target_->Read(offset, n, result, scratch);
178 }
179 };
180
181 Status s = target()->NewRandomAccessFile(f, r);
182 if (s.ok() && count_random_reads_) {
183 *r = new CountingFile(*r, &random_read_counter_);
184 }
185 return s;
186 }
187 };
188
189 class DBTest {
190 private:
191 const FilterPolicy* filter_policy_;
192
193 // Sequence of option configurations to try
194 enum OptionConfig {
195 kDefault,
196 kFilter,
197 kUncompressed,
198 kEnd
199 };
200 int option_config_;
201
202 public:
203 std::string dbname_;
204 SpecialEnv* env_;
205 DB* db_;
206
207 Options last_options_;
208
DBTest()209 DBTest() : option_config_(kDefault),
210 env_(new SpecialEnv(Env::Default())) {
211 filter_policy_ = NewBloomFilterPolicy(10);
212 dbname_ = test::TmpDir() + "/db_test";
213 DestroyDB(dbname_, Options());
214 db_ = NULL;
215 Reopen();
216 }
217
~DBTest()218 ~DBTest() {
219 delete db_;
220 DestroyDB(dbname_, Options());
221 delete env_;
222 delete filter_policy_;
223 }
224
225 // Switch to a fresh database with the next option configuration to
226 // test. Return false if there are no more configurations to test.
ChangeOptions()227 bool ChangeOptions() {
228 option_config_++;
229 if (option_config_ >= kEnd) {
230 return false;
231 } else {
232 DestroyAndReopen();
233 return true;
234 }
235 }
236
237 // Return the current option configuration.
CurrentOptions()238 Options CurrentOptions() {
239 Options options;
240 switch (option_config_) {
241 case kFilter:
242 options.filter_policy = filter_policy_;
243 break;
244 case kUncompressed:
245 options.compression = kNoCompression;
246 break;
247 default:
248 break;
249 }
250 return options;
251 }
252
dbfull()253 DBImpl* dbfull() {
254 return reinterpret_cast<DBImpl*>(db_);
255 }
256
Reopen(Options * options=NULL)257 void Reopen(Options* options = NULL) {
258 ASSERT_OK(TryReopen(options));
259 }
260
Close()261 void Close() {
262 delete db_;
263 db_ = NULL;
264 }
265
DestroyAndReopen(Options * options=NULL)266 void DestroyAndReopen(Options* options = NULL) {
267 delete db_;
268 db_ = NULL;
269 DestroyDB(dbname_, Options());
270 ASSERT_OK(TryReopen(options));
271 }
272
TryReopen(Options * options)273 Status TryReopen(Options* options) {
274 delete db_;
275 db_ = NULL;
276 Options opts;
277 if (options != NULL) {
278 opts = *options;
279 } else {
280 opts = CurrentOptions();
281 opts.create_if_missing = true;
282 }
283 last_options_ = opts;
284
285 return DB::Open(opts, dbname_, &db_);
286 }
287
Put(const std::string & k,const std::string & v)288 Status Put(const std::string& k, const std::string& v) {
289 return db_->Put(WriteOptions(), k, v);
290 }
291
Delete(const std::string & k)292 Status Delete(const std::string& k) {
293 return db_->Delete(WriteOptions(), k);
294 }
295
Get(const std::string & k,const Snapshot * snapshot=NULL)296 std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
297 ReadOptions options;
298 options.snapshot = snapshot;
299 std::string result;
300 Status s = db_->Get(options, k, &result);
301 if (s.IsNotFound()) {
302 result = "NOT_FOUND";
303 } else if (!s.ok()) {
304 result = s.ToString();
305 }
306 return result;
307 }
308
309 // Return a string that contains all key,value pairs in order,
310 // formatted like "(k1->v1)(k2->v2)".
Contents()311 std::string Contents() {
312 std::vector<std::string> forward;
313 std::string result;
314 Iterator* iter = db_->NewIterator(ReadOptions());
315 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
316 std::string s = IterStatus(iter);
317 result.push_back('(');
318 result.append(s);
319 result.push_back(')');
320 forward.push_back(s);
321 }
322
323 // Check reverse iteration results are the reverse of forward results
324 size_t matched = 0;
325 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
326 ASSERT_LT(matched, forward.size());
327 ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
328 matched++;
329 }
330 ASSERT_EQ(matched, forward.size());
331
332 delete iter;
333 return result;
334 }
335
AllEntriesFor(const Slice & user_key)336 std::string AllEntriesFor(const Slice& user_key) {
337 Iterator* iter = dbfull()->TEST_NewInternalIterator();
338 InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
339 iter->Seek(target.Encode());
340 std::string result;
341 if (!iter->status().ok()) {
342 result = iter->status().ToString();
343 } else {
344 result = "[ ";
345 bool first = true;
346 while (iter->Valid()) {
347 ParsedInternalKey ikey;
348 if (!ParseInternalKey(iter->key(), &ikey)) {
349 result += "CORRUPTED";
350 } else {
351 if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
352 break;
353 }
354 if (!first) {
355 result += ", ";
356 }
357 first = false;
358 switch (ikey.type) {
359 case kTypeValue:
360 result += iter->value().ToString();
361 break;
362 case kTypeDeletion:
363 result += "DEL";
364 break;
365 }
366 }
367 iter->Next();
368 }
369 if (!first) {
370 result += " ";
371 }
372 result += "]";
373 }
374 delete iter;
375 return result;
376 }
377
NumTableFilesAtLevel(int level)378 int NumTableFilesAtLevel(int level) {
379 std::string property;
380 ASSERT_TRUE(
381 db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level),
382 &property));
383 return atoi(property.c_str());
384 }
385
TotalTableFiles()386 int TotalTableFiles() {
387 int result = 0;
388 for (int level = 0; level < config::kNumLevels; level++) {
389 result += NumTableFilesAtLevel(level);
390 }
391 return result;
392 }
393
394 // Return spread of files per level
FilesPerLevel()395 std::string FilesPerLevel() {
396 std::string result;
397 int last_non_zero_offset = 0;
398 for (int level = 0; level < config::kNumLevels; level++) {
399 int f = NumTableFilesAtLevel(level);
400 char buf[100];
401 snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
402 result += buf;
403 if (f > 0) {
404 last_non_zero_offset = result.size();
405 }
406 }
407 result.resize(last_non_zero_offset);
408 return result;
409 }
410
CountFiles()411 int CountFiles() {
412 std::vector<std::string> files;
413 env_->GetChildren(dbname_, &files);
414 return static_cast<int>(files.size());
415 }
416
Size(const Slice & start,const Slice & limit)417 uint64_t Size(const Slice& start, const Slice& limit) {
418 Range r(start, limit);
419 uint64_t size;
420 db_->GetApproximateSizes(&r, 1, &size);
421 return size;
422 }
423
Compact(const Slice & start,const Slice & limit)424 void Compact(const Slice& start, const Slice& limit) {
425 db_->CompactRange(&start, &limit);
426 }
427
428 // Do n memtable compactions, each of which produces an sstable
429 // covering the range [small,large].
MakeTables(int n,const std::string & small,const std::string & large)430 void MakeTables(int n, const std::string& small, const std::string& large) {
431 for (int i = 0; i < n; i++) {
432 Put(small, "begin");
433 Put(large, "end");
434 dbfull()->TEST_CompactMemTable();
435 }
436 }
437
438 // Prevent pushing of new sstables into deeper levels by adding
439 // tables that cover a specified range to all levels.
FillLevels(const std::string & smallest,const std::string & largest)440 void FillLevels(const std::string& smallest, const std::string& largest) {
441 MakeTables(config::kNumLevels, smallest, largest);
442 }
443
DumpFileCounts(const char * label)444 void DumpFileCounts(const char* label) {
445 fprintf(stderr, "---\n%s:\n", label);
446 fprintf(stderr, "maxoverlap: %lld\n",
447 static_cast<long long>(
448 dbfull()->TEST_MaxNextLevelOverlappingBytes()));
449 for (int level = 0; level < config::kNumLevels; level++) {
450 int num = NumTableFilesAtLevel(level);
451 if (num > 0) {
452 fprintf(stderr, " level %3d : %d files\n", level, num);
453 }
454 }
455 }
456
DumpSSTableList()457 std::string DumpSSTableList() {
458 std::string property;
459 db_->GetProperty("leveldb.sstables", &property);
460 return property;
461 }
462
IterStatus(Iterator * iter)463 std::string IterStatus(Iterator* iter) {
464 std::string result;
465 if (iter->Valid()) {
466 result = iter->key().ToString() + "->" + iter->value().ToString();
467 } else {
468 result = "(invalid)";
469 }
470 return result;
471 }
472
DeleteAnSSTFile()473 bool DeleteAnSSTFile() {
474 std::vector<std::string> filenames;
475 ASSERT_OK(env_->GetChildren(dbname_, &filenames));
476 uint64_t number;
477 FileType type;
478 for (size_t i = 0; i < filenames.size(); i++) {
479 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
480 ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number)));
481 return true;
482 }
483 }
484 return false;
485 }
486
487 // Returns number of files renamed.
RenameLDBToSST()488 int RenameLDBToSST() {
489 std::vector<std::string> filenames;
490 ASSERT_OK(env_->GetChildren(dbname_, &filenames));
491 uint64_t number;
492 FileType type;
493 int files_renamed = 0;
494 for (size_t i = 0; i < filenames.size(); i++) {
495 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
496 const std::string from = TableFileName(dbname_, number);
497 const std::string to = SSTTableFileName(dbname_, number);
498 ASSERT_OK(env_->RenameFile(from, to));
499 files_renamed++;
500 }
501 }
502 return files_renamed;
503 }
504 };
505
TEST(DBTest,Empty)506 TEST(DBTest, Empty) {
507 do {
508 ASSERT_TRUE(db_ != NULL);
509 ASSERT_EQ("NOT_FOUND", Get("foo"));
510 } while (ChangeOptions());
511 }
512
TEST(DBTest,ReadWrite)513 TEST(DBTest, ReadWrite) {
514 do {
515 ASSERT_OK(Put("foo", "v1"));
516 ASSERT_EQ("v1", Get("foo"));
517 ASSERT_OK(Put("bar", "v2"));
518 ASSERT_OK(Put("foo", "v3"));
519 ASSERT_EQ("v3", Get("foo"));
520 ASSERT_EQ("v2", Get("bar"));
521 } while (ChangeOptions());
522 }
523
TEST(DBTest,PutDeleteGet)524 TEST(DBTest, PutDeleteGet) {
525 do {
526 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
527 ASSERT_EQ("v1", Get("foo"));
528 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
529 ASSERT_EQ("v2", Get("foo"));
530 ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
531 ASSERT_EQ("NOT_FOUND", Get("foo"));
532 } while (ChangeOptions());
533 }
534
TEST(DBTest,GetFromImmutableLayer)535 TEST(DBTest, GetFromImmutableLayer) {
536 do {
537 Options options = CurrentOptions();
538 options.env = env_;
539 options.write_buffer_size = 100000; // Small write buffer
540 Reopen(&options);
541
542 ASSERT_OK(Put("foo", "v1"));
543 ASSERT_EQ("v1", Get("foo"));
544
545 env_->delay_data_sync_.Release_Store(env_); // Block sync calls
546 Put("k1", std::string(100000, 'x')); // Fill memtable
547 Put("k2", std::string(100000, 'y')); // Trigger compaction
548 ASSERT_EQ("v1", Get("foo"));
549 env_->delay_data_sync_.Release_Store(NULL); // Release sync calls
550 } while (ChangeOptions());
551 }
552
TEST(DBTest,GetFromVersions)553 TEST(DBTest, GetFromVersions) {
554 do {
555 ASSERT_OK(Put("foo", "v1"));
556 dbfull()->TEST_CompactMemTable();
557 ASSERT_EQ("v1", Get("foo"));
558 } while (ChangeOptions());
559 }
560
TEST(DBTest,GetSnapshot)561 TEST(DBTest, GetSnapshot) {
562 do {
563 // Try with both a short key and a long key
564 for (int i = 0; i < 2; i++) {
565 std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
566 ASSERT_OK(Put(key, "v1"));
567 const Snapshot* s1 = db_->GetSnapshot();
568 ASSERT_OK(Put(key, "v2"));
569 ASSERT_EQ("v2", Get(key));
570 ASSERT_EQ("v1", Get(key, s1));
571 dbfull()->TEST_CompactMemTable();
572 ASSERT_EQ("v2", Get(key));
573 ASSERT_EQ("v1", Get(key, s1));
574 db_->ReleaseSnapshot(s1);
575 }
576 } while (ChangeOptions());
577 }
578
TEST(DBTest,GetLevel0Ordering)579 TEST(DBTest, GetLevel0Ordering) {
580 do {
581 // Check that we process level-0 files in correct order. The code
582 // below generates two level-0 files where the earlier one comes
583 // before the later one in the level-0 file list since the earlier
584 // one has a smaller "smallest" key.
585 ASSERT_OK(Put("bar", "b"));
586 ASSERT_OK(Put("foo", "v1"));
587 dbfull()->TEST_CompactMemTable();
588 ASSERT_OK(Put("foo", "v2"));
589 dbfull()->TEST_CompactMemTable();
590 ASSERT_EQ("v2", Get("foo"));
591 } while (ChangeOptions());
592 }
593
TEST(DBTest,GetOrderedByLevels)594 TEST(DBTest, GetOrderedByLevels) {
595 do {
596 ASSERT_OK(Put("foo", "v1"));
597 Compact("a", "z");
598 ASSERT_EQ("v1", Get("foo"));
599 ASSERT_OK(Put("foo", "v2"));
600 ASSERT_EQ("v2", Get("foo"));
601 dbfull()->TEST_CompactMemTable();
602 ASSERT_EQ("v2", Get("foo"));
603 } while (ChangeOptions());
604 }
605
TEST(DBTest,GetPicksCorrectFile)606 TEST(DBTest, GetPicksCorrectFile) {
607 do {
608 // Arrange to have multiple files in a non-level-0 level.
609 ASSERT_OK(Put("a", "va"));
610 Compact("a", "b");
611 ASSERT_OK(Put("x", "vx"));
612 Compact("x", "y");
613 ASSERT_OK(Put("f", "vf"));
614 Compact("f", "g");
615 ASSERT_EQ("va", Get("a"));
616 ASSERT_EQ("vf", Get("f"));
617 ASSERT_EQ("vx", Get("x"));
618 } while (ChangeOptions());
619 }
620
TEST(DBTest,GetEncountersEmptyLevel)621 TEST(DBTest, GetEncountersEmptyLevel) {
622 do {
623 // Arrange for the following to happen:
624 // * sstable A in level 0
625 // * nothing in level 1
626 // * sstable B in level 2
627 // Then do enough Get() calls to arrange for an automatic compaction
628 // of sstable A. A bug would cause the compaction to be marked as
629 // occurring at level 1 (instead of the correct level 0).
630
631 // Step 1: First place sstables in levels 0 and 2
632 int compaction_count = 0;
633 while (NumTableFilesAtLevel(0) == 0 ||
634 NumTableFilesAtLevel(2) == 0) {
635 ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
636 compaction_count++;
637 Put("a", "begin");
638 Put("z", "end");
639 dbfull()->TEST_CompactMemTable();
640 }
641
642 // Step 2: clear level 1 if necessary.
643 dbfull()->TEST_CompactRange(1, NULL, NULL);
644 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
645 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
646 ASSERT_EQ(NumTableFilesAtLevel(2), 1);
647
648 // Step 3: read a bunch of times
649 for (int i = 0; i < 1000; i++) {
650 ASSERT_EQ("NOT_FOUND", Get("missing"));
651 }
652
653 // Step 4: Wait for compaction to finish
654 DelayMilliseconds(1000);
655
656 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
657 } while (ChangeOptions());
658 }
659
TEST(DBTest,IterEmpty)660 TEST(DBTest, IterEmpty) {
661 Iterator* iter = db_->NewIterator(ReadOptions());
662
663 iter->SeekToFirst();
664 ASSERT_EQ(IterStatus(iter), "(invalid)");
665
666 iter->SeekToLast();
667 ASSERT_EQ(IterStatus(iter), "(invalid)");
668
669 iter->Seek("foo");
670 ASSERT_EQ(IterStatus(iter), "(invalid)");
671
672 delete iter;
673 }
674
TEST(DBTest,IterSingle)675 TEST(DBTest, IterSingle) {
676 ASSERT_OK(Put("a", "va"));
677 Iterator* iter = db_->NewIterator(ReadOptions());
678
679 iter->SeekToFirst();
680 ASSERT_EQ(IterStatus(iter), "a->va");
681 iter->Next();
682 ASSERT_EQ(IterStatus(iter), "(invalid)");
683 iter->SeekToFirst();
684 ASSERT_EQ(IterStatus(iter), "a->va");
685 iter->Prev();
686 ASSERT_EQ(IterStatus(iter), "(invalid)");
687
688 iter->SeekToLast();
689 ASSERT_EQ(IterStatus(iter), "a->va");
690 iter->Next();
691 ASSERT_EQ(IterStatus(iter), "(invalid)");
692 iter->SeekToLast();
693 ASSERT_EQ(IterStatus(iter), "a->va");
694 iter->Prev();
695 ASSERT_EQ(IterStatus(iter), "(invalid)");
696
697 iter->Seek("");
698 ASSERT_EQ(IterStatus(iter), "a->va");
699 iter->Next();
700 ASSERT_EQ(IterStatus(iter), "(invalid)");
701
702 iter->Seek("a");
703 ASSERT_EQ(IterStatus(iter), "a->va");
704 iter->Next();
705 ASSERT_EQ(IterStatus(iter), "(invalid)");
706
707 iter->Seek("b");
708 ASSERT_EQ(IterStatus(iter), "(invalid)");
709
710 delete iter;
711 }
712
TEST(DBTest,IterMulti)713 TEST(DBTest, IterMulti) {
714 ASSERT_OK(Put("a", "va"));
715 ASSERT_OK(Put("b", "vb"));
716 ASSERT_OK(Put("c", "vc"));
717 Iterator* iter = db_->NewIterator(ReadOptions());
718
719 iter->SeekToFirst();
720 ASSERT_EQ(IterStatus(iter), "a->va");
721 iter->Next();
722 ASSERT_EQ(IterStatus(iter), "b->vb");
723 iter->Next();
724 ASSERT_EQ(IterStatus(iter), "c->vc");
725 iter->Next();
726 ASSERT_EQ(IterStatus(iter), "(invalid)");
727 iter->SeekToFirst();
728 ASSERT_EQ(IterStatus(iter), "a->va");
729 iter->Prev();
730 ASSERT_EQ(IterStatus(iter), "(invalid)");
731
732 iter->SeekToLast();
733 ASSERT_EQ(IterStatus(iter), "c->vc");
734 iter->Prev();
735 ASSERT_EQ(IterStatus(iter), "b->vb");
736 iter->Prev();
737 ASSERT_EQ(IterStatus(iter), "a->va");
738 iter->Prev();
739 ASSERT_EQ(IterStatus(iter), "(invalid)");
740 iter->SeekToLast();
741 ASSERT_EQ(IterStatus(iter), "c->vc");
742 iter->Next();
743 ASSERT_EQ(IterStatus(iter), "(invalid)");
744
745 iter->Seek("");
746 ASSERT_EQ(IterStatus(iter), "a->va");
747 iter->Seek("a");
748 ASSERT_EQ(IterStatus(iter), "a->va");
749 iter->Seek("ax");
750 ASSERT_EQ(IterStatus(iter), "b->vb");
751 iter->Seek("b");
752 ASSERT_EQ(IterStatus(iter), "b->vb");
753 iter->Seek("z");
754 ASSERT_EQ(IterStatus(iter), "(invalid)");
755
756 // Switch from reverse to forward
757 iter->SeekToLast();
758 iter->Prev();
759 iter->Prev();
760 iter->Next();
761 ASSERT_EQ(IterStatus(iter), "b->vb");
762
763 // Switch from forward to reverse
764 iter->SeekToFirst();
765 iter->Next();
766 iter->Next();
767 iter->Prev();
768 ASSERT_EQ(IterStatus(iter), "b->vb");
769
770 // Make sure iter stays at snapshot
771 ASSERT_OK(Put("a", "va2"));
772 ASSERT_OK(Put("a2", "va3"));
773 ASSERT_OK(Put("b", "vb2"));
774 ASSERT_OK(Put("c", "vc2"));
775 ASSERT_OK(Delete("b"));
776 iter->SeekToFirst();
777 ASSERT_EQ(IterStatus(iter), "a->va");
778 iter->Next();
779 ASSERT_EQ(IterStatus(iter), "b->vb");
780 iter->Next();
781 ASSERT_EQ(IterStatus(iter), "c->vc");
782 iter->Next();
783 ASSERT_EQ(IterStatus(iter), "(invalid)");
784 iter->SeekToLast();
785 ASSERT_EQ(IterStatus(iter), "c->vc");
786 iter->Prev();
787 ASSERT_EQ(IterStatus(iter), "b->vb");
788 iter->Prev();
789 ASSERT_EQ(IterStatus(iter), "a->va");
790 iter->Prev();
791 ASSERT_EQ(IterStatus(iter), "(invalid)");
792
793 delete iter;
794 }
795
TEST(DBTest,IterSmallAndLargeMix)796 TEST(DBTest, IterSmallAndLargeMix) {
797 ASSERT_OK(Put("a", "va"));
798 ASSERT_OK(Put("b", std::string(100000, 'b')));
799 ASSERT_OK(Put("c", "vc"));
800 ASSERT_OK(Put("d", std::string(100000, 'd')));
801 ASSERT_OK(Put("e", std::string(100000, 'e')));
802
803 Iterator* iter = db_->NewIterator(ReadOptions());
804
805 iter->SeekToFirst();
806 ASSERT_EQ(IterStatus(iter), "a->va");
807 iter->Next();
808 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
809 iter->Next();
810 ASSERT_EQ(IterStatus(iter), "c->vc");
811 iter->Next();
812 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
813 iter->Next();
814 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
815 iter->Next();
816 ASSERT_EQ(IterStatus(iter), "(invalid)");
817
818 iter->SeekToLast();
819 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
820 iter->Prev();
821 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
822 iter->Prev();
823 ASSERT_EQ(IterStatus(iter), "c->vc");
824 iter->Prev();
825 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
826 iter->Prev();
827 ASSERT_EQ(IterStatus(iter), "a->va");
828 iter->Prev();
829 ASSERT_EQ(IterStatus(iter), "(invalid)");
830
831 delete iter;
832 }
833
TEST(DBTest,IterMultiWithDelete)834 TEST(DBTest, IterMultiWithDelete) {
835 do {
836 ASSERT_OK(Put("a", "va"));
837 ASSERT_OK(Put("b", "vb"));
838 ASSERT_OK(Put("c", "vc"));
839 ASSERT_OK(Delete("b"));
840 ASSERT_EQ("NOT_FOUND", Get("b"));
841
842 Iterator* iter = db_->NewIterator(ReadOptions());
843 iter->Seek("c");
844 ASSERT_EQ(IterStatus(iter), "c->vc");
845 iter->Prev();
846 ASSERT_EQ(IterStatus(iter), "a->va");
847 delete iter;
848 } while (ChangeOptions());
849 }
850
TEST(DBTest,Recover)851 TEST(DBTest, Recover) {
852 do {
853 ASSERT_OK(Put("foo", "v1"));
854 ASSERT_OK(Put("baz", "v5"));
855
856 Reopen();
857 ASSERT_EQ("v1", Get("foo"));
858
859 ASSERT_EQ("v1", Get("foo"));
860 ASSERT_EQ("v5", Get("baz"));
861 ASSERT_OK(Put("bar", "v2"));
862 ASSERT_OK(Put("foo", "v3"));
863
864 Reopen();
865 ASSERT_EQ("v3", Get("foo"));
866 ASSERT_OK(Put("foo", "v4"));
867 ASSERT_EQ("v4", Get("foo"));
868 ASSERT_EQ("v2", Get("bar"));
869 ASSERT_EQ("v5", Get("baz"));
870 } while (ChangeOptions());
871 }
872
TEST(DBTest,RecoveryWithEmptyLog)873 TEST(DBTest, RecoveryWithEmptyLog) {
874 do {
875 ASSERT_OK(Put("foo", "v1"));
876 ASSERT_OK(Put("foo", "v2"));
877 Reopen();
878 Reopen();
879 ASSERT_OK(Put("foo", "v3"));
880 Reopen();
881 ASSERT_EQ("v3", Get("foo"));
882 } while (ChangeOptions());
883 }
884
885 // Check that writes done during a memtable compaction are recovered
886 // if the database is shutdown during the memtable compaction.
TEST(DBTest,RecoverDuringMemtableCompaction)887 TEST(DBTest, RecoverDuringMemtableCompaction) {
888 do {
889 Options options = CurrentOptions();
890 options.env = env_;
891 options.write_buffer_size = 1000000;
892 Reopen(&options);
893
894 // Trigger a long memtable compaction and reopen the database during it
895 ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
896 ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
897 ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
898 ASSERT_OK(Put("bar", "v2")); // Goes to new log file
899
900 Reopen(&options);
901 ASSERT_EQ("v1", Get("foo"));
902 ASSERT_EQ("v2", Get("bar"));
903 ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
904 ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
905 } while (ChangeOptions());
906 }
907
Key(int i)908 static std::string Key(int i) {
909 char buf[100];
910 snprintf(buf, sizeof(buf), "key%06d", i);
911 return std::string(buf);
912 }
913
TEST(DBTest,MinorCompactionsHappen)914 TEST(DBTest, MinorCompactionsHappen) {
915 Options options = CurrentOptions();
916 options.write_buffer_size = 10000;
917 Reopen(&options);
918
919 const int N = 500;
920
921 int starting_num_tables = TotalTableFiles();
922 for (int i = 0; i < N; i++) {
923 ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
924 }
925 int ending_num_tables = TotalTableFiles();
926 ASSERT_GT(ending_num_tables, starting_num_tables);
927
928 for (int i = 0; i < N; i++) {
929 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
930 }
931
932 Reopen();
933
934 for (int i = 0; i < N; i++) {
935 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
936 }
937 }
938
TEST(DBTest,RecoverWithLargeLog)939 TEST(DBTest, RecoverWithLargeLog) {
940 {
941 Options options = CurrentOptions();
942 Reopen(&options);
943 ASSERT_OK(Put("big1", std::string(200000, '1')));
944 ASSERT_OK(Put("big2", std::string(200000, '2')));
945 ASSERT_OK(Put("small3", std::string(10, '3')));
946 ASSERT_OK(Put("small4", std::string(10, '4')));
947 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
948 }
949
950 // Make sure that if we re-open with a small write buffer size that
951 // we flush table files in the middle of a large log file.
952 Options options = CurrentOptions();
953 options.write_buffer_size = 100000;
954 Reopen(&options);
955 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
956 ASSERT_EQ(std::string(200000, '1'), Get("big1"));
957 ASSERT_EQ(std::string(200000, '2'), Get("big2"));
958 ASSERT_EQ(std::string(10, '3'), Get("small3"));
959 ASSERT_EQ(std::string(10, '4'), Get("small4"));
960 ASSERT_GT(NumTableFilesAtLevel(0), 1);
961 }
962
TEST(DBTest,CompactionsGenerateMultipleFiles)963 TEST(DBTest, CompactionsGenerateMultipleFiles) {
964 Options options = CurrentOptions();
965 options.write_buffer_size = 100000000; // Large write buffer
966 Reopen(&options);
967
968 Random rnd(301);
969
970 // Write 8MB (80 values, each 100K)
971 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
972 std::vector<std::string> values;
973 for (int i = 0; i < 80; i++) {
974 values.push_back(RandomString(&rnd, 100000));
975 ASSERT_OK(Put(Key(i), values[i]));
976 }
977
978 // Reopening moves updates to level-0
979 Reopen(&options);
980 dbfull()->TEST_CompactRange(0, NULL, NULL);
981
982 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
983 ASSERT_GT(NumTableFilesAtLevel(1), 1);
984 for (int i = 0; i < 80; i++) {
985 ASSERT_EQ(Get(Key(i)), values[i]);
986 }
987 }
988
TEST(DBTest,RepeatedWritesToSameKey)989 TEST(DBTest, RepeatedWritesToSameKey) {
990 Options options = CurrentOptions();
991 options.env = env_;
992 options.write_buffer_size = 100000; // Small write buffer
993 Reopen(&options);
994
995 // We must have at most one file per level except for level-0,
996 // which may have up to kL0_StopWritesTrigger files.
997 const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
998
999 Random rnd(301);
1000 std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
1001 for (int i = 0; i < 5 * kMaxFiles; i++) {
1002 Put("key", value);
1003 ASSERT_LE(TotalTableFiles(), kMaxFiles);
1004 fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
1005 }
1006 }
1007
TEST(DBTest,SparseMerge)1008 TEST(DBTest, SparseMerge) {
1009 Options options = CurrentOptions();
1010 options.compression = kNoCompression;
1011 Reopen(&options);
1012
1013 FillLevels("A", "Z");
1014
1015 // Suppose there is:
1016 // small amount of data with prefix A
1017 // large amount of data with prefix B
1018 // small amount of data with prefix C
1019 // and that recent updates have made small changes to all three prefixes.
1020 // Check that we do not do a compaction that merges all of B in one shot.
1021 const std::string value(1000, 'x');
1022 Put("A", "va");
1023 // Write approximately 100MB of "B" values
1024 for (int i = 0; i < 100000; i++) {
1025 char key[100];
1026 snprintf(key, sizeof(key), "B%010d", i);
1027 Put(key, value);
1028 }
1029 Put("C", "vc");
1030 dbfull()->TEST_CompactMemTable();
1031 dbfull()->TEST_CompactRange(0, NULL, NULL);
1032
1033 // Make sparse update
1034 Put("A", "va2");
1035 Put("B100", "bvalue2");
1036 Put("C", "vc2");
1037 dbfull()->TEST_CompactMemTable();
1038
1039 // Compactions should not cause us to create a situation where
1040 // a file overlaps too much data at the next level.
1041 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
1042 dbfull()->TEST_CompactRange(0, NULL, NULL);
1043 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
1044 dbfull()->TEST_CompactRange(1, NULL, NULL);
1045 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
1046 }
1047
Between(uint64_t val,uint64_t low,uint64_t high)1048 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1049 bool result = (val >= low) && (val <= high);
1050 if (!result) {
1051 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1052 (unsigned long long)(val),
1053 (unsigned long long)(low),
1054 (unsigned long long)(high));
1055 }
1056 return result;
1057 }
1058
TEST(DBTest,ApproximateSizes)1059 TEST(DBTest, ApproximateSizes) {
1060 do {
1061 Options options = CurrentOptions();
1062 options.write_buffer_size = 100000000; // Large write buffer
1063 options.compression = kNoCompression;
1064 DestroyAndReopen();
1065
1066 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1067 Reopen(&options);
1068 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1069
1070 // Write 8MB (80 values, each 100K)
1071 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1072 const int N = 80;
1073 static const int S1 = 100000;
1074 static const int S2 = 105000; // Allow some expansion from metadata
1075 Random rnd(301);
1076 for (int i = 0; i < N; i++) {
1077 ASSERT_OK(Put(Key(i), RandomString(&rnd, S1)));
1078 }
1079
1080 // 0 because GetApproximateSizes() does not account for memtable space
1081 ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
1082
1083 // Check sizes across recovery by reopening a few times
1084 for (int run = 0; run < 3; run++) {
1085 Reopen(&options);
1086
1087 for (int compact_start = 0; compact_start < N; compact_start += 10) {
1088 for (int i = 0; i < N; i += 10) {
1089 ASSERT_TRUE(Between(Size("", Key(i)), S1*i, S2*i));
1090 ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), S1*(i+1), S2*(i+1)));
1091 ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), S1*10, S2*10));
1092 }
1093 ASSERT_TRUE(Between(Size("", Key(50)), S1*50, S2*50));
1094 ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), S1*50, S2*50));
1095
1096 std::string cstart_str = Key(compact_start);
1097 std::string cend_str = Key(compact_start + 9);
1098 Slice cstart = cstart_str;
1099 Slice cend = cend_str;
1100 dbfull()->TEST_CompactRange(0, &cstart, &cend);
1101 }
1102
1103 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1104 ASSERT_GT(NumTableFilesAtLevel(1), 0);
1105 }
1106 } while (ChangeOptions());
1107 }
1108
TEST(DBTest,ApproximateSizes_MixOfSmallAndLarge)1109 TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
1110 do {
1111 Options options = CurrentOptions();
1112 options.compression = kNoCompression;
1113 Reopen();
1114
1115 Random rnd(301);
1116 std::string big1 = RandomString(&rnd, 100000);
1117 ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
1118 ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
1119 ASSERT_OK(Put(Key(2), big1));
1120 ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
1121 ASSERT_OK(Put(Key(4), big1));
1122 ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
1123 ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
1124 ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
1125
1126 // Check sizes across recovery by reopening a few times
1127 for (int run = 0; run < 3; run++) {
1128 Reopen(&options);
1129
1130 ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
1131 ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
1132 ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
1133 ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
1134 ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
1135 ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
1136 ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
1137 ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
1138 ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000));
1139
1140 ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
1141
1142 dbfull()->TEST_CompactRange(0, NULL, NULL);
1143 }
1144 } while (ChangeOptions());
1145 }
1146
TEST(DBTest,IteratorPinsRef)1147 TEST(DBTest, IteratorPinsRef) {
1148 Put("foo", "hello");
1149
1150 // Get iterator that will yield the current contents of the DB.
1151 Iterator* iter = db_->NewIterator(ReadOptions());
1152
1153 // Write to force compactions
1154 Put("foo", "newvalue1");
1155 for (int i = 0; i < 100; i++) {
1156 ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
1157 }
1158 Put("foo", "newvalue2");
1159
1160 iter->SeekToFirst();
1161 ASSERT_TRUE(iter->Valid());
1162 ASSERT_EQ("foo", iter->key().ToString());
1163 ASSERT_EQ("hello", iter->value().ToString());
1164 iter->Next();
1165 ASSERT_TRUE(!iter->Valid());
1166 delete iter;
1167 }
1168
TEST(DBTest,Snapshot)1169 TEST(DBTest, Snapshot) {
1170 do {
1171 Put("foo", "v1");
1172 const Snapshot* s1 = db_->GetSnapshot();
1173 Put("foo", "v2");
1174 const Snapshot* s2 = db_->GetSnapshot();
1175 Put("foo", "v3");
1176 const Snapshot* s3 = db_->GetSnapshot();
1177
1178 Put("foo", "v4");
1179 ASSERT_EQ("v1", Get("foo", s1));
1180 ASSERT_EQ("v2", Get("foo", s2));
1181 ASSERT_EQ("v3", Get("foo", s3));
1182 ASSERT_EQ("v4", Get("foo"));
1183
1184 db_->ReleaseSnapshot(s3);
1185 ASSERT_EQ("v1", Get("foo", s1));
1186 ASSERT_EQ("v2", Get("foo", s2));
1187 ASSERT_EQ("v4", Get("foo"));
1188
1189 db_->ReleaseSnapshot(s1);
1190 ASSERT_EQ("v2", Get("foo", s2));
1191 ASSERT_EQ("v4", Get("foo"));
1192
1193 db_->ReleaseSnapshot(s2);
1194 ASSERT_EQ("v4", Get("foo"));
1195 } while (ChangeOptions());
1196 }
1197
TEST(DBTest,HiddenValuesAreRemoved)1198 TEST(DBTest, HiddenValuesAreRemoved) {
1199 do {
1200 Random rnd(301);
1201 FillLevels("a", "z");
1202
1203 std::string big = RandomString(&rnd, 50000);
1204 Put("foo", big);
1205 Put("pastfoo", "v");
1206 const Snapshot* snapshot = db_->GetSnapshot();
1207 Put("foo", "tiny");
1208 Put("pastfoo2", "v2"); // Advance sequence number one more
1209
1210 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1211 ASSERT_GT(NumTableFilesAtLevel(0), 0);
1212
1213 ASSERT_EQ(big, Get("foo", snapshot));
1214 ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
1215 db_->ReleaseSnapshot(snapshot);
1216 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
1217 Slice x("x");
1218 dbfull()->TEST_CompactRange(0, NULL, &x);
1219 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1220 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1221 ASSERT_GE(NumTableFilesAtLevel(1), 1);
1222 dbfull()->TEST_CompactRange(1, NULL, &x);
1223 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1224
1225 ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
1226 } while (ChangeOptions());
1227 }
1228
TEST(DBTest,DeletionMarkers1)1229 TEST(DBTest, DeletionMarkers1) {
1230 Put("foo", "v1");
1231 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1232 const int last = config::kMaxMemCompactLevel;
1233 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
1234
1235 // Place a table at level last-1 to prevent merging with preceding mutation
1236 Put("a", "begin");
1237 Put("z", "end");
1238 dbfull()->TEST_CompactMemTable();
1239 ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1240 ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
1241
1242 Delete("foo");
1243 Put("foo", "v2");
1244 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1245 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
1246 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1247 Slice z("z");
1248 dbfull()->TEST_CompactRange(last-2, NULL, &z);
1249 // DEL eliminated, but v1 remains because we aren't compacting that level
1250 // (DEL can be eliminated because v2 hides v1).
1251 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
1252 dbfull()->TEST_CompactRange(last-1, NULL, NULL);
1253 // Merging last-1 w/ last, so we are the base level for "foo", so
1254 // DEL is removed. (as is v1).
1255 ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
1256 }
1257
TEST(DBTest,DeletionMarkers2)1258 TEST(DBTest, DeletionMarkers2) {
1259 Put("foo", "v1");
1260 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1261 const int last = config::kMaxMemCompactLevel;
1262 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
1263
1264 // Place a table at level last-1 to prevent merging with preceding mutation
1265 Put("a", "begin");
1266 Put("z", "end");
1267 dbfull()->TEST_CompactMemTable();
1268 ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1269 ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
1270
1271 Delete("foo");
1272 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1273 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
1274 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1275 dbfull()->TEST_CompactRange(last-2, NULL, NULL);
1276 // DEL kept: "last" file overlaps
1277 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1278 dbfull()->TEST_CompactRange(last-1, NULL, NULL);
1279 // Merging last-1 w/ last, so we are the base level for "foo", so
1280 // DEL is removed. (as is v1).
1281 ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
1282 }
1283
TEST(DBTest,OverlapInLevel0)1284 TEST(DBTest, OverlapInLevel0) {
1285 do {
1286 ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
1287
1288 // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
1289 ASSERT_OK(Put("100", "v100"));
1290 ASSERT_OK(Put("999", "v999"));
1291 dbfull()->TEST_CompactMemTable();
1292 ASSERT_OK(Delete("100"));
1293 ASSERT_OK(Delete("999"));
1294 dbfull()->TEST_CompactMemTable();
1295 ASSERT_EQ("0,1,1", FilesPerLevel());
1296
1297 // Make files spanning the following ranges in level-0:
1298 // files[0] 200 .. 900
1299 // files[1] 300 .. 500
1300 // Note that files are sorted by smallest key.
1301 ASSERT_OK(Put("300", "v300"));
1302 ASSERT_OK(Put("500", "v500"));
1303 dbfull()->TEST_CompactMemTable();
1304 ASSERT_OK(Put("200", "v200"));
1305 ASSERT_OK(Put("600", "v600"));
1306 ASSERT_OK(Put("900", "v900"));
1307 dbfull()->TEST_CompactMemTable();
1308 ASSERT_EQ("2,1,1", FilesPerLevel());
1309
1310 // Compact away the placeholder files we created initially
1311 dbfull()->TEST_CompactRange(1, NULL, NULL);
1312 dbfull()->TEST_CompactRange(2, NULL, NULL);
1313 ASSERT_EQ("2", FilesPerLevel());
1314
1315 // Do a memtable compaction. Before bug-fix, the compaction would
1316 // not detect the overlap with level-0 files and would incorrectly place
1317 // the deletion in a deeper level.
1318 ASSERT_OK(Delete("600"));
1319 dbfull()->TEST_CompactMemTable();
1320 ASSERT_EQ("3", FilesPerLevel());
1321 ASSERT_EQ("NOT_FOUND", Get("600"));
1322 } while (ChangeOptions());
1323 }
1324
TEST(DBTest,L0_CompactionBug_Issue44_a)1325 TEST(DBTest, L0_CompactionBug_Issue44_a) {
1326 Reopen();
1327 ASSERT_OK(Put("b", "v"));
1328 Reopen();
1329 ASSERT_OK(Delete("b"));
1330 ASSERT_OK(Delete("a"));
1331 Reopen();
1332 ASSERT_OK(Delete("a"));
1333 Reopen();
1334 ASSERT_OK(Put("a", "v"));
1335 Reopen();
1336 Reopen();
1337 ASSERT_EQ("(a->v)", Contents());
1338 DelayMilliseconds(1000); // Wait for compaction to finish
1339 ASSERT_EQ("(a->v)", Contents());
1340 }
1341
TEST(DBTest,L0_CompactionBug_Issue44_b)1342 TEST(DBTest, L0_CompactionBug_Issue44_b) {
1343 Reopen();
1344 Put("","");
1345 Reopen();
1346 Delete("e");
1347 Put("","");
1348 Reopen();
1349 Put("c", "cv");
1350 Reopen();
1351 Put("","");
1352 Reopen();
1353 Put("","");
1354 DelayMilliseconds(1000); // Wait for compaction to finish
1355 Reopen();
1356 Put("d","dv");
1357 Reopen();
1358 Put("","");
1359 Reopen();
1360 Delete("d");
1361 Delete("b");
1362 Reopen();
1363 ASSERT_EQ("(->)(c->cv)", Contents());
1364 DelayMilliseconds(1000); // Wait for compaction to finish
1365 ASSERT_EQ("(->)(c->cv)", Contents());
1366 }
1367
TEST(DBTest,ComparatorCheck)1368 TEST(DBTest, ComparatorCheck) {
1369 class NewComparator : public Comparator {
1370 public:
1371 virtual const char* Name() const { return "leveldb.NewComparator"; }
1372 virtual int Compare(const Slice& a, const Slice& b) const {
1373 return BytewiseComparator()->Compare(a, b);
1374 }
1375 virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
1376 BytewiseComparator()->FindShortestSeparator(s, l);
1377 }
1378 virtual void FindShortSuccessor(std::string* key) const {
1379 BytewiseComparator()->FindShortSuccessor(key);
1380 }
1381 };
1382 NewComparator cmp;
1383 Options new_options = CurrentOptions();
1384 new_options.comparator = &cmp;
1385 Status s = TryReopen(&new_options);
1386 ASSERT_TRUE(!s.ok());
1387 ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
1388 << s.ToString();
1389 }
1390
TEST(DBTest,CustomComparator)1391 TEST(DBTest, CustomComparator) {
1392 class NumberComparator : public Comparator {
1393 public:
1394 virtual const char* Name() const { return "test.NumberComparator"; }
1395 virtual int Compare(const Slice& a, const Slice& b) const {
1396 return ToNumber(a) - ToNumber(b);
1397 }
1398 virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
1399 ToNumber(*s); // Check format
1400 ToNumber(l); // Check format
1401 }
1402 virtual void FindShortSuccessor(std::string* key) const {
1403 ToNumber(*key); // Check format
1404 }
1405 private:
1406 static int ToNumber(const Slice& x) {
1407 // Check that there are no extra characters.
1408 ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size()-1] == ']')
1409 << EscapeString(x);
1410 int val;
1411 char ignored;
1412 ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
1413 << EscapeString(x);
1414 return val;
1415 }
1416 };
1417 NumberComparator cmp;
1418 Options new_options = CurrentOptions();
1419 new_options.create_if_missing = true;
1420 new_options.comparator = &cmp;
1421 new_options.filter_policy = NULL; // Cannot use bloom filters
1422 new_options.write_buffer_size = 1000; // Compact more often
1423 DestroyAndReopen(&new_options);
1424 ASSERT_OK(Put("[10]", "ten"));
1425 ASSERT_OK(Put("[0x14]", "twenty"));
1426 for (int i = 0; i < 2; i++) {
1427 ASSERT_EQ("ten", Get("[10]"));
1428 ASSERT_EQ("ten", Get("[0xa]"));
1429 ASSERT_EQ("twenty", Get("[20]"));
1430 ASSERT_EQ("twenty", Get("[0x14]"));
1431 ASSERT_EQ("NOT_FOUND", Get("[15]"));
1432 ASSERT_EQ("NOT_FOUND", Get("[0xf]"));
1433 Compact("[0]", "[9999]");
1434 }
1435
1436 for (int run = 0; run < 2; run++) {
1437 for (int i = 0; i < 1000; i++) {
1438 char buf[100];
1439 snprintf(buf, sizeof(buf), "[%d]", i*10);
1440 ASSERT_OK(Put(buf, buf));
1441 }
1442 Compact("[0]", "[1000000]");
1443 }
1444 }
1445
TEST(DBTest,ManualCompaction)1446 TEST(DBTest, ManualCompaction) {
1447 ASSERT_EQ(config::kMaxMemCompactLevel, 2)
1448 << "Need to update this test to match kMaxMemCompactLevel";
1449
1450 MakeTables(3, "p", "q");
1451 ASSERT_EQ("1,1,1", FilesPerLevel());
1452
1453 // Compaction range falls before files
1454 Compact("", "c");
1455 ASSERT_EQ("1,1,1", FilesPerLevel());
1456
1457 // Compaction range falls after files
1458 Compact("r", "z");
1459 ASSERT_EQ("1,1,1", FilesPerLevel());
1460
1461 // Compaction range overlaps files
1462 Compact("p1", "p9");
1463 ASSERT_EQ("0,0,1", FilesPerLevel());
1464
1465 // Populate a different range
1466 MakeTables(3, "c", "e");
1467 ASSERT_EQ("1,1,2", FilesPerLevel());
1468
1469 // Compact just the new range
1470 Compact("b", "f");
1471 ASSERT_EQ("0,0,2", FilesPerLevel());
1472
1473 // Compact all
1474 MakeTables(1, "a", "z");
1475 ASSERT_EQ("0,1,2", FilesPerLevel());
1476 db_->CompactRange(NULL, NULL);
1477 ASSERT_EQ("0,0,1", FilesPerLevel());
1478 }
1479
TEST(DBTest,DBOpen_Options)1480 TEST(DBTest, DBOpen_Options) {
1481 std::string dbname = test::TmpDir() + "/db_options_test";
1482 DestroyDB(dbname, Options());
1483
1484 // Does not exist, and create_if_missing == false: error
1485 DB* db = NULL;
1486 Options opts;
1487 opts.create_if_missing = false;
1488 Status s = DB::Open(opts, dbname, &db);
1489 ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL);
1490 ASSERT_TRUE(db == NULL);
1491
1492 // Does not exist, and create_if_missing == true: OK
1493 opts.create_if_missing = true;
1494 s = DB::Open(opts, dbname, &db);
1495 ASSERT_OK(s);
1496 ASSERT_TRUE(db != NULL);
1497
1498 delete db;
1499 db = NULL;
1500
1501 // Does exist, and error_if_exists == true: error
1502 opts.create_if_missing = false;
1503 opts.error_if_exists = true;
1504 s = DB::Open(opts, dbname, &db);
1505 ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL);
1506 ASSERT_TRUE(db == NULL);
1507
1508 // Does exist, and error_if_exists == false: OK
1509 opts.create_if_missing = true;
1510 opts.error_if_exists = false;
1511 s = DB::Open(opts, dbname, &db);
1512 ASSERT_OK(s);
1513 ASSERT_TRUE(db != NULL);
1514
1515 delete db;
1516 db = NULL;
1517 }
1518
TEST(DBTest,Locking)1519 TEST(DBTest, Locking) {
1520 DB* db2 = NULL;
1521 Status s = DB::Open(CurrentOptions(), dbname_, &db2);
1522 ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
1523 }
1524
1525 // Check that number of files does not grow when we are out of space
TEST(DBTest,NoSpace)1526 TEST(DBTest, NoSpace) {
1527 Options options = CurrentOptions();
1528 options.env = env_;
1529 Reopen(&options);
1530
1531 ASSERT_OK(Put("foo", "v1"));
1532 ASSERT_EQ("v1", Get("foo"));
1533 Compact("a", "z");
1534 const int num_files = CountFiles();
1535 env_->no_space_.Release_Store(env_); // Force out-of-space errors
1536 for (int i = 0; i < 10; i++) {
1537 for (int level = 0; level < config::kNumLevels-1; level++) {
1538 dbfull()->TEST_CompactRange(level, NULL, NULL);
1539 }
1540 }
1541 env_->no_space_.Release_Store(NULL);
1542 ASSERT_LT(CountFiles(), num_files + 3);
1543 }
1544
TEST(DBTest,NonWritableFileSystem)1545 TEST(DBTest, NonWritableFileSystem) {
1546 Options options = CurrentOptions();
1547 options.write_buffer_size = 1000;
1548 options.env = env_;
1549 Reopen(&options);
1550 ASSERT_OK(Put("foo", "v1"));
1551 env_->non_writable_.Release_Store(env_); // Force errors for new files
1552 std::string big(100000, 'x');
1553 int errors = 0;
1554 for (int i = 0; i < 20; i++) {
1555 fprintf(stderr, "iter %d; errors %d\n", i, errors);
1556 if (!Put("foo", big).ok()) {
1557 errors++;
1558 DelayMilliseconds(100);
1559 }
1560 }
1561 ASSERT_GT(errors, 0);
1562 env_->non_writable_.Release_Store(NULL);
1563 }
1564
TEST(DBTest,WriteSyncError)1565 TEST(DBTest, WriteSyncError) {
1566 // Check that log sync errors cause the DB to disallow future writes.
1567
1568 // (a) Cause log sync calls to fail
1569 Options options = CurrentOptions();
1570 options.env = env_;
1571 Reopen(&options);
1572 env_->data_sync_error_.Release_Store(env_);
1573
1574 // (b) Normal write should succeed
1575 WriteOptions w;
1576 ASSERT_OK(db_->Put(w, "k1", "v1"));
1577 ASSERT_EQ("v1", Get("k1"));
1578
1579 // (c) Do a sync write; should fail
1580 w.sync = true;
1581 ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok());
1582 ASSERT_EQ("v1", Get("k1"));
1583 ASSERT_EQ("NOT_FOUND", Get("k2"));
1584
1585 // (d) make sync behave normally
1586 env_->data_sync_error_.Release_Store(NULL);
1587
1588 // (e) Do a non-sync write; should fail
1589 w.sync = false;
1590 ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok());
1591 ASSERT_EQ("v1", Get("k1"));
1592 ASSERT_EQ("NOT_FOUND", Get("k2"));
1593 ASSERT_EQ("NOT_FOUND", Get("k3"));
1594 }
1595
TEST(DBTest,ManifestWriteError)1596 TEST(DBTest, ManifestWriteError) {
1597 // Test for the following problem:
1598 // (a) Compaction produces file F
1599 // (b) Log record containing F is written to MANIFEST file, but Sync() fails
1600 // (c) GC deletes F
1601 // (d) After reopening DB, reads fail since deleted F is named in log record
1602
1603 // We iterate twice. In the second iteration, everything is the
1604 // same except the log record never makes it to the MANIFEST file.
1605 for (int iter = 0; iter < 2; iter++) {
1606 port::AtomicPointer* error_type = (iter == 0)
1607 ? &env_->manifest_sync_error_
1608 : &env_->manifest_write_error_;
1609
1610 // Insert foo=>bar mapping
1611 Options options = CurrentOptions();
1612 options.env = env_;
1613 options.create_if_missing = true;
1614 options.error_if_exists = false;
1615 DestroyAndReopen(&options);
1616 ASSERT_OK(Put("foo", "bar"));
1617 ASSERT_EQ("bar", Get("foo"));
1618
1619 // Memtable compaction (will succeed)
1620 dbfull()->TEST_CompactMemTable();
1621 ASSERT_EQ("bar", Get("foo"));
1622 const int last = config::kMaxMemCompactLevel;
1623 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
1624
1625 // Merging compaction (will fail)
1626 error_type->Release_Store(env_);
1627 dbfull()->TEST_CompactRange(last, NULL, NULL); // Should fail
1628 ASSERT_EQ("bar", Get("foo"));
1629
1630 // Recovery: should not lose data
1631 error_type->Release_Store(NULL);
1632 Reopen(&options);
1633 ASSERT_EQ("bar", Get("foo"));
1634 }
1635 }
1636
TEST(DBTest,MissingSSTFile)1637 TEST(DBTest, MissingSSTFile) {
1638 ASSERT_OK(Put("foo", "bar"));
1639 ASSERT_EQ("bar", Get("foo"));
1640
1641 // Dump the memtable to disk.
1642 dbfull()->TEST_CompactMemTable();
1643 ASSERT_EQ("bar", Get("foo"));
1644
1645 Close();
1646 ASSERT_TRUE(DeleteAnSSTFile());
1647 Options options = CurrentOptions();
1648 options.paranoid_checks = true;
1649 Status s = TryReopen(&options);
1650 ASSERT_TRUE(!s.ok());
1651 ASSERT_TRUE(s.ToString().find("issing") != std::string::npos)
1652 << s.ToString();
1653 }
1654
TEST(DBTest,StillReadSST)1655 TEST(DBTest, StillReadSST) {
1656 ASSERT_OK(Put("foo", "bar"));
1657 ASSERT_EQ("bar", Get("foo"));
1658
1659 // Dump the memtable to disk.
1660 dbfull()->TEST_CompactMemTable();
1661 ASSERT_EQ("bar", Get("foo"));
1662 Close();
1663 ASSERT_GT(RenameLDBToSST(), 0);
1664 Options options = CurrentOptions();
1665 options.paranoid_checks = true;
1666 Status s = TryReopen(&options);
1667 ASSERT_TRUE(s.ok());
1668 ASSERT_EQ("bar", Get("foo"));
1669 }
1670
TEST(DBTest,FilesDeletedAfterCompaction)1671 TEST(DBTest, FilesDeletedAfterCompaction) {
1672 ASSERT_OK(Put("foo", "v2"));
1673 Compact("a", "z");
1674 const int num_files = CountFiles();
1675 for (int i = 0; i < 10; i++) {
1676 ASSERT_OK(Put("foo", "v2"));
1677 Compact("a", "z");
1678 }
1679 ASSERT_EQ(CountFiles(), num_files);
1680 }
1681
TEST(DBTest,BloomFilter)1682 TEST(DBTest, BloomFilter) {
1683 env_->count_random_reads_ = true;
1684 Options options = CurrentOptions();
1685 options.env = env_;
1686 options.block_cache = NewLRUCache(0); // Prevent cache hits
1687 options.filter_policy = NewBloomFilterPolicy(10);
1688 Reopen(&options);
1689
1690 // Populate multiple layers
1691 const int N = 10000;
1692 for (int i = 0; i < N; i++) {
1693 ASSERT_OK(Put(Key(i), Key(i)));
1694 }
1695 Compact("a", "z");
1696 for (int i = 0; i < N; i += 100) {
1697 ASSERT_OK(Put(Key(i), Key(i)));
1698 }
1699 dbfull()->TEST_CompactMemTable();
1700
1701 // Prevent auto compactions triggered by seeks
1702 env_->delay_data_sync_.Release_Store(env_);
1703
1704 // Lookup present keys. Should rarely read from small sstable.
1705 env_->random_read_counter_.Reset();
1706 for (int i = 0; i < N; i++) {
1707 ASSERT_EQ(Key(i), Get(Key(i)));
1708 }
1709 int reads = env_->random_read_counter_.Read();
1710 fprintf(stderr, "%d present => %d reads\n", N, reads);
1711 ASSERT_GE(reads, N);
1712 ASSERT_LE(reads, N + 2*N/100);
1713
1714 // Lookup present keys. Should rarely read from either sstable.
1715 env_->random_read_counter_.Reset();
1716 for (int i = 0; i < N; i++) {
1717 ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing"));
1718 }
1719 reads = env_->random_read_counter_.Read();
1720 fprintf(stderr, "%d missing => %d reads\n", N, reads);
1721 ASSERT_LE(reads, 3*N/100);
1722
1723 env_->delay_data_sync_.Release_Store(NULL);
1724 Close();
1725 delete options.block_cache;
1726 delete options.filter_policy;
1727 }
1728
1729 // Multi-threaded test:
1730 namespace {
1731
1732 static const int kNumThreads = 4;
1733 static const int kTestSeconds = 10;
1734 static const int kNumKeys = 1000;
1735
1736 struct MTState {
1737 DBTest* test;
1738 port::AtomicPointer stop;
1739 port::AtomicPointer counter[kNumThreads];
1740 port::AtomicPointer thread_done[kNumThreads];
1741 };
1742
1743 struct MTThread {
1744 MTState* state;
1745 int id;
1746 };
1747
MTThreadBody(void * arg)1748 static void MTThreadBody(void* arg) {
1749 MTThread* t = reinterpret_cast<MTThread*>(arg);
1750 int id = t->id;
1751 DB* db = t->state->test->db_;
1752 uintptr_t counter = 0;
1753 fprintf(stderr, "... starting thread %d\n", id);
1754 Random rnd(1000 + id);
1755 std::string value;
1756 char valbuf[1500];
1757 while (t->state->stop.Acquire_Load() == NULL) {
1758 t->state->counter[id].Release_Store(reinterpret_cast<void*>(counter));
1759
1760 int key = rnd.Uniform(kNumKeys);
1761 char keybuf[20];
1762 snprintf(keybuf, sizeof(keybuf), "%016d", key);
1763
1764 if (rnd.OneIn(2)) {
1765 // Write values of the form <key, my id, counter>.
1766 // We add some padding for force compactions.
1767 snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d",
1768 key, id, static_cast<int>(counter));
1769 ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
1770 } else {
1771 // Read a value and verify that it matches the pattern written above.
1772 Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
1773 if (s.IsNotFound()) {
1774 // Key has not yet been written
1775 } else {
1776 // Check that the writer thread counter is >= the counter in the value
1777 ASSERT_OK(s);
1778 int k, w, c;
1779 ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
1780 ASSERT_EQ(k, key);
1781 ASSERT_GE(w, 0);
1782 ASSERT_LT(w, kNumThreads);
1783 ASSERT_LE(static_cast<uintptr_t>(c), reinterpret_cast<uintptr_t>(
1784 t->state->counter[w].Acquire_Load()));
1785 }
1786 }
1787 counter++;
1788 }
1789 t->state->thread_done[id].Release_Store(t);
1790 fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
1791 }
1792
1793 } // namespace
1794
TEST(DBTest,MultiThreaded)1795 TEST(DBTest, MultiThreaded) {
1796 do {
1797 // Initialize state
1798 MTState mt;
1799 mt.test = this;
1800 mt.stop.Release_Store(0);
1801 for (int id = 0; id < kNumThreads; id++) {
1802 mt.counter[id].Release_Store(0);
1803 mt.thread_done[id].Release_Store(0);
1804 }
1805
1806 // Start threads
1807 MTThread thread[kNumThreads];
1808 for (int id = 0; id < kNumThreads; id++) {
1809 thread[id].state = &mt;
1810 thread[id].id = id;
1811 env_->StartThread(MTThreadBody, &thread[id]);
1812 }
1813
1814 // Let them run for a while
1815 DelayMilliseconds(kTestSeconds * 1000);
1816
1817 // Stop the threads and wait for them to finish
1818 mt.stop.Release_Store(&mt);
1819 for (int id = 0; id < kNumThreads; id++) {
1820 while (mt.thread_done[id].Acquire_Load() == NULL) {
1821 DelayMilliseconds(100);
1822 }
1823 }
1824 } while (ChangeOptions());
1825 }
1826
1827 namespace {
1828 typedef std::map<std::string, std::string> KVMap;
1829 }
1830
1831 class ModelDB: public DB {
1832 public:
1833 class ModelSnapshot : public Snapshot {
1834 public:
1835 KVMap map_;
1836 };
1837
ModelDB(const Options & options)1838 explicit ModelDB(const Options& options): options_(options) { }
~ModelDB()1839 ~ModelDB() { }
Put(const WriteOptions & o,const Slice & k,const Slice & v)1840 virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) {
1841 return DB::Put(o, k, v);
1842 }
Delete(const WriteOptions & o,const Slice & key)1843 virtual Status Delete(const WriteOptions& o, const Slice& key) {
1844 return DB::Delete(o, key);
1845 }
Get(const ReadOptions & options,const Slice & key,std::string * value)1846 virtual Status Get(const ReadOptions& options,
1847 const Slice& key, std::string* value) {
1848 assert(false); // Not implemented
1849 return Status::NotFound(key);
1850 }
NewIterator(const ReadOptions & options)1851 virtual Iterator* NewIterator(const ReadOptions& options) {
1852 if (options.snapshot == NULL) {
1853 KVMap* saved = new KVMap;
1854 *saved = map_;
1855 return new ModelIter(saved, true);
1856 } else {
1857 const KVMap* snapshot_state =
1858 &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
1859 return new ModelIter(snapshot_state, false);
1860 }
1861 }
GetSnapshot()1862 virtual const Snapshot* GetSnapshot() {
1863 ModelSnapshot* snapshot = new ModelSnapshot;
1864 snapshot->map_ = map_;
1865 return snapshot;
1866 }
1867
ReleaseSnapshot(const Snapshot * snapshot)1868 virtual void ReleaseSnapshot(const Snapshot* snapshot) {
1869 delete reinterpret_cast<const ModelSnapshot*>(snapshot);
1870 }
Write(const WriteOptions & options,WriteBatch * batch)1871 virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
1872 class Handler : public WriteBatch::Handler {
1873 public:
1874 KVMap* map_;
1875 virtual void Put(const Slice& key, const Slice& value) {
1876 (*map_)[key.ToString()] = value.ToString();
1877 }
1878 virtual void Delete(const Slice& key) {
1879 map_->erase(key.ToString());
1880 }
1881 };
1882 Handler handler;
1883 handler.map_ = &map_;
1884 return batch->Iterate(&handler);
1885 }
1886
GetProperty(const Slice & property,std::string * value)1887 virtual bool GetProperty(const Slice& property, std::string* value) {
1888 return false;
1889 }
GetApproximateSizes(const Range * r,int n,uint64_t * sizes)1890 virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
1891 for (int i = 0; i < n; i++) {
1892 sizes[i] = 0;
1893 }
1894 }
CompactRange(const Slice * start,const Slice * end)1895 virtual void CompactRange(const Slice* start, const Slice* end) {
1896 }
1897
1898 private:
1899 class ModelIter: public Iterator {
1900 public:
ModelIter(const KVMap * map,bool owned)1901 ModelIter(const KVMap* map, bool owned)
1902 : map_(map), owned_(owned), iter_(map_->end()) {
1903 }
~ModelIter()1904 ~ModelIter() {
1905 if (owned_) delete map_;
1906 }
Valid() const1907 virtual bool Valid() const { return iter_ != map_->end(); }
SeekToFirst()1908 virtual void SeekToFirst() { iter_ = map_->begin(); }
SeekToLast()1909 virtual void SeekToLast() {
1910 if (map_->empty()) {
1911 iter_ = map_->end();
1912 } else {
1913 iter_ = map_->find(map_->rbegin()->first);
1914 }
1915 }
Seek(const Slice & k)1916 virtual void Seek(const Slice& k) {
1917 iter_ = map_->lower_bound(k.ToString());
1918 }
Next()1919 virtual void Next() { ++iter_; }
Prev()1920 virtual void Prev() { --iter_; }
key() const1921 virtual Slice key() const { return iter_->first; }
value() const1922 virtual Slice value() const { return iter_->second; }
status() const1923 virtual Status status() const { return Status::OK(); }
1924 private:
1925 const KVMap* const map_;
1926 const bool owned_; // Do we own map_
1927 KVMap::const_iterator iter_;
1928 };
1929 const Options options_;
1930 KVMap map_;
1931 };
1932
RandomKey(Random * rnd)1933 static std::string RandomKey(Random* rnd) {
1934 int len = (rnd->OneIn(3)
1935 ? 1 // Short sometimes to encourage collisions
1936 : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
1937 return test::RandomKey(rnd, len);
1938 }
1939
CompareIterators(int step,DB * model,DB * db,const Snapshot * model_snap,const Snapshot * db_snap)1940 static bool CompareIterators(int step,
1941 DB* model,
1942 DB* db,
1943 const Snapshot* model_snap,
1944 const Snapshot* db_snap) {
1945 ReadOptions options;
1946 options.snapshot = model_snap;
1947 Iterator* miter = model->NewIterator(options);
1948 options.snapshot = db_snap;
1949 Iterator* dbiter = db->NewIterator(options);
1950 bool ok = true;
1951 int count = 0;
1952 for (miter->SeekToFirst(), dbiter->SeekToFirst();
1953 ok && miter->Valid() && dbiter->Valid();
1954 miter->Next(), dbiter->Next()) {
1955 count++;
1956 if (miter->key().compare(dbiter->key()) != 0) {
1957 fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
1958 step,
1959 EscapeString(miter->key()).c_str(),
1960 EscapeString(dbiter->key()).c_str());
1961 ok = false;
1962 break;
1963 }
1964
1965 if (miter->value().compare(dbiter->value()) != 0) {
1966 fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
1967 step,
1968 EscapeString(miter->key()).c_str(),
1969 EscapeString(miter->value()).c_str(),
1970 EscapeString(miter->value()).c_str());
1971 ok = false;
1972 }
1973 }
1974
1975 if (ok) {
1976 if (miter->Valid() != dbiter->Valid()) {
1977 fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
1978 step, miter->Valid(), dbiter->Valid());
1979 ok = false;
1980 }
1981 }
1982 fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
1983 delete miter;
1984 delete dbiter;
1985 return ok;
1986 }
1987
TEST(DBTest,Randomized)1988 TEST(DBTest, Randomized) {
1989 Random rnd(test::RandomSeed());
1990 do {
1991 ModelDB model(CurrentOptions());
1992 const int N = 10000;
1993 const Snapshot* model_snap = NULL;
1994 const Snapshot* db_snap = NULL;
1995 std::string k, v;
1996 for (int step = 0; step < N; step++) {
1997 if (step % 100 == 0) {
1998 fprintf(stderr, "Step %d of %d\n", step, N);
1999 }
2000 // TODO(sanjay): Test Get() works
2001 int p = rnd.Uniform(100);
2002 if (p < 45) { // Put
2003 k = RandomKey(&rnd);
2004 v = RandomString(&rnd,
2005 rnd.OneIn(20)
2006 ? 100 + rnd.Uniform(100)
2007 : rnd.Uniform(8));
2008 ASSERT_OK(model.Put(WriteOptions(), k, v));
2009 ASSERT_OK(db_->Put(WriteOptions(), k, v));
2010
2011 } else if (p < 90) { // Delete
2012 k = RandomKey(&rnd);
2013 ASSERT_OK(model.Delete(WriteOptions(), k));
2014 ASSERT_OK(db_->Delete(WriteOptions(), k));
2015
2016
2017 } else { // Multi-element batch
2018 WriteBatch b;
2019 const int num = rnd.Uniform(8);
2020 for (int i = 0; i < num; i++) {
2021 if (i == 0 || !rnd.OneIn(10)) {
2022 k = RandomKey(&rnd);
2023 } else {
2024 // Periodically re-use the same key from the previous iter, so
2025 // we have multiple entries in the write batch for the same key
2026 }
2027 if (rnd.OneIn(2)) {
2028 v = RandomString(&rnd, rnd.Uniform(10));
2029 b.Put(k, v);
2030 } else {
2031 b.Delete(k);
2032 }
2033 }
2034 ASSERT_OK(model.Write(WriteOptions(), &b));
2035 ASSERT_OK(db_->Write(WriteOptions(), &b));
2036 }
2037
2038 if ((step % 100) == 0) {
2039 ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
2040 ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
2041 // Save a snapshot from each DB this time that we'll use next
2042 // time we compare things, to make sure the current state is
2043 // preserved with the snapshot
2044 if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
2045 if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
2046
2047 Reopen();
2048 ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
2049
2050 model_snap = model.GetSnapshot();
2051 db_snap = db_->GetSnapshot();
2052 }
2053 }
2054 if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
2055 if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
2056 } while (ChangeOptions());
2057 }
2058
MakeKey(unsigned int num)2059 std::string MakeKey(unsigned int num) {
2060 char buf[30];
2061 snprintf(buf, sizeof(buf), "%016u", num);
2062 return std::string(buf);
2063 }
2064
BM_LogAndApply(int iters,int num_base_files)2065 void BM_LogAndApply(int iters, int num_base_files) {
2066 std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
2067 DestroyDB(dbname, Options());
2068
2069 DB* db = NULL;
2070 Options opts;
2071 opts.create_if_missing = true;
2072 Status s = DB::Open(opts, dbname, &db);
2073 ASSERT_OK(s);
2074 ASSERT_TRUE(db != NULL);
2075
2076 delete db;
2077 db = NULL;
2078
2079 Env* env = Env::Default();
2080
2081 port::Mutex mu;
2082 MutexLock l(&mu);
2083
2084 InternalKeyComparator cmp(BytewiseComparator());
2085 Options options;
2086 VersionSet vset(dbname, &options, NULL, &cmp);
2087 ASSERT_OK(vset.Recover());
2088 VersionEdit vbase;
2089 uint64_t fnum = 1;
2090 for (int i = 0; i < num_base_files; i++) {
2091 InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
2092 InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
2093 vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
2094 }
2095 ASSERT_OK(vset.LogAndApply(&vbase, &mu));
2096
2097 uint64_t start_micros = env->NowMicros();
2098
2099 for (int i = 0; i < iters; i++) {
2100 VersionEdit vedit;
2101 vedit.DeleteFile(2, fnum);
2102 InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
2103 InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
2104 vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
2105 vset.LogAndApply(&vedit, &mu);
2106 }
2107 uint64_t stop_micros = env->NowMicros();
2108 unsigned int us = stop_micros - start_micros;
2109 char buf[16];
2110 snprintf(buf, sizeof(buf), "%d", num_base_files);
2111 fprintf(stderr,
2112 "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n",
2113 buf, iters, us, ((float)us) / iters);
2114 }
2115
2116 } // namespace leveldb
2117
main(int argc,char ** argv)2118 int main(int argc, char** argv) {
2119 if (argc > 1 && std::string(argv[1]) == "--benchmark") {
2120 leveldb::BM_LogAndApply(1000, 1);
2121 leveldb::BM_LogAndApply(1000, 100);
2122 leveldb::BM_LogAndApply(1000, 10000);
2123 leveldb::BM_LogAndApply(100, 100000);
2124 return 0;
2125 }
2126
2127 return leveldb::test::RunAllTests();
2128 }
2129