1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 
5 #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_
6 #define STORAGE_LEVELDB_DB_DB_IMPL_H_
7 
8 #include <atomic>
9 #include <deque>
10 #include <set>
11 #include <string>
12 
13 #include "db/dbformat.h"
14 #include "db/log_writer.h"
15 #include "db/snapshot.h"
16 #include "leveldb/db.h"
17 #include "leveldb/env.h"
18 #include "port/port.h"
19 #include "port/thread_annotations.h"
20 
21 namespace leveldb {
22 
23 class MemTable;
24 class TableCache;
25 class Version;
26 class VersionEdit;
27 class VersionSet;
28 
29 class DBImpl : public DB {
30  public:
31   DBImpl(const Options& options, const std::string& dbname);
32 
33   DBImpl(const DBImpl&) = delete;
34   DBImpl& operator=(const DBImpl&) = delete;
35 
36   virtual ~DBImpl();
37 
38   // Implementations of the DB interface
39   virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);
40   virtual Status Delete(const WriteOptions&, const Slice& key);
41   virtual Status Write(const WriteOptions& options, WriteBatch* updates);
42   virtual Status Get(const ReadOptions& options, const Slice& key,
43                      std::string* value);
44   virtual Iterator* NewIterator(const ReadOptions&);
45   virtual const Snapshot* GetSnapshot();
46   virtual void ReleaseSnapshot(const Snapshot* snapshot);
47   virtual bool GetProperty(const Slice& property, std::string* value);
48   virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
49   virtual void CompactRange(const Slice* begin, const Slice* end);
50 
51   // Extra methods (for testing) that are not in the public DB interface
52 
53   // Compact any files in the named level that overlap [*begin,*end]
54   void TEST_CompactRange(int level, const Slice* begin, const Slice* end);
55 
56   // Force current memtable contents to be compacted.
57   Status TEST_CompactMemTable();
58 
59   // Return an internal iterator over the current state of the database.
60   // The keys of this iterator are internal keys (see format.h).
61   // The returned iterator should be deleted when no longer needed.
62   Iterator* TEST_NewInternalIterator();
63 
64   // Return the maximum overlapping data (in bytes) at next level for any
65   // file at a level >= 1.
66   int64_t TEST_MaxNextLevelOverlappingBytes();
67 
68   // Record a sample of bytes read at the specified internal key.
69   // Samples are taken approximately once every config::kReadBytesPeriod
70   // bytes.
71   void RecordReadSample(Slice key);
72 
73  private:
74   friend class DB;
75   struct CompactionState;
76   struct Writer;
77 
78   // Information for a manual compaction
79   struct ManualCompaction {
80     int level;
81     bool done;
82     const InternalKey* begin;  // null means beginning of key range
83     const InternalKey* end;    // null means end of key range
84     InternalKey tmp_storage;   // Used to keep track of compaction progress
85   };
86 
87   // Per level compaction stats.  stats_[level] stores the stats for
88   // compactions that produced data for the specified "level".
89   struct CompactionStats {
CompactionStatsCompactionStats90     CompactionStats() : micros(0), bytes_read(0), bytes_written(0) {}
91 
AddCompactionStats92     void Add(const CompactionStats& c) {
93       this->micros += c.micros;
94       this->bytes_read += c.bytes_read;
95       this->bytes_written += c.bytes_written;
96     }
97 
98     int64_t micros;
99     int64_t bytes_read;
100     int64_t bytes_written;
101   };
102 
103   Iterator* NewInternalIterator(const ReadOptions&,
104                                 SequenceNumber* latest_snapshot,
105                                 uint32_t* seed);
106 
107   Status NewDB();
108 
109   // Recover the descriptor from persistent storage.  May do a significant
110   // amount of work to recover recently logged updates.  Any changes to
111   // be made to the descriptor are added to *edit.
112   Status Recover(VersionEdit* edit, bool* save_manifest)
113       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
114 
115   void MaybeIgnoreError(Status* s) const;
116 
117   // Delete any unneeded files and stale in-memory entries.
118   void DeleteObsoleteFiles() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
119 
120   // Compact the in-memory write buffer to disk.  Switches to a new
121   // log-file/memtable and writes a new descriptor iff successful.
122   // Errors are recorded in bg_error_.
123   void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
124 
125   Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest,
126                         VersionEdit* edit, SequenceNumber* max_sequence)
127       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
128 
129   Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)
130       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
131 
132   Status MakeRoomForWrite(bool force /* compact even if there is room? */)
133       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
134   WriteBatch* BuildBatchGroup(Writer** last_writer)
135       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
136 
137   void RecordBackgroundError(const Status& s);
138 
139   void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
140   static void BGWork(void* db);
141   void BackgroundCall();
142   void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
143   void CleanupCompaction(CompactionState* compact)
144       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
145   Status DoCompactionWork(CompactionState* compact)
146       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
147 
148   Status OpenCompactionOutputFile(CompactionState* compact);
149   Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
150   Status InstallCompactionResults(CompactionState* compact)
151       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
152 
user_comparator()153   const Comparator* user_comparator() const {
154     return internal_comparator_.user_comparator();
155   }
156 
157   // Constant after construction
158   Env* const env_;
159   const InternalKeyComparator internal_comparator_;
160   const InternalFilterPolicy internal_filter_policy_;
161   const Options options_;  // options_.comparator == &internal_comparator_
162   const bool owns_info_log_;
163   const bool owns_cache_;
164   const std::string dbname_;
165 
166   // table_cache_ provides its own synchronization
167   TableCache* const table_cache_;
168 
169   // Lock over the persistent DB state.  Non-null iff successfully acquired.
170   FileLock* db_lock_;
171 
172   // State below is protected by mutex_
173   port::Mutex mutex_;
174   std::atomic<bool> shutting_down_;
175   port::CondVar background_work_finished_signal_ GUARDED_BY(mutex_);
176   MemTable* mem_;
177   MemTable* imm_ GUARDED_BY(mutex_);  // Memtable being compacted
178   std::atomic<bool> has_imm_;         // So bg thread can detect non-null imm_
179   WritableFile* logfile_;
180   uint64_t logfile_number_ GUARDED_BY(mutex_);
181   log::Writer* log_;
182   uint32_t seed_ GUARDED_BY(mutex_);  // For sampling.
183 
184   // Queue of writers.
185   std::deque<Writer*> writers_ GUARDED_BY(mutex_);
186   WriteBatch* tmp_batch_ GUARDED_BY(mutex_);
187 
188   SnapshotList snapshots_ GUARDED_BY(mutex_);
189 
190   // Set of table files to protect from deletion because they are
191   // part of ongoing compactions.
192   std::set<uint64_t> pending_outputs_ GUARDED_BY(mutex_);
193 
194   // Has a background compaction been scheduled or is running?
195   bool background_compaction_scheduled_ GUARDED_BY(mutex_);
196 
197   ManualCompaction* manual_compaction_ GUARDED_BY(mutex_);
198 
199   VersionSet* const versions_;
200 
201   // Have we encountered a background error in paranoid mode?
202   Status bg_error_ GUARDED_BY(mutex_);
203 
204   CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_);
205 };
206 
207 // Sanitize db options.  The caller should delete result.info_log if
208 // it is not equal to src.info_log.
209 Options SanitizeOptions(const std::string& db,
210                         const InternalKeyComparator* icmp,
211                         const InternalFilterPolicy* ipolicy,
212                         const Options& src);
213 
214 }  // namespace leveldb
215 
216 #endif  // STORAGE_LEVELDB_DB_DB_IMPL_H_
217