1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ 6 #define STORAGE_LEVELDB_DB_DB_IMPL_H_ 7 8 #include <atomic> 9 #include <deque> 10 #include <set> 11 #include <string> 12 13 #include "db/dbformat.h" 14 #include "db/log_writer.h" 15 #include "db/snapshot.h" 16 #include "leveldb/db.h" 17 #include "leveldb/env.h" 18 #include "port/port.h" 19 #include "port/thread_annotations.h" 20 21 namespace leveldb { 22 23 class MemTable; 24 class TableCache; 25 class Version; 26 class VersionEdit; 27 class VersionSet; 28 29 class DBImpl : public DB { 30 public: 31 DBImpl(const Options& options, const std::string& dbname); 32 33 DBImpl(const DBImpl&) = delete; 34 DBImpl& operator=(const DBImpl&) = delete; 35 36 virtual ~DBImpl(); 37 38 // Implementations of the DB interface 39 virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); 40 virtual Status Delete(const WriteOptions&, const Slice& key); 41 virtual Status Write(const WriteOptions& options, WriteBatch* updates); 42 virtual Status Get(const ReadOptions& options, const Slice& key, 43 std::string* value); 44 virtual Iterator* NewIterator(const ReadOptions&); 45 virtual const Snapshot* GetSnapshot(); 46 virtual void ReleaseSnapshot(const Snapshot* snapshot); 47 virtual bool GetProperty(const Slice& property, std::string* value); 48 virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); 49 virtual void CompactRange(const Slice* begin, const Slice* end); 50 51 // Extra methods (for testing) that are not in the public DB interface 52 53 // Compact any files in the named level that overlap [*begin,*end] 54 void TEST_CompactRange(int level, const Slice* begin, const Slice* end); 55 56 // Force current memtable contents to be compacted. 57 Status TEST_CompactMemTable(); 58 59 // Return an internal iterator over the current state of the database. 60 // The keys of this iterator are internal keys (see format.h). 61 // The returned iterator should be deleted when no longer needed. 62 Iterator* TEST_NewInternalIterator(); 63 64 // Return the maximum overlapping data (in bytes) at next level for any 65 // file at a level >= 1. 66 int64_t TEST_MaxNextLevelOverlappingBytes(); 67 68 // Record a sample of bytes read at the specified internal key. 69 // Samples are taken approximately once every config::kReadBytesPeriod 70 // bytes. 71 void RecordReadSample(Slice key); 72 73 private: 74 friend class DB; 75 struct CompactionState; 76 struct Writer; 77 78 // Information for a manual compaction 79 struct ManualCompaction { 80 int level; 81 bool done; 82 const InternalKey* begin; // null means beginning of key range 83 const InternalKey* end; // null means end of key range 84 InternalKey tmp_storage; // Used to keep track of compaction progress 85 }; 86 87 // Per level compaction stats. stats_[level] stores the stats for 88 // compactions that produced data for the specified "level". 89 struct CompactionStats { CompactionStatsCompactionStats90 CompactionStats() : micros(0), bytes_read(0), bytes_written(0) {} 91 AddCompactionStats92 void Add(const CompactionStats& c) { 93 this->micros += c.micros; 94 this->bytes_read += c.bytes_read; 95 this->bytes_written += c.bytes_written; 96 } 97 98 int64_t micros; 99 int64_t bytes_read; 100 int64_t bytes_written; 101 }; 102 103 Iterator* NewInternalIterator(const ReadOptions&, 104 SequenceNumber* latest_snapshot, 105 uint32_t* seed); 106 107 Status NewDB(); 108 109 // Recover the descriptor from persistent storage. May do a significant 110 // amount of work to recover recently logged updates. Any changes to 111 // be made to the descriptor are added to *edit. 112 Status Recover(VersionEdit* edit, bool* save_manifest) 113 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 114 115 void MaybeIgnoreError(Status* s) const; 116 117 // Delete any unneeded files and stale in-memory entries. 118 void DeleteObsoleteFiles() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 119 120 // Compact the in-memory write buffer to disk. Switches to a new 121 // log-file/memtable and writes a new descriptor iff successful. 122 // Errors are recorded in bg_error_. 123 void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 124 125 Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest, 126 VersionEdit* edit, SequenceNumber* max_sequence) 127 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 128 129 Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) 130 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 131 132 Status MakeRoomForWrite(bool force /* compact even if there is room? */) 133 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 134 WriteBatch* BuildBatchGroup(Writer** last_writer) 135 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 136 137 void RecordBackgroundError(const Status& s); 138 139 void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 140 static void BGWork(void* db); 141 void BackgroundCall(); 142 void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 143 void CleanupCompaction(CompactionState* compact) 144 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 145 Status DoCompactionWork(CompactionState* compact) 146 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 147 148 Status OpenCompactionOutputFile(CompactionState* compact); 149 Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); 150 Status InstallCompactionResults(CompactionState* compact) 151 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 152 user_comparator()153 const Comparator* user_comparator() const { 154 return internal_comparator_.user_comparator(); 155 } 156 157 // Constant after construction 158 Env* const env_; 159 const InternalKeyComparator internal_comparator_; 160 const InternalFilterPolicy internal_filter_policy_; 161 const Options options_; // options_.comparator == &internal_comparator_ 162 const bool owns_info_log_; 163 const bool owns_cache_; 164 const std::string dbname_; 165 166 // table_cache_ provides its own synchronization 167 TableCache* const table_cache_; 168 169 // Lock over the persistent DB state. Non-null iff successfully acquired. 170 FileLock* db_lock_; 171 172 // State below is protected by mutex_ 173 port::Mutex mutex_; 174 std::atomic<bool> shutting_down_; 175 port::CondVar background_work_finished_signal_ GUARDED_BY(mutex_); 176 MemTable* mem_; 177 MemTable* imm_ GUARDED_BY(mutex_); // Memtable being compacted 178 std::atomic<bool> has_imm_; // So bg thread can detect non-null imm_ 179 WritableFile* logfile_; 180 uint64_t logfile_number_ GUARDED_BY(mutex_); 181 log::Writer* log_; 182 uint32_t seed_ GUARDED_BY(mutex_); // For sampling. 183 184 // Queue of writers. 185 std::deque<Writer*> writers_ GUARDED_BY(mutex_); 186 WriteBatch* tmp_batch_ GUARDED_BY(mutex_); 187 188 SnapshotList snapshots_ GUARDED_BY(mutex_); 189 190 // Set of table files to protect from deletion because they are 191 // part of ongoing compactions. 192 std::set<uint64_t> pending_outputs_ GUARDED_BY(mutex_); 193 194 // Has a background compaction been scheduled or is running? 195 bool background_compaction_scheduled_ GUARDED_BY(mutex_); 196 197 ManualCompaction* manual_compaction_ GUARDED_BY(mutex_); 198 199 VersionSet* const versions_; 200 201 // Have we encountered a background error in paranoid mode? 202 Status bg_error_ GUARDED_BY(mutex_); 203 204 CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_); 205 }; 206 207 // Sanitize db options. The caller should delete result.info_log if 208 // it is not equal to src.info_log. 209 Options SanitizeOptions(const std::string& db, 210 const InternalKeyComparator* icmp, 211 const InternalFilterPolicy* ipolicy, 212 const Options& src); 213 214 } // namespace leveldb 215 216 #endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ 217