1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #ifndef ROCKSDB_LITE
9 
10 #include <string>
11 
12 #include "port/port.h"
13 
14 #include "db/compaction/compaction.h"
15 #include "file/delete_scheduler.h"
16 #include "rocksdb/sst_file_manager.h"
17 
18 namespace ROCKSDB_NAMESPACE {
19 class ErrorHandler;
20 class FileSystem;
21 class SystemClock;
22 class Logger;
23 
24 // SstFileManager is used to track SST and blob files in the DB and control
25 // their deletion rate. All SstFileManager public functions are thread-safe.
26 class SstFileManagerImpl : public SstFileManager {
27  public:
28   explicit SstFileManagerImpl(const std::shared_ptr<SystemClock>& clock,
29                               const std::shared_ptr<FileSystem>& fs,
30                               const std::shared_ptr<Logger>& logger,
31                               int64_t rate_bytes_per_sec,
32                               double max_trash_db_ratio,
33                               uint64_t bytes_max_delete_chunk);
34 
35   ~SstFileManagerImpl();
36 
37   // DB will call OnAddFile whenever a new sst/blob file is added.
38   Status OnAddFile(const std::string& file_path);
39 
40   // Overload where size of the file is provided by the caller rather than
41   // queried from the filesystem. This is an optimization.
42   Status OnAddFile(const std::string& file_path, uint64_t file_size);
43 
44   // DB will call OnDeleteFile whenever a sst/blob file is deleted.
45   Status OnDeleteFile(const std::string& file_path);
46 
47   // DB will call OnMoveFile whenever a sst/blob file is move to a new path.
48   Status OnMoveFile(const std::string& old_path, const std::string& new_path,
49                     uint64_t* file_size = nullptr);
50 
51   // Update the maximum allowed space that should be used by RocksDB, if
52   // the total size of the SST and blob files exceeds max_allowed_space, writes
53   // to RocksDB will fail.
54   //
55   // Setting max_allowed_space to 0 will disable this feature, maximum allowed
56   // space will be infinite (Default value).
57   //
58   // thread-safe.
59   void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override;
60 
61   void SetCompactionBufferSize(uint64_t compaction_buffer_size) override;
62 
63   // Return true if the total size of SST and blob files exceeded the maximum
64   // allowed space usage.
65   //
66   // thread-safe.
67   bool IsMaxAllowedSpaceReached() override;
68 
69   bool IsMaxAllowedSpaceReachedIncludingCompactions() override;
70 
71   // Returns true is there is enough (approximate) space for the specified
72   // compaction. Space is approximate because this function conservatively
73   // estimates how much space is currently being used by compactions (i.e.
74   // if a compaction has started, this function bumps the used space by
75   // the full compaction size).
76   bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
77                                const std::vector<CompactionInputFiles>& inputs,
78                                const Status& bg_error);
79 
80   // Bookkeeping so total_file_sizes_ goes back to normal after compaction
81   // finishes
82   void OnCompactionCompletion(Compaction* c);
83 
84   uint64_t GetCompactionsReservedSize();
85 
86   // Return the total size of all tracked files.
87   uint64_t GetTotalSize() override;
88 
89   // Return a map containing all tracked files and there corresponding sizes.
90   std::unordered_map<std::string, uint64_t> GetTrackedFiles() override;
91 
92   // Return delete rate limit in bytes per second.
93   virtual int64_t GetDeleteRateBytesPerSecond() override;
94 
95   // Update the delete rate limit in bytes per second.
96   virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override;
97 
98   // Return trash/DB size ratio where new files will be deleted immediately
99   virtual double GetMaxTrashDBRatio() override;
100 
101   // Update trash/DB size ratio where new files will be deleted immediately
102   virtual void SetMaxTrashDBRatio(double ratio) override;
103 
104   // Return the total size of trash files
105   uint64_t GetTotalTrashSize() override;
106 
107   // Called by each DB instance using this sst file manager to reserve
108   // disk buffer space for recovery from out of space errors
109   void ReserveDiskBuffer(uint64_t buffer, const std::string& path);
110 
111   // Set a flag upon encountering disk full. May enqueue the ErrorHandler
112   // instance for background polling and recovery
113   void StartErrorRecovery(ErrorHandler* db, Status bg_error);
114 
115   // Remove the given Errorhandler instance from the recovery queue. Its
116   // not guaranteed
117   bool CancelErrorRecovery(ErrorHandler* db);
118 
119   // Mark file as trash and schedule it's deletion. If force_bg is set, it
120   // forces the file to be deleting in the background regardless of DB size,
121   // except when rate limited delete is disabled
122   virtual Status ScheduleFileDeletion(const std::string& file_path,
123                                       const std::string& dir_to_sync,
124                                       const bool force_bg = false);
125 
126   // Wait for all files being deleteing in the background to finish or for
127   // destructor to be called.
128   virtual void WaitForEmptyTrash();
129 
delete_scheduler()130   DeleteScheduler* delete_scheduler() { return &delete_scheduler_; }
131 
132   // Stop the error recovery background thread. This should be called only
133   // once in the object's lifetime, and before the destructor
134   void Close();
135 
SetStatisticsPtr(const std::shared_ptr<Statistics> & stats)136   void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) override {
137     stats_ = stats;
138     delete_scheduler_.SetStatisticsPtr(stats);
139   }
140 
141  private:
142   // REQUIRES: mutex locked
143   void OnAddFileImpl(const std::string& file_path, uint64_t file_size);
144   // REQUIRES: mutex locked
145   void OnDeleteFileImpl(const std::string& file_path);
146 
147   void ClearError();
CheckFreeSpace()148   bool CheckFreeSpace() {
149     return bg_err_.severity() == Status::Severity::kSoftError;
150   }
151 
152   std::shared_ptr<SystemClock> clock_;
153   std::shared_ptr<FileSystem> fs_;
154   std::shared_ptr<Logger> logger_;
155   // Mutex to protect tracked_files_, total_files_size_
156   port::Mutex mu_;
157   // The summation of the sizes of all files in tracked_files_ map
158   uint64_t total_files_size_;
159   // Compactions should only execute if they can leave at least
160   // this amount of buffer space for logs and flushes
161   uint64_t compaction_buffer_size_;
162   // Estimated size of the current ongoing compactions
163   uint64_t cur_compactions_reserved_size_;
164   // A map containing all tracked files and there sizes
165   //  file_path => file_size
166   std::unordered_map<std::string, uint64_t> tracked_files_;
167   // The maximum allowed space (in bytes) for sst and blob files.
168   uint64_t max_allowed_space_;
169   // DeleteScheduler used to throttle file deletition.
170   DeleteScheduler delete_scheduler_;
171   port::CondVar cv_;
172   // Flag to force error recovery thread to exit
173   bool closing_;
174   // Background error recovery thread
175   std::unique_ptr<port::Thread> bg_thread_;
176   // A path in the filesystem corresponding to this SFM. This is used for
177   // calling Env::GetFreeSpace. Posix requires a path in the filesystem
178   std::string path_;
179   // Save the current background error
180   Status bg_err_;
181   // Amount of free disk headroom before allowing recovery from hard errors
182   uint64_t reserved_disk_buffer_;
183   // For soft errors, amount of free disk space before we can allow
184   // compactions to run full throttle. If disk space is below this trigger,
185   // compactions will be gated by free disk space > input size
186   uint64_t free_space_trigger_;
187   // List of database error handler instances tracked by this SstFileManager.
188   std::list<ErrorHandler*> error_handler_list_;
189   // Pointer to ErrorHandler instance that is currently processing recovery
190   ErrorHandler* cur_instance_;
191   std::shared_ptr<Statistics> stats_;
192 };
193 
194 }  // namespace ROCKSDB_NAMESPACE
195 
196 #endif  // ROCKSDB_LITE
197