1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE file. See the AUTHORS file for names of contributors. 9 10 #pragma once 11 #include <atomic> 12 #include <sstream> 13 #include <string> 14 15 #include "env/file_system_tracer.h" 16 #include "port/port.h" 17 #include "rocksdb/file_system.h" 18 #include "rocksdb/listener.h" 19 #include "rocksdb/options.h" 20 #include "rocksdb/rate_limiter.h" 21 #include "util/aligned_buffer.h" 22 23 namespace ROCKSDB_NAMESPACE { 24 class Statistics; 25 class HistogramImpl; 26 class SystemClock; 27 28 using AlignedBuf = std::unique_ptr<char[]>; 29 30 // Align the request r according to alignment and return the aligned result. 31 FSReadRequest Align(const FSReadRequest& r, size_t alignment); 32 33 // Try to merge src to dest if they have overlap. 34 // 35 // Each request represents an inclusive interval [offset, offset + len]. 36 // If the intervals have overlap, update offset and len to represent the 37 // merged interval, and return true. 38 // Otherwise, do nothing and return false. 39 bool TryMerge(FSReadRequest* dest, const FSReadRequest& src); 40 41 // RandomAccessFileReader is a wrapper on top of Env::RandomAccessFile. It is 42 // responsible for: 43 // - Handling Buffered and Direct reads appropriately. 44 // - Rate limiting compaction reads. 45 // - Notifying any interested listeners on the completion of a read. 46 // - Updating IO stats. 47 class RandomAccessFileReader { 48 private: 49 #ifndef ROCKSDB_LITE NotifyOnFileReadFinish(uint64_t offset,size_t length,const FileOperationInfo::StartTimePoint & start_ts,const FileOperationInfo::FinishTimePoint & finish_ts,const Status & status)50 void NotifyOnFileReadFinish( 51 uint64_t offset, size_t length, 52 const FileOperationInfo::StartTimePoint& start_ts, 53 const FileOperationInfo::FinishTimePoint& finish_ts, 54 const Status& status) const { 55 FileOperationInfo info(FileOperationType::kRead, file_name_, start_ts, 56 finish_ts, status); 57 info.offset = offset; 58 info.length = length; 59 60 for (auto& listener : listeners_) { 61 listener->OnFileReadFinish(info); 62 } 63 } 64 #endif // ROCKSDB_LITE 65 ShouldNotifyListeners()66 bool ShouldNotifyListeners() const { return !listeners_.empty(); } 67 68 FSRandomAccessFilePtr file_; 69 std::string file_name_; 70 SystemClock* clock_; 71 Statistics* stats_; 72 uint32_t hist_type_; 73 HistogramImpl* file_read_hist_; 74 RateLimiter* rate_limiter_; 75 std::vector<std::shared_ptr<EventListener>> listeners_; 76 77 public: 78 explicit RandomAccessFileReader( 79 std::unique_ptr<FSRandomAccessFile>&& raf, const std::string& _file_name, 80 SystemClock* clock = nullptr, 81 const std::shared_ptr<IOTracer>& io_tracer = nullptr, 82 Statistics* stats = nullptr, uint32_t hist_type = 0, 83 HistogramImpl* file_read_hist = nullptr, 84 RateLimiter* rate_limiter = nullptr, 85 const std::vector<std::shared_ptr<EventListener>>& listeners = {}) file_(std::move (raf),io_tracer,_file_name)86 : file_(std::move(raf), io_tracer, _file_name), 87 file_name_(std::move(_file_name)), 88 clock_(clock), 89 stats_(stats), 90 hist_type_(hist_type), 91 file_read_hist_(file_read_hist), 92 rate_limiter_(rate_limiter), 93 listeners_() { 94 #ifndef ROCKSDB_LITE 95 std::for_each(listeners.begin(), listeners.end(), 96 [this](const std::shared_ptr<EventListener>& e) { 97 if (e->ShouldBeNotifiedOnFileIO()) { 98 listeners_.emplace_back(e); 99 } 100 }); 101 #else // !ROCKSDB_LITE 102 (void)listeners; 103 #endif 104 } 105 106 static IOStatus Create(const std::shared_ptr<FileSystem>& fs, 107 const std::string& fname, const FileOptions& file_opts, 108 std::unique_ptr<RandomAccessFileReader>* reader, 109 IODebugContext* dbg); 110 RandomAccessFileReader(const RandomAccessFileReader&) = delete; 111 RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete; 112 113 // In non-direct IO mode, 114 // 1. if using mmap, result is stored in a buffer other than scratch; 115 // 2. if not using mmap, result is stored in the buffer starting from scratch. 116 // 117 // In direct IO mode, an aligned buffer is allocated internally. 118 // 1. If aligned_buf is null, then results are copied to the buffer 119 // starting from scratch; 120 // 2. Otherwise, scratch is not used and can be null, the aligned_buf owns 121 // the internally allocated buffer on return, and the result refers to a 122 // region in aligned_buf. 123 IOStatus Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result, 124 char* scratch, AlignedBuf* aligned_buf, 125 bool for_compaction = false) const; 126 127 // REQUIRES: 128 // num_reqs > 0, reqs do not overlap, and offsets in reqs are increasing. 129 // In non-direct IO mode, aligned_buf should be null; 130 // In direct IO mode, aligned_buf stores the aligned buffer allocated inside 131 // MultiRead, the result Slices in reqs refer to aligned_buf. 132 IOStatus MultiRead(const IOOptions& opts, FSReadRequest* reqs, 133 size_t num_reqs, AlignedBuf* aligned_buf) const; 134 Prefetch(uint64_t offset,size_t n)135 IOStatus Prefetch(uint64_t offset, size_t n) const { 136 return file_->Prefetch(offset, n, IOOptions(), nullptr); 137 } 138 file()139 FSRandomAccessFile* file() { return file_.get(); } 140 file_name()141 const std::string& file_name() const { return file_name_; } 142 use_direct_io()143 bool use_direct_io() const { return file_->use_direct_io(); } 144 145 IOStatus PrepareIOOptions(const ReadOptions& ro, IOOptions& opts); 146 }; 147 } // namespace ROCKSDB_NAMESPACE 148