1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE file. See the AUTHORS file for names of contributors. 9 // 10 // Thread-safe (provides internal synchronization) 11 12 #pragma once 13 #include <string> 14 #include <vector> 15 #include <stdint.h> 16 17 #include "db/dbformat.h" 18 #include "db/range_del_aggregator.h" 19 #include "options/cf_options.h" 20 #include "port/port.h" 21 #include "rocksdb/cache.h" 22 #include "rocksdb/env.h" 23 #include "rocksdb/options.h" 24 #include "rocksdb/table.h" 25 #include "table/table_reader.h" 26 #include "trace_replay/block_cache_tracer.h" 27 28 namespace ROCKSDB_NAMESPACE { 29 30 class Env; 31 class Arena; 32 struct FileDescriptor; 33 class GetContext; 34 class HistogramImpl; 35 36 // Manages caching for TableReader objects for a column family. The actual 37 // cache is allocated separately and passed to the constructor. TableCache 38 // wraps around the underlying SST file readers by providing Get(), 39 // MultiGet() and NewIterator() methods that hide the instantiation, 40 // caching and access to the TableReader. The main purpose of this is 41 // performance - by caching the TableReader, it avoids unnecessary file opens 42 // and object allocation and instantiation. One exception is compaction, where 43 // a new TableReader may be instantiated - see NewIterator() comments 44 // 45 // Another service provided by TableCache is managing the row cache - if the 46 // DB is configured with a row cache, and the lookup key is present in the row 47 // cache, lookup is very fast. The row cache is obtained from 48 // ioptions.row_cache 49 class TableCache { 50 public: 51 TableCache(const ImmutableCFOptions& ioptions, 52 const FileOptions& storage_options, Cache* cache, 53 BlockCacheTracer* const block_cache_tracer); 54 ~TableCache(); 55 56 // Return an iterator for the specified file number (the corresponding 57 // file length must be exactly "file_size" bytes). If "table_reader_ptr" 58 // is non-nullptr, also sets "*table_reader_ptr" to point to the Table object 59 // underlying the returned iterator, or nullptr if no Table object underlies 60 // the returned iterator. The returned "*table_reader_ptr" object is owned 61 // by the cache and should not be deleted, and is valid for as long as the 62 // returned iterator is live. 63 // @param range_del_agg If non-nullptr, adds range deletions to the 64 // aggregator. If an error occurs, returns it in a NewErrorInternalIterator 65 // @param for_compaction If true, a new TableReader may be allocated (but 66 // not cached), depending on the CF options 67 // @param skip_filters Disables loading/accessing the filter block 68 // @param level The level this table is at, -1 for "not set / don't know" 69 InternalIterator* NewIterator( 70 const ReadOptions& options, const FileOptions& toptions, 71 const InternalKeyComparator& internal_comparator, 72 const FileMetaData& file_meta, RangeDelAggregator* range_del_agg, 73 const SliceTransform* prefix_extractor, TableReader** table_reader_ptr, 74 HistogramImpl* file_read_hist, TableReaderCaller caller, Arena* arena, 75 bool skip_filters, int level, const InternalKey* smallest_compaction_key, 76 const InternalKey* largest_compaction_key); 77 78 // If a seek to internal key "k" in specified file finds an entry, 79 // call get_context->SaveValue() repeatedly until 80 // it returns false. As a side effect, it will insert the TableReader 81 // into the cache and potentially evict another entry 82 // @param get_context Context for get operation. The result of the lookup 83 // can be retrieved by calling get_context->State() 84 // @param file_read_hist If non-nullptr, the file reader statistics are 85 // recorded 86 // @param skip_filters Disables loading/accessing the filter block 87 // @param level The level this table is at, -1 for "not set / don't know" 88 Status Get(const ReadOptions& options, 89 const InternalKeyComparator& internal_comparator, 90 const FileMetaData& file_meta, const Slice& k, 91 GetContext* get_context, 92 const SliceTransform* prefix_extractor = nullptr, 93 HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, 94 int level = -1); 95 96 // Return the range delete tombstone iterator of the file specified by 97 // `file_meta`. 98 Status GetRangeTombstoneIterator( 99 const ReadOptions& options, 100 const InternalKeyComparator& internal_comparator, 101 const FileMetaData& file_meta, 102 std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter); 103 104 // If a seek to internal key "k" in specified file finds an entry, 105 // call get_context->SaveValue() repeatedly until 106 // it returns false. As a side effect, it will insert the TableReader 107 // into the cache and potentially evict another entry 108 // @param mget_range Pointer to the structure describing a batch of keys to 109 // be looked up in this table file. The result is stored 110 // in the embedded GetContext 111 // @param skip_filters Disables loading/accessing the filter block 112 // @param level The level this table is at, -1 for "not set / don't know" 113 Status MultiGet(const ReadOptions& options, 114 const InternalKeyComparator& internal_comparator, 115 const FileMetaData& file_meta, 116 const MultiGetContext::Range* mget_range, 117 const SliceTransform* prefix_extractor = nullptr, 118 HistogramImpl* file_read_hist = nullptr, 119 bool skip_filters = false, int level = -1); 120 121 // Evict any entry for the specified file number 122 static void Evict(Cache* cache, uint64_t file_number); 123 124 // Clean table handle and erase it from the table cache 125 // Used in DB close, or the file is not live anymore. 126 void EraseHandle(const FileDescriptor& fd, Cache::Handle* handle); 127 128 // Find table reader 129 // @param skip_filters Disables loading/accessing the filter block 130 // @param level == -1 means not specified 131 Status FindTable(const FileOptions& toptions, 132 const InternalKeyComparator& internal_comparator, 133 const FileDescriptor& file_fd, Cache::Handle**, 134 const SliceTransform* prefix_extractor = nullptr, 135 const bool no_io = false, bool record_read_stats = true, 136 HistogramImpl* file_read_hist = nullptr, 137 bool skip_filters = false, int level = -1, 138 bool prefetch_index_and_filter_in_cache = true); 139 140 // Get TableReader from a cache handle. 141 TableReader* GetTableReaderFromHandle(Cache::Handle* handle); 142 143 // Get the table properties of a given table. 144 // @no_io: indicates if we should load table to the cache if it is not present 145 // in table cache yet. 146 // @returns: `properties` will be reset on success. Please note that we will 147 // return Status::Incomplete() if table is not present in cache and 148 // we set `no_io` to be true. 149 Status GetTableProperties(const FileOptions& toptions, 150 const InternalKeyComparator& internal_comparator, 151 const FileDescriptor& file_meta, 152 std::shared_ptr<const TableProperties>* properties, 153 const SliceTransform* prefix_extractor = nullptr, 154 bool no_io = false); 155 156 // Return total memory usage of the table reader of the file. 157 // 0 if table reader of the file is not loaded. 158 size_t GetMemoryUsageByTableReader( 159 const FileOptions& toptions, 160 const InternalKeyComparator& internal_comparator, 161 const FileDescriptor& fd, 162 const SliceTransform* prefix_extractor = nullptr); 163 164 // Returns approximated offset of a key in a file represented by fd. 165 uint64_t ApproximateOffsetOf( 166 const Slice& key, const FileDescriptor& fd, TableReaderCaller caller, 167 const InternalKeyComparator& internal_comparator, 168 const SliceTransform* prefix_extractor = nullptr); 169 170 // Returns approximated data size between start and end keys in a file 171 // represented by fd (the start key must not be greater than the end key). 172 uint64_t ApproximateSize(const Slice& start, const Slice& end, 173 const FileDescriptor& fd, TableReaderCaller caller, 174 const InternalKeyComparator& internal_comparator, 175 const SliceTransform* prefix_extractor = nullptr); 176 177 // Release the handle from a cache 178 void ReleaseHandle(Cache::Handle* handle); 179 get_cache()180 Cache* get_cache() const { return cache_; } 181 182 // Capacity of the backing Cache that indicates inifinite TableCache capacity. 183 // For example when max_open_files is -1 we set the backing Cache to this. 184 static const int kInfiniteCapacity = 0x400000; 185 186 // The tables opened with this TableCache will be immortal, i.e., their 187 // lifetime is as long as that of the DB. SetTablesAreImmortal()188 void SetTablesAreImmortal() { 189 if (cache_->GetCapacity() >= kInfiniteCapacity) { 190 immortal_tables_ = true; 191 } 192 } 193 194 private: 195 // Build a table reader 196 Status GetTableReader(const FileOptions& file_options, 197 const InternalKeyComparator& internal_comparator, 198 const FileDescriptor& fd, bool sequential_mode, 199 bool record_read_stats, HistogramImpl* file_read_hist, 200 std::unique_ptr<TableReader>* table_reader, 201 const SliceTransform* prefix_extractor = nullptr, 202 bool skip_filters = false, int level = -1, 203 bool prefetch_index_and_filter_in_cache = true); 204 205 // Create a key prefix for looking up the row cache. The prefix is of the 206 // format row_cache_id + fd_number + seq_no. Later, the user key can be 207 // appended to form the full key 208 void CreateRowCacheKeyPrefix(const ReadOptions& options, 209 const FileDescriptor& fd, 210 const Slice& internal_key, 211 GetContext* get_context, IterKey& row_cache_key); 212 213 // Helper function to lookup the row cache for a key. It appends the 214 // user key to row_cache_key at offset prefix_size 215 bool GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, 216 size_t prefix_size, GetContext* get_context); 217 218 const ImmutableCFOptions& ioptions_; 219 const FileOptions& file_options_; 220 Cache* const cache_; 221 std::string row_cache_id_; 222 bool immortal_tables_; 223 BlockCacheTracer* const block_cache_tracer_; 224 }; 225 226 } // namespace ROCKSDB_NAMESPACE 227