1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #pragma once
11 #include <cstdint>
12 #include <functional>
13 #include <limits>
14 #include <vector>
15 #include "memory/arena.h"
16 #include "port/port.h"
17 #include "util/autovector.h"
18 
19 namespace ROCKSDB_NAMESPACE {
20 
21 class Comparator;
22 struct FileMetaData;
23 struct FdWithKeyRange;
24 struct FileLevel;
25 
26 // The file tree structure in Version is prebuilt and the range of each file
27 // is known. On Version::Get(), it uses binary search to find a potential file
28 // and then check if a target key can be found in the file by comparing the key
29 // to each file's smallest and largest key. The results of these comparisons
30 // can be reused beyond checking if a key falls into a file's range.
31 // With some pre-calculated knowledge, each key comparison that has been done
32 // can serve as a hint to narrow down further searches: if a key compared to
33 // be smaller than a file's smallest or largest, that comparison can be used
34 // to find out the right bound of next binary search. Similarly, if a key
35 // compared to be larger than a file's smallest or largest, it can be utilized
36 // to find out the left bound of next binary search.
37 // With these hints: it can greatly reduce the range of binary search,
38 // especially for bottom levels, given that one file most likely overlaps with
39 // only N files from level below (where N is max_bytes_for_level_multiplier).
40 // So on level L, we will only look at ~N files instead of N^L files on the
41 // naive approach.
42 class FileIndexer {
43  public:
44   explicit FileIndexer(const Comparator* ucmp);
45 
46   size_t NumLevelIndex() const;
47 
48   size_t LevelIndexSize(size_t level) const;
49 
50   // Return a file index range in the next level to search for a key based on
51   // smallest and largest key comparison for the current file specified by
52   // level and file_index. When *left_index < *right_index, both index should
53   // be valid and fit in the vector size.
54   void GetNextLevelIndex(const size_t level, const size_t file_index,
55                          const int cmp_smallest, const int cmp_largest,
56                          int32_t* left_bound, int32_t* right_bound) const;
57 
58   void UpdateIndex(Arena* arena, const size_t num_levels,
59                    std::vector<FileMetaData*>* const files);
60 
61   enum {
62     // MSVC version 1800 still does not have constexpr for ::max()
63     kLevelMaxIndex = ROCKSDB_NAMESPACE::port::kMaxInt32
64   };
65 
66  private:
67   size_t num_levels_;
68   const Comparator* ucmp_;
69 
70   struct IndexUnit {
IndexUnitIndexUnit71     IndexUnit()
72       : smallest_lb(0), largest_lb(0), smallest_rb(-1), largest_rb(-1) {}
73     // During file search, a key is compared against smallest and largest
74     // from a FileMetaData. It can have 3 possible outcomes:
75     // (1) key is smaller than smallest, implying it is also smaller than
76     //     larger. Precalculated index based on "smallest < smallest" can
77     //     be used to provide right bound.
78     // (2) key is in between smallest and largest.
79     //     Precalculated index based on "smallest > greatest" can be used to
80     //     provide left bound.
81     //     Precalculated index based on "largest < smallest" can be used to
82     //     provide right bound.
83     // (3) key is larger than largest, implying it is also larger than smallest.
84     //     Precalculated index based on "largest > largest" can be used to
85     //     provide left bound.
86     //
87     // As a result, we will need to do:
88     // Compare smallest (<=) and largest keys from upper level file with
89     // smallest key from lower level to get a right bound.
90     // Compare smallest (>=) and largest keys from upper level file with
91     // largest key from lower level to get a left bound.
92     //
93     // Example:
94     //    level 1:              [50 - 60]
95     //    level 2:        [1 - 40], [45 - 55], [58 - 80]
96     // A key 35, compared to be less than 50, 3rd file on level 2 can be
97     // skipped according to rule (1). LB = 0, RB = 1.
98     // A key 53, sits in the middle 50 and 60. 1st file on level 2 can be
99     // skipped according to rule (2)-a, but the 3rd file cannot be skipped
100     // because 60 is greater than 58. LB = 1, RB = 2.
101     // A key 70, compared to be larger than 60. 1st and 2nd file can be skipped
102     // according to rule (3). LB = 2, RB = 2.
103     //
104     // Point to a left most file in a lower level that may contain a key,
105     // which compares greater than smallest of a FileMetaData (upper level)
106     int32_t smallest_lb;
107     // Point to a left most file in a lower level that may contain a key,
108     // which compares greater than largest of a FileMetaData (upper level)
109     int32_t largest_lb;
110     // Point to a right most file in a lower level that may contain a key,
111     // which compares smaller than smallest of a FileMetaData (upper level)
112     int32_t smallest_rb;
113     // Point to a right most file in a lower level that may contain a key,
114     // which compares smaller than largest of a FileMetaData (upper level)
115     int32_t largest_rb;
116   };
117 
118   // Data structure to store IndexUnits in a whole level
119   struct IndexLevel {
120     size_t num_index;
121     IndexUnit* index_units;
122 
IndexLevelIndexLevel123     IndexLevel() : num_index(0), index_units(nullptr) {}
124   };
125 
126   void CalculateLB(
127       const std::vector<FileMetaData*>& upper_files,
128       const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
129       std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
130       std::function<void(IndexUnit*, int32_t)> set_index);
131 
132   void CalculateRB(
133       const std::vector<FileMetaData*>& upper_files,
134       const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
135       std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
136       std::function<void(IndexUnit*, int32_t)> set_index);
137 
138   autovector<IndexLevel> next_level_index_;
139   int32_t* level_rb_;
140 };
141 
142 }  // namespace ROCKSDB_NAMESPACE
143