1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <algorithm>
9 #include <cstdint>
10 #include <functional>
11 #include "port/port.h"
12 #include "util/autovector.h"
13 
14 namespace ROCKSDB_NAMESPACE {
15 
16 // Binary heap implementation optimized for use in multi-way merge sort.
17 // Comparison to std::priority_queue:
18 // - In libstdc++, std::priority_queue::pop() usually performs just over logN
19 //   comparisons but never fewer.
20 // - std::priority_queue does not have a replace-top operation, requiring a
21 //   pop+push.  If the replacement element is the new top, this requires
22 //   around 2logN comparisons.
23 // - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN
24 //   comparisons.
25 // - This heap provides a replace_top() operation which requires [1, 2logN]
26 //   comparisons.  When the replacement element is also the new top, this
27 //   takes just 1 or 2 comparisons.
28 //
29 // The last property can yield an order-of-magnitude performance improvement
30 // when merge-sorting real-world non-random data.  If the merge operation is
31 // likely to take chunks of elements from the same input stream, only 1
32 // comparison per element is needed.  In RocksDB-land, this happens when
33 // compacting a database where keys are not randomly distributed across L0
34 // files but nearby keys are likely to be in the same L0 file.
35 //
36 // The container uses the same counterintuitive ordering as
37 // std::priority_queue: the comparison operator is expected to provide the
38 // less-than relation, but top() will return the maximum.
39 
40 template<typename T, typename Compare = std::less<T>>
41 class BinaryHeap {
42  public:
BinaryHeap()43   BinaryHeap() { }
BinaryHeap(Compare cmp)44   explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) { }
45 
push(const T & value)46   void push(const T& value) {
47     data_.push_back(value);
48     upheap(data_.size() - 1);
49   }
50 
push(T && value)51   void push(T&& value) {
52     data_.push_back(std::move(value));
53     upheap(data_.size() - 1);
54   }
55 
top()56   const T& top() const {
57     assert(!empty());
58     return data_.front();
59   }
60 
replace_top(const T & value)61   void replace_top(const T& value) {
62     assert(!empty());
63     data_.front() = value;
64     downheap(get_root());
65   }
66 
replace_top(T && value)67   void replace_top(T&& value) {
68     assert(!empty());
69     data_.front() = std::move(value);
70     downheap(get_root());
71   }
72 
pop()73   void pop() {
74     assert(!empty());
75     data_.front() = std::move(data_.back());
76     data_.pop_back();
77     if (!empty()) {
78       downheap(get_root());
79     } else {
80       reset_root_cmp_cache();
81     }
82   }
83 
swap(BinaryHeap & other)84   void swap(BinaryHeap &other) {
85     std::swap(cmp_, other.cmp_);
86     data_.swap(other.data_);
87     std::swap(root_cmp_cache_, other.root_cmp_cache_);
88   }
89 
clear()90   void clear() {
91     data_.clear();
92     reset_root_cmp_cache();
93   }
94 
empty()95   bool empty() const { return data_.empty(); }
96 
size()97   size_t size() const { return data_.size(); }
98 
reset_root_cmp_cache()99   void reset_root_cmp_cache() { root_cmp_cache_ = port::kMaxSizet; }
100 
101  private:
get_root()102   static inline size_t get_root() { return 0; }
get_parent(size_t index)103   static inline size_t get_parent(size_t index) { return (index - 1) / 2; }
get_left(size_t index)104   static inline size_t get_left(size_t index) { return 2 * index + 1; }
get_right(size_t index)105   static inline size_t get_right(size_t index) { return 2 * index + 2; }
106 
upheap(size_t index)107   void upheap(size_t index) {
108     T v = std::move(data_[index]);
109     while (index > get_root()) {
110       const size_t parent = get_parent(index);
111       if (!cmp_(data_[parent], v)) {
112         break;
113       }
114       data_[index] = std::move(data_[parent]);
115       index = parent;
116     }
117     data_[index] = std::move(v);
118     reset_root_cmp_cache();
119   }
120 
downheap(size_t index)121   void downheap(size_t index) {
122     T v = std::move(data_[index]);
123 
124     size_t picked_child = port::kMaxSizet;
125     while (1) {
126       const size_t left_child = get_left(index);
127       if (get_left(index) >= data_.size()) {
128         break;
129       }
130       const size_t right_child = left_child + 1;
131       assert(right_child == get_right(index));
132       picked_child = left_child;
133       if (index == 0 && root_cmp_cache_ < data_.size()) {
134         picked_child = root_cmp_cache_;
135       } else if (right_child < data_.size() &&
136                  cmp_(data_[left_child], data_[right_child])) {
137         picked_child = right_child;
138       }
139       if (!cmp_(v, data_[picked_child])) {
140         break;
141       }
142       data_[index] = std::move(data_[picked_child]);
143       index = picked_child;
144     }
145 
146     if (index == 0) {
147       // We did not change anything in the tree except for the value
148       // of the root node, left and right child did not change, we can
149       // cache that `picked_child` is the smallest child
150       // so next time we compare againist it directly
151       root_cmp_cache_ = picked_child;
152     } else {
153       // the tree changed, reset cache
154       reset_root_cmp_cache();
155     }
156 
157     data_[index] = std::move(v);
158   }
159 
160   Compare cmp_;
161   autovector<T> data_;
162   // Used to reduce number of cmp_ calls in downheap()
163   size_t root_cmp_cache_ = port::kMaxSizet;
164 };
165 
166 }  // namespace ROCKSDB_NAMESPACE
167