1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 #pragma once 7 8 #include <algorithm> 9 #include <cstdint> 10 #include <functional> 11 #include "port/port.h" 12 #include "util/autovector.h" 13 14 namespace ROCKSDB_NAMESPACE { 15 16 // Binary heap implementation optimized for use in multi-way merge sort. 17 // Comparison to std::priority_queue: 18 // - In libstdc++, std::priority_queue::pop() usually performs just over logN 19 // comparisons but never fewer. 20 // - std::priority_queue does not have a replace-top operation, requiring a 21 // pop+push. If the replacement element is the new top, this requires 22 // around 2logN comparisons. 23 // - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN 24 // comparisons. 25 // - This heap provides a replace_top() operation which requires [1, 2logN] 26 // comparisons. When the replacement element is also the new top, this 27 // takes just 1 or 2 comparisons. 28 // 29 // The last property can yield an order-of-magnitude performance improvement 30 // when merge-sorting real-world non-random data. If the merge operation is 31 // likely to take chunks of elements from the same input stream, only 1 32 // comparison per element is needed. In RocksDB-land, this happens when 33 // compacting a database where keys are not randomly distributed across L0 34 // files but nearby keys are likely to be in the same L0 file. 35 // 36 // The container uses the same counterintuitive ordering as 37 // std::priority_queue: the comparison operator is expected to provide the 38 // less-than relation, but top() will return the maximum. 39 40 template<typename T, typename Compare = std::less<T>> 41 class BinaryHeap { 42 public: BinaryHeap()43 BinaryHeap() { } BinaryHeap(Compare cmp)44 explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) { } 45 push(const T & value)46 void push(const T& value) { 47 data_.push_back(value); 48 upheap(data_.size() - 1); 49 } 50 push(T && value)51 void push(T&& value) { 52 data_.push_back(std::move(value)); 53 upheap(data_.size() - 1); 54 } 55 top()56 const T& top() const { 57 assert(!empty()); 58 return data_.front(); 59 } 60 replace_top(const T & value)61 void replace_top(const T& value) { 62 assert(!empty()); 63 data_.front() = value; 64 downheap(get_root()); 65 } 66 replace_top(T && value)67 void replace_top(T&& value) { 68 assert(!empty()); 69 data_.front() = std::move(value); 70 downheap(get_root()); 71 } 72 pop()73 void pop() { 74 assert(!empty()); 75 data_.front() = std::move(data_.back()); 76 data_.pop_back(); 77 if (!empty()) { 78 downheap(get_root()); 79 } else { 80 reset_root_cmp_cache(); 81 } 82 } 83 swap(BinaryHeap & other)84 void swap(BinaryHeap &other) { 85 std::swap(cmp_, other.cmp_); 86 data_.swap(other.data_); 87 std::swap(root_cmp_cache_, other.root_cmp_cache_); 88 } 89 clear()90 void clear() { 91 data_.clear(); 92 reset_root_cmp_cache(); 93 } 94 empty()95 bool empty() const { return data_.empty(); } 96 size()97 size_t size() const { return data_.size(); } 98 reset_root_cmp_cache()99 void reset_root_cmp_cache() { root_cmp_cache_ = port::kMaxSizet; } 100 101 private: get_root()102 static inline size_t get_root() { return 0; } get_parent(size_t index)103 static inline size_t get_parent(size_t index) { return (index - 1) / 2; } get_left(size_t index)104 static inline size_t get_left(size_t index) { return 2 * index + 1; } get_right(size_t index)105 static inline size_t get_right(size_t index) { return 2 * index + 2; } 106 upheap(size_t index)107 void upheap(size_t index) { 108 T v = std::move(data_[index]); 109 while (index > get_root()) { 110 const size_t parent = get_parent(index); 111 if (!cmp_(data_[parent], v)) { 112 break; 113 } 114 data_[index] = std::move(data_[parent]); 115 index = parent; 116 } 117 data_[index] = std::move(v); 118 reset_root_cmp_cache(); 119 } 120 downheap(size_t index)121 void downheap(size_t index) { 122 T v = std::move(data_[index]); 123 124 size_t picked_child = port::kMaxSizet; 125 while (1) { 126 const size_t left_child = get_left(index); 127 if (get_left(index) >= data_.size()) { 128 break; 129 } 130 const size_t right_child = left_child + 1; 131 assert(right_child == get_right(index)); 132 picked_child = left_child; 133 if (index == 0 && root_cmp_cache_ < data_.size()) { 134 picked_child = root_cmp_cache_; 135 } else if (right_child < data_.size() && 136 cmp_(data_[left_child], data_[right_child])) { 137 picked_child = right_child; 138 } 139 if (!cmp_(v, data_[picked_child])) { 140 break; 141 } 142 data_[index] = std::move(data_[picked_child]); 143 index = picked_child; 144 } 145 146 if (index == 0) { 147 // We did not change anything in the tree except for the value 148 // of the root node, left and right child did not change, we can 149 // cache that `picked_child` is the smallest child 150 // so next time we compare againist it directly 151 root_cmp_cache_ = picked_child; 152 } else { 153 // the tree changed, reset cache 154 reset_root_cmp_cache(); 155 } 156 157 data_[index] = std::move(v); 158 } 159 160 Compare cmp_; 161 autovector<T> data_; 162 // Used to reduce number of cmp_ calls in downheap() 163 size_t root_cmp_cache_ = port::kMaxSizet; 164 }; 165 166 } // namespace ROCKSDB_NAMESPACE 167