1 // Copyright (c) 2018-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 #pragma once 7 8 #include <list> 9 #include <memory> 10 #include <set> 11 #include <string> 12 #include <vector> 13 14 #include "db/dbformat.h" 15 #include "db/pinned_iterators_manager.h" 16 #include "rocksdb/status.h" 17 #include "table/internal_iterator.h" 18 19 namespace ROCKSDB_NAMESPACE { 20 21 struct FragmentedRangeTombstoneList { 22 public: 23 // A compact representation of a "stack" of range tombstone fragments, which 24 // start and end at the same user keys but have different sequence numbers. 25 // The members seq_start_idx and seq_end_idx are intended to be parameters to 26 // seq_iter(). 27 struct RangeTombstoneStack { RangeTombstoneStackFragmentedRangeTombstoneList::RangeTombstoneStack28 RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx, 29 size_t end_idx) 30 : start_key(start), 31 end_key(end), 32 seq_start_idx(start_idx), 33 seq_end_idx(end_idx) {} 34 35 Slice start_key; 36 Slice end_key; 37 size_t seq_start_idx; 38 size_t seq_end_idx; 39 }; 40 FragmentedRangeTombstoneList( 41 std::unique_ptr<InternalIterator> unfragmented_tombstones, 42 const InternalKeyComparator& icmp, bool for_compaction = false, 43 const std::vector<SequenceNumber>& snapshots = {}); 44 beginFragmentedRangeTombstoneList45 std::vector<RangeTombstoneStack>::const_iterator begin() const { 46 return tombstones_.begin(); 47 } 48 endFragmentedRangeTombstoneList49 std::vector<RangeTombstoneStack>::const_iterator end() const { 50 return tombstones_.end(); 51 } 52 seq_iterFragmentedRangeTombstoneList53 std::vector<SequenceNumber>::const_iterator seq_iter(size_t idx) const { 54 return std::next(tombstone_seqs_.begin(), idx); 55 } 56 seq_beginFragmentedRangeTombstoneList57 std::vector<SequenceNumber>::const_iterator seq_begin() const { 58 return tombstone_seqs_.begin(); 59 } 60 seq_endFragmentedRangeTombstoneList61 std::vector<SequenceNumber>::const_iterator seq_end() const { 62 return tombstone_seqs_.end(); 63 } 64 emptyFragmentedRangeTombstoneList65 bool empty() const { return tombstones_.empty(); } 66 67 // Returns true if the stored tombstones contain with one with a sequence 68 // number in [lower, upper]. 69 bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const; 70 num_unfragmented_tombstonesFragmentedRangeTombstoneList71 uint64_t num_unfragmented_tombstones() const { 72 return num_unfragmented_tombstones_; 73 } 74 75 private: 76 // Given an ordered range tombstone iterator unfragmented_tombstones, 77 // "fragment" the tombstones into non-overlapping pieces, and store them in 78 // tombstones_ and tombstone_seqs_. 79 void FragmentTombstones( 80 std::unique_ptr<InternalIterator> unfragmented_tombstones, 81 const InternalKeyComparator& icmp, bool for_compaction, 82 const std::vector<SequenceNumber>& snapshots); 83 84 std::vector<RangeTombstoneStack> tombstones_; 85 std::vector<SequenceNumber> tombstone_seqs_; 86 std::set<SequenceNumber> seq_set_; 87 std::list<std::string> pinned_slices_; 88 PinnedIteratorsManager pinned_iters_mgr_; 89 uint64_t num_unfragmented_tombstones_; 90 }; 91 92 // FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del 93 // meta block into an iterator over non-overlapping tombstone fragments. The 94 // tombstone fragmentation process should be more efficient than the range 95 // tombstone collapsing algorithm in RangeDelAggregator because this leverages 96 // the internal key ordering already provided by the input iterator, if 97 // applicable (when the iterator is unsorted, a new sorted iterator is created 98 // before proceeding). If there are few overlaps, creating a 99 // FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator 100 // tombstone collapsing is always O(n log n). 101 class FragmentedRangeTombstoneIterator : public InternalIterator { 102 public: 103 FragmentedRangeTombstoneIterator( 104 const FragmentedRangeTombstoneList* tombstones, 105 const InternalKeyComparator& icmp, SequenceNumber upper_bound, 106 SequenceNumber lower_bound = 0); 107 FragmentedRangeTombstoneIterator( 108 const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones, 109 const InternalKeyComparator& icmp, SequenceNumber upper_bound, 110 SequenceNumber lower_bound = 0); 111 112 void SeekToFirst() override; 113 void SeekToLast() override; 114 115 void SeekToTopFirst(); 116 void SeekToTopLast(); 117 118 // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator 119 // seeking should behave. This is OK because they are not currently used, but 120 // eventually FragmentedRangeTombstoneIterator should no longer implement 121 // InternalIterator. 122 // 123 // Seeks to the range tombstone that covers target at a seqnum in the 124 // snapshot. If no such tombstone exists, seek to the earliest tombstone in 125 // the snapshot that ends after target. 126 void Seek(const Slice& target) override; 127 // Seeks to the range tombstone that covers target at a seqnum in the 128 // snapshot. If no such tombstone exists, seek to the latest tombstone in the 129 // snapshot that starts before target. 130 void SeekForPrev(const Slice& target) override; 131 132 void Next() override; 133 void Prev() override; 134 135 void TopNext(); 136 void TopPrev(); 137 138 bool Valid() const override; key()139 Slice key() const override { 140 MaybePinKey(); 141 return current_start_key_.Encode(); 142 } value()143 Slice value() const override { return pos_->end_key; } IsKeyPinned()144 bool IsKeyPinned() const override { return false; } IsValuePinned()145 bool IsValuePinned() const override { return true; } status()146 Status status() const override { return Status::OK(); } 147 empty()148 bool empty() const { return tombstones_->empty(); } Invalidate()149 void Invalidate() { 150 pos_ = tombstones_->end(); 151 seq_pos_ = tombstones_->seq_end(); 152 pinned_pos_ = tombstones_->end(); 153 pinned_seq_pos_ = tombstones_->seq_end(); 154 } 155 Tombstone()156 RangeTombstone Tombstone() const { 157 return RangeTombstone(start_key(), end_key(), seq()); 158 } start_key()159 Slice start_key() const { return pos_->start_key; } end_key()160 Slice end_key() const { return pos_->end_key; } seq()161 SequenceNumber seq() const { return *seq_pos_; } parsed_start_key()162 ParsedInternalKey parsed_start_key() const { 163 return ParsedInternalKey(pos_->start_key, kMaxSequenceNumber, 164 kTypeRangeDeletion); 165 } parsed_end_key()166 ParsedInternalKey parsed_end_key() const { 167 return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber, 168 kTypeRangeDeletion); 169 } 170 171 SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key); 172 173 // Splits the iterator into n+1 iterators (where n is the number of 174 // snapshots), each providing a view over a "stripe" of sequence numbers. The 175 // iterators are keyed by the upper bound of their ranges (the provided 176 // snapshots + kMaxSequenceNumber). 177 // 178 // NOTE: the iterators in the returned map are no longer valid if their 179 // parent iterator is deleted, since they do not modify the refcount of the 180 // underlying tombstone list. Therefore, this map should be deleted before 181 // the parent iterator. 182 std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>> 183 SplitBySnapshot(const std::vector<SequenceNumber>& snapshots); 184 upper_bound()185 SequenceNumber upper_bound() const { return upper_bound_; } lower_bound()186 SequenceNumber lower_bound() const { return lower_bound_; } 187 num_unfragmented_tombstones()188 uint64_t num_unfragmented_tombstones() const { 189 return tombstones_->num_unfragmented_tombstones(); 190 } 191 192 private: 193 using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack; 194 195 struct RangeTombstoneStackStartComparator { RangeTombstoneStackStartComparatorRangeTombstoneStackStartComparator196 explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {} 197 operatorRangeTombstoneStackStartComparator198 bool operator()(const RangeTombstoneStack& a, 199 const RangeTombstoneStack& b) const { 200 return cmp->Compare(a.start_key, b.start_key) < 0; 201 } 202 operatorRangeTombstoneStackStartComparator203 bool operator()(const RangeTombstoneStack& a, const Slice& b) const { 204 return cmp->Compare(a.start_key, b) < 0; 205 } 206 operatorRangeTombstoneStackStartComparator207 bool operator()(const Slice& a, const RangeTombstoneStack& b) const { 208 return cmp->Compare(a, b.start_key) < 0; 209 } 210 211 const Comparator* cmp; 212 }; 213 214 struct RangeTombstoneStackEndComparator { RangeTombstoneStackEndComparatorRangeTombstoneStackEndComparator215 explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {} 216 operatorRangeTombstoneStackEndComparator217 bool operator()(const RangeTombstoneStack& a, 218 const RangeTombstoneStack& b) const { 219 return cmp->Compare(a.end_key, b.end_key) < 0; 220 } 221 operatorRangeTombstoneStackEndComparator222 bool operator()(const RangeTombstoneStack& a, const Slice& b) const { 223 return cmp->Compare(a.end_key, b) < 0; 224 } 225 operatorRangeTombstoneStackEndComparator226 bool operator()(const Slice& a, const RangeTombstoneStack& b) const { 227 return cmp->Compare(a, b.end_key) < 0; 228 } 229 230 const Comparator* cmp; 231 }; 232 MaybePinKey()233 void MaybePinKey() const { 234 if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() && 235 (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) { 236 current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion); 237 pinned_pos_ = pos_; 238 pinned_seq_pos_ = seq_pos_; 239 } 240 } 241 242 void SeekToCoveringTombstone(const Slice& key); 243 void SeekForPrevToCoveringTombstone(const Slice& key); 244 void ScanForwardToVisibleTombstone(); 245 void ScanBackwardToVisibleTombstone(); ValidPos()246 bool ValidPos() const { 247 return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx); 248 } 249 250 const RangeTombstoneStackStartComparator tombstone_start_cmp_; 251 const RangeTombstoneStackEndComparator tombstone_end_cmp_; 252 const InternalKeyComparator* icmp_; 253 const Comparator* ucmp_; 254 std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_; 255 const FragmentedRangeTombstoneList* tombstones_; 256 SequenceNumber upper_bound_; 257 SequenceNumber lower_bound_; 258 std::vector<RangeTombstoneStack>::const_iterator pos_; 259 std::vector<SequenceNumber>::const_iterator seq_pos_; 260 mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_; 261 mutable std::vector<SequenceNumber>::const_iterator pinned_seq_pos_; 262 mutable InternalKey current_start_key_; 263 }; 264 265 } // namespace ROCKSDB_NAMESPACE 266