1 //  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <list>
9 #include <memory>
10 #include <set>
11 #include <string>
12 #include <vector>
13 
14 #include "db/dbformat.h"
15 #include "db/pinned_iterators_manager.h"
16 #include "rocksdb/status.h"
17 #include "table/internal_iterator.h"
18 
19 namespace ROCKSDB_NAMESPACE {
20 
21 struct FragmentedRangeTombstoneList {
22  public:
23   // A compact representation of a "stack" of range tombstone fragments, which
24   // start and end at the same user keys but have different sequence numbers.
25   // The members seq_start_idx and seq_end_idx are intended to be parameters to
26   // seq_iter().
27   struct RangeTombstoneStack {
RangeTombstoneStackFragmentedRangeTombstoneList::RangeTombstoneStack28     RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx,
29                         size_t end_idx)
30         : start_key(start),
31           end_key(end),
32           seq_start_idx(start_idx),
33           seq_end_idx(end_idx) {}
34 
35     Slice start_key;
36     Slice end_key;
37     size_t seq_start_idx;
38     size_t seq_end_idx;
39   };
40   FragmentedRangeTombstoneList(
41       std::unique_ptr<InternalIterator> unfragmented_tombstones,
42       const InternalKeyComparator& icmp, bool for_compaction = false,
43       const std::vector<SequenceNumber>& snapshots = {});
44 
beginFragmentedRangeTombstoneList45   std::vector<RangeTombstoneStack>::const_iterator begin() const {
46     return tombstones_.begin();
47   }
48 
endFragmentedRangeTombstoneList49   std::vector<RangeTombstoneStack>::const_iterator end() const {
50     return tombstones_.end();
51   }
52 
seq_iterFragmentedRangeTombstoneList53   std::vector<SequenceNumber>::const_iterator seq_iter(size_t idx) const {
54     return std::next(tombstone_seqs_.begin(), idx);
55   }
56 
seq_beginFragmentedRangeTombstoneList57   std::vector<SequenceNumber>::const_iterator seq_begin() const {
58     return tombstone_seqs_.begin();
59   }
60 
seq_endFragmentedRangeTombstoneList61   std::vector<SequenceNumber>::const_iterator seq_end() const {
62     return tombstone_seqs_.end();
63   }
64 
emptyFragmentedRangeTombstoneList65   bool empty() const { return tombstones_.empty(); }
66 
67   // Returns true if the stored tombstones contain with one with a sequence
68   // number in [lower, upper].
69   bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const;
70 
num_unfragmented_tombstonesFragmentedRangeTombstoneList71   uint64_t num_unfragmented_tombstones() const {
72     return num_unfragmented_tombstones_;
73   }
74 
75  private:
76   // Given an ordered range tombstone iterator unfragmented_tombstones,
77   // "fragment" the tombstones into non-overlapping pieces, and store them in
78   // tombstones_ and tombstone_seqs_.
79   void FragmentTombstones(
80       std::unique_ptr<InternalIterator> unfragmented_tombstones,
81       const InternalKeyComparator& icmp, bool for_compaction,
82       const std::vector<SequenceNumber>& snapshots);
83 
84   std::vector<RangeTombstoneStack> tombstones_;
85   std::vector<SequenceNumber> tombstone_seqs_;
86   std::set<SequenceNumber> seq_set_;
87   std::list<std::string> pinned_slices_;
88   PinnedIteratorsManager pinned_iters_mgr_;
89   uint64_t num_unfragmented_tombstones_;
90 };
91 
92 // FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del
93 // meta block into an iterator over non-overlapping tombstone fragments. The
94 // tombstone fragmentation process should be more efficient than the range
95 // tombstone collapsing algorithm in RangeDelAggregator because this leverages
96 // the internal key ordering already provided by the input iterator, if
97 // applicable (when the iterator is unsorted, a new sorted iterator is created
98 // before proceeding). If there are few overlaps, creating a
99 // FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator
100 // tombstone collapsing is always O(n log n).
101 class FragmentedRangeTombstoneIterator : public InternalIterator {
102  public:
103   FragmentedRangeTombstoneIterator(
104       const FragmentedRangeTombstoneList* tombstones,
105       const InternalKeyComparator& icmp, SequenceNumber upper_bound,
106       SequenceNumber lower_bound = 0);
107   FragmentedRangeTombstoneIterator(
108       const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
109       const InternalKeyComparator& icmp, SequenceNumber upper_bound,
110       SequenceNumber lower_bound = 0);
111 
112   void SeekToFirst() override;
113   void SeekToLast() override;
114 
115   void SeekToTopFirst();
116   void SeekToTopLast();
117 
118   // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator
119   // seeking should behave. This is OK because they are not currently used, but
120   // eventually FragmentedRangeTombstoneIterator should no longer implement
121   // InternalIterator.
122   //
123   // Seeks to the range tombstone that covers target at a seqnum in the
124   // snapshot. If no such tombstone exists, seek to the earliest tombstone in
125   // the snapshot that ends after target.
126   void Seek(const Slice& target) override;
127   // Seeks to the range tombstone that covers target at a seqnum in the
128   // snapshot. If no such tombstone exists, seek to the latest tombstone in the
129   // snapshot that starts before target.
130   void SeekForPrev(const Slice& target) override;
131 
132   void Next() override;
133   void Prev() override;
134 
135   void TopNext();
136   void TopPrev();
137 
138   bool Valid() const override;
key()139   Slice key() const override {
140     MaybePinKey();
141     return current_start_key_.Encode();
142   }
value()143   Slice value() const override { return pos_->end_key; }
IsKeyPinned()144   bool IsKeyPinned() const override { return false; }
IsValuePinned()145   bool IsValuePinned() const override { return true; }
status()146   Status status() const override { return Status::OK(); }
147 
empty()148   bool empty() const { return tombstones_->empty(); }
Invalidate()149   void Invalidate() {
150     pos_ = tombstones_->end();
151     seq_pos_ = tombstones_->seq_end();
152     pinned_pos_ = tombstones_->end();
153     pinned_seq_pos_ = tombstones_->seq_end();
154   }
155 
Tombstone()156   RangeTombstone Tombstone() const {
157     return RangeTombstone(start_key(), end_key(), seq());
158   }
start_key()159   Slice start_key() const { return pos_->start_key; }
end_key()160   Slice end_key() const { return pos_->end_key; }
seq()161   SequenceNumber seq() const { return *seq_pos_; }
parsed_start_key()162   ParsedInternalKey parsed_start_key() const {
163     return ParsedInternalKey(pos_->start_key, kMaxSequenceNumber,
164                              kTypeRangeDeletion);
165   }
parsed_end_key()166   ParsedInternalKey parsed_end_key() const {
167     return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber,
168                              kTypeRangeDeletion);
169   }
170 
171   SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key);
172 
173   // Splits the iterator into n+1 iterators (where n is the number of
174   // snapshots), each providing a view over a "stripe" of sequence numbers. The
175   // iterators are keyed by the upper bound of their ranges (the provided
176   // snapshots + kMaxSequenceNumber).
177   //
178   // NOTE: the iterators in the returned map are no longer valid if their
179   // parent iterator is deleted, since they do not modify the refcount of the
180   // underlying tombstone list. Therefore, this map should be deleted before
181   // the parent iterator.
182   std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
183   SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
184 
upper_bound()185   SequenceNumber upper_bound() const { return upper_bound_; }
lower_bound()186   SequenceNumber lower_bound() const { return lower_bound_; }
187 
num_unfragmented_tombstones()188   uint64_t num_unfragmented_tombstones() const {
189     return tombstones_->num_unfragmented_tombstones();
190   }
191 
192  private:
193   using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack;
194 
195   struct RangeTombstoneStackStartComparator {
RangeTombstoneStackStartComparatorRangeTombstoneStackStartComparator196     explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {}
197 
operatorRangeTombstoneStackStartComparator198     bool operator()(const RangeTombstoneStack& a,
199                     const RangeTombstoneStack& b) const {
200       return cmp->Compare(a.start_key, b.start_key) < 0;
201     }
202 
operatorRangeTombstoneStackStartComparator203     bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
204       return cmp->Compare(a.start_key, b) < 0;
205     }
206 
operatorRangeTombstoneStackStartComparator207     bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
208       return cmp->Compare(a, b.start_key) < 0;
209     }
210 
211     const Comparator* cmp;
212   };
213 
214   struct RangeTombstoneStackEndComparator {
RangeTombstoneStackEndComparatorRangeTombstoneStackEndComparator215     explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {}
216 
operatorRangeTombstoneStackEndComparator217     bool operator()(const RangeTombstoneStack& a,
218                     const RangeTombstoneStack& b) const {
219       return cmp->Compare(a.end_key, b.end_key) < 0;
220     }
221 
operatorRangeTombstoneStackEndComparator222     bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
223       return cmp->Compare(a.end_key, b) < 0;
224     }
225 
operatorRangeTombstoneStackEndComparator226     bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
227       return cmp->Compare(a, b.end_key) < 0;
228     }
229 
230     const Comparator* cmp;
231   };
232 
MaybePinKey()233   void MaybePinKey() const {
234     if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() &&
235         (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) {
236       current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion);
237       pinned_pos_ = pos_;
238       pinned_seq_pos_ = seq_pos_;
239     }
240   }
241 
242   void SeekToCoveringTombstone(const Slice& key);
243   void SeekForPrevToCoveringTombstone(const Slice& key);
244   void ScanForwardToVisibleTombstone();
245   void ScanBackwardToVisibleTombstone();
ValidPos()246   bool ValidPos() const {
247     return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx);
248   }
249 
250   const RangeTombstoneStackStartComparator tombstone_start_cmp_;
251   const RangeTombstoneStackEndComparator tombstone_end_cmp_;
252   const InternalKeyComparator* icmp_;
253   const Comparator* ucmp_;
254   std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_;
255   const FragmentedRangeTombstoneList* tombstones_;
256   SequenceNumber upper_bound_;
257   SequenceNumber lower_bound_;
258   std::vector<RangeTombstoneStack>::const_iterator pos_;
259   std::vector<SequenceNumber>::const_iterator seq_pos_;
260   mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_;
261   mutable std::vector<SequenceNumber>::const_iterator pinned_seq_pos_;
262   mutable InternalKey current_start_key_;
263 };
264 
265 }  // namespace ROCKSDB_NAMESPACE
266