1 //  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <list>
9 #include <memory>
10 #include <set>
11 #include <string>
12 #include <vector>
13 
14 #include "db/dbformat.h"
15 #include "db/pinned_iterators_manager.h"
16 #include "rocksdb/status.h"
17 #include "table/internal_iterator.h"
18 
19 namespace rocksdb {
20 
21 struct FragmentedRangeTombstoneList {
22  public:
23   // A compact representation of a "stack" of range tombstone fragments, which
24   // start and end at the same user keys but have different sequence numbers.
25   // The members seq_start_idx and seq_end_idx are intended to be parameters to
26   // seq_iter().
27   struct RangeTombstoneStack {
RangeTombstoneStackFragmentedRangeTombstoneList::RangeTombstoneStack28     RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx,
29                         size_t end_idx)
30         : start_key(start),
31           end_key(end),
32           seq_start_idx(start_idx),
33           seq_end_idx(end_idx) {}
34 
35     Slice start_key;
36     Slice end_key;
37     size_t seq_start_idx;
38     size_t seq_end_idx;
39   };
40   FragmentedRangeTombstoneList(
41       std::unique_ptr<InternalIterator> unfragmented_tombstones,
42       const InternalKeyComparator& icmp, bool for_compaction = false,
43       const std::vector<SequenceNumber>& snapshots = {});
44 
beginFragmentedRangeTombstoneList45   std::vector<RangeTombstoneStack>::const_iterator begin() const {
46     return tombstones_.begin();
47   }
48 
endFragmentedRangeTombstoneList49   std::vector<RangeTombstoneStack>::const_iterator end() const {
50     return tombstones_.end();
51   }
52 
seq_iterFragmentedRangeTombstoneList53   std::vector<SequenceNumber>::const_iterator seq_iter(size_t idx) const {
54     return std::next(tombstone_seqs_.begin(), idx);
55   }
56 
seq_beginFragmentedRangeTombstoneList57   std::vector<SequenceNumber>::const_iterator seq_begin() const {
58     return tombstone_seqs_.begin();
59   }
60 
seq_endFragmentedRangeTombstoneList61   std::vector<SequenceNumber>::const_iterator seq_end() const {
62     return tombstone_seqs_.end();
63   }
64 
emptyFragmentedRangeTombstoneList65   bool empty() const { return tombstones_.empty(); }
66 
67   // Returns true if the stored tombstones contain with one with a sequence
68   // number in [lower, upper].
69   bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const;
70 
71  private:
72   // Given an ordered range tombstone iterator unfragmented_tombstones,
73   // "fragment" the tombstones into non-overlapping pieces, and store them in
74   // tombstones_ and tombstone_seqs_.
75   void FragmentTombstones(
76       std::unique_ptr<InternalIterator> unfragmented_tombstones,
77       const InternalKeyComparator& icmp, bool for_compaction,
78       const std::vector<SequenceNumber>& snapshots);
79 
80   std::vector<RangeTombstoneStack> tombstones_;
81   std::vector<SequenceNumber> tombstone_seqs_;
82   std::set<SequenceNumber> seq_set_;
83   std::list<std::string> pinned_slices_;
84   PinnedIteratorsManager pinned_iters_mgr_;
85 };
86 
87 // FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del
88 // meta block into an iterator over non-overlapping tombstone fragments. The
89 // tombstone fragmentation process should be more efficient than the range
90 // tombstone collapsing algorithm in RangeDelAggregator because this leverages
91 // the internal key ordering already provided by the input iterator, if
92 // applicable (when the iterator is unsorted, a new sorted iterator is created
93 // before proceeding). If there are few overlaps, creating a
94 // FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator
95 // tombstone collapsing is always O(n log n).
96 class FragmentedRangeTombstoneIterator : public InternalIterator {
97  public:
98   FragmentedRangeTombstoneIterator(
99       const FragmentedRangeTombstoneList* tombstones,
100       const InternalKeyComparator& icmp, SequenceNumber upper_bound,
101       SequenceNumber lower_bound = 0);
102   FragmentedRangeTombstoneIterator(
103       const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
104       const InternalKeyComparator& icmp, SequenceNumber upper_bound,
105       SequenceNumber lower_bound = 0);
106 
107   void SeekToFirst() override;
108   void SeekToLast() override;
109 
110   void SeekToTopFirst();
111   void SeekToTopLast();
112 
113   // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator
114   // seeking should behave. This is OK because they are not currently used, but
115   // eventually FragmentedRangeTombstoneIterator should no longer implement
116   // InternalIterator.
117   //
118   // Seeks to the range tombstone that covers target at a seqnum in the
119   // snapshot. If no such tombstone exists, seek to the earliest tombstone in
120   // the snapshot that ends after target.
121   void Seek(const Slice& target) override;
122   // Seeks to the range tombstone that covers target at a seqnum in the
123   // snapshot. If no such tombstone exists, seek to the latest tombstone in the
124   // snapshot that starts before target.
125   void SeekForPrev(const Slice& target) override;
126 
127   void Next() override;
128   void Prev() override;
129 
130   void TopNext();
131   void TopPrev();
132 
133   bool Valid() const override;
key()134   Slice key() const override {
135     MaybePinKey();
136     return current_start_key_.Encode();
137   }
value()138   Slice value() const override { return pos_->end_key; }
IsKeyPinned()139   bool IsKeyPinned() const override { return false; }
IsValuePinned()140   bool IsValuePinned() const override { return true; }
status()141   Status status() const override { return Status::OK(); }
142 
empty()143   bool empty() const { return tombstones_->empty(); }
Invalidate()144   void Invalidate() {
145     pos_ = tombstones_->end();
146     seq_pos_ = tombstones_->seq_end();
147     pinned_pos_ = tombstones_->end();
148     pinned_seq_pos_ = tombstones_->seq_end();
149   }
150 
Tombstone()151   RangeTombstone Tombstone() const {
152     return RangeTombstone(start_key(), end_key(), seq());
153   }
start_key()154   Slice start_key() const { return pos_->start_key; }
end_key()155   Slice end_key() const { return pos_->end_key; }
seq()156   SequenceNumber seq() const { return *seq_pos_; }
parsed_start_key()157   ParsedInternalKey parsed_start_key() const {
158     return ParsedInternalKey(pos_->start_key, kMaxSequenceNumber,
159                              kTypeRangeDeletion);
160   }
parsed_end_key()161   ParsedInternalKey parsed_end_key() const {
162     return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber,
163                              kTypeRangeDeletion);
164   }
165 
166   SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key);
167 
168   // Splits the iterator into n+1 iterators (where n is the number of
169   // snapshots), each providing a view over a "stripe" of sequence numbers. The
170   // iterators are keyed by the upper bound of their ranges (the provided
171   // snapshots + kMaxSequenceNumber).
172   //
173   // NOTE: the iterators in the returned map are no longer valid if their
174   // parent iterator is deleted, since they do not modify the refcount of the
175   // underlying tombstone list. Therefore, this map should be deleted before
176   // the parent iterator.
177   std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
178   SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
179 
upper_bound()180   SequenceNumber upper_bound() const { return upper_bound_; }
lower_bound()181   SequenceNumber lower_bound() const { return lower_bound_; }
182 
183  private:
184   using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack;
185 
186   struct RangeTombstoneStackStartComparator {
RangeTombstoneStackStartComparatorRangeTombstoneStackStartComparator187     explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {}
188 
operatorRangeTombstoneStackStartComparator189     bool operator()(const RangeTombstoneStack& a,
190                     const RangeTombstoneStack& b) const {
191       return cmp->Compare(a.start_key, b.start_key) < 0;
192     }
193 
operatorRangeTombstoneStackStartComparator194     bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
195       return cmp->Compare(a.start_key, b) < 0;
196     }
197 
operatorRangeTombstoneStackStartComparator198     bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
199       return cmp->Compare(a, b.start_key) < 0;
200     }
201 
202     const Comparator* cmp;
203   };
204 
205   struct RangeTombstoneStackEndComparator {
RangeTombstoneStackEndComparatorRangeTombstoneStackEndComparator206     explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {}
207 
operatorRangeTombstoneStackEndComparator208     bool operator()(const RangeTombstoneStack& a,
209                     const RangeTombstoneStack& b) const {
210       return cmp->Compare(a.end_key, b.end_key) < 0;
211     }
212 
operatorRangeTombstoneStackEndComparator213     bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
214       return cmp->Compare(a.end_key, b) < 0;
215     }
216 
operatorRangeTombstoneStackEndComparator217     bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
218       return cmp->Compare(a, b.end_key) < 0;
219     }
220 
221     const Comparator* cmp;
222   };
223 
MaybePinKey()224   void MaybePinKey() const {
225     if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() &&
226         (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) {
227       current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion);
228       pinned_pos_ = pos_;
229       pinned_seq_pos_ = seq_pos_;
230     }
231   }
232 
233   void SeekToCoveringTombstone(const Slice& key);
234   void SeekForPrevToCoveringTombstone(const Slice& key);
235   void ScanForwardToVisibleTombstone();
236   void ScanBackwardToVisibleTombstone();
ValidPos()237   bool ValidPos() const {
238     return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx);
239   }
240 
241   const RangeTombstoneStackStartComparator tombstone_start_cmp_;
242   const RangeTombstoneStackEndComparator tombstone_end_cmp_;
243   const InternalKeyComparator* icmp_;
244   const Comparator* ucmp_;
245   std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_;
246   const FragmentedRangeTombstoneList* tombstones_;
247   SequenceNumber upper_bound_;
248   SequenceNumber lower_bound_;
249   std::vector<RangeTombstoneStack>::const_iterator pos_;
250   std::vector<SequenceNumber>::const_iterator seq_pos_;
251   mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_;
252   mutable std::vector<SequenceNumber>::const_iterator pinned_seq_pos_;
253   mutable InternalKey current_start_key_;
254 };
255 
256 }  // namespace rocksdb
257