1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 
7 #ifndef ROCKSDB_LITE
8 #include "memtable/hash_skiplist_rep.h"
9 
10 #include <atomic>
11 
12 #include "db/memtable.h"
13 #include "memory/arena.h"
14 #include "memtable/skiplist.h"
15 #include "port/port.h"
16 #include "rocksdb/memtablerep.h"
17 #include "rocksdb/slice.h"
18 #include "rocksdb/slice_transform.h"
19 #include "util/murmurhash.h"
20 
21 namespace ROCKSDB_NAMESPACE {
22 namespace {
23 
24 class HashSkipListRep : public MemTableRep {
25  public:
26   HashSkipListRep(const MemTableRep::KeyComparator& compare,
27                   Allocator* allocator, const SliceTransform* transform,
28                   size_t bucket_size, int32_t skiplist_height,
29                   int32_t skiplist_branching_factor);
30 
31   void Insert(KeyHandle handle) override;
32 
33   bool Contains(const char* key) const override;
34 
35   size_t ApproximateMemoryUsage() override;
36 
37   void Get(const LookupKey& k, void* callback_args,
38            bool (*callback_func)(void* arg, const char* entry)) override;
39 
40   ~HashSkipListRep() override;
41 
42   MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override;
43 
44   MemTableRep::Iterator* GetDynamicPrefixIterator(
45       Arena* arena = nullptr) override;
46 
47  private:
48   friend class DynamicIterator;
49   typedef SkipList<const char*, const MemTableRep::KeyComparator&> Bucket;
50 
51   size_t bucket_size_;
52 
53   const int32_t skiplist_height_;
54   const int32_t skiplist_branching_factor_;
55 
56   // Maps slices (which are transformed user keys) to buckets of keys sharing
57   // the same transform.
58   std::atomic<Bucket*>* buckets_;
59 
60   // The user-supplied transform whose domain is the user keys.
61   const SliceTransform* transform_;
62 
63   const MemTableRep::KeyComparator& compare_;
64   // immutable after construction
65   Allocator* const allocator_;
66 
GetHash(const Slice & slice) const67   inline size_t GetHash(const Slice& slice) const {
68     return MurmurHash(slice.data(), static_cast<int>(slice.size()), 0) %
69            bucket_size_;
70   }
GetBucket(size_t i) const71   inline Bucket* GetBucket(size_t i) const {
72     return buckets_[i].load(std::memory_order_acquire);
73   }
GetBucket(const Slice & slice) const74   inline Bucket* GetBucket(const Slice& slice) const {
75     return GetBucket(GetHash(slice));
76   }
77   // Get a bucket from buckets_. If the bucket hasn't been initialized yet,
78   // initialize it before returning.
79   Bucket* GetInitializedBucket(const Slice& transformed);
80 
81   class Iterator : public MemTableRep::Iterator {
82    public:
Iterator(Bucket * list,bool own_list=true,Arena * arena=nullptr)83     explicit Iterator(Bucket* list, bool own_list = true,
84                       Arena* arena = nullptr)
85         : list_(list), iter_(list), own_list_(own_list), arena_(arena) {}
86 
~Iterator()87     ~Iterator() override {
88       // if we own the list, we should also delete it
89       if (own_list_) {
90         assert(list_ != nullptr);
91         delete list_;
92       }
93     }
94 
95     // Returns true iff the iterator is positioned at a valid node.
Valid() const96     bool Valid() const override { return list_ != nullptr && iter_.Valid(); }
97 
98     // Returns the key at the current position.
99     // REQUIRES: Valid()
key() const100     const char* key() const override {
101       assert(Valid());
102       return iter_.key();
103     }
104 
105     // Advances to the next position.
106     // REQUIRES: Valid()
Next()107     void Next() override {
108       assert(Valid());
109       iter_.Next();
110     }
111 
112     // Advances to the previous position.
113     // REQUIRES: Valid()
Prev()114     void Prev() override {
115       assert(Valid());
116       iter_.Prev();
117     }
118 
119     // Advance to the first entry with a key >= target
Seek(const Slice & internal_key,const char * memtable_key)120     void Seek(const Slice& internal_key, const char* memtable_key) override {
121       if (list_ != nullptr) {
122         const char* encoded_key =
123             (memtable_key != nullptr) ?
124                 memtable_key : EncodeKey(&tmp_, internal_key);
125         iter_.Seek(encoded_key);
126       }
127     }
128 
129     // Retreat to the last entry with a key <= target
SeekForPrev(const Slice &,const char *)130     void SeekForPrev(const Slice& /*internal_key*/,
131                      const char* /*memtable_key*/) override {
132       // not supported
133       assert(false);
134     }
135 
136     // Position at the first entry in collection.
137     // Final state of iterator is Valid() iff collection is not empty.
SeekToFirst()138     void SeekToFirst() override {
139       if (list_ != nullptr) {
140         iter_.SeekToFirst();
141       }
142     }
143 
144     // Position at the last entry in collection.
145     // Final state of iterator is Valid() iff collection is not empty.
SeekToLast()146     void SeekToLast() override {
147       if (list_ != nullptr) {
148         iter_.SeekToLast();
149       }
150     }
151 
152    protected:
Reset(Bucket * list)153     void Reset(Bucket* list) {
154       if (own_list_) {
155         assert(list_ != nullptr);
156         delete list_;
157       }
158       list_ = list;
159       iter_.SetList(list);
160       own_list_ = false;
161     }
162    private:
163     // if list_ is nullptr, we should NEVER call any methods on iter_
164     // if list_ is nullptr, this Iterator is not Valid()
165     Bucket* list_;
166     Bucket::Iterator iter_;
167     // here we track if we own list_. If we own it, we are also
168     // responsible for it's cleaning. This is a poor man's std::shared_ptr
169     bool own_list_;
170     std::unique_ptr<Arena> arena_;
171     std::string tmp_;       // For passing to EncodeKey
172   };
173 
174   class DynamicIterator : public HashSkipListRep::Iterator {
175    public:
DynamicIterator(const HashSkipListRep & memtable_rep)176     explicit DynamicIterator(const HashSkipListRep& memtable_rep)
177       : HashSkipListRep::Iterator(nullptr, false),
178         memtable_rep_(memtable_rep) {}
179 
180     // Advance to the first entry with a key >= target
Seek(const Slice & k,const char * memtable_key)181     void Seek(const Slice& k, const char* memtable_key) override {
182       auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k));
183       Reset(memtable_rep_.GetBucket(transformed));
184       HashSkipListRep::Iterator::Seek(k, memtable_key);
185     }
186 
187     // Position at the first entry in collection.
188     // Final state of iterator is Valid() iff collection is not empty.
SeekToFirst()189     void SeekToFirst() override {
190       // Prefix iterator does not support total order.
191       // We simply set the iterator to invalid state
192       Reset(nullptr);
193     }
194 
195     // Position at the last entry in collection.
196     // Final state of iterator is Valid() iff collection is not empty.
SeekToLast()197     void SeekToLast() override {
198       // Prefix iterator does not support total order.
199       // We simply set the iterator to invalid state
200       Reset(nullptr);
201     }
202 
203    private:
204     // the underlying memtable
205     const HashSkipListRep& memtable_rep_;
206   };
207 
208   class EmptyIterator : public MemTableRep::Iterator {
209     // This is used when there wasn't a bucket. It is cheaper than
210     // instantiating an empty bucket over which to iterate.
211    public:
EmptyIterator()212     EmptyIterator() { }
Valid() const213     bool Valid() const override { return false; }
key() const214     const char* key() const override {
215       assert(false);
216       return nullptr;
217     }
Next()218     void Next() override {}
Prev()219     void Prev() override {}
Seek(const Slice &,const char *)220     void Seek(const Slice& /*internal_key*/,
221               const char* /*memtable_key*/) override {}
SeekForPrev(const Slice &,const char *)222     void SeekForPrev(const Slice& /*internal_key*/,
223                      const char* /*memtable_key*/) override {}
SeekToFirst()224     void SeekToFirst() override {}
SeekToLast()225     void SeekToLast() override {}
226 
227    private:
228   };
229 };
230 
HashSkipListRep(const MemTableRep::KeyComparator & compare,Allocator * allocator,const SliceTransform * transform,size_t bucket_size,int32_t skiplist_height,int32_t skiplist_branching_factor)231 HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare,
232                                  Allocator* allocator,
233                                  const SliceTransform* transform,
234                                  size_t bucket_size, int32_t skiplist_height,
235                                  int32_t skiplist_branching_factor)
236     : MemTableRep(allocator),
237       bucket_size_(bucket_size),
238       skiplist_height_(skiplist_height),
239       skiplist_branching_factor_(skiplist_branching_factor),
240       transform_(transform),
241       compare_(compare),
242       allocator_(allocator) {
243   auto mem = allocator->AllocateAligned(
244                sizeof(std::atomic<void*>) * bucket_size);
245   buckets_ = new (mem) std::atomic<Bucket*>[bucket_size];
246 
247   for (size_t i = 0; i < bucket_size_; ++i) {
248     buckets_[i].store(nullptr, std::memory_order_relaxed);
249   }
250 }
251 
~HashSkipListRep()252 HashSkipListRep::~HashSkipListRep() {
253 }
254 
GetInitializedBucket(const Slice & transformed)255 HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket(
256     const Slice& transformed) {
257   size_t hash = GetHash(transformed);
258   auto bucket = GetBucket(hash);
259   if (bucket == nullptr) {
260     auto addr = allocator_->AllocateAligned(sizeof(Bucket));
261     bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_,
262                                skiplist_branching_factor_);
263     buckets_[hash].store(bucket, std::memory_order_release);
264   }
265   return bucket;
266 }
267 
Insert(KeyHandle handle)268 void HashSkipListRep::Insert(KeyHandle handle) {
269   auto* key = static_cast<char*>(handle);
270   assert(!Contains(key));
271   auto transformed = transform_->Transform(UserKey(key));
272   auto bucket = GetInitializedBucket(transformed);
273   bucket->Insert(key);
274 }
275 
Contains(const char * key) const276 bool HashSkipListRep::Contains(const char* key) const {
277   auto transformed = transform_->Transform(UserKey(key));
278   auto bucket = GetBucket(transformed);
279   if (bucket == nullptr) {
280     return false;
281   }
282   return bucket->Contains(key);
283 }
284 
ApproximateMemoryUsage()285 size_t HashSkipListRep::ApproximateMemoryUsage() {
286   return 0;
287 }
288 
Get(const LookupKey & k,void * callback_args,bool (* callback_func)(void * arg,const char * entry))289 void HashSkipListRep::Get(const LookupKey& k, void* callback_args,
290                           bool (*callback_func)(void* arg, const char* entry)) {
291   auto transformed = transform_->Transform(k.user_key());
292   auto bucket = GetBucket(transformed);
293   if (bucket != nullptr) {
294     Bucket::Iterator iter(bucket);
295     for (iter.Seek(k.memtable_key().data());
296          iter.Valid() && callback_func(callback_args, iter.key());
297          iter.Next()) {
298     }
299   }
300 }
301 
GetIterator(Arena * arena)302 MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) {
303   // allocate a new arena of similar size to the one currently in use
304   Arena* new_arena = new Arena(allocator_->BlockSize());
305   auto list = new Bucket(compare_, new_arena);
306   for (size_t i = 0; i < bucket_size_; ++i) {
307     auto bucket = GetBucket(i);
308     if (bucket != nullptr) {
309       Bucket::Iterator itr(bucket);
310       for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
311         list->Insert(itr.key());
312       }
313     }
314   }
315   if (arena == nullptr) {
316     return new Iterator(list, true, new_arena);
317   } else {
318     auto mem = arena->AllocateAligned(sizeof(Iterator));
319     return new (mem) Iterator(list, true, new_arena);
320   }
321 }
322 
GetDynamicPrefixIterator(Arena * arena)323 MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) {
324   if (arena == nullptr) {
325     return new DynamicIterator(*this);
326   } else {
327     auto mem = arena->AllocateAligned(sizeof(DynamicIterator));
328     return new (mem) DynamicIterator(*this);
329   }
330 }
331 
332 } // anon namespace
333 
CreateMemTableRep(const MemTableRep::KeyComparator & compare,Allocator * allocator,const SliceTransform * transform,Logger *)334 MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
335     const MemTableRep::KeyComparator& compare, Allocator* allocator,
336     const SliceTransform* transform, Logger* /*logger*/) {
337   return new HashSkipListRep(compare, allocator, transform, bucket_count_,
338                              skiplist_height_, skiplist_branching_factor_);
339 }
340 
NewHashSkipListRepFactory(size_t bucket_count,int32_t skiplist_height,int32_t skiplist_branching_factor)341 MemTableRepFactory* NewHashSkipListRepFactory(
342     size_t bucket_count, int32_t skiplist_height,
343     int32_t skiplist_branching_factor) {
344   return new HashSkipListRepFactory(bucket_count, skiplist_height,
345       skiplist_branching_factor);
346 }
347 
348 }  // namespace ROCKSDB_NAMESPACE
349 #endif  // ROCKSDB_LITE
350