1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 #include <string>
6 #include <vector>
7 
8 #include "rocksdb/slice.h"
9 #include "table/block_based/data_block_hash_index.h"
10 #include "util/coding.h"
11 #include "util/hash.h"
12 
13 namespace ROCKSDB_NAMESPACE {
14 
Add(const Slice & key,const size_t restart_index)15 void DataBlockHashIndexBuilder::Add(const Slice& key,
16                                     const size_t restart_index) {
17   assert(Valid());
18   if (restart_index > kMaxRestartSupportedByHashIndex) {
19     valid_ = false;
20     return;
21   }
22 
23   uint32_t hash_value = GetSliceHash(key);
24   hash_and_restart_pairs_.emplace_back(hash_value,
25                                        static_cast<uint8_t>(restart_index));
26   estimated_num_buckets_ += bucket_per_key_;
27 }
28 
Finish(std::string & buffer)29 void DataBlockHashIndexBuilder::Finish(std::string& buffer) {
30   assert(Valid());
31   uint16_t num_buckets = static_cast<uint16_t>(estimated_num_buckets_);
32 
33   if (num_buckets == 0) {
34     num_buckets = 1;  // sanity check
35   }
36 
37   // The build-in hash cannot well distribute strings when into different
38   // buckets when num_buckets is power of two, resulting in high hash
39   // collision.
40   // We made the num_buckets to be odd to avoid this issue.
41   num_buckets |= 1;
42 
43   std::vector<uint8_t> buckets(num_buckets, kNoEntry);
44   // write the restart_index array
45   for (auto& entry : hash_and_restart_pairs_) {
46     uint32_t hash_value = entry.first;
47     uint8_t restart_index = entry.second;
48     uint16_t buck_idx = static_cast<uint16_t>(hash_value % num_buckets);
49     if (buckets[buck_idx] == kNoEntry) {
50       buckets[buck_idx] = restart_index;
51     } else if (buckets[buck_idx] != restart_index) {
52       // same bucket cannot store two different restart_index, mark collision
53       buckets[buck_idx] = kCollision;
54     }
55   }
56 
57   for (uint8_t restart_index : buckets) {
58     buffer.append(
59         const_cast<const char*>(reinterpret_cast<char*>(&restart_index)),
60         sizeof(restart_index));
61   }
62 
63   // write NUM_BUCK
64   PutFixed16(&buffer, num_buckets);
65 
66   assert(buffer.size() <= kMaxBlockSizeSupportedByHashIndex);
67 }
68 
Reset()69 void DataBlockHashIndexBuilder::Reset() {
70   estimated_num_buckets_ = 0;
71   valid_ = true;
72   hash_and_restart_pairs_.clear();
73 }
74 
Initialize(const char * data,uint16_t size,uint16_t * map_offset)75 void DataBlockHashIndex::Initialize(const char* data, uint16_t size,
76                                     uint16_t* map_offset) {
77   assert(size >= sizeof(uint16_t));  // NUM_BUCKETS
78   num_buckets_ = DecodeFixed16(data + size - sizeof(uint16_t));
79   assert(num_buckets_ > 0);
80   assert(size > num_buckets_ * sizeof(uint8_t));
81   *map_offset = static_cast<uint16_t>(size - sizeof(uint16_t) -
82                                       num_buckets_ * sizeof(uint8_t));
83 }
84 
Lookup(const char * data,uint32_t map_offset,const Slice & key) const85 uint8_t DataBlockHashIndex::Lookup(const char* data, uint32_t map_offset,
86                                    const Slice& key) const {
87   uint32_t hash_value = GetSliceHash(key);
88   uint16_t idx = static_cast<uint16_t>(hash_value % num_buckets_);
89   const char* bucket_table = data + map_offset;
90   return static_cast<uint8_t>(*(bucket_table + idx * sizeof(uint8_t)));
91 }
92 
93 }  // namespace ROCKSDB_NAMESPACE
94