1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11 
12 #include "llvm/ADT/SparseBitVector.h"
13 #include "llvm/ADT/iterator.h"
14 #include "llvm/DebugInfo/PDB/Native/RawError.h"
15 #include "llvm/Support/BinaryStreamReader.h"
16 #include "llvm/Support/BinaryStreamWriter.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Error.h"
19 #include <cstdint>
20 #include <iterator>
21 #include <utility>
22 #include <vector>
23 
24 namespace llvm {
25 
26 namespace pdb {
27 
28 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
29 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
30 
31 template <typename ValueT> class HashTable;
32 
33 template <typename ValueT>
34 class HashTableIterator
35     : public iterator_facade_base<HashTableIterator<ValueT>,
36                                   std::forward_iterator_tag,
37                                   const std::pair<uint32_t, ValueT>> {
38   using BaseT = typename HashTableIterator::iterator_facade_base;
39   friend HashTable<ValueT>;
40 
41   HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
42                     bool IsEnd)
43       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
44 
45 public:
46   HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
47     int I = Map.Present.find_first();
48     if (I == -1) {
49       Index = 0;
50       IsEnd = true;
51     } else {
52       Index = static_cast<uint32_t>(I);
53       IsEnd = false;
54     }
55   }
56 
57   HashTableIterator(const HashTableIterator &R) = default;
58   HashTableIterator &operator=(const HashTableIterator &R) {
59     Map = R.Map;
60     return *this;
61   }
62   bool operator==(const HashTableIterator &R) const {
63     if (IsEnd && R.IsEnd)
64       return true;
65     if (IsEnd != R.IsEnd)
66       return false;
67 
68     return (Map == R.Map) && (Index == R.Index);
69   }
70   const std::pair<uint32_t, ValueT> &operator*() const {
71     assert(Map->Present.test(Index));
72     return Map->Buckets[Index];
73   }
74 
75   // Implement postfix op++ in terms of prefix op++ by using the superclass
76   // implementation.
77   using BaseT::operator++;
78   HashTableIterator &operator++() {
79     while (Index < Map->Buckets.size()) {
80       ++Index;
81       if (Map->Present.test(Index))
82         return *this;
83     }
84 
85     IsEnd = true;
86     return *this;
87   }
88 
89 private:
90   bool isEnd() const { return IsEnd; }
91   uint32_t index() const { return Index; }
92 
93   const HashTable<ValueT> *Map;
94   uint32_t Index;
95   bool IsEnd;
96 };
97 
98 template <typename ValueT>
99 class HashTable {
100   struct Header {
101     support::ulittle32_t Size;
102     support::ulittle32_t Capacity;
103   };
104 
105   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
106 
107 public:
108   using const_iterator = HashTableIterator<ValueT>;
109   friend const_iterator;
110 
111   HashTable() { Buckets.resize(8); }
112   explicit HashTable(uint32_t Capacity) {
113     Buckets.resize(Capacity);
114   }
115 
116   Error load(BinaryStreamReader &Stream) {
117     const Header *H;
118     if (auto EC = Stream.readObject(H))
119       return EC;
120     if (H->Capacity == 0)
121       return make_error<RawError>(raw_error_code::corrupt_file,
122                                   "Invalid Hash Table Capacity");
123     if (H->Size > maxLoad(H->Capacity))
124       return make_error<RawError>(raw_error_code::corrupt_file,
125                                   "Invalid Hash Table Size");
126 
127     Buckets.resize(H->Capacity);
128 
129     if (auto EC = readSparseBitVector(Stream, Present))
130       return EC;
131     if (Present.count() != H->Size)
132       return make_error<RawError>(raw_error_code::corrupt_file,
133                                   "Present bit vector does not match size!");
134 
135     if (auto EC = readSparseBitVector(Stream, Deleted))
136       return EC;
137     if (Present.intersects(Deleted))
138       return make_error<RawError>(raw_error_code::corrupt_file,
139                                   "Present bit vector intersects deleted!");
140 
141     for (uint32_t P : Present) {
142       if (auto EC = Stream.readInteger(Buckets[P].first))
143         return EC;
144       const ValueT *Value;
145       if (auto EC = Stream.readObject(Value))
146         return EC;
147       Buckets[P].second = *Value;
148     }
149 
150     return Error::success();
151   }
152 
153   uint32_t calculateSerializedLength() const {
154     uint32_t Size = sizeof(Header);
155 
156     constexpr int BitsPerWord = 8 * sizeof(uint32_t);
157 
158     int NumBitsP = Present.find_last() + 1;
159     int NumBitsD = Deleted.find_last() + 1;
160 
161     uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
162     uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
163 
164     // Present bit set number of words (4 bytes), followed by that many actual
165     // words (4 bytes each).
166     Size += sizeof(uint32_t);
167     Size += NumWordsP * sizeof(uint32_t);
168 
169     // Deleted bit set number of words (4 bytes), followed by that many actual
170     // words (4 bytes each).
171     Size += sizeof(uint32_t);
172     Size += NumWordsD * sizeof(uint32_t);
173 
174     // One (Key, ValueT) pair for each entry Present.
175     Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
176 
177     return Size;
178   }
179 
180   Error commit(BinaryStreamWriter &Writer) const {
181     Header H;
182     H.Size = size();
183     H.Capacity = capacity();
184     if (auto EC = Writer.writeObject(H))
185       return EC;
186 
187     if (auto EC = writeSparseBitVector(Writer, Present))
188       return EC;
189 
190     if (auto EC = writeSparseBitVector(Writer, Deleted))
191       return EC;
192 
193     for (const auto &Entry : *this) {
194       if (auto EC = Writer.writeInteger(Entry.first))
195         return EC;
196       if (auto EC = Writer.writeObject(Entry.second))
197         return EC;
198     }
199     return Error::success();
200   }
201 
202   void clear() {
203     Buckets.resize(8);
204     Present.clear();
205     Deleted.clear();
206   }
207 
208   bool empty() const { return size() == 0; }
209   uint32_t capacity() const { return Buckets.size(); }
210   uint32_t size() const { return Present.count(); }
211 
212   const_iterator begin() const { return const_iterator(*this); }
213   const_iterator end() const { return const_iterator(*this, 0, true); }
214 
215   /// Find the entry whose key has the specified hash value, using the specified
216   /// traits defining hash function and equality.
217   template <typename Key, typename TraitsT>
218   const_iterator find_as(const Key &K, TraitsT &Traits) const {
219     uint32_t H = Traits.hashLookupKey(K) % capacity();
220     uint32_t I = H;
221     Optional<uint32_t> FirstUnused;
222     do {
223       if (isPresent(I)) {
224         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
225           return const_iterator(*this, I, false);
226       } else {
227         if (!FirstUnused)
228           FirstUnused = I;
229         // Insertion occurs via linear probing from the slot hint, and will be
230         // inserted at the first empty / deleted location.  Therefore, if we are
231         // probing and find a location that is neither present nor deleted, then
232         // nothing must have EVER been inserted at this location, and thus it is
233         // not possible for a matching value to occur later.
234         if (!isDeleted(I))
235           break;
236       }
237       I = (I + 1) % capacity();
238     } while (I != H);
239 
240     // The only way FirstUnused would not be set is if every single entry in the
241     // table were Present.  But this would violate the load factor constraints
242     // that we impose, so it should never happen.
243     assert(FirstUnused);
244     return const_iterator(*this, *FirstUnused, true);
245   }
246 
247   /// Set the entry using a key type that the specified Traits can convert
248   /// from a real key to an internal key.
249   template <typename Key, typename TraitsT>
250   bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
251     return set_as_internal(K, std::move(V), Traits, None);
252   }
253 
254   template <typename Key, typename TraitsT>
255   ValueT get(const Key &K, TraitsT &Traits) const {
256     auto Iter = find_as(K, Traits);
257     assert(Iter != end());
258     return (*Iter).second;
259   }
260 
261 protected:
262   bool isPresent(uint32_t K) const { return Present.test(K); }
263   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
264 
265   BucketList Buckets;
266   mutable SparseBitVector<> Present;
267   mutable SparseBitVector<> Deleted;
268 
269 private:
270   /// Set the entry using a key type that the specified Traits can convert
271   /// from a real key to an internal key.
272   template <typename Key, typename TraitsT>
273   bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
274                        Optional<uint32_t> InternalKey) {
275     auto Entry = find_as(K, Traits);
276     if (Entry != end()) {
277       assert(isPresent(Entry.index()));
278       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
279       // We're updating, no need to do anything special.
280       Buckets[Entry.index()].second = V;
281       return false;
282     }
283 
284     auto &B = Buckets[Entry.index()];
285     assert(!isPresent(Entry.index()));
286     assert(Entry.isEnd());
287     B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
288     B.second = V;
289     Present.set(Entry.index());
290     Deleted.reset(Entry.index());
291 
292     grow(Traits);
293 
294     assert((find_as(K, Traits)) != end());
295     return true;
296   }
297 
298   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
299 
300   template <typename TraitsT>
301   void grow(TraitsT &Traits) {
302     uint32_t S = size();
303     uint32_t MaxLoad = maxLoad(capacity());
304     if (S < maxLoad(capacity()))
305       return;
306     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
307 
308     uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
309 
310     // Growing requires rebuilding the table and re-hashing every item.  Make a
311     // copy with a larger capacity, insert everything into the copy, then swap
312     // it in.
313     HashTable NewMap(NewCapacity);
314     for (auto I : Present) {
315       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
316       NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
317                              Buckets[I].first);
318     }
319 
320     Buckets.swap(NewMap.Buckets);
321     std::swap(Present, NewMap.Present);
322     std::swap(Deleted, NewMap.Deleted);
323     assert(capacity() == NewCapacity);
324     assert(size() == S);
325   }
326 };
327 
328 } // end namespace pdb
329 
330 } // end namespace llvm
331 
332 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
333