1 //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines CachedHashString and CachedHashStringRef.  These are
11 /// owning and not-owning string types that store their hash in addition to
12 /// their string data.
13 ///
14 /// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
15 /// (because, unlike std::string, CachedHashString lets us have empty and
16 /// tombstone values).
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_ADT_CACHEDHASHSTRING_H
21 #define LLVM_ADT_CACHEDHASHSTRING_H
22 
23 #include "llvm/ADT/DenseMapInfo.h"
24 #include "llvm/ADT/StringRef.h"
25 
26 namespace llvm {
27 
28 /// A container which contains a StringRef plus a precomputed hash.
29 class CachedHashStringRef {
30   const char *P;
31   uint32_t Size;
32   uint32_t Hash;
33 
34 public:
35   // Explicit because hashing a string isn't free.
CachedHashStringRef(StringRef S)36   explicit CachedHashStringRef(StringRef S)
37       : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
38 
CachedHashStringRef(StringRef S,uint32_t Hash)39   CachedHashStringRef(StringRef S, uint32_t Hash)
40       : P(S.data()), Size(S.size()), Hash(Hash) {
41     assert(S.size() <= std::numeric_limits<uint32_t>::max());
42   }
43 
val()44   StringRef val() const { return StringRef(P, Size); }
data()45   const char *data() const { return P; }
size()46   uint32_t size() const { return Size; }
hash()47   uint32_t hash() const { return Hash; }
48 };
49 
50 template <> struct DenseMapInfo<CachedHashStringRef> {
51   static CachedHashStringRef getEmptyKey() {
52     return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0);
53   }
54   static CachedHashStringRef getTombstoneKey() {
55     return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1);
56   }
57   static unsigned getHashValue(const CachedHashStringRef &S) {
58     assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
59     assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
60     return S.hash();
61   }
62   static bool isEqual(const CachedHashStringRef &LHS,
63                       const CachedHashStringRef &RHS) {
64     return LHS.hash() == RHS.hash() &&
65            DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val());
66   }
67 };
68 
69 /// A container which contains a string, which it owns, plus a precomputed hash.
70 ///
71 /// We do not null-terminate the string.
72 class CachedHashString {
73   friend struct DenseMapInfo<CachedHashString>;
74 
75   char *P;
76   uint32_t Size;
77   uint32_t Hash;
78 
79   static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); }
80   static char *getTombstoneKeyPtr() {
81     return DenseMapInfo<char *>::getTombstoneKey();
82   }
83 
84   bool isEmptyOrTombstone() const {
85     return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
86   }
87 
88   struct ConstructEmptyOrTombstoneTy {};
89 
90   CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr)
91       : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
92     assert(isEmptyOrTombstone());
93   }
94 
95   // TODO: Use small-string optimization to avoid allocating.
96 
97 public:
98   explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {}
99 
100   // Explicit because copying and hashing a string isn't free.
101   explicit CachedHashString(StringRef S)
102       : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
103 
104   CachedHashString(StringRef S, uint32_t Hash)
105       : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
106     memcpy(P, S.data(), S.size());
107   }
108 
109   // Ideally this class would not be copyable.  But SetVector requires copyable
110   // keys, and we want this to be usable there.
111   CachedHashString(const CachedHashString &Other)
112       : Size(Other.Size), Hash(Other.Hash) {
113     if (Other.isEmptyOrTombstone()) {
114       P = Other.P;
115     } else {
116       P = new char[Size];
117       memcpy(P, Other.P, Size);
118     }
119   }
120 
121   CachedHashString &operator=(CachedHashString Other) {
122     swap(*this, Other);
123     return *this;
124   }
125 
126   CachedHashString(CachedHashString &&Other) noexcept
127       : P(Other.P), Size(Other.Size), Hash(Other.Hash) {
128     Other.P = getEmptyKeyPtr();
129   }
130 
131   ~CachedHashString() {
132     if (!isEmptyOrTombstone())
133       delete[] P;
134   }
135 
136   StringRef val() const { return StringRef(P, Size); }
137   uint32_t size() const { return Size; }
138   uint32_t hash() const { return Hash; }
139 
140   operator StringRef() const { return val(); }
141   operator CachedHashStringRef() const {
142     return CachedHashStringRef(val(), Hash);
143   }
144 
145   friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
146     using std::swap;
147     swap(LHS.P, RHS.P);
148     swap(LHS.Size, RHS.Size);
149     swap(LHS.Hash, RHS.Hash);
150   }
151 };
152 
153 template <> struct DenseMapInfo<CachedHashString> {
154   static CachedHashString getEmptyKey() {
155     return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
156                             CachedHashString::getEmptyKeyPtr());
157   }
158   static CachedHashString getTombstoneKey() {
159     return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
160                             CachedHashString::getTombstoneKeyPtr());
161   }
162   static unsigned getHashValue(const CachedHashString &S) {
163     assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
164     assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
165     return S.hash();
166   }
167   static bool isEqual(const CachedHashString &LHS,
168                       const CachedHashString &RHS) {
169     if (LHS.hash() != RHS.hash())
170       return false;
171     if (LHS.P == CachedHashString::getEmptyKeyPtr())
172       return RHS.P == CachedHashString::getEmptyKeyPtr();
173     if (LHS.P == CachedHashString::getTombstoneKeyPtr())
174       return RHS.P == CachedHashString::getTombstoneKeyPtr();
175 
176     // This is safe because if RHS.P is the empty or tombstone key, it will have
177     // length 0, so we'll never dereference its pointer.
178     return LHS.val() == RHS.val();
179   }
180 };
181 
182 } // namespace llvm
183 
184 #endif
185