1 /* 2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_WTF_TEXT_STRING_HASHER_H_ 23 #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_WTF_TEXT_STRING_HASHER_H_ 24 25 #include "third_party/blink/renderer/platform/wtf/allocator/allocator.h" 26 #include "third_party/blink/renderer/platform/wtf/text/unicode.h" 27 28 namespace WTF { 29 30 // Paul Hsieh's SuperFastHash 31 // http://www.azillionmonkeys.com/qed/hash.html 32 33 // LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits). 34 35 // NOTE: The hash computation here must stay in sync with 36 // build/scripts/hasher.py. 37 38 // Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash 39 // value of zero. 40 static const unsigned kStringHashingStartValue = 0x9E3779B9U; 41 42 class StringHasher { 43 DISALLOW_NEW(); 44 45 public: 46 static const unsigned kFlagCount = 47 8; // Save 8 bits for StringImpl to use as flags. 48 StringHasher()49 StringHasher() 50 : hash_(kStringHashingStartValue), 51 has_pending_character_(false), 52 pending_character_(0) {} 53 54 // The hasher hashes two characters at a time, and thus an "aligned" hasher is 55 // one where an even number of characters have been added. Callers that 56 // always add characters two at a time can use the "assuming aligned" 57 // functions. AddCharactersAssumingAligned(UChar a,UChar b)58 void AddCharactersAssumingAligned(UChar a, UChar b) { 59 DCHECK(!has_pending_character_); 60 hash_ += a; 61 hash_ = (hash_ << 16) ^ ((b << 11) ^ hash_); 62 hash_ += hash_ >> 11; 63 } 64 AddCharacter(UChar character)65 void AddCharacter(UChar character) { 66 if (has_pending_character_) { 67 has_pending_character_ = false; 68 AddCharactersAssumingAligned(pending_character_, character); 69 return; 70 } 71 72 pending_character_ = character; 73 has_pending_character_ = true; 74 } 75 AddCharacters(UChar a,UChar b)76 void AddCharacters(UChar a, UChar b) { 77 if (has_pending_character_) { 78 #if DCHECK_IS_ON() 79 has_pending_character_ = false; 80 #endif 81 AddCharactersAssumingAligned(pending_character_, a); 82 pending_character_ = b; 83 #if DCHECK_IS_ON() 84 has_pending_character_ = true; 85 #endif 86 return; 87 } 88 89 AddCharactersAssumingAligned(a, b); 90 } 91 92 template <typename T, UChar Converter(T)> AddCharactersAssumingAligned(const T * data,unsigned length)93 void AddCharactersAssumingAligned(const T* data, unsigned length) { 94 DCHECK(!has_pending_character_); 95 96 bool remainder = length & 1; 97 length >>= 1; 98 99 while (length--) { 100 AddCharactersAssumingAligned(Converter(data[0]), Converter(data[1])); 101 data += 2; 102 } 103 104 if (remainder) 105 AddCharacter(Converter(*data)); 106 } 107 108 template <typename T> AddCharactersAssumingAligned(const T * data,unsigned length)109 void AddCharactersAssumingAligned(const T* data, unsigned length) { 110 AddCharactersAssumingAligned<T, DefaultConverter>(data, length); 111 } 112 113 template <typename T, UChar Converter(T)> AddCharacters(const T * data,unsigned length)114 void AddCharacters(const T* data, unsigned length) { 115 if (has_pending_character_ && length) { 116 has_pending_character_ = false; 117 AddCharactersAssumingAligned(pending_character_, Converter(*data++)); 118 --length; 119 } 120 AddCharactersAssumingAligned<T, Converter>(data, length); 121 } 122 123 template <typename T> AddCharacters(const T * data,unsigned length)124 void AddCharacters(const T* data, unsigned length) { 125 AddCharacters<T, DefaultConverter>(data, length); 126 } 127 HashWithTop8BitsMasked()128 unsigned HashWithTop8BitsMasked() const { 129 unsigned result = AvalancheBits(); 130 131 // Reserving space from the high bits for flags preserves most of the hash's 132 // value, since hash lookup typically masks out the high bits anyway. 133 result &= (1U << (sizeof(result) * 8 - kFlagCount)) - 1; 134 135 // This avoids ever returning a hash code of 0, since that is used to 136 // signal "hash not computed yet". Setting the high bit maintains 137 // reasonable fidelity to a hash code of 0 because it is likely to yield 138 // exactly 0 when hash lookup masks out the high bits. 139 if (!result) 140 result = 0x80000000 >> kFlagCount; 141 142 return result; 143 } 144 GetHash()145 unsigned GetHash() const { 146 unsigned result = AvalancheBits(); 147 148 // This avoids ever returning a hash code of 0, since that is used to 149 // signal "hash not computed yet". Setting the high bit maintains 150 // reasonable fidelity to a hash code of 0 because it is likely to yield 151 // exactly 0 when hash lookup masks out the high bits. 152 if (!result) 153 result = 0x80000000; 154 155 return result; 156 } 157 158 template <typename T, UChar Converter(T)> ComputeHashAndMaskTop8Bits(const T * data,unsigned length)159 static unsigned ComputeHashAndMaskTop8Bits(const T* data, unsigned length) { 160 StringHasher hasher; 161 hasher.AddCharactersAssumingAligned<T, Converter>(data, length); 162 return hasher.HashWithTop8BitsMasked(); 163 } 164 165 template <typename T> ComputeHashAndMaskTop8Bits(const T * data,unsigned length)166 static unsigned ComputeHashAndMaskTop8Bits(const T* data, unsigned length) { 167 return ComputeHashAndMaskTop8Bits<T, DefaultConverter>(data, length); 168 } 169 170 template <typename T, UChar Converter(T)> ComputeHash(const T * data,unsigned length)171 static unsigned ComputeHash(const T* data, unsigned length) { 172 StringHasher hasher; 173 hasher.AddCharactersAssumingAligned<T, Converter>(data, length); 174 return hasher.GetHash(); 175 } 176 177 template <typename T> ComputeHash(const T * data,unsigned length)178 static unsigned ComputeHash(const T* data, unsigned length) { 179 return ComputeHash<T, DefaultConverter>(data, length); 180 } 181 HashMemory(const void * data,unsigned length)182 static unsigned HashMemory(const void* data, unsigned length) { 183 // FIXME: Why does this function use the version of the hash that drops the 184 // top 8 bits? We want that for all string hashing so we can use those 185 // bits in StringImpl and hash strings consistently, but I don't see why 186 // we'd want that for general memory hashing. 187 DCHECK(!(length % 2)); 188 return ComputeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data), 189 length / sizeof(UChar)); 190 } 191 192 template <size_t length> HashMemory(const void * data)193 static unsigned HashMemory(const void* data) { 194 static_assert(!(length % 2), "length must be a multiple of two"); 195 return HashMemory(data, length); 196 } 197 198 private: 199 // The StringHasher works on UChar so all converters should normalize input 200 // data into being a UChar. DefaultConverter(UChar character)201 static UChar DefaultConverter(UChar character) { return character; } DefaultConverter(LChar character)202 static UChar DefaultConverter(LChar character) { return character; } 203 AvalancheBits()204 unsigned AvalancheBits() const { 205 unsigned result = hash_; 206 207 // Handle end case. 208 if (has_pending_character_) { 209 result += pending_character_; 210 result ^= result << 11; 211 result += result >> 17; 212 } 213 214 // Force "avalanching" of final 31 bits. 215 result ^= result << 3; 216 result += result >> 5; 217 result ^= result << 2; 218 result += result >> 15; 219 result ^= result << 10; 220 221 return result; 222 } 223 224 unsigned hash_; 225 bool has_pending_character_; 226 UChar pending_character_; 227 }; 228 229 } // namespace WTF 230 231 using WTF::StringHasher; 232 233 #endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_WTF_STRING_HASHER_H_ 234