1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef vm_SharedImmutableStringsCache_h 8 #define vm_SharedImmutableStringsCache_h 9 10 #include "mozilla/Maybe.h" 11 #include "mozilla/UniquePtr.h" 12 13 #include <cstring> 14 #include <new> // for placement new 15 #include <utility> // std::move 16 17 #include "js/AllocPolicy.h" 18 #include "js/HashTable.h" 19 #include "js/UniquePtr.h" 20 #include "js/Utility.h" 21 22 #include "threading/ExclusiveData.h" 23 24 #include "vm/MutexIDs.h" 25 26 namespace js { 27 28 class SharedImmutableString; 29 class SharedImmutableTwoByteString; 30 31 /** 32 * The `SharedImmutableStringsCache` allows safely sharing and deduplicating 33 * immutable strings (either `const char*` [any encoding, not restricted to 34 * only Latin-1 or only UTF-8] or `const char16_t*`) between threads. 35 * 36 * The locking mechanism is dead-simple and coarse grained: a single lock guards 37 * all of the internal table itself, the table's entries, and the entries' 38 * reference counts. It is only safe to perform any mutation on the cache or any 39 * data stored within the cache when this lock is acquired. 40 */ 41 class SharedImmutableStringsCache { 42 friend class SharedImmutableString; 43 friend class SharedImmutableTwoByteString; 44 struct Hasher; 45 46 public: 47 using OwnedChars = JS::UniqueChars; 48 using OwnedTwoByteChars = JS::UniqueTwoByteChars; 49 50 /** 51 * Get the canonical, shared, and de-duplicated version of the given `const 52 * char*` string. If such a string does not exist, call `intoOwnedChars` and 53 * add the string it returns to the cache. 54 * 55 * `intoOwnedChars` must create an owned version of the given string, and 56 * must have one of the following types: 57 * 58 * JS::UniqueChars intoOwnedChars(); 59 * JS::UniqueChars&& intoOwnedChars(); 60 * 61 * It can be used by callers to elide a copy of the string when it is safe 62 * to give up ownership of the lookup string to the cache. It must return a 63 * `nullptr` on failure. 64 * 65 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 66 * returned. 67 */ 68 template <typename IntoOwnedChars> 69 [[nodiscard]] SharedImmutableString getOrCreate( 70 const char* chars, size_t length, IntoOwnedChars intoOwnedChars); 71 72 /** 73 * Take ownership of the given `chars` and return the canonical, shared and 74 * de-duplicated version. 75 * 76 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 77 * returned. 78 */ 79 [[nodiscard]] SharedImmutableString getOrCreate(OwnedChars&& chars, 80 size_t length); 81 82 /** 83 * Do not take ownership of the given `chars`. Return the canonical, shared 84 * and de-duplicated version. If there is no extant shared version of 85 * `chars`, make a copy and insert it into the cache. 86 * 87 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 88 * returned. 89 */ 90 [[nodiscard]] SharedImmutableString getOrCreate(const char* chars, 91 size_t length); 92 93 /** 94 * Get the canonical, shared, and de-duplicated version of the given `const 95 * char16_t*` string. If such a string does not exist, call `intoOwnedChars` 96 * and add the string it returns to the cache. 97 * 98 * `intoOwnedTwoByteChars` must create an owned version of the given string, 99 * and must have one of the following types: 100 * 101 * JS::UniqueTwoByteChars intoOwnedTwoByteChars(); 102 * JS::UniqueTwoByteChars&& intoOwnedTwoByteChars(); 103 * 104 * It can be used by callers to elide a copy of the string when it is safe 105 * to give up ownership of the lookup string to the cache. It must return a 106 * `nullptr` on failure. 107 * 108 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 109 * returned. 110 */ 111 template <typename IntoOwnedTwoByteChars> 112 [[nodiscard]] SharedImmutableTwoByteString getOrCreate( 113 const char16_t* chars, size_t length, 114 IntoOwnedTwoByteChars intoOwnedTwoByteChars); 115 116 /** 117 * Take ownership of the given `chars` and return the canonical, shared and 118 * de-duplicated version. 119 * 120 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 121 * returned. 122 */ 123 [[nodiscard]] SharedImmutableTwoByteString getOrCreate( 124 OwnedTwoByteChars&& chars, size_t length); 125 126 /** 127 * Do not take ownership of the given `chars`. Return the canonical, shared 128 * and de-duplicated version. If there is no extant shared version of 129 * `chars`, then make a copy and insert it into the cache. 130 * 131 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 132 * returned. 133 */ 134 [[nodiscard]] SharedImmutableTwoByteString getOrCreate(const char16_t* chars, 135 size_t length); 136 sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)137 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const { 138 MOZ_ASSERT(inner_); 139 size_t n = mallocSizeOf(inner_); 140 141 auto locked = inner_->lock(); 142 143 // Size of the table. 144 n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf); 145 146 // Sizes of the strings and their boxes. 147 for (auto r = locked->set.all(); !r.empty(); r.popFront()) { 148 n += mallocSizeOf(r.front().get()); 149 if (const char* chars = r.front()->chars()) { 150 n += mallocSizeOf(chars); 151 } 152 } 153 154 return n; 155 } 156 157 /** 158 * Construct a new cache of shared, immutable strings. Returns 159 * `mozilla::Nothing` on out of memory failure. 160 */ Create()161 static mozilla::Maybe<SharedImmutableStringsCache> Create() { 162 auto inner = 163 js_new<ExclusiveData<Inner>>(mutexid::SharedImmutableStringsCache); 164 if (!inner) { 165 return mozilla::Nothing(); 166 } 167 168 auto locked = inner->lock(); 169 return mozilla::Some(SharedImmutableStringsCache(locked)); 170 } 171 SharedImmutableStringsCache(SharedImmutableStringsCache && rhs)172 SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs) 173 : inner_(rhs.inner_) { 174 MOZ_ASSERT(inner_); 175 rhs.inner_ = nullptr; 176 } 177 178 SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) { 179 MOZ_ASSERT(this != &rhs, "self move not allowed"); 180 new (this) SharedImmutableStringsCache(std::move(rhs)); 181 return *this; 182 } 183 184 SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) = 185 delete; 186 clone()187 SharedImmutableStringsCache clone() { 188 MOZ_ASSERT(inner_); 189 auto locked = inner_->lock(); 190 return SharedImmutableStringsCache(locked); 191 } 192 ~SharedImmutableStringsCache()193 ~SharedImmutableStringsCache() { 194 if (!inner_) { 195 return; 196 } 197 198 bool shouldDestroy = false; 199 { 200 // ~ExclusiveData takes the lock, so be sure to drop the lock before 201 // attempting to destroy the inner. 202 auto locked = inner_->lock(); 203 MOZ_ASSERT(locked->refcount > 0); 204 locked->refcount--; 205 if (locked->refcount == 0) { 206 shouldDestroy = true; 207 } 208 } 209 if (shouldDestroy) { 210 js_delete(inner_); 211 } 212 } 213 214 /** 215 * Purge the cache of all refcount == 0 entries. 216 */ purge()217 void purge() { 218 auto locked = inner_->lock(); 219 MOZ_ASSERT(locked->refcount > 0); 220 221 for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) { 222 if (e.front()->refcount == 0) { 223 // The chars should be eagerly freed when refcount reaches zero. 224 MOZ_ASSERT(!e.front()->chars()); 225 e.removeFront(); 226 } else { 227 // The chars should exist as long as the refcount is non-zero. 228 MOZ_ASSERT(e.front()->chars()); 229 } 230 } 231 } 232 233 private: 234 struct Inner; 235 class StringBox { 236 friend class SharedImmutableString; 237 238 OwnedChars chars_; 239 size_t length_; 240 const ExclusiveData<Inner>* cache_; 241 242 public: 243 mutable size_t refcount; 244 245 using Ptr = js::UniquePtr<StringBox>; 246 StringBox(OwnedChars && chars,size_t length,const ExclusiveData<Inner> * cache)247 StringBox(OwnedChars&& chars, size_t length, 248 const ExclusiveData<Inner>* cache) 249 : chars_(std::move(chars)), 250 length_(length), 251 cache_(cache), 252 refcount(0) { 253 MOZ_ASSERT(chars_); 254 } 255 Create(OwnedChars && chars,size_t length,const ExclusiveData<Inner> * cache)256 static Ptr Create(OwnedChars&& chars, size_t length, 257 const ExclusiveData<Inner>* cache) { 258 return js::MakeUnique<StringBox>(std::move(chars), length, cache); 259 } 260 261 StringBox(const StringBox&) = delete; 262 StringBox& operator=(const StringBox&) = delete; 263 ~StringBox()264 ~StringBox() { 265 MOZ_RELEASE_ASSERT( 266 refcount == 0, 267 "There are `SharedImmutable[TwoByte]String`s outliving their " 268 "associated cache! This always leads to use-after-free in the " 269 "`~SharedImmutableString` destructor!"); 270 } 271 chars()272 const char* chars() const { return chars_.get(); } length()273 size_t length() const { return length_; } 274 }; 275 276 struct Hasher { 277 /** 278 * A structure used when querying for a `const char*` string in the cache. 279 */ 280 class Lookup { 281 friend struct Hasher; 282 283 HashNumber hash_; 284 const char* chars_; 285 size_t length_; 286 287 public: LookupHasher288 Lookup(HashNumber hash, const char* chars, size_t length) 289 : hash_(hash), chars_(chars), length_(length) { 290 MOZ_ASSERT(chars_); 291 MOZ_ASSERT(hash == Hasher::hashLongString(chars, length)); 292 } 293 LookupHasher294 Lookup(HashNumber hash, const char16_t* chars, size_t length) 295 : Lookup(hash, reinterpret_cast<const char*>(chars), 296 length * sizeof(char16_t)) {} 297 }; 298 299 static const size_t SHORT_STRING_MAX_LENGTH = 8192; 300 static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2; 301 302 // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the 303 // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the 304 // string. This increases the risk of collisions, but in practice it 305 // should be rare, and it yields a large speedup for hashing long 306 // strings. hashLongStringHasher307 static HashNumber hashLongString(const char* chars, size_t length) { 308 MOZ_ASSERT(chars); 309 return length <= SHORT_STRING_MAX_LENGTH 310 ? mozilla::HashString(chars, length) 311 : mozilla::AddToHash( 312 mozilla::HashString(chars, HASH_CHUNK_LENGTH), 313 mozilla::HashString(chars + length - HASH_CHUNK_LENGTH, 314 HASH_CHUNK_LENGTH)); 315 } 316 hashHasher317 static HashNumber hash(const Lookup& lookup) { return lookup.hash_; } 318 matchHasher319 static bool match(const StringBox::Ptr& key, const Lookup& lookup) { 320 MOZ_ASSERT(lookup.chars_); 321 322 if (!key->chars() || key->length() != lookup.length_) { 323 return false; 324 } 325 326 if (key->chars() == lookup.chars_) { 327 return true; 328 } 329 330 return memcmp(key->chars(), lookup.chars_, key->length()) == 0; 331 } 332 }; 333 334 // The `Inner` struct contains the actual cached contents, and is reference 335 // counted and shared between all `SharedImmutableStringsCache` and 336 // `SharedImmutable[TwoByte]String` holders. 337 struct Inner { 338 using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>; 339 340 size_t refcount; 341 Set set; 342 InnerInner343 Inner() : refcount(0), set() {} 344 345 Inner(const Inner&) = delete; 346 Inner& operator=(const Inner&) = delete; 347 ~InnerInner348 ~Inner() { MOZ_ASSERT(refcount == 0); } 349 }; 350 351 const ExclusiveData<Inner>* inner_; 352 SharedImmutableStringsCache(ExclusiveData<Inner>::Guard & locked)353 explicit SharedImmutableStringsCache(ExclusiveData<Inner>::Guard& locked) 354 : inner_(locked.parent()) { 355 locked->refcount++; 356 } 357 }; 358 359 /** 360 * The `SharedImmutableString` class holds a reference to a `const char*` string 361 * from the `SharedImmutableStringsCache` and releases the reference upon 362 * destruction. 363 */ 364 class SharedImmutableString { 365 friend class SharedImmutableStringsCache; 366 friend class SharedImmutableTwoByteString; 367 368 mutable SharedImmutableStringsCache::StringBox* box_; 369 370 explicit SharedImmutableString(SharedImmutableStringsCache::StringBox* box); 371 372 public: 373 /** 374 * `SharedImmutableString`s are move-able. It is an error to use a 375 * `SharedImmutableString` after it has been moved. 376 */ 377 SharedImmutableString(SharedImmutableString&& rhs); 378 SharedImmutableString& operator=(SharedImmutableString&& rhs); SharedImmutableString()379 SharedImmutableString() { box_ = nullptr; } 380 381 /** 382 * Create another shared reference to the underlying string. 383 */ 384 SharedImmutableString clone() const; 385 386 // If you want a copy, take one explicitly with `clone`! 387 SharedImmutableString& operator=(const SharedImmutableString&) = delete; 388 explicit operator bool() const { return box_ != nullptr; } 389 390 ~SharedImmutableString(); 391 392 /** 393 * Get a raw pointer to the underlying string. It is only safe to use the 394 * resulting pointer while this `SharedImmutableString` exists. 395 */ chars()396 const char* chars() const { 397 MOZ_ASSERT(box_); 398 MOZ_ASSERT(box_->refcount > 0); 399 MOZ_ASSERT(box_->chars()); 400 return box_->chars(); 401 } 402 403 /** 404 * Get the length of the underlying string. 405 */ length()406 size_t length() const { 407 MOZ_ASSERT(box_); 408 MOZ_ASSERT(box_->refcount > 0); 409 MOZ_ASSERT(box_->chars()); 410 return box_->length(); 411 } 412 }; 413 414 /** 415 * The `SharedImmutableTwoByteString` class holds a reference to a `const 416 * char16_t*` string from the `SharedImmutableStringsCache` and releases the 417 * reference upon destruction. 418 */ 419 class SharedImmutableTwoByteString { 420 friend class SharedImmutableStringsCache; 421 422 // If a `char*` string and `char16_t*` string happen to have the same bytes, 423 // the bytes will be shared but handed out as different types. 424 SharedImmutableString string_; 425 426 explicit SharedImmutableTwoByteString(SharedImmutableString&& string); 427 explicit SharedImmutableTwoByteString( 428 SharedImmutableStringsCache::StringBox* box); 429 430 public: 431 /** 432 * `SharedImmutableTwoByteString`s are move-able. It is an error to use a 433 * `SharedImmutableTwoByteString` after it has been moved. 434 */ 435 SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs); 436 SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs); SharedImmutableTwoByteString()437 SharedImmutableTwoByteString() { string_.box_ = nullptr; } 438 439 /** 440 * Create another shared reference to the underlying string. 441 */ 442 SharedImmutableTwoByteString clone() const; 443 444 // If you want a copy, take one explicitly with `clone`! 445 SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) = 446 delete; 447 explicit operator bool() const { return string_.box_ != nullptr; } 448 /** 449 * Get a raw pointer to the underlying string. It is only safe to use the 450 * resulting pointer while this `SharedImmutableTwoByteString` exists. 451 */ chars()452 const char16_t* chars() const { 453 return reinterpret_cast<const char16_t*>(string_.chars()); 454 } 455 456 /** 457 * Get the length of the underlying string. 458 */ length()459 size_t length() const { return string_.length() / sizeof(char16_t); } 460 }; 461 462 } // namespace js 463 464 #endif // vm_SharedImmutableStringsCache_h 465