1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef vm_SharedImmutableStringsCache_h
8 #define vm_SharedImmutableStringsCache_h
9 
10 #include "mozilla/Maybe.h"
11 #include "mozilla/UniquePtr.h"
12 
13 #include <cstring>
14 #include <new>      // for placement new
15 #include <utility>  // std::move
16 
17 #include "js/AllocPolicy.h"
18 #include "js/HashTable.h"
19 #include "js/UniquePtr.h"
20 #include "js/Utility.h"
21 
22 #include "threading/ExclusiveData.h"
23 
24 #include "vm/MutexIDs.h"
25 
26 namespace js {
27 
28 class SharedImmutableString;
29 class SharedImmutableTwoByteString;
30 
31 /**
32  * The `SharedImmutableStringsCache` allows safely sharing and deduplicating
33  * immutable strings (either `const char*` [any encoding, not restricted to
34  * only Latin-1 or only UTF-8] or `const char16_t*`) between threads.
35  *
36  * The locking mechanism is dead-simple and coarse grained: a single lock guards
37  * all of the internal table itself, the table's entries, and the entries'
38  * reference counts. It is only safe to perform any mutation on the cache or any
39  * data stored within the cache when this lock is acquired.
40  */
41 class SharedImmutableStringsCache {
42   friend class SharedImmutableString;
43   friend class SharedImmutableTwoByteString;
44   struct Hasher;
45 
46  public:
47   using OwnedChars = JS::UniqueChars;
48   using OwnedTwoByteChars = JS::UniqueTwoByteChars;
49 
50   /**
51    * Get the canonical, shared, and de-duplicated version of the given `const
52    * char*` string. If such a string does not exist, call `intoOwnedChars` and
53    * add the string it returns to the cache.
54    *
55    * `intoOwnedChars` must create an owned version of the given string, and
56    * must have one of the following types:
57    *
58    *     JS::UniqueChars   intoOwnedChars();
59    *     JS::UniqueChars&& intoOwnedChars();
60    *
61    * It can be used by callers to elide a copy of the string when it is safe
62    * to give up ownership of the lookup string to the cache. It must return a
63    * `nullptr` on failure.
64    *
65    * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
66    * returned.
67    */
68   template <typename IntoOwnedChars>
69   [[nodiscard]] SharedImmutableString getOrCreate(
70       const char* chars, size_t length, IntoOwnedChars intoOwnedChars);
71 
72   /**
73    * Take ownership of the given `chars` and return the canonical, shared and
74    * de-duplicated version.
75    *
76    * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
77    * returned.
78    */
79   [[nodiscard]] SharedImmutableString getOrCreate(OwnedChars&& chars,
80                                                   size_t length);
81 
82   /**
83    * Do not take ownership of the given `chars`. Return the canonical, shared
84    * and de-duplicated version. If there is no extant shared version of
85    * `chars`, make a copy and insert it into the cache.
86    *
87    * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
88    * returned.
89    */
90   [[nodiscard]] SharedImmutableString getOrCreate(const char* chars,
91                                                   size_t length);
92 
93   /**
94    * Get the canonical, shared, and de-duplicated version of the given `const
95    * char16_t*` string. If such a string does not exist, call `intoOwnedChars`
96    * and add the string it returns to the cache.
97    *
98    * `intoOwnedTwoByteChars` must create an owned version of the given string,
99    * and must have one of the following types:
100    *
101    *     JS::UniqueTwoByteChars   intoOwnedTwoByteChars();
102    *     JS::UniqueTwoByteChars&& intoOwnedTwoByteChars();
103    *
104    * It can be used by callers to elide a copy of the string when it is safe
105    * to give up ownership of the lookup string to the cache. It must return a
106    * `nullptr` on failure.
107    *
108    * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
109    * returned.
110    */
111   template <typename IntoOwnedTwoByteChars>
112   [[nodiscard]] SharedImmutableTwoByteString getOrCreate(
113       const char16_t* chars, size_t length,
114       IntoOwnedTwoByteChars intoOwnedTwoByteChars);
115 
116   /**
117    * Take ownership of the given `chars` and return the canonical, shared and
118    * de-duplicated version.
119    *
120    * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
121    * returned.
122    */
123   [[nodiscard]] SharedImmutableTwoByteString getOrCreate(
124       OwnedTwoByteChars&& chars, size_t length);
125 
126   /**
127    * Do not take ownership of the given `chars`. Return the canonical, shared
128    * and de-duplicated version. If there is no extant shared version of
129    * `chars`, then make a copy and insert it into the cache.
130    *
131    * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
132    * returned.
133    */
134   [[nodiscard]] SharedImmutableTwoByteString getOrCreate(const char16_t* chars,
135                                                          size_t length);
136 
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)137   size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
138     MOZ_ASSERT(inner_);
139     size_t n = mallocSizeOf(inner_);
140 
141     auto locked = inner_->lock();
142 
143     // Size of the table.
144     n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf);
145 
146     // Sizes of the strings and their boxes.
147     for (auto r = locked->set.all(); !r.empty(); r.popFront()) {
148       n += mallocSizeOf(r.front().get());
149       if (const char* chars = r.front()->chars()) {
150         n += mallocSizeOf(chars);
151       }
152     }
153 
154     return n;
155   }
156 
157   /**
158    * Construct a new cache of shared, immutable strings. Returns
159    * `mozilla::Nothing` on out of memory failure.
160    */
Create()161   static mozilla::Maybe<SharedImmutableStringsCache> Create() {
162     auto inner =
163         js_new<ExclusiveData<Inner>>(mutexid::SharedImmutableStringsCache);
164     if (!inner) {
165       return mozilla::Nothing();
166     }
167 
168     auto locked = inner->lock();
169     return mozilla::Some(SharedImmutableStringsCache(locked));
170   }
171 
SharedImmutableStringsCache(SharedImmutableStringsCache && rhs)172   SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs)
173       : inner_(rhs.inner_) {
174     MOZ_ASSERT(inner_);
175     rhs.inner_ = nullptr;
176   }
177 
178   SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) {
179     MOZ_ASSERT(this != &rhs, "self move not allowed");
180     new (this) SharedImmutableStringsCache(std::move(rhs));
181     return *this;
182   }
183 
184   SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) =
185       delete;
186 
clone()187   SharedImmutableStringsCache clone() {
188     MOZ_ASSERT(inner_);
189     auto locked = inner_->lock();
190     return SharedImmutableStringsCache(locked);
191   }
192 
~SharedImmutableStringsCache()193   ~SharedImmutableStringsCache() {
194     if (!inner_) {
195       return;
196     }
197 
198     bool shouldDestroy = false;
199     {
200       // ~ExclusiveData takes the lock, so be sure to drop the lock before
201       // attempting to destroy the inner.
202       auto locked = inner_->lock();
203       MOZ_ASSERT(locked->refcount > 0);
204       locked->refcount--;
205       if (locked->refcount == 0) {
206         shouldDestroy = true;
207       }
208     }
209     if (shouldDestroy) {
210       js_delete(inner_);
211     }
212   }
213 
214   /**
215    * Purge the cache of all refcount == 0 entries.
216    */
purge()217   void purge() {
218     auto locked = inner_->lock();
219     MOZ_ASSERT(locked->refcount > 0);
220 
221     for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) {
222       if (e.front()->refcount == 0) {
223         // The chars should be eagerly freed when refcount reaches zero.
224         MOZ_ASSERT(!e.front()->chars());
225         e.removeFront();
226       } else {
227         // The chars should exist as long as the refcount is non-zero.
228         MOZ_ASSERT(e.front()->chars());
229       }
230     }
231   }
232 
233  private:
234   struct Inner;
235   class StringBox {
236     friend class SharedImmutableString;
237 
238     OwnedChars chars_;
239     size_t length_;
240     const ExclusiveData<Inner>* cache_;
241 
242    public:
243     mutable size_t refcount;
244 
245     using Ptr = js::UniquePtr<StringBox>;
246 
StringBox(OwnedChars && chars,size_t length,const ExclusiveData<Inner> * cache)247     StringBox(OwnedChars&& chars, size_t length,
248               const ExclusiveData<Inner>* cache)
249         : chars_(std::move(chars)),
250           length_(length),
251           cache_(cache),
252           refcount(0) {
253       MOZ_ASSERT(chars_);
254     }
255 
Create(OwnedChars && chars,size_t length,const ExclusiveData<Inner> * cache)256     static Ptr Create(OwnedChars&& chars, size_t length,
257                       const ExclusiveData<Inner>* cache) {
258       return js::MakeUnique<StringBox>(std::move(chars), length, cache);
259     }
260 
261     StringBox(const StringBox&) = delete;
262     StringBox& operator=(const StringBox&) = delete;
263 
~StringBox()264     ~StringBox() {
265       MOZ_RELEASE_ASSERT(
266           refcount == 0,
267           "There are `SharedImmutable[TwoByte]String`s outliving their "
268           "associated cache! This always leads to use-after-free in the "
269           "`~SharedImmutableString` destructor!");
270     }
271 
chars()272     const char* chars() const { return chars_.get(); }
length()273     size_t length() const { return length_; }
274   };
275 
276   struct Hasher {
277     /**
278      * A structure used when querying for a `const char*` string in the cache.
279      */
280     class Lookup {
281       friend struct Hasher;
282 
283       HashNumber hash_;
284       const char* chars_;
285       size_t length_;
286 
287      public:
LookupHasher288       Lookup(HashNumber hash, const char* chars, size_t length)
289           : hash_(hash), chars_(chars), length_(length) {
290         MOZ_ASSERT(chars_);
291         MOZ_ASSERT(hash == Hasher::hashLongString(chars, length));
292       }
293 
LookupHasher294       Lookup(HashNumber hash, const char16_t* chars, size_t length)
295           : Lookup(hash, reinterpret_cast<const char*>(chars),
296                    length * sizeof(char16_t)) {}
297     };
298 
299     static const size_t SHORT_STRING_MAX_LENGTH = 8192;
300     static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2;
301 
302     // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the
303     // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the
304     // string. This increases the risk of collisions, but in practice it
305     // should be rare, and it yields a large speedup for hashing long
306     // strings.
hashLongStringHasher307     static HashNumber hashLongString(const char* chars, size_t length) {
308       MOZ_ASSERT(chars);
309       return length <= SHORT_STRING_MAX_LENGTH
310                  ? mozilla::HashString(chars, length)
311                  : mozilla::AddToHash(
312                        mozilla::HashString(chars, HASH_CHUNK_LENGTH),
313                        mozilla::HashString(chars + length - HASH_CHUNK_LENGTH,
314                                            HASH_CHUNK_LENGTH));
315     }
316 
hashHasher317     static HashNumber hash(const Lookup& lookup) { return lookup.hash_; }
318 
matchHasher319     static bool match(const StringBox::Ptr& key, const Lookup& lookup) {
320       MOZ_ASSERT(lookup.chars_);
321 
322       if (!key->chars() || key->length() != lookup.length_) {
323         return false;
324       }
325 
326       if (key->chars() == lookup.chars_) {
327         return true;
328       }
329 
330       return memcmp(key->chars(), lookup.chars_, key->length()) == 0;
331     }
332   };
333 
334   // The `Inner` struct contains the actual cached contents, and is reference
335   // counted and shared between all `SharedImmutableStringsCache` and
336   // `SharedImmutable[TwoByte]String` holders.
337   struct Inner {
338     using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>;
339 
340     size_t refcount;
341     Set set;
342 
InnerInner343     Inner() : refcount(0), set() {}
344 
345     Inner(const Inner&) = delete;
346     Inner& operator=(const Inner&) = delete;
347 
~InnerInner348     ~Inner() { MOZ_ASSERT(refcount == 0); }
349   };
350 
351   const ExclusiveData<Inner>* inner_;
352 
SharedImmutableStringsCache(ExclusiveData<Inner>::Guard & locked)353   explicit SharedImmutableStringsCache(ExclusiveData<Inner>::Guard& locked)
354       : inner_(locked.parent()) {
355     locked->refcount++;
356   }
357 };
358 
359 /**
360  * The `SharedImmutableString` class holds a reference to a `const char*` string
361  * from the `SharedImmutableStringsCache` and releases the reference upon
362  * destruction.
363  */
364 class SharedImmutableString {
365   friend class SharedImmutableStringsCache;
366   friend class SharedImmutableTwoByteString;
367 
368   mutable SharedImmutableStringsCache::StringBox* box_;
369 
370   explicit SharedImmutableString(SharedImmutableStringsCache::StringBox* box);
371 
372  public:
373   /**
374    * `SharedImmutableString`s are move-able. It is an error to use a
375    * `SharedImmutableString` after it has been moved.
376    */
377   SharedImmutableString(SharedImmutableString&& rhs);
378   SharedImmutableString& operator=(SharedImmutableString&& rhs);
SharedImmutableString()379   SharedImmutableString() { box_ = nullptr; }
380 
381   /**
382    * Create another shared reference to the underlying string.
383    */
384   SharedImmutableString clone() const;
385 
386   // If you want a copy, take one explicitly with `clone`!
387   SharedImmutableString& operator=(const SharedImmutableString&) = delete;
388   explicit operator bool() const { return box_ != nullptr; }
389 
390   ~SharedImmutableString();
391 
392   /**
393    * Get a raw pointer to the underlying string. It is only safe to use the
394    * resulting pointer while this `SharedImmutableString` exists.
395    */
chars()396   const char* chars() const {
397     MOZ_ASSERT(box_);
398     MOZ_ASSERT(box_->refcount > 0);
399     MOZ_ASSERT(box_->chars());
400     return box_->chars();
401   }
402 
403   /**
404    * Get the length of the underlying string.
405    */
length()406   size_t length() const {
407     MOZ_ASSERT(box_);
408     MOZ_ASSERT(box_->refcount > 0);
409     MOZ_ASSERT(box_->chars());
410     return box_->length();
411   }
412 };
413 
414 /**
415  * The `SharedImmutableTwoByteString` class holds a reference to a `const
416  * char16_t*` string from the `SharedImmutableStringsCache` and releases the
417  * reference upon destruction.
418  */
419 class SharedImmutableTwoByteString {
420   friend class SharedImmutableStringsCache;
421 
422   // If a `char*` string and `char16_t*` string happen to have the same bytes,
423   // the bytes will be shared but handed out as different types.
424   SharedImmutableString string_;
425 
426   explicit SharedImmutableTwoByteString(SharedImmutableString&& string);
427   explicit SharedImmutableTwoByteString(
428       SharedImmutableStringsCache::StringBox* box);
429 
430  public:
431   /**
432    * `SharedImmutableTwoByteString`s are move-able. It is an error to use a
433    * `SharedImmutableTwoByteString` after it has been moved.
434    */
435   SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs);
436   SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs);
SharedImmutableTwoByteString()437   SharedImmutableTwoByteString() { string_.box_ = nullptr; }
438 
439   /**
440    * Create another shared reference to the underlying string.
441    */
442   SharedImmutableTwoByteString clone() const;
443 
444   // If you want a copy, take one explicitly with `clone`!
445   SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) =
446       delete;
447   explicit operator bool() const { return string_.box_ != nullptr; }
448   /**
449    * Get a raw pointer to the underlying string. It is only safe to use the
450    * resulting pointer while this `SharedImmutableTwoByteString` exists.
451    */
chars()452   const char16_t* chars() const {
453     return reinterpret_cast<const char16_t*>(string_.chars());
454   }
455 
456   /**
457    * Get the length of the underlying string.
458    */
length()459   size_t length() const { return string_.length() / sizeof(char16_t); }
460 };
461 
462 }  // namespace js
463 
464 #endif  // vm_SharedImmutableStringsCache_h
465