1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * Improved thread local storage for non-trivial types (similar speed as
19  * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster
20  * than boost::thread_specific_ptr).
21  *
22  * Also includes an accessor interface to walk all the thread local child
23  * objects of a parent.  accessAllThreads() initializes an accessor which holds
24  * a global lock *that blocks all creation and destruction of ThreadLocal
25  * objects with the same Tag* and can be used as an iterable container.
26  * accessAllThreads() can race with destruction of thread-local elements. We
27  * provide a strict mode which is dangerous because it requires the access lock
28  * to be held while destroying thread-local elements which could cause
29  * deadlocks. We gate this mode behind the AccessModeStrict template parameter.
30  *
31  * Intended use is for frequent write, infrequent read data access patterns such
32  * as counters.
33  *
34  * There are two classes here - ThreadLocal and ThreadLocalPtr.  ThreadLocalPtr
35  * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin
36  * wrapper around ThreadLocalPtr that manages allocation automatically.
37  *
38  * @author Spencer Ahrens (sahrens)
39  */
40 
41 #pragma once
42 
43 #include <iterator>
44 #include <thread>
45 #include <type_traits>
46 #include <utility>
47 
48 #include <folly/Likely.h>
49 #include <folly/Portability.h>
50 #include <folly/ScopeGuard.h>
51 #include <folly/SharedMutex.h>
52 #include <folly/detail/ThreadLocalDetail.h>
53 
54 namespace folly {
55 
56 template <class T, class Tag, class AccessMode>
57 class ThreadLocalPtr;
58 
59 template <class T, class Tag = void, class AccessMode = void>
60 class ThreadLocal {
61  public:
ThreadLocal()62   constexpr ThreadLocal() : constructor_([]() { return new T(); }) {}
63 
64   template <typename F, std::enable_if_t<is_invocable_r_v<T*, F>, int> = 0>
ThreadLocal(F && constructor)65   explicit ThreadLocal(F&& constructor)
66       : constructor_(std::forward<F>(constructor)) {}
67 
get()68   FOLLY_ERASE T* get() const {
69     auto const ptr = tlp_.get();
70     return FOLLY_LIKELY(!!ptr) ? ptr : makeTlp();
71   }
72 
73   // may return null
getIfExist()74   FOLLY_ERASE T* getIfExist() const { return tlp_.get(); }
75 
76   T* operator->() const { return get(); }
77 
78   T& operator*() const { return *get(); }
79 
80   void reset(T* newPtr = nullptr) { tlp_.reset(newPtr); }
81 
82   typedef typename ThreadLocalPtr<T, Tag, AccessMode>::Accessor Accessor;
accessAllThreads()83   Accessor accessAllThreads() const { return tlp_.accessAllThreads(); }
84 
85   // movable
86   ThreadLocal(ThreadLocal&&) = default;
87   ThreadLocal& operator=(ThreadLocal&&) = default;
88 
89  private:
90   // non-copyable
91   ThreadLocal(const ThreadLocal&) = delete;
92   ThreadLocal& operator=(const ThreadLocal&) = delete;
93 
makeTlp()94   FOLLY_NOINLINE T* makeTlp() const {
95     auto const ptr = constructor_();
96     tlp_.reset(ptr);
97     return ptr;
98   }
99 
100   mutable ThreadLocalPtr<T, Tag, AccessMode> tlp_;
101   std::function<T*()> constructor_;
102 };
103 
104 /*
105  * The idea here is that __thread is faster than pthread_getspecific, so we
106  * keep a __thread array of pointers to objects (ThreadEntry::elements) where
107  * each array has an index for each unique instance of the ThreadLocalPtr
108  * object.  Each ThreadLocalPtr object has a unique id that is an index into
109  * these arrays so we can fetch the correct object from thread local storage
110  * very efficiently.
111  *
112  * In order to prevent unbounded growth of the id space and thus huge
113  * ThreadEntry::elements, arrays, for example due to continuous creation and
114  * destruction of ThreadLocalPtr objects, we keep a set of all active
115  * instances.  When an instance is destroyed we remove it from the active
116  * set and insert the id into freeIds_ for reuse.  These operations require a
117  * global mutex, but only happen at construction and destruction time.
118  *
119  * We use a single global pthread_key_t per Tag to manage object destruction and
120  * memory cleanup upon thread exit because there is a finite number of
121  * pthread_key_t's available per machine.
122  *
123  * NOTE: Apple platforms don't support the same semantics for __thread that
124  *       Linux does (and it's only supported at all on i386). For these, use
125  *       pthread_setspecific()/pthread_getspecific() for the per-thread
126  *       storage.  Windows (MSVC and GCC) does support the same semantics
127  *       with __declspec(thread)
128  */
129 
130 template <class T, class Tag = void, class AccessMode = void>
131 class ThreadLocalPtr {
132  private:
133   typedef threadlocal_detail::StaticMeta<Tag, AccessMode> StaticMeta;
134 
135   using AccessAllThreadsEnabled = Negation<std::is_same<Tag, void>>;
136 
137  public:
ThreadLocalPtr()138   constexpr ThreadLocalPtr() : id_() {}
139 
ThreadLocalPtr(ThreadLocalPtr && other)140   ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : id_(std::move(other.id_)) {}
141 
142   ThreadLocalPtr& operator=(ThreadLocalPtr&& other) {
143     assert(this != &other);
144     destroy();
145     id_ = std::move(other.id_);
146     return *this;
147   }
148 
~ThreadLocalPtr()149   ~ThreadLocalPtr() { destroy(); }
150 
get()151   T* get() const {
152     threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
153     return static_cast<T*>(w.ptr);
154   }
155 
156   T* operator->() const { return get(); }
157 
158   T& operator*() const { return *get(); }
159 
release()160   T* release() {
161     auto rlock = getAccessAllThreadsLockReadHolderIfEnabled();
162 
163     threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
164 
165     return static_cast<T*>(w.release());
166   }
167 
168   void reset(T* newPtr = nullptr) {
169     auto rlock = getAccessAllThreadsLockReadHolderIfEnabled();
170 
171     auto guard = makeGuard([&] { delete newPtr; });
172     threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
173 
174     w->dispose(TLPDestructionMode::THIS_THREAD);
175     // need to get a new ptr since the
176     // ThreadEntry::elements array can be reallocated
177     w = &StaticMeta::get(&id_);
178     w->cleanup();
179     guard.dismiss();
180     w->set(newPtr);
181   }
182 
183   explicit operator bool() const { return get() != nullptr; }
184 
185   /**
186    * reset() that transfers ownership from a smart pointer
187    */
188   template <
189       typename SourceT,
190       typename Deleter,
191       typename = typename std::enable_if<
192           std::is_convertible<SourceT*, T*>::value>::type>
reset(std::unique_ptr<SourceT,Deleter> source)193   void reset(std::unique_ptr<SourceT, Deleter> source) {
194     auto deleter = [delegate = source.get_deleter()](
195                        T* ptr, TLPDestructionMode) { delegate(ptr); };
196     reset(source.release(), deleter);
197   }
198 
199   /**
200    * reset() that transfers ownership from a smart pointer with the default
201    * deleter
202    */
203   template <
204       typename SourceT,
205       typename = typename std::enable_if<
206           std::is_convertible<SourceT*, T*>::value>::type>
reset(std::unique_ptr<SourceT> source)207   void reset(std::unique_ptr<SourceT> source) {
208     reset(source.release());
209   }
210 
211   /**
212    * reset() with a custom deleter:
213    * deleter(T* ptr, TLPDestructionMode mode)
214    * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus
215    * deleting pointers for all threads), and THIS_THREAD if we're only deleting
216    * the member for one thread (because of thread exit or reset()).
217    * Invoking the deleter must not throw.
218    */
219   template <class Deleter>
reset(T * newPtr,const Deleter & deleter)220   void reset(T* newPtr, const Deleter& deleter) {
221     auto rlock = getAccessAllThreadsLockReadHolderIfEnabled();
222 
223     auto guard = makeGuard([&] {
224       if (newPtr) {
225         deleter(newPtr, TLPDestructionMode::THIS_THREAD);
226       }
227     });
228     threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
229     w->dispose(TLPDestructionMode::THIS_THREAD);
230     // need to get a new ptr since the
231     // ThreadEntry::elements array can be reallocated
232     w = &StaticMeta::get(&id_);
233     w->cleanup();
234     guard.dismiss();
235     w->set(newPtr, deleter);
236   }
237 
238   // Holds a global lock for iteration through all thread local child objects.
239   // Can be used as an iterable container.
240   // Use accessAllThreads() to obtain one.
241   class Accessor {
242     friend class ThreadLocalPtr<T, Tag, AccessMode>;
243 
244     threadlocal_detail::StaticMetaBase& meta_;
245     SharedMutex* accessAllThreadsLock_;
246     std::mutex* lock_;
247     uint32_t id_;
248 
249    public:
250     class Iterator;
251     friend class Iterator;
252 
253     // The iterators obtained from Accessor are bidirectional iterators.
254     class Iterator {
255       friend class Accessor;
256       const Accessor* accessor_{nullptr};
257       threadlocal_detail::ThreadEntryNode* e_{nullptr};
258 
increment()259       void increment() {
260         e_ = e_->getNext();
261         incrementToValid();
262       }
263 
decrement()264       void decrement() {
265         e_ = e_->getPrev();
266         decrementToValid();
267       }
268 
dereference()269       const T& dereference() const {
270         return *static_cast<T*>(
271             e_->getThreadEntry()->elements[accessor_->id_].ptr);
272       }
273 
dereference()274       T& dereference() {
275         return *static_cast<T*>(
276             e_->getThreadEntry()->elements[accessor_->id_].ptr);
277       }
278 
equal(const Iterator & other)279       bool equal(const Iterator& other) const {
280         return (accessor_->id_ == other.accessor_->id_ && e_ == other.e_);
281       }
282 
Iterator(const Accessor * accessor)283       explicit Iterator(const Accessor* accessor)
284           : accessor_(accessor),
285             e_(&accessor_->meta_.head_.elements[accessor_->id_].node) {}
286 
287       // we just need to check the ptr since it can be set to nullptr
288       // even if the entry is part of the list
valid()289       bool valid() const {
290         return (e_->getThreadEntry()->elements[accessor_->id_].ptr);
291       }
292 
incrementToValid()293       void incrementToValid() {
294         for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
295              !valid();
296              e_ = e_->getNext()) {
297         }
298       }
299 
decrementToValid()300       void decrementToValid() {
301         for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
302              !valid();
303              e_ = e_->getPrev()) {
304         }
305       }
306 
307      public:
308       using difference_type = ssize_t;
309       using value_type = T;
310       using reference = T const&;
311       using pointer = T const*;
312       using iterator_category = std::bidirectional_iterator_tag;
313 
314       Iterator() = default;
315 
316       Iterator& operator++() {
317         increment();
318         return *this;
319       }
320 
321       Iterator& operator++(int) {
322         Iterator copy(*this);
323         increment();
324         return copy;
325       }
326 
327       Iterator& operator--() {
328         decrement();
329         return *this;
330       }
331 
332       Iterator& operator--(int) {
333         Iterator copy(*this);
334         decrement();
335         return copy;
336       }
337 
338       T& operator*() { return dereference(); }
339 
340       T const& operator*() const { return dereference(); }
341 
342       T* operator->() { return &dereference(); }
343 
344       T const* operator->() const { return &dereference(); }
345 
346       bool operator==(Iterator const& rhs) const { return equal(rhs); }
347 
348       bool operator!=(Iterator const& rhs) const { return !equal(rhs); }
349 
getThreadId()350       std::thread::id getThreadId() const {
351         return e_->getThreadEntry()->tid();
352       }
353 
getOSThreadId()354       uint64_t getOSThreadId() const { return e_->getThreadEntry()->tid_os; }
355     };
356 
~Accessor()357     ~Accessor() { release(); }
358 
begin()359     Iterator begin() const { return ++Iterator(this); }
360 
end()361     Iterator end() const { return Iterator(this); }
362 
363     Accessor(const Accessor&) = delete;
364     Accessor& operator=(const Accessor&) = delete;
365 
Accessor(Accessor && other)366     Accessor(Accessor&& other) noexcept
367         : meta_(other.meta_),
368           accessAllThreadsLock_(other.accessAllThreadsLock_),
369           lock_(other.lock_),
370           id_(other.id_) {
371       other.id_ = 0;
372       other.accessAllThreadsLock_ = nullptr;
373       other.lock_ = nullptr;
374     }
375 
376     Accessor& operator=(Accessor&& other) noexcept {
377       // Each Tag has its own unique meta, and accessors with different Tags
378       // have different types.  So either *this is empty, or this and other
379       // have the same tag.  But if they have the same tag, they have the same
380       // meta (and lock), so they'd both hold the lock at the same time,
381       // which is impossible, which leaves only one possible scenario --
382       // *this is empty.  Assert it.
383       assert(&meta_ == &other.meta_);
384       assert(lock_ == nullptr);
385       using std::swap;
386       swap(accessAllThreadsLock_, other.accessAllThreadsLock_);
387       swap(lock_, other.lock_);
388       swap(id_, other.id_);
389     }
390 
Accessor()391     Accessor()
392         : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
393           accessAllThreadsLock_(nullptr),
394           lock_(nullptr),
395           id_(0) {}
396 
397    private:
Accessor(uint32_t id)398     explicit Accessor(uint32_t id)
399         : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
400           accessAllThreadsLock_(&meta_.accessAllThreadsLock_),
401           lock_(&meta_.lock_) {
402       accessAllThreadsLock_->lock();
403       lock_->lock();
404       id_ = id;
405     }
406 
release()407     void release() {
408       if (lock_) {
409         lock_->unlock();
410         DCHECK(accessAllThreadsLock_ != nullptr);
411         accessAllThreadsLock_->unlock();
412         id_ = 0;
413         lock_ = nullptr;
414         accessAllThreadsLock_ = nullptr;
415       }
416     }
417   };
418 
419   // accessor allows a client to iterate through all thread local child
420   // elements of this ThreadLocal instance.  Holds a global lock for each <Tag>
accessAllThreads()421   Accessor accessAllThreads() const {
422     static_assert(
423         AccessAllThreadsEnabled::value,
424         "Must use a unique Tag to use the accessAllThreads feature");
425     return Accessor(id_.getOrAllocate(StaticMeta::instance()));
426   }
427 
428  private:
destroy()429   void destroy() { StaticMeta::instance().destroy(&id_); }
430 
431   // non-copyable
432   ThreadLocalPtr(const ThreadLocalPtr&) = delete;
433   ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
434 
getAccessAllThreadsLockReadHolderIfEnabled()435   static auto getAccessAllThreadsLockReadHolderIfEnabled() {
436     return SharedMutex::ReadHolder(
437         AccessAllThreadsEnabled::value
438             ? &StaticMeta::instance().accessAllThreadsLock_
439             : nullptr);
440   }
441 
442   mutable typename StaticMeta::EntryID id_;
443 };
444 
445 namespace threadlocal_detail {
446 template <typename>
447 struct static_meta_of;
448 
449 template <typename T, typename Tag, typename AccessMode>
450 struct static_meta_of<ThreadLocalPtr<T, Tag, AccessMode>> {
451   using type = StaticMeta<Tag, AccessMode>;
452 };
453 
454 template <typename T, typename Tag, typename AccessMode>
455 struct static_meta_of<ThreadLocal<T, Tag, AccessMode>> {
456   using type = StaticMeta<Tag, AccessMode>;
457 };
458 
459 } // namespace threadlocal_detail
460 } // namespace folly
461