1 /* 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * Improved thread local storage for non-trivial types (similar speed as 19 * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster 20 * than boost::thread_specific_ptr). 21 * 22 * Also includes an accessor interface to walk all the thread local child 23 * objects of a parent. accessAllThreads() initializes an accessor which holds 24 * a global lock *that blocks all creation and destruction of ThreadLocal 25 * objects with the same Tag* and can be used as an iterable container. 26 * accessAllThreads() can race with destruction of thread-local elements. We 27 * provide a strict mode which is dangerous because it requires the access lock 28 * to be held while destroying thread-local elements which could cause 29 * deadlocks. We gate this mode behind the AccessModeStrict template parameter. 30 * 31 * Intended use is for frequent write, infrequent read data access patterns such 32 * as counters. 33 * 34 * There are two classes here - ThreadLocal and ThreadLocalPtr. ThreadLocalPtr 35 * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin 36 * wrapper around ThreadLocalPtr that manages allocation automatically. 37 * 38 * @author Spencer Ahrens (sahrens) 39 */ 40 41 #pragma once 42 43 #include <iterator> 44 #include <thread> 45 #include <type_traits> 46 #include <utility> 47 48 #include <folly/Likely.h> 49 #include <folly/Portability.h> 50 #include <folly/ScopeGuard.h> 51 #include <folly/SharedMutex.h> 52 #include <folly/detail/ThreadLocalDetail.h> 53 54 namespace folly { 55 56 template <class T, class Tag, class AccessMode> 57 class ThreadLocalPtr; 58 59 template <class T, class Tag = void, class AccessMode = void> 60 class ThreadLocal { 61 public: ThreadLocal()62 constexpr ThreadLocal() : constructor_([]() { return new T(); }) {} 63 64 template <typename F, std::enable_if_t<is_invocable_r_v<T*, F>, int> = 0> ThreadLocal(F && constructor)65 explicit ThreadLocal(F&& constructor) 66 : constructor_(std::forward<F>(constructor)) {} 67 get()68 FOLLY_ERASE T* get() const { 69 auto const ptr = tlp_.get(); 70 return FOLLY_LIKELY(!!ptr) ? ptr : makeTlp(); 71 } 72 73 // may return null getIfExist()74 FOLLY_ERASE T* getIfExist() const { return tlp_.get(); } 75 76 T* operator->() const { return get(); } 77 78 T& operator*() const { return *get(); } 79 80 void reset(T* newPtr = nullptr) { tlp_.reset(newPtr); } 81 82 typedef typename ThreadLocalPtr<T, Tag, AccessMode>::Accessor Accessor; accessAllThreads()83 Accessor accessAllThreads() const { return tlp_.accessAllThreads(); } 84 85 // movable 86 ThreadLocal(ThreadLocal&&) = default; 87 ThreadLocal& operator=(ThreadLocal&&) = default; 88 89 private: 90 // non-copyable 91 ThreadLocal(const ThreadLocal&) = delete; 92 ThreadLocal& operator=(const ThreadLocal&) = delete; 93 makeTlp()94 FOLLY_NOINLINE T* makeTlp() const { 95 auto const ptr = constructor_(); 96 tlp_.reset(ptr); 97 return ptr; 98 } 99 100 mutable ThreadLocalPtr<T, Tag, AccessMode> tlp_; 101 std::function<T*()> constructor_; 102 }; 103 104 /* 105 * The idea here is that __thread is faster than pthread_getspecific, so we 106 * keep a __thread array of pointers to objects (ThreadEntry::elements) where 107 * each array has an index for each unique instance of the ThreadLocalPtr 108 * object. Each ThreadLocalPtr object has a unique id that is an index into 109 * these arrays so we can fetch the correct object from thread local storage 110 * very efficiently. 111 * 112 * In order to prevent unbounded growth of the id space and thus huge 113 * ThreadEntry::elements, arrays, for example due to continuous creation and 114 * destruction of ThreadLocalPtr objects, we keep a set of all active 115 * instances. When an instance is destroyed we remove it from the active 116 * set and insert the id into freeIds_ for reuse. These operations require a 117 * global mutex, but only happen at construction and destruction time. 118 * 119 * We use a single global pthread_key_t per Tag to manage object destruction and 120 * memory cleanup upon thread exit because there is a finite number of 121 * pthread_key_t's available per machine. 122 * 123 * NOTE: Apple platforms don't support the same semantics for __thread that 124 * Linux does (and it's only supported at all on i386). For these, use 125 * pthread_setspecific()/pthread_getspecific() for the per-thread 126 * storage. Windows (MSVC and GCC) does support the same semantics 127 * with __declspec(thread) 128 */ 129 130 template <class T, class Tag = void, class AccessMode = void> 131 class ThreadLocalPtr { 132 private: 133 typedef threadlocal_detail::StaticMeta<Tag, AccessMode> StaticMeta; 134 135 using AccessAllThreadsEnabled = Negation<std::is_same<Tag, void>>; 136 137 public: ThreadLocalPtr()138 constexpr ThreadLocalPtr() : id_() {} 139 ThreadLocalPtr(ThreadLocalPtr && other)140 ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : id_(std::move(other.id_)) {} 141 142 ThreadLocalPtr& operator=(ThreadLocalPtr&& other) { 143 assert(this != &other); 144 destroy(); 145 id_ = std::move(other.id_); 146 return *this; 147 } 148 ~ThreadLocalPtr()149 ~ThreadLocalPtr() { destroy(); } 150 get()151 T* get() const { 152 threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); 153 return static_cast<T*>(w.ptr); 154 } 155 156 T* operator->() const { return get(); } 157 158 T& operator*() const { return *get(); } 159 release()160 T* release() { 161 auto rlock = getAccessAllThreadsLockReadHolderIfEnabled(); 162 163 threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); 164 165 return static_cast<T*>(w.release()); 166 } 167 168 void reset(T* newPtr = nullptr) { 169 auto rlock = getAccessAllThreadsLockReadHolderIfEnabled(); 170 171 auto guard = makeGuard([&] { delete newPtr; }); 172 threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_); 173 174 w->dispose(TLPDestructionMode::THIS_THREAD); 175 // need to get a new ptr since the 176 // ThreadEntry::elements array can be reallocated 177 w = &StaticMeta::get(&id_); 178 w->cleanup(); 179 guard.dismiss(); 180 w->set(newPtr); 181 } 182 183 explicit operator bool() const { return get() != nullptr; } 184 185 /** 186 * reset() that transfers ownership from a smart pointer 187 */ 188 template < 189 typename SourceT, 190 typename Deleter, 191 typename = typename std::enable_if< 192 std::is_convertible<SourceT*, T*>::value>::type> reset(std::unique_ptr<SourceT,Deleter> source)193 void reset(std::unique_ptr<SourceT, Deleter> source) { 194 auto deleter = [delegate = source.get_deleter()]( 195 T* ptr, TLPDestructionMode) { delegate(ptr); }; 196 reset(source.release(), deleter); 197 } 198 199 /** 200 * reset() that transfers ownership from a smart pointer with the default 201 * deleter 202 */ 203 template < 204 typename SourceT, 205 typename = typename std::enable_if< 206 std::is_convertible<SourceT*, T*>::value>::type> reset(std::unique_ptr<SourceT> source)207 void reset(std::unique_ptr<SourceT> source) { 208 reset(source.release()); 209 } 210 211 /** 212 * reset() with a custom deleter: 213 * deleter(T* ptr, TLPDestructionMode mode) 214 * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus 215 * deleting pointers for all threads), and THIS_THREAD if we're only deleting 216 * the member for one thread (because of thread exit or reset()). 217 * Invoking the deleter must not throw. 218 */ 219 template <class Deleter> reset(T * newPtr,const Deleter & deleter)220 void reset(T* newPtr, const Deleter& deleter) { 221 auto rlock = getAccessAllThreadsLockReadHolderIfEnabled(); 222 223 auto guard = makeGuard([&] { 224 if (newPtr) { 225 deleter(newPtr, TLPDestructionMode::THIS_THREAD); 226 } 227 }); 228 threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_); 229 w->dispose(TLPDestructionMode::THIS_THREAD); 230 // need to get a new ptr since the 231 // ThreadEntry::elements array can be reallocated 232 w = &StaticMeta::get(&id_); 233 w->cleanup(); 234 guard.dismiss(); 235 w->set(newPtr, deleter); 236 } 237 238 // Holds a global lock for iteration through all thread local child objects. 239 // Can be used as an iterable container. 240 // Use accessAllThreads() to obtain one. 241 class Accessor { 242 friend class ThreadLocalPtr<T, Tag, AccessMode>; 243 244 threadlocal_detail::StaticMetaBase& meta_; 245 SharedMutex* accessAllThreadsLock_; 246 std::mutex* lock_; 247 uint32_t id_; 248 249 public: 250 class Iterator; 251 friend class Iterator; 252 253 // The iterators obtained from Accessor are bidirectional iterators. 254 class Iterator { 255 friend class Accessor; 256 const Accessor* accessor_{nullptr}; 257 threadlocal_detail::ThreadEntryNode* e_{nullptr}; 258 increment()259 void increment() { 260 e_ = e_->getNext(); 261 incrementToValid(); 262 } 263 decrement()264 void decrement() { 265 e_ = e_->getPrev(); 266 decrementToValid(); 267 } 268 dereference()269 const T& dereference() const { 270 return *static_cast<T*>( 271 e_->getThreadEntry()->elements[accessor_->id_].ptr); 272 } 273 dereference()274 T& dereference() { 275 return *static_cast<T*>( 276 e_->getThreadEntry()->elements[accessor_->id_].ptr); 277 } 278 equal(const Iterator & other)279 bool equal(const Iterator& other) const { 280 return (accessor_->id_ == other.accessor_->id_ && e_ == other.e_); 281 } 282 Iterator(const Accessor * accessor)283 explicit Iterator(const Accessor* accessor) 284 : accessor_(accessor), 285 e_(&accessor_->meta_.head_.elements[accessor_->id_].node) {} 286 287 // we just need to check the ptr since it can be set to nullptr 288 // even if the entry is part of the list valid()289 bool valid() const { 290 return (e_->getThreadEntry()->elements[accessor_->id_].ptr); 291 } 292 incrementToValid()293 void incrementToValid() { 294 for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node && 295 !valid(); 296 e_ = e_->getNext()) { 297 } 298 } 299 decrementToValid()300 void decrementToValid() { 301 for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node && 302 !valid(); 303 e_ = e_->getPrev()) { 304 } 305 } 306 307 public: 308 using difference_type = ssize_t; 309 using value_type = T; 310 using reference = T const&; 311 using pointer = T const*; 312 using iterator_category = std::bidirectional_iterator_tag; 313 314 Iterator() = default; 315 316 Iterator& operator++() { 317 increment(); 318 return *this; 319 } 320 321 Iterator& operator++(int) { 322 Iterator copy(*this); 323 increment(); 324 return copy; 325 } 326 327 Iterator& operator--() { 328 decrement(); 329 return *this; 330 } 331 332 Iterator& operator--(int) { 333 Iterator copy(*this); 334 decrement(); 335 return copy; 336 } 337 338 T& operator*() { return dereference(); } 339 340 T const& operator*() const { return dereference(); } 341 342 T* operator->() { return &dereference(); } 343 344 T const* operator->() const { return &dereference(); } 345 346 bool operator==(Iterator const& rhs) const { return equal(rhs); } 347 348 bool operator!=(Iterator const& rhs) const { return !equal(rhs); } 349 getThreadId()350 std::thread::id getThreadId() const { 351 return e_->getThreadEntry()->tid(); 352 } 353 getOSThreadId()354 uint64_t getOSThreadId() const { return e_->getThreadEntry()->tid_os; } 355 }; 356 ~Accessor()357 ~Accessor() { release(); } 358 begin()359 Iterator begin() const { return ++Iterator(this); } 360 end()361 Iterator end() const { return Iterator(this); } 362 363 Accessor(const Accessor&) = delete; 364 Accessor& operator=(const Accessor&) = delete; 365 Accessor(Accessor && other)366 Accessor(Accessor&& other) noexcept 367 : meta_(other.meta_), 368 accessAllThreadsLock_(other.accessAllThreadsLock_), 369 lock_(other.lock_), 370 id_(other.id_) { 371 other.id_ = 0; 372 other.accessAllThreadsLock_ = nullptr; 373 other.lock_ = nullptr; 374 } 375 376 Accessor& operator=(Accessor&& other) noexcept { 377 // Each Tag has its own unique meta, and accessors with different Tags 378 // have different types. So either *this is empty, or this and other 379 // have the same tag. But if they have the same tag, they have the same 380 // meta (and lock), so they'd both hold the lock at the same time, 381 // which is impossible, which leaves only one possible scenario -- 382 // *this is empty. Assert it. 383 assert(&meta_ == &other.meta_); 384 assert(lock_ == nullptr); 385 using std::swap; 386 swap(accessAllThreadsLock_, other.accessAllThreadsLock_); 387 swap(lock_, other.lock_); 388 swap(id_, other.id_); 389 } 390 Accessor()391 Accessor() 392 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()), 393 accessAllThreadsLock_(nullptr), 394 lock_(nullptr), 395 id_(0) {} 396 397 private: Accessor(uint32_t id)398 explicit Accessor(uint32_t id) 399 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()), 400 accessAllThreadsLock_(&meta_.accessAllThreadsLock_), 401 lock_(&meta_.lock_) { 402 accessAllThreadsLock_->lock(); 403 lock_->lock(); 404 id_ = id; 405 } 406 release()407 void release() { 408 if (lock_) { 409 lock_->unlock(); 410 DCHECK(accessAllThreadsLock_ != nullptr); 411 accessAllThreadsLock_->unlock(); 412 id_ = 0; 413 lock_ = nullptr; 414 accessAllThreadsLock_ = nullptr; 415 } 416 } 417 }; 418 419 // accessor allows a client to iterate through all thread local child 420 // elements of this ThreadLocal instance. Holds a global lock for each <Tag> accessAllThreads()421 Accessor accessAllThreads() const { 422 static_assert( 423 AccessAllThreadsEnabled::value, 424 "Must use a unique Tag to use the accessAllThreads feature"); 425 return Accessor(id_.getOrAllocate(StaticMeta::instance())); 426 } 427 428 private: destroy()429 void destroy() { StaticMeta::instance().destroy(&id_); } 430 431 // non-copyable 432 ThreadLocalPtr(const ThreadLocalPtr&) = delete; 433 ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete; 434 getAccessAllThreadsLockReadHolderIfEnabled()435 static auto getAccessAllThreadsLockReadHolderIfEnabled() { 436 return SharedMutex::ReadHolder( 437 AccessAllThreadsEnabled::value 438 ? &StaticMeta::instance().accessAllThreadsLock_ 439 : nullptr); 440 } 441 442 mutable typename StaticMeta::EntryID id_; 443 }; 444 445 namespace threadlocal_detail { 446 template <typename> 447 struct static_meta_of; 448 449 template <typename T, typename Tag, typename AccessMode> 450 struct static_meta_of<ThreadLocalPtr<T, Tag, AccessMode>> { 451 using type = StaticMeta<Tag, AccessMode>; 452 }; 453 454 template <typename T, typename Tag, typename AccessMode> 455 struct static_meta_of<ThreadLocal<T, Tag, AccessMode>> { 456 using type = StaticMeta<Tag, AccessMode>; 457 }; 458 459 } // namespace threadlocal_detail 460 } // namespace folly 461