1 #pragma once 2 3 /// \file ThreadLocal.h 4 /// \brief Template for thread-local storage 5 /// \author Pavel Sevecek (sevecek at sirrah.troja.mff.cuni.cz) 6 /// \date 2016-2021 7 8 #include "objects/containers/Array.h" 9 #include "objects/utility/Iterator.h" 10 #include "objects/wrappers/Optional.h" 11 #include "thread/Scheduler.h" 12 13 NAMESPACE_SPH_BEGIN 14 15 namespace Detail { 16 17 struct ValueInitTag {}; 18 struct FunctorInitTag {}; 19 20 template <typename... TArgs> 21 struct ParamTraits { 22 using Tag = ValueInitTag; 23 }; 24 template <typename TArg> 25 struct ParamTraits<TArg> { 26 using Tag = std::conditional_t<IsCallable<TArg>::value, FunctorInitTag, ValueInitTag>; 27 }; 28 29 } // namespace Detail 30 31 /// \brief Template for storing a copy of a value for every thread in given scheduler. 32 /// 33 /// While C++ provides thread_local keyword for creating thread-local storages with static duration, 34 /// ThreadLocal template can be used for local variables or (non-static) member variables of classes. 35 template <typename Type> 36 class ThreadLocal { 37 // befriend other ThreadLocal classes 38 template <typename> 39 friend class ThreadLocal; 40 41 private: 42 struct Local { 43 uint8_t padd1[64]; 44 Type value; 45 uint8_t padd2[64]; 46 47 template <typename... TArgs> 48 Local(TArgs&&... args) 49 : value(std::forward<TArgs>(args)...) {} 50 }; 51 52 /// Array of thread-local values 53 Array<Local> locals; 54 55 /// Associated scheduler; one value is allocated for each thread of the scheduler. 56 IScheduler& scheduler; 57 58 struct Sum { 59 INLINE constexpr Type operator()(const Type& t1, const Type& t2) const { 60 return t1 + t2; 61 } 62 }; 63 64 public: 65 /// \brief Constructs a thread-local storage from a list of values 66 /// 67 /// \param scheduler Scheduler associated with the object. 68 /// \param args List of parameters that are passed into the constructor of each thread-local storage. 69 template <typename... TArgs> 70 ThreadLocal(IScheduler& scheduler, TArgs&&... args) 71 : scheduler(scheduler) { 72 initialize(typename Detail::ParamTraits<TArgs...>::Tag{}, std::forward<TArgs>(args)...); 73 } 74 75 /// \brief Constructs a thread-local storage using a functor. 76 /// 77 /// \param scheduler Scheduler associated with the object. 78 /// \param functor Functor used to initialize each thread-local object. 79 template <typename TFunctor> 80 ThreadLocal(IScheduler& scheduler, TFunctor&& functor) 81 : scheduler(scheduler) { 82 initialize(typename Detail::ParamTraits<TFunctor>::Tag{}, std::forward<TFunctor>(functor)); 83 } 84 85 /// \brief Return a value for current thread. 86 /// 87 /// This thread must belong the the thread pool given in constructor, checked by assert. 88 INLINE Type& local() { 89 const Optional<Size> idx = scheduler.getThreadIdx(); 90 SPH_ASSERT(idx && idx.value() < locals.size()); 91 return locals[idx.value()].value; 92 } 93 94 /// \copydoc local 95 INLINE const Type& local() const { 96 const Optional<Size> idx = scheduler.getThreadIdx(); 97 SPH_ASSERT(idx && idx.value() < locals.size()); 98 return locals[idx.value()].value; 99 } 100 101 /// \brief Returns the storage corresponding to the thread with given index. 102 /// 103 /// Can be called from any thread. There is no synchronization, so accessing the storage from the 104 /// associated worker at the same time might cause a race condition. 105 INLINE Type& value(const Size threadId) { 106 return locals[threadId].value; 107 } 108 109 /// \brief Performs an accumulation of thread-local values. 110 /// 111 /// Uses operator + to sum up the elements. 112 /// \param initial Value to which the accumulated result is initialized. 113 Type accumulate(const Type& initial = Type(0._f)) const { 114 return this->accumulate(initial, Sum{}); 115 } 116 117 /// \brief Performs an accumulation of thread-local values. 118 /// 119 /// Uses provided binary predicate to accumulate the values. 120 /// \param initial Value to which the accumulated result is initialized. 121 /// \param predicate Callable object with signature Type operator()(const Type&, const Type&). 122 template <typename TPredicate> 123 Type accumulate(const Type& initial, const TPredicate& predicate) const { 124 Type sum = initial; 125 for (const Type& value : *this) { 126 sum = predicate(sum, value); 127 } 128 return sum; 129 } 130 131 template <typename T> 132 class LocalIterator : public Iterator<T> { 133 public: 134 LocalIterator(Iterator<T> iter) 135 : Iterator<T>(iter) {} 136 137 using Return = std::conditional_t<std::is_const<T>::value, const Type&, Type&>; 138 139 INLINE Return operator*() const { 140 return this->data->value; 141 } 142 }; 143 144 /// \brief Returns the iterator to the first element in the thread-local storage. 145 LocalIterator<Local> begin() { 146 return locals.begin(); 147 } 148 149 /// \copydoc begin 150 LocalIterator<const Local> begin() const { 151 return locals.begin(); 152 } 153 154 /// \brief Returns the iterator to the first element in the thread-local storage. 155 LocalIterator<Local> end() { 156 return locals.end(); 157 } 158 159 /// \copydoc end 160 LocalIterator<const Local> end() const { 161 return locals.end(); 162 } 163 164 private: 165 template <typename... TArgs> 166 void initialize(Detail::ValueInitTag, TArgs&&... args) { 167 const Size threadCnt = scheduler.getThreadCnt(); 168 locals.reserve(threadCnt); 169 for (Size i = 0; i < threadCnt; ++i) { 170 // intentionally not forwarded, we cannot move parameters if we have more than one object 171 locals.emplaceBack(args...); 172 } 173 } 174 175 template <typename TFunctor> 176 void initialize(Detail::FunctorInitTag, TFunctor&& functor) { 177 const Size threadCnt = scheduler.getThreadCnt(); 178 locals.reserve(threadCnt); 179 for (Size i = 0; i < threadCnt; ++i) { 180 locals.emplaceBack(functor()); 181 } 182 } 183 }; 184 185 /// \brief Overload of parallelFor that passes thread-local storage into the functor. 186 template <typename Type, typename TFunctor> 187 INLINE void parallelFor(IScheduler& scheduler, 188 ThreadLocal<Type>& storage, 189 const Size from, 190 const Size to, 191 TFunctor&& functor) { 192 const Size granularity = scheduler.getRecommendedGranularity(); 193 parallelFor(scheduler, storage, from, to, granularity, std::forward<TFunctor>(functor)); 194 } 195 196 /// \brief Overload of parallelFor that passes thread-local storage into the functor. 197 template <typename Type, typename TFunctor> 198 INLINE void parallelFor(IScheduler& scheduler, 199 ThreadLocal<Type>& storage, 200 const Size from, 201 const Size to, 202 const Size granularity, 203 TFunctor&& functor) { 204 SPH_ASSERT(from <= to); 205 206 scheduler.parallelFor(from, to, granularity, [&storage, &functor](Size n1, Size n2) { 207 SPH_ASSERT(n1 < n2); 208 Type& value = storage.local(); 209 for (Size i = n1; i < n2; ++i) { 210 functor(i, value); 211 } 212 }); 213 } 214 215 NAMESPACE_SPH_END 216