1 #pragma once
2 
3 /// \file ThreadLocal.h
4 /// \brief Template for thread-local storage
5 /// \author Pavel Sevecek (sevecek at sirrah.troja.mff.cuni.cz)
6 /// \date 2016-2021
7 
8 #include "objects/containers/Array.h"
9 #include "objects/utility/Iterator.h"
10 #include "objects/wrappers/Optional.h"
11 #include "thread/Scheduler.h"
12 
13 NAMESPACE_SPH_BEGIN
14 
15 namespace Detail {
16 
17 struct ValueInitTag {};
18 struct FunctorInitTag {};
19 
20 template <typename... TArgs>
21 struct ParamTraits {
22     using Tag = ValueInitTag;
23 };
24 template <typename TArg>
25 struct ParamTraits<TArg> {
26     using Tag = std::conditional_t<IsCallable<TArg>::value, FunctorInitTag, ValueInitTag>;
27 };
28 
29 } // namespace Detail
30 
31 /// \brief Template for storing a copy of a value for every thread in given scheduler.
32 ///
33 /// While C++ provides thread_local keyword for creating thread-local storages with static duration,
34 /// ThreadLocal template can be used for local variables or (non-static) member variables of classes.
35 template <typename Type>
36 class ThreadLocal {
37     // befriend other ThreadLocal classes
38     template <typename>
39     friend class ThreadLocal;
40 
41 private:
42     struct Local {
43         uint8_t padd1[64];
44         Type value;
45         uint8_t padd2[64];
46 
47         template <typename... TArgs>
48         Local(TArgs&&... args)
49             : value(std::forward<TArgs>(args)...) {}
50     };
51 
52     /// Array of thread-local values
53     Array<Local> locals;
54 
55     /// Associated scheduler; one value is allocated for each thread of the scheduler.
56     IScheduler& scheduler;
57 
58     struct Sum {
59         INLINE constexpr Type operator()(const Type& t1, const Type& t2) const {
60             return t1 + t2;
61         }
62     };
63 
64 public:
65     /// \brief Constructs a thread-local storage from a list of values
66     ///
67     /// \param scheduler Scheduler associated with the object.
68     /// \param args List of parameters that are passed into the constructor of each thread-local storage.
69     template <typename... TArgs>
70     ThreadLocal(IScheduler& scheduler, TArgs&&... args)
71         : scheduler(scheduler) {
72         initialize(typename Detail::ParamTraits<TArgs...>::Tag{}, std::forward<TArgs>(args)...);
73     }
74 
75     /// \brief Constructs a thread-local storage using a functor.
76     ///
77     /// \param scheduler Scheduler associated with the object.
78     /// \param functor Functor used to initialize each thread-local object.
79     template <typename TFunctor>
80     ThreadLocal(IScheduler& scheduler, TFunctor&& functor)
81         : scheduler(scheduler) {
82         initialize(typename Detail::ParamTraits<TFunctor>::Tag{}, std::forward<TFunctor>(functor));
83     }
84 
85     /// \brief Return a value for current thread.
86     ///
87     /// This thread must belong the the thread pool given in constructor, checked by assert.
88     INLINE Type& local() {
89         const Optional<Size> idx = scheduler.getThreadIdx();
90         SPH_ASSERT(idx && idx.value() < locals.size());
91         return locals[idx.value()].value;
92     }
93 
94     /// \copydoc local
95     INLINE const Type& local() const {
96         const Optional<Size> idx = scheduler.getThreadIdx();
97         SPH_ASSERT(idx && idx.value() < locals.size());
98         return locals[idx.value()].value;
99     }
100 
101     /// \brief Returns the storage corresponding to the thread with given index.
102     ///
103     /// Can be called from any thread. There is no synchronization, so accessing the storage from the
104     /// associated worker at the same time might cause a race condition.
105     INLINE Type& value(const Size threadId) {
106         return locals[threadId].value;
107     }
108 
109     /// \brief Performs an accumulation of thread-local values.
110     ///
111     /// Uses operator + to sum up the elements.
112     /// \param initial Value to which the accumulated result is initialized.
113     Type accumulate(const Type& initial = Type(0._f)) const {
114         return this->accumulate(initial, Sum{});
115     }
116 
117     /// \brief Performs an accumulation of thread-local values.
118     ///
119     /// Uses provided binary predicate to accumulate the values.
120     /// \param initial Value to which the accumulated result is initialized.
121     /// \param predicate Callable object with signature Type operator()(const Type&, const Type&).
122     template <typename TPredicate>
123     Type accumulate(const Type& initial, const TPredicate& predicate) const {
124         Type sum = initial;
125         for (const Type& value : *this) {
126             sum = predicate(sum, value);
127         }
128         return sum;
129     }
130 
131     template <typename T>
132     class LocalIterator : public Iterator<T> {
133     public:
134         LocalIterator(Iterator<T> iter)
135             : Iterator<T>(iter) {}
136 
137         using Return = std::conditional_t<std::is_const<T>::value, const Type&, Type&>;
138 
139         INLINE Return operator*() const {
140             return this->data->value;
141         }
142     };
143 
144     /// \brief Returns the iterator to the first element in the thread-local storage.
145     LocalIterator<Local> begin() {
146         return locals.begin();
147     }
148 
149     /// \copydoc begin
150     LocalIterator<const Local> begin() const {
151         return locals.begin();
152     }
153 
154     /// \brief Returns the iterator to the first element in the thread-local storage.
155     LocalIterator<Local> end() {
156         return locals.end();
157     }
158 
159     /// \copydoc end
160     LocalIterator<const Local> end() const {
161         return locals.end();
162     }
163 
164 private:
165     template <typename... TArgs>
166     void initialize(Detail::ValueInitTag, TArgs&&... args) {
167         const Size threadCnt = scheduler.getThreadCnt();
168         locals.reserve(threadCnt);
169         for (Size i = 0; i < threadCnt; ++i) {
170             // intentionally not forwarded, we cannot move parameters if we have more than one object
171             locals.emplaceBack(args...);
172         }
173     }
174 
175     template <typename TFunctor>
176     void initialize(Detail::FunctorInitTag, TFunctor&& functor) {
177         const Size threadCnt = scheduler.getThreadCnt();
178         locals.reserve(threadCnt);
179         for (Size i = 0; i < threadCnt; ++i) {
180             locals.emplaceBack(functor());
181         }
182     }
183 };
184 
185 /// \brief Overload of parallelFor that passes thread-local storage into the functor.
186 template <typename Type, typename TFunctor>
187 INLINE void parallelFor(IScheduler& scheduler,
188     ThreadLocal<Type>& storage,
189     const Size from,
190     const Size to,
191     TFunctor&& functor) {
192     const Size granularity = scheduler.getRecommendedGranularity();
193     parallelFor(scheduler, storage, from, to, granularity, std::forward<TFunctor>(functor));
194 }
195 
196 /// \brief Overload of parallelFor that passes thread-local storage into the functor.
197 template <typename Type, typename TFunctor>
198 INLINE void parallelFor(IScheduler& scheduler,
199     ThreadLocal<Type>& storage,
200     const Size from,
201     const Size to,
202     const Size granularity,
203     TFunctor&& functor) {
204     SPH_ASSERT(from <= to);
205 
206     scheduler.parallelFor(from, to, granularity, [&storage, &functor](Size n1, Size n2) {
207         SPH_ASSERT(n1 < n2);
208         Type& value = storage.local();
209         for (Size i = n1; i < n2; ++i) {
210             functor(i, value);
211         }
212     });
213 }
214 
215 NAMESPACE_SPH_END
216