1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_reduce_H
18 #define __TBB_parallel_reduce_H
19 
20 #define __TBB_parallel_reduce_H_include_area
21 #include "internal/_warning_suppress_enable_notice.h"
22 
23 #include <new>
24 #include "task.h"
25 #include "aligned_space.h"
26 #include "partitioner.h"
27 #include "tbb_profiling.h"
28 
29 namespace tbb {
30 
31 namespace interface9 {
32 //! @cond INTERNAL
33 namespace internal {
34 
35     using namespace tbb::internal;
36 
37     /** Values for reduction_context. */
38     enum {
39         root_task, left_child, right_child
40     };
41 
42     /** Represented as a char, not enum, for compactness. */
43     typedef char reduction_context;
44 
45     //! Task type used to combine the partial results of parallel_reduce.
46     /** @ingroup algorithms */
47     template<typename Body>
48     class finish_reduce: public flag_task {
49         //! Pointer to body, or NULL if the left child has not yet finished.
50         bool has_right_zombie;
51         const reduction_context my_context;
52         Body* my_body;
53         aligned_space<Body> zombie_space;
finish_reduce(reduction_context context_)54         finish_reduce( reduction_context context_ ) :
55             has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
56             my_context(context_),
57             my_body(NULL)
58         {
59         }
~finish_reduce()60         ~finish_reduce() {
61             if( has_right_zombie )
62                 zombie_space.begin()->~Body();
63         }
execute()64         task* execute() __TBB_override {
65             if( has_right_zombie ) {
66                 // Right child was stolen.
67                 Body* s = zombie_space.begin();
68                 my_body->join( *s );
69                 // Body::join() won't be called if canceled. Defer destruction to destructor
70             }
71             if( my_context==left_child )
72                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
73             return NULL;
74         }
75         template<typename Range,typename Body_, typename Partitioner>
76         friend class start_reduce;
77     };
78 
79     //! allocate right task with new parent
80     void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes);
81 
82     //! Task type used to split the work of parallel_reduce.
83     /** @ingroup algorithms */
84     template<typename Range, typename Body, typename Partitioner>
85     class start_reduce: public task {
86         typedef finish_reduce<Body> finish_type;
87         Body* my_body;
88         Range my_range;
89         typename Partitioner::task_partition_type my_partition;
90         reduction_context my_context;
91         task* execute() __TBB_override;
92         //! Update affinity info, if any
note_affinity(affinity_id id)93         void note_affinity( affinity_id id ) __TBB_override {
94             my_partition.note_affinity( id );
95         }
96         template<typename Body_>
97         friend class finish_reduce;
98 
99 public:
100         //! Constructor used for root task
start_reduce(const Range & range,Body * body,Partitioner & partitioner)101         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
102             my_body(body),
103             my_range(range),
104             my_partition(partitioner),
105             my_context(root_task)
106         {
107         }
108         //! Splitting constructor used to generate children.
109         /** parent_ becomes left child.  Newly constructed object is right child. */
start_reduce(start_reduce & parent_,typename Partitioner::split_type & split_obj)110         start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj ) :
111             my_body(parent_.my_body),
112             my_range(parent_.my_range, split_obj),
113             my_partition(parent_.my_partition, split_obj),
114             my_context(right_child)
115         {
116             my_partition.set_affinity(*this);
117             parent_.my_context = left_child;
118         }
119         //! Construct right child from the given range as response to the demand.
120         /** parent_ remains left child.  Newly constructed object is right child. */
start_reduce(start_reduce & parent_,const Range & r,depth_t d)121         start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
122             my_body(parent_.my_body),
123             my_range(r),
124             my_partition(parent_.my_partition, split()),
125             my_context(right_child)
126         {
127             my_partition.set_affinity(*this);
128             my_partition.align_depth( d ); // TODO: move into constructor of partitioner
129             parent_.my_context = left_child;
130         }
run(const Range & range,Body & body,Partitioner & partitioner)131         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
132             if( !range.empty() ) {
133 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
134                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
135 #else
136                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
137                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
138                 task_group_context context(PARALLEL_REDUCE);
139                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
140 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
141             }
142         }
143 #if __TBB_TASK_GROUP_CONTEXT
run(const Range & range,Body & body,Partitioner & partitioner,task_group_context & context)144         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
145             if( !range.empty() )
146                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
147         }
148 #endif /* __TBB_TASK_GROUP_CONTEXT */
149         //! Run body for range
run_body(Range & r)150         void run_body( Range &r ) { (*my_body)( r ); }
151 
152         //! spawn right task, serves as callback for partitioner
153         // TODO: remove code duplication from 'offer_work' methods
offer_work(typename Partitioner::split_type & split_obj)154         void offer_work(typename Partitioner::split_type& split_obj) {
155             task *tasks[2];
156             allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
157             new((void*)tasks[0]) finish_type(my_context);
158             new((void*)tasks[1]) start_reduce(*this, split_obj);
159             spawn(*tasks[1]);
160         }
161         //! spawn right task, serves as callback for partitioner
162         void offer_work(const Range& r, depth_t d = 0) {
163             task *tasks[2];
164             allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
165             new((void*)tasks[0]) finish_type(my_context);
166             new((void*)tasks[1]) start_reduce(*this, r, d);
167             spawn(*tasks[1]);
168         }
169     };
170 
171     //! allocate right task with new parent
172     // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
allocate_sibling(task * start_reduce_task,task * tasks[],size_t start_bytes,size_t finish_bytes)173     inline void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes) {
174         tasks[0] = &start_reduce_task->allocate_continuation().allocate(finish_bytes);
175         start_reduce_task->set_parent(tasks[0]);
176         tasks[0]->set_ref_count(2);
177         tasks[1] = &tasks[0]->allocate_child().allocate(start_bytes);
178     }
179 
180     template<typename Range, typename Body, typename Partitioner>
execute()181     task* start_reduce<Range,Body,Partitioner>::execute() {
182         my_partition.check_being_stolen( *this );
183         if( my_context==right_child ) {
184             finish_type* parent_ptr = static_cast<finish_type*>(parent());
185             if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
186                 my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
187                 parent_ptr->has_right_zombie = true;
188             }
189         } else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling
190         my_partition.execute(*this, my_range);
191         if( my_context==left_child ) {
192             finish_type* parent_ptr = static_cast<finish_type*>(parent());
193             __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
194             itt_store_word_with_release(parent_ptr->my_body, my_body );
195         }
196         return NULL;
197     }
198 
199     //! Task type used to combine the partial results of parallel_deterministic_reduce.
200     /** @ingroup algorithms */
201     template<typename Body>
202     class finish_deterministic_reduce: public task {
203         Body &my_left_body;
204         Body my_right_body;
205 
finish_deterministic_reduce(Body & body)206         finish_deterministic_reduce( Body &body ) :
207             my_left_body( body ),
208             my_right_body( body, split() )
209         {
210         }
execute()211         task* execute() __TBB_override {
212             my_left_body.join( my_right_body );
213             return NULL;
214         }
215         template<typename Range,typename Body_, typename Partitioner>
216         friend class start_deterministic_reduce;
217     };
218 
219     //! Task type used to split the work of parallel_deterministic_reduce.
220     /** @ingroup algorithms */
221     template<typename Range, typename Body, typename Partitioner>
222     class start_deterministic_reduce: public task {
223         typedef finish_deterministic_reduce<Body> finish_type;
224         Body &my_body;
225         Range my_range;
226         typename Partitioner::task_partition_type my_partition;
227         task* execute() __TBB_override;
228 
229         //! Constructor used for root task
start_deterministic_reduce(const Range & range,Body & body,Partitioner & partitioner)230         start_deterministic_reduce( const Range& range, Body& body, Partitioner& partitioner ) :
231             my_body( body ),
232             my_range( range ),
233             my_partition( partitioner )
234         {
235         }
236         //! Splitting constructor used to generate children.
237         /** parent_ becomes left child.  Newly constructed object is right child. */
start_deterministic_reduce(start_deterministic_reduce & parent_,finish_type & c,typename Partitioner::split_type & split_obj)238         start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c, typename Partitioner::split_type& split_obj ) :
239             my_body( c.my_right_body ),
240             my_range( parent_.my_range, split_obj ),
241             my_partition( parent_.my_partition, split_obj )
242         {
243         }
244 
245 public:
run(const Range & range,Body & body,Partitioner & partitioner)246         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
247             if( !range.empty() ) {
248 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
249                 task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body,partitioner) );
250 #else
251                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
252                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
253                 task_group_context context(PARALLEL_REDUCE);
254                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body,partitioner) );
255 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
256             }
257         }
258 #if __TBB_TASK_GROUP_CONTEXT
run(const Range & range,Body & body,Partitioner & partitioner,task_group_context & context)259         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
260             if( !range.empty() )
261                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body,partitioner) );
262         }
263 #endif /* __TBB_TASK_GROUP_CONTEXT */
264 
offer_work(typename Partitioner::split_type & split_obj)265         void offer_work( typename Partitioner::split_type& split_obj) {
266             task* tasks[2];
267             allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_deterministic_reduce), sizeof(finish_type));
268             new((void*)tasks[0]) finish_type(my_body);
269             new((void*)tasks[1]) start_deterministic_reduce(*this, *static_cast<finish_type*>(tasks[0]), split_obj);
270             spawn(*tasks[1]);
271         }
272 
run_body(Range & r)273         void run_body( Range &r ) { my_body(r); }
274     };
275 
276     template<typename Range, typename Body, typename Partitioner>
execute()277     task* start_deterministic_reduce<Range,Body, Partitioner>::execute() {
278         my_partition.execute(*this, my_range);
279         return NULL;
280     }
281 } // namespace internal
282 //! @endcond
283 } //namespace interfaceX
284 
285 //! @cond INTERNAL
286 namespace internal {
287     using interface9::internal::start_reduce;
288     using interface9::internal::start_deterministic_reduce;
289     //! Auxiliary class for parallel_reduce; for internal use only.
290     /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
291         using given \ref parallel_reduce_lambda_req "anonymous function objects".
292      **/
293     /** @ingroup algorithms */
294     template<typename Range, typename Value, typename RealBody, typename Reduction>
295     class lambda_reduce_body {
296 
297 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
298 //       (might require some performance measurements)
299 
300         const Value&     identity_element;
301         const RealBody&  my_real_body;
302         const Reduction& my_reduction;
303         Value            my_value;
304         lambda_reduce_body& operator= ( const lambda_reduce_body& other );
305     public:
lambda_reduce_body(const Value & identity,const RealBody & body,const Reduction & reduction)306         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
307             : identity_element(identity)
308             , my_real_body(body)
309             , my_reduction(reduction)
310             , my_value(identity)
311         { }
lambda_reduce_body(const lambda_reduce_body & other)312         lambda_reduce_body( const lambda_reduce_body& other )
313             : identity_element(other.identity_element)
314             , my_real_body(other.my_real_body)
315             , my_reduction(other.my_reduction)
316             , my_value(other.my_value)
317         { }
lambda_reduce_body(lambda_reduce_body & other,tbb::split)318         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
319             : identity_element(other.identity_element)
320             , my_real_body(other.my_real_body)
321             , my_reduction(other.my_reduction)
322             , my_value(other.identity_element)
323         { }
operator()324         void operator()(Range& range) {
325             my_value = my_real_body(range, const_cast<const Value&>(my_value));
326         }
join(lambda_reduce_body & rhs)327         void join( lambda_reduce_body& rhs ) {
328             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
329         }
result()330         Value result() const {
331             return my_value;
332         }
333     };
334 
335 } // namespace internal
336 //! @endcond
337 
338 // Requirements on Range concept are documented in blocked_range.h
339 
340 /** \page parallel_reduce_body_req Requirements on parallel_reduce body
341     Class \c Body implementing the concept of parallel_reduce body must define:
342     - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
343                                                         Must be able to run concurrently with operator() and method \c join
344     - \code Body::~Body(); \endcode                     Destructor
345     - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
346                                                         and accumulating the result
347     - \code void Body::join( Body& b ); \endcode        Join results.
348                                                         The result in \c b should be merged into the result of \c this
349 **/
350 
351 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
352     TO BE DOCUMENTED
353 **/
354 
355 /** \name parallel_reduce
356     See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
357 //@{
358 
359 //! Parallel iteration with reduction and default partitioner.
360 /** @ingroup algorithms **/
361 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body)362 void parallel_reduce( const Range& range, Body& body ) {
363     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
364 }
365 
366 //! Parallel iteration with reduction and simple_partitioner
367 /** @ingroup algorithms **/
368 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,const simple_partitioner & partitioner)369 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
370     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
371 }
372 
373 //! Parallel iteration with reduction and auto_partitioner
374 /** @ingroup algorithms **/
375 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,const auto_partitioner & partitioner)376 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
377     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
378 }
379 
380 //! Parallel iteration with reduction and static_partitioner
381 /** @ingroup algorithms **/
382 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,const static_partitioner & partitioner)383 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
384     internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
385 }
386 
387 //! Parallel iteration with reduction and affinity_partitioner
388 /** @ingroup algorithms **/
389 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,affinity_partitioner & partitioner)390 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
391     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
392 }
393 
394 #if __TBB_TASK_GROUP_CONTEXT
395 //! Parallel iteration with reduction, default partitioner and user-supplied context.
396 /** @ingroup algorithms **/
397 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,task_group_context & context)398 void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
399     internal::start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
400 }
401 
402 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
403 /** @ingroup algorithms **/
404 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,const simple_partitioner & partitioner,task_group_context & context)405 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
406     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
407 }
408 
409 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
410 /** @ingroup algorithms **/
411 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,const auto_partitioner & partitioner,task_group_context & context)412 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
413     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
414 }
415 
416 //! Parallel iteration with reduction, static_partitioner and user-supplied context
417 /** @ingroup algorithms **/
418 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,const static_partitioner & partitioner,task_group_context & context)419 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
420     internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
421 }
422 
423 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
424 /** @ingroup algorithms **/
425 template<typename Range, typename Body>
parallel_reduce(const Range & range,Body & body,affinity_partitioner & partitioner,task_group_context & context)426 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
427     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
428 }
429 #endif /* __TBB_TASK_GROUP_CONTEXT */
430 
431 /** parallel_reduce overloads that work with anonymous function objects
432     (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
433 
434 //! Parallel iteration with reduction and default partitioner.
435 /** @ingroup algorithms **/
436 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction)437 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
438     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
439     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
440                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
441     return body.result();
442 }
443 
444 //! Parallel iteration with reduction and simple_partitioner.
445 /** @ingroup algorithms **/
446 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const simple_partitioner & partitioner)447 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
448                        const simple_partitioner& partitioner ) {
449     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
450     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
451                           ::run(range, body, partitioner );
452     return body.result();
453 }
454 
455 //! Parallel iteration with reduction and auto_partitioner
456 /** @ingroup algorithms **/
457 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const auto_partitioner & partitioner)458 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
459                        const auto_partitioner& partitioner ) {
460     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
461     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
462                           ::run( range, body, partitioner );
463     return body.result();
464 }
465 
466 //! Parallel iteration with reduction and static_partitioner
467 /** @ingroup algorithms **/
468 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const static_partitioner & partitioner)469 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
470                        const static_partitioner& partitioner ) {
471     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
472     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
473                                         ::run( range, body, partitioner );
474     return body.result();
475 }
476 
477 //! Parallel iteration with reduction and affinity_partitioner
478 /** @ingroup algorithms **/
479 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,affinity_partitioner & partitioner)480 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
481                        affinity_partitioner& partitioner ) {
482     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
483     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
484                                         ::run( range, body, partitioner );
485     return body.result();
486 }
487 
488 #if __TBB_TASK_GROUP_CONTEXT
489 //! Parallel iteration with reduction, default partitioner and user-supplied context.
490 /** @ingroup algorithms **/
491 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,task_group_context & context)492 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
493                        task_group_context& context ) {
494     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
495     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
496                           ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
497     return body.result();
498 }
499 
500 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
501 /** @ingroup algorithms **/
502 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const simple_partitioner & partitioner,task_group_context & context)503 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
504                        const simple_partitioner& partitioner, task_group_context& context ) {
505     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
506     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
507                           ::run( range, body, partitioner, context );
508     return body.result();
509 }
510 
511 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
512 /** @ingroup algorithms **/
513 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const auto_partitioner & partitioner,task_group_context & context)514 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
515                        const auto_partitioner& partitioner, task_group_context& context ) {
516     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
517     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
518                           ::run( range, body, partitioner, context );
519     return body.result();
520 }
521 
522 //! Parallel iteration with reduction, static_partitioner and user-supplied context
523 /** @ingroup algorithms **/
524 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const static_partitioner & partitioner,task_group_context & context)525 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
526                        const static_partitioner& partitioner, task_group_context& context ) {
527     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
528     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
529                                         ::run( range, body, partitioner, context );
530     return body.result();
531 }
532 
533 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
534 /** @ingroup algorithms **/
535 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,affinity_partitioner & partitioner,task_group_context & context)536 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
537                        affinity_partitioner& partitioner, task_group_context& context ) {
538     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
539     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
540                                         ::run( range, body, partitioner, context );
541     return body.result();
542 }
543 #endif /* __TBB_TASK_GROUP_CONTEXT */
544 
545 //! Parallel iteration with deterministic reduction and default simple partitioner.
546 /** @ingroup algorithms **/
547 template<typename Range, typename Body>
parallel_deterministic_reduce(const Range & range,Body & body)548 void parallel_deterministic_reduce( const Range& range, Body& body ) {
549     internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner());
550 }
551 
552 //! Parallel iteration with deterministic reduction and simple partitioner.
553 /** @ingroup algorithms **/
554 template<typename Range, typename Body>
parallel_deterministic_reduce(const Range & range,Body & body,const simple_partitioner & partitioner)555 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
556     internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
557 }
558 
559 //! Parallel iteration with deterministic reduction and static partitioner.
560 /** @ingroup algorithms **/
561 template<typename Range, typename Body>
parallel_deterministic_reduce(const Range & range,Body & body,const static_partitioner & partitioner)562 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
563     internal::start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
564 }
565 
566 #if __TBB_TASK_GROUP_CONTEXT
567 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
568 /** @ingroup algorithms **/
569 template<typename Range, typename Body>
parallel_deterministic_reduce(const Range & range,Body & body,task_group_context & context)570 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
571     internal::start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context );
572 }
573 
574 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
575 /** @ingroup algorithms **/
576 template<typename Range, typename Body>
parallel_deterministic_reduce(const Range & range,Body & body,const simple_partitioner & partitioner,task_group_context & context)577 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
578     internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
579 }
580 
581 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
582 /** @ingroup algorithms **/
583 template<typename Range, typename Body>
parallel_deterministic_reduce(const Range & range,Body & body,const static_partitioner & partitioner,task_group_context & context)584 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
585     internal::start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
586 }
587 #endif /* __TBB_TASK_GROUP_CONTEXT */
588 
589 /** parallel_reduce overloads that work with anonymous function objects
590     (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
591 
592 //! Parallel iteration with deterministic reduction and default simple partitioner.
593 // TODO: consider making static_partitioner the default
594 /** @ingroup algorithms **/
595 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_deterministic_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction)596 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
597     return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner());
598 }
599 
600 //! Parallel iteration with deterministic reduction and simple partitioner.
601 /** @ingroup algorithms **/
602 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_deterministic_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const simple_partitioner & partitioner)603 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
604     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
605     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
606                           ::run(range, body, partitioner);
607     return body.result();
608 }
609 
610 //! Parallel iteration with deterministic reduction and static partitioner.
611 /** @ingroup algorithms **/
612 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_deterministic_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const static_partitioner & partitioner)613 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
614     internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
615     internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
616         ::run(range, body, partitioner);
617     return body.result();
618 }
619 #if __TBB_TASK_GROUP_CONTEXT
620 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
621 /** @ingroup algorithms **/
622 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_deterministic_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,task_group_context & context)623 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
624     task_group_context& context ) {
625     return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context);
626 }
627 
628 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
629 /** @ingroup algorithms **/
630 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_deterministic_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const simple_partitioner & partitioner,task_group_context & context)631 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
632     const simple_partitioner& partitioner, task_group_context& context ) {
633     internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
634     internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
635         ::run(range, body, partitioner, context);
636     return body.result();
637 }
638 
639 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
640 /** @ingroup algorithms **/
641 template<typename Range, typename Value, typename RealBody, typename Reduction>
parallel_deterministic_reduce(const Range & range,const Value & identity,const RealBody & real_body,const Reduction & reduction,const static_partitioner & partitioner,task_group_context & context)642 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
643     const static_partitioner& partitioner, task_group_context& context ) {
644     internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
645     internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
646         ::run(range, body, partitioner, context);
647     return body.result();
648 }
649 #endif /* __TBB_TASK_GROUP_CONTEXT */
650 //@}
651 
652 } // namespace tbb
653 
654 #include "internal/_warning_suppress_disable_notice.h"
655 #undef __TBB_parallel_reduce_H_include_area
656 
657 #endif /* __TBB_parallel_reduce_H */
658