1 /*
2  *  Copyright 2008-2013 NVIDIA Corporation
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *  Unless required by applicable law or agreed to in writing, software
11  *  distributed under the License is distributed on an "AS IS" BASIS,
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16 
17 
18 /*! \file reduce.h
19  *  \brief Functions for reducing a range to a single value
20  */
21 
22 #pragma once
23 
24 #include <thrust/detail/config.h>
25 #include <thrust/detail/execution_policy.h>
26 #include <thrust/iterator/iterator_traits.h>
27 #include <thrust/pair.h>
28 
29 namespace thrust
30 {
31 
32 
33 /*! \addtogroup reductions
34  *  \{
35  */
36 
37 
38 /*! \p reduce is a generalization of summation: it computes the sum (or some
39  *  other binary operation) of all the elements in the range <tt>[first,
40  *  last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
41  *  reduction. \p reduce is similar to the C++ Standard Template Library's
42  *  <tt>std::accumulate</tt>. The primary difference between the two functions
43  *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
44  *  \p reduce requires associativity of the binary operation to parallelize
45  *  the reduction.
46  *
47  *  Note that \p reduce also assumes that the binary reduction operator (in this
48  *  case operator+) is commutative.  If the reduction operator is not commutative
49  *  then \p thrust::reduce should not be used.  Instead, one could use
50  *  \p inclusive_scan (which does not require commutativity) and select the
51  *  last element of the output array.
52  *
53  *  The algorithm's execution is parallelized as determined by \p exec.
54  *
55  *  \param exec The execution policy to use for parallelization.
56  *  \param first The beginning of the sequence.
57  *  \param last The end of the sequence.
58  *  \return The result of the reduction.
59  *
60  *  \tparam DerivedPolicy The name of the derived execution policy.
61  *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
62  *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
63  *          then <tt>x + y</tt> is defined and is convertible to \p InputIterator's
64  *          \c value_type. If \c T is \c InputIterator's \c value_type, then
65  *          <tt>T(0)</tt> is defined.
66  *
67  *  The following code snippet demonstrates how to use \p reduce to compute
68  *  the sum of a sequence of integers using the \p thrust::host execution policy for parallelization:
69  *
70  *  \code
71  *  #include <thrust/reduce.h>
72  *  #include <thrust/execution_policy.h>
73  *  ...
74  *  int data[6] = {1, 0, 2, 2, 1, 3};
75  *  int result = thrust::reduce(thrust::host, data, data + 6);
76  *
77  *  // result == 9
78  *  \endcode
79  *
80  *  \see http://www.sgi.com/tech/stl/accumulate.html
81  */
82 template<typename DerivedPolicy, typename InputIterator>
83 __host__ __device__
84   typename thrust::iterator_traits<InputIterator>::value_type
85     reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last);
86 
87 
88 /*! \p reduce is a generalization of summation: it computes the sum (or some
89  *  other binary operation) of all the elements in the range <tt>[first,
90  *  last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
91  *  reduction. \p reduce is similar to the C++ Standard Template Library's
92  *  <tt>std::accumulate</tt>. The primary difference between the two functions
93  *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
94  *  \p reduce requires associativity of the binary operation to parallelize
95  *  the reduction.
96  *
97  *  Note that \p reduce also assumes that the binary reduction operator (in this
98  *  case operator+) is commutative.  If the reduction operator is not commutative
99  *  then \p thrust::reduce should not be used.  Instead, one could use
100  *  \p inclusive_scan (which does not require commutativity) and select the
101  *  last element of the output array.
102  *
103  *  \param first The beginning of the sequence.
104  *  \param last The end of the sequence.
105  *  \return The result of the reduction.
106  *
107  *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
108  *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
109  *          then <tt>x + y</tt> is defined and is convertible to \p InputIterator's
110  *          \c value_type. If \c T is \c InputIterator's \c value_type, then
111  *          <tt>T(0)</tt> is defined.
112  *
113  *  The following code snippet demonstrates how to use \p reduce to compute
114  *  the sum of a sequence of integers.
115  *
116  *  \code
117  *  #include <thrust/reduce.h>
118  *  ...
119  *  int data[6] = {1, 0, 2, 2, 1, 3};
120  *  int result = thrust::reduce(data, data + 6);
121  *
122  *  // result == 9
123  *  \endcode
124  *
125  *  \see http://www.sgi.com/tech/stl/accumulate.html
126  */
127 template<typename InputIterator> typename
128   thrust::iterator_traits<InputIterator>::value_type reduce(InputIterator first, InputIterator last);
129 
130 
131 /*! \p reduce is a generalization of summation: it computes the sum (or some
132  *  other binary operation) of all the elements in the range <tt>[first,
133  *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
134  *  reduction. \p reduce is similar to the C++ Standard Template Library's
135  *  <tt>std::accumulate</tt>. The primary difference between the two functions
136  *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
137  *  \p reduce requires associativity of the binary operation to parallelize
138  *  the reduction.
139  *
140  *  Note that \p reduce also assumes that the binary reduction operator (in this
141  *  case operator+) is commutative.  If the reduction operator is not commutative
142  *  then \p thrust::reduce should not be used.  Instead, one could use
143  *  \p inclusive_scan (which does not require commutativity) and select the
144  *  last element of the output array.
145  *
146  *  The algorithm's execution is parallelized as determined by \p exec.
147  *
148  *  \param exec The execution policy to use for parallelization.
149  *  \param first The beginning of the input sequence.
150  *  \param last The end of the input sequence.
151  *  \param init The initial value.
152  *  \return The result of the reduction.
153  *
154  *  \tparam DerivedPolicy The name of the derived execution policy.
155  *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
156  *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
157  *          then <tt>x + y</tt> is defined and is convertible to \p T.
158  *  \tparam T is convertible to \p InputIterator's \c value_type.
159  *
160  *  The following code snippet demonstrates how to use \p reduce to compute
161  *  the sum of a sequence of integers including an intialization value using the \p thrust::host
162  *  execution policy for parallelization:
163  *
164  *  \code
165  *  #include <thrust/reduce.h>
166  *  #include <thrust/execution_policy.h>
167  *  ...
168  *  int data[6] = {1, 0, 2, 2, 1, 3};
169  *  int result = thrust::reduce(thrust::host, data, data + 6, 1);
170  *
171  *  // result == 10
172  *  \endcode
173  *
174  *  \see http://www.sgi.com/tech/stl/accumulate.html
175  */
176 template<typename DerivedPolicy, typename InputIterator, typename T>
177 __host__ __device__
178   T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
179            InputIterator first,
180            InputIterator last,
181            T init);
182 
183 
184 /*! \p reduce is a generalization of summation: it computes the sum (or some
185  *  other binary operation) of all the elements in the range <tt>[first,
186  *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
187  *  reduction. \p reduce is similar to the C++ Standard Template Library's
188  *  <tt>std::accumulate</tt>. The primary difference between the two functions
189  *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
190  *  \p reduce requires associativity of the binary operation to parallelize
191  *  the reduction.
192  *
193  *  Note that \p reduce also assumes that the binary reduction operator (in this
194  *  case operator+) is commutative.  If the reduction operator is not commutative
195  *  then \p thrust::reduce should not be used.  Instead, one could use
196  *  \p inclusive_scan (which does not require commutativity) and select the
197  *  last element of the output array.
198  *
199  *  \param first The beginning of the input sequence.
200  *  \param last The end of the input sequence.
201  *  \param init The initial value.
202  *  \return The result of the reduction.
203  *
204  *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
205  *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
206  *          then <tt>x + y</tt> is defined and is convertible to \p T.
207  *  \tparam T is convertible to \p InputIterator's \c value_type.
208  *
209  *  The following code snippet demonstrates how to use \p reduce to compute
210  *  the sum of a sequence of integers including an intialization value.
211  *
212  *  \code
213  *  #include <thrust/reduce.h>
214  *  ...
215  *  int data[6] = {1, 0, 2, 2, 1, 3};
216  *  int result = thrust::reduce(data, data + 6, 1);
217  *
218  *  // result == 10
219  *  \endcode
220  *
221  *  \see http://www.sgi.com/tech/stl/accumulate.html
222  */
223 template<typename InputIterator, typename T>
224   T reduce(InputIterator first,
225            InputIterator last,
226            T init);
227 
228 
229 /*! \p reduce is a generalization of summation: it computes the sum (or some
230  *  other binary operation) of all the elements in the range <tt>[first,
231  *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
232  *  reduction and \p binary_op as the binary function used for summation. \p reduce
233  *  is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
234  *  The primary difference between the two functions is that <tt>std::accumulate</tt>
235  *  guarantees the order of summation, while \p reduce requires associativity of
236  *  \p binary_op to parallelize the reduction.
237  *
238  *  Note that \p reduce also assumes that the binary reduction operator (in this
239  *  case \p binary_op) is commutative.  If the reduction operator is not commutative
240  *  then \p thrust::reduce should not be used.  Instead, one could use
241  *  \p inclusive_scan (which does not require commutativity) and select the
242  *  last element of the output array.
243  *
244  *  The algorithm's execution is parallelized as determined by \p exec.
245  *
246  *  \param exec The execution policy to use for parallelization.
247  *  \param first The beginning of the input sequence.
248  *  \param last The end of the input sequence.
249  *  \param init The initial value.
250  *  \param binary_op The binary function used to 'sum' values.
251  *  \return The result of the reduction.
252  *
253  *  \tparam DerivedPolicy The name of the derived execution policy.
254  *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
255  *          and \c InputIterator's \c value_type is convertible to \c T.
256  *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
257  *          and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
258  *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
259  *          and \p BinaryFunction's \c result_type is convertible to \p OutputType.
260  *
261  *  The following code snippet demonstrates how to use \p reduce to
262  *  compute the maximum value of a sequence of integers using the \p thrust::host execution policy
263  *  for parallelization:
264  *
265  *  \code
266  *  #include <thrust/reduce.h>
267  *  #include <thrust/functional.h>
268  *  #include <thrust/execution_policy.h>
269  *  ...
270  *  int data[6] = {1, 0, 2, 2, 1, 3};
271  *  int result = thrust::reduce(thrust::host,
272  *                              data, data + 6,
273  *                              -1,
274  *                              thrust::maximum<int>());
275  *  // result == 3
276  *  \endcode
277  *
278  *  \see http://www.sgi.com/tech/stl/accumulate.html
279  *  \see transform_reduce
280  */
281 template<typename DerivedPolicy,
282          typename InputIterator,
283          typename T,
284          typename BinaryFunction>
285 __host__ __device__
286   T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
287            InputIterator first,
288            InputIterator last,
289            T init,
290            BinaryFunction binary_op);
291 
292 
293 /*! \p reduce is a generalization of summation: it computes the sum (or some
294  *  other binary operation) of all the elements in the range <tt>[first,
295  *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
296  *  reduction and \p binary_op as the binary function used for summation. \p reduce
297  *  is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
298  *  The primary difference between the two functions is that <tt>std::accumulate</tt>
299  *  guarantees the order of summation, while \p reduce requires associativity of
300  *  \p binary_op to parallelize the reduction.
301  *
302  *  Note that \p reduce also assumes that the binary reduction operator (in this
303  *  case \p binary_op) is commutative.  If the reduction operator is not commutative
304  *  then \p thrust::reduce should not be used.  Instead, one could use
305  *  \p inclusive_scan (which does not require commutativity) and select the
306  *  last element of the output array.
307  *
308  *  \param first The beginning of the input sequence.
309  *  \param last The end of the input sequence.
310  *  \param init The initial value.
311  *  \param binary_op The binary function used to 'sum' values.
312  *  \return The result of the reduction.
313  *
314  *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
315  *          and \c InputIterator's \c value_type is convertible to \c T.
316  *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
317  *          and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
318  *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
319  *          and \p BinaryFunction's \c result_type is convertible to \p OutputType.
320  *
321  *  The following code snippet demonstrates how to use \p reduce to
322  *  compute the maximum value of a sequence of integers.
323  *
324  *  \code
325  *  #include <thrust/reduce.h>
326  *  #include <thrust/functional.h>
327  *  ...
328  *  int data[6] = {1, 0, 2, 2, 1, 3};
329  *  int result = thrust::reduce(data, data + 6,
330  *                              -1,
331  *                              thrust::maximum<int>());
332  *  // result == 3
333  *  \endcode
334  *
335  *  \see http://www.sgi.com/tech/stl/accumulate.html
336  *  \see transform_reduce
337  */
338 template<typename InputIterator,
339          typename T,
340          typename BinaryFunction>
341   T reduce(InputIterator first,
342            InputIterator last,
343            T init,
344            BinaryFunction binary_op);
345 
346 
347 /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
348  *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
349  *  that are equal, \p reduce_by_key copies the first element of the group to the
350  *  \c keys_output. The corresponding values in the range are reduced using the
351  *  \c plus and the result copied to \c values_output.
352  *
353  *  This version of \p reduce_by_key uses the function object \c equal_to
354  *  to test for equality and \c plus to reduce values with equal keys.
355  *
356  *  The algorithm's execution is parallelized as determined by \p exec.
357  *
358  *  \param exec The execution policy to use for parallelization.
359  *  \param keys_first The beginning of the input key range.
360  *  \param keys_last  The end of the input key range.
361  *  \param values_first The beginning of the input value range.
362  *  \param keys_output The beginning of the output key range.
363  *  \param values_output The beginning of the output value range.
364  *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
365  *
366  *  \tparam DerivedPolicy The name of the derived execution policy.
367  *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
368  *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
369  *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
370  *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
371  *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
372  *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
373  *
374  *  \pre The input ranges shall not overlap either output range.
375  *
376  *  The following code snippet demonstrates how to use \p reduce_by_key to
377  *  compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
378  *  execution policy for parallelization:
379  *
380  *  \code
381  *  #include <thrust/reduce.h>
382  *  #include <thrust/execution_policy.h>
383  *  ...
384  *  const int N = 7;
385  *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
386  *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
387  *  int C[N];                         // output keys
388  *  int D[N];                         // output values
389  *
390  *  thrust::pair<int*,int*> new_end;
391  *  new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D);
392  *
393  *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
394  *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
395  *  \endcode
396  *
397  *  \see reduce
398  *  \see unique_copy
399  *  \see unique_by_key
400  *  \see unique_by_key_copy
401  */
402 template<typename DerivedPolicy,
403          typename InputIterator1,
404          typename InputIterator2,
405          typename OutputIterator1,
406          typename OutputIterator2>
407 __host__ __device__
408   thrust::pair<OutputIterator1,OutputIterator2>
409   reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
410                 InputIterator1 keys_first,
411                 InputIterator1 keys_last,
412                 InputIterator2 values_first,
413                 OutputIterator1 keys_output,
414                 OutputIterator2 values_output);
415 
416 
417 /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
418  *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
419  *  that are equal, \p reduce_by_key copies the first element of the group to the
420  *  \c keys_output. The corresponding values in the range are reduced using the
421  *  \c plus and the result copied to \c values_output.
422  *
423  *  This version of \p reduce_by_key uses the function object \c equal_to
424  *  to test for equality and \c plus to reduce values with equal keys.
425  *
426  *  \param keys_first The beginning of the input key range.
427  *  \param keys_last  The end of the input key range.
428  *  \param values_first The beginning of the input value range.
429  *  \param keys_output The beginning of the output key range.
430  *  \param values_output The beginning of the output value range.
431  *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
432  *
433  *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
434  *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
435  *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
436  *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
437  *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
438  *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
439  *
440  *  \pre The input ranges shall not overlap either output range.
441  *
442  *  The following code snippet demonstrates how to use \p reduce_by_key to
443  *  compact a sequence of key/value pairs and sum values with equal keys.
444  *
445  *  \code
446  *  #include <thrust/reduce.h>
447  *  ...
448  *  const int N = 7;
449  *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
450  *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
451  *  int C[N];                         // output keys
452  *  int D[N];                         // output values
453  *
454  *  thrust::pair<int*,int*> new_end;
455  *  new_end = thrust::reduce_by_key(A, A + N, B, C, D);
456  *
457  *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
458  *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
459  *  \endcode
460  *
461  *  \see reduce
462  *  \see unique_copy
463  *  \see unique_by_key
464  *  \see unique_by_key_copy
465  */
466 template<typename InputIterator1,
467          typename InputIterator2,
468          typename OutputIterator1,
469          typename OutputIterator2>
470   thrust::pair<OutputIterator1,OutputIterator2>
471   reduce_by_key(InputIterator1 keys_first,
472                 InputIterator1 keys_last,
473                 InputIterator2 values_first,
474                 OutputIterator1 keys_output,
475                 OutputIterator2 values_output);
476 
477 
478 /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
479  *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
480  *  that are equal, \p reduce_by_key copies the first element of the group to the
481  *  \c keys_output. The corresponding values in the range are reduced using the
482  *  \c plus and the result copied to \c values_output.
483  *
484  *  This version of \p reduce_by_key uses the function object \c binary_pred
485  *  to test for equality and \c plus to reduce values with equal keys.
486  *
487  *  The algorithm's execution is parallelized as determined by \p exec.
488  *
489  *  \param exec The execution policy to use for parallelization.
490  *  \param keys_first The beginning of the input key range.
491  *  \param keys_last  The end of the input key range.
492  *  \param values_first The beginning of the input value range.
493  *  \param keys_output The beginning of the output key range.
494  *  \param values_output The beginning of the output value range.
495  *  \param binary_pred  The binary predicate used to determine equality.
496  *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
497  *
498  *  \tparam DerivedPolicy The name of the derived execution policy.
499  *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
500  *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
501  *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
502  *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
503  *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
504  *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
505  *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
506  *
507  *  \pre The input ranges shall not overlap either output range.
508  *
509  *  The following code snippet demonstrates how to use \p reduce_by_key to
510  *  compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
511  *  execution policy for parallelization:
512  *
513  *  \code
514  *  #include <thrust/reduce.h>
515  *  #include <thrust/execution_policy.h>
516  *  ...
517  *  const int N = 7;
518  *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
519  *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
520  *  int C[N];                         // output keys
521  *  int D[N];                         // output values
522  *
523  *  thrust::pair<int*,int*> new_end;
524  *  thrust::equal_to<int> binary_pred;
525  *  new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred);
526  *
527  *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
528  *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
529  *  \endcode
530  *
531  *  \see reduce
532  *  \see unique_copy
533  *  \see unique_by_key
534  *  \see unique_by_key_copy
535  */
536 template<typename DerivedPolicy,
537          typename InputIterator1,
538          typename InputIterator2,
539          typename OutputIterator1,
540          typename OutputIterator2,
541          typename BinaryPredicate>
542 __host__ __device__
543   thrust::pair<OutputIterator1,OutputIterator2>
544   reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
545                 InputIterator1 keys_first,
546                 InputIterator1 keys_last,
547                 InputIterator2 values_first,
548                 OutputIterator1 keys_output,
549                 OutputIterator2 values_output,
550                 BinaryPredicate binary_pred);
551 
552 
553 /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
554  *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
555  *  that are equal, \p reduce_by_key copies the first element of the group to the
556  *  \c keys_output. The corresponding values in the range are reduced using the
557  *  \c plus and the result copied to \c values_output.
558  *
559  *  This version of \p reduce_by_key uses the function object \c binary_pred
560  *  to test for equality and \c plus to reduce values with equal keys.
561  *
562  *  \param keys_first The beginning of the input key range.
563  *  \param keys_last  The end of the input key range.
564  *  \param values_first The beginning of the input value range.
565  *  \param keys_output The beginning of the output key range.
566  *  \param values_output The beginning of the output value range.
567  *  \param binary_pred  The binary predicate used to determine equality.
568  *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
569  *
570  *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
571  *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
572  *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
573  *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
574  *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
575  *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
576  *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
577  *
578  *  \pre The input ranges shall not overlap either output range.
579  *
580  *  The following code snippet demonstrates how to use \p reduce_by_key to
581  *  compact a sequence of key/value pairs and sum values with equal keys.
582  *
583  *  \code
584  *  #include <thrust/reduce.h>
585  *  ...
586  *  const int N = 7;
587  *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
588  *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
589  *  int C[N];                         // output keys
590  *  int D[N];                         // output values
591  *
592  *  thrust::pair<int*,int*> new_end;
593  *  thrust::equal_to<int> binary_pred;
594  *  new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred);
595  *
596  *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
597  *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
598  *  \endcode
599  *
600  *  \see reduce
601  *  \see unique_copy
602  *  \see unique_by_key
603  *  \see unique_by_key_copy
604  */
605 template<typename InputIterator1,
606          typename InputIterator2,
607          typename OutputIterator1,
608          typename OutputIterator2,
609          typename BinaryPredicate>
610   thrust::pair<OutputIterator1,OutputIterator2>
611   reduce_by_key(InputIterator1 keys_first,
612                 InputIterator1 keys_last,
613                 InputIterator2 values_first,
614                 OutputIterator1 keys_output,
615                 OutputIterator2 values_output,
616                 BinaryPredicate binary_pred);
617 
618 
619 /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
620  *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
621  *  that are equal, \p reduce_by_key copies the first element of the group to the
622  *  \c keys_output. The corresponding values in the range are reduced using the
623  *  \c BinaryFunction \c binary_op and the result copied to \c values_output.
624  *  Specifically, if consecutive key iterators \c i and \c (i + 1) are
625  *  such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
626  *  values are reduced to a single value with \c binary_op.
627  *
628  *  This version of \p reduce_by_key uses the function object \c binary_pred
629  *  to test for equality and \c binary_op to reduce values with equal keys.
630  *
631  *  The algorithm's execution is parallelized as determined by \p exec.
632  *
633  *  \param exec The execution policy to use for parallelization.
634  *  \param keys_first The beginning of the input key range.
635  *  \param keys_last  The end of the input key range.
636  *  \param values_first The beginning of the input value range.
637  *  \param keys_output The beginning of the output key range.
638  *  \param values_output The beginning of the output value range.
639  *  \param binary_pred  The binary predicate used to determine equality.
640  *  \param binary_op The binary function used to accumulate values.
641  *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
642  *
643  *  \tparam DerivedPolicy The name of the derived execution policy.
644  *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
645  *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
646  *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
647  *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
648  *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
649  *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
650  *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
651  *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
652  *          and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type.
653  *
654  *  \pre The input ranges shall not overlap either output range.
655  *
656  *  The following code snippet demonstrates how to use \p reduce_by_key to
657  *  compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
658  *  execution policy for parallelization:
659  *
660  *  \code
661  *  #include <thrust/reduce.h>
662  *  #include <thrust/execution_policy.h>
663  *  ...
664  *  const int N = 7;
665  *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
666  *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
667  *  int C[N];                         // output keys
668  *  int D[N];                         // output values
669  *
670  *  thrust::pair<int*,int*> new_end;
671  *  thrust::equal_to<int> binary_pred;
672  *  thrust::plus<int> binary_op;
673  *  new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred, binary_op);
674  *
675  *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
676  *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
677  *  \endcode
678  *
679  *  \see reduce
680  *  \see unique_copy
681  *  \see unique_by_key
682  *  \see unique_by_key_copy
683  */
684 template<typename DerivedPolicy,
685          typename InputIterator1,
686          typename InputIterator2,
687          typename OutputIterator1,
688          typename OutputIterator2,
689          typename BinaryPredicate,
690          typename BinaryFunction>
691 __host__ __device__
692   thrust::pair<OutputIterator1,OutputIterator2>
693   reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
694                 InputIterator1 keys_first,
695                 InputIterator1 keys_last,
696                 InputIterator2 values_first,
697                 OutputIterator1 keys_output,
698                 OutputIterator2 values_output,
699                 BinaryPredicate binary_pred,
700                 BinaryFunction binary_op);
701 
702 
703 /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
704  *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
705  *  that are equal, \p reduce_by_key copies the first element of the group to the
706  *  \c keys_output. The corresponding values in the range are reduced using the
707  *  \c BinaryFunction \c binary_op and the result copied to \c values_output.
708  *  Specifically, if consecutive key iterators \c i and \c (i + 1) are
709  *  such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
710  *  values are reduced to a single value with \c binary_op.
711  *
712  *  This version of \p reduce_by_key uses the function object \c binary_pred
713  *  to test for equality and \c binary_op to reduce values with equal keys.
714  *
715  *  \param keys_first The beginning of the input key range.
716  *  \param keys_last  The end of the input key range.
717  *  \param values_first The beginning of the input value range.
718  *  \param keys_output The beginning of the output key range.
719  *  \param values_output The beginning of the output value range.
720  *  \param binary_pred  The binary predicate used to determine equality.
721  *  \param binary_op The binary function used to accumulate values.
722  *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
723  *
724  *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
725  *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
726  *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
727  *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
728  *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
729  *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
730  *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
731  *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
732  *          and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type.
733  *
734  *  \pre The input ranges shall not overlap either output range.
735  *
736  *  The following code snippet demonstrates how to use \p reduce_by_key to
737  *  compact a sequence of key/value pairs and sum values with equal keys.
738  *
739  *  \code
740  *  #include <thrust/reduce.h>
741  *  ...
742  *  const int N = 7;
743  *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
744  *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
745  *  int C[N];                         // output keys
746  *  int D[N];                         // output values
747  *
748  *  thrust::pair<int*,int*> new_end;
749  *  thrust::equal_to<int> binary_pred;
750  *  thrust::plus<int> binary_op;
751  *  new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred, binary_op);
752  *
753  *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
754  *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
755  *  \endcode
756  *
757  *  \see reduce
758  *  \see unique_copy
759  *  \see unique_by_key
760  *  \see unique_by_key_copy
761  */
762 template<typename InputIterator1,
763          typename InputIterator2,
764          typename OutputIterator1,
765          typename OutputIterator2,
766          typename BinaryPredicate,
767          typename BinaryFunction>
768   thrust::pair<OutputIterator1,OutputIterator2>
769   reduce_by_key(InputIterator1 keys_first,
770                 InputIterator1 keys_last,
771                 InputIterator2 values_first,
772                 OutputIterator1 keys_output,
773                 OutputIterator2 values_output,
774                 BinaryPredicate binary_pred,
775                 BinaryFunction binary_op);
776 
777 
778 /*! \} // end reductions
779  */
780 
781 
782 } // end namespace thrust
783 
784 #include <thrust/detail/reduce.inl>
785 
786