1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H
10 #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H
11 
12 #include <__algorithm/pstl_backends/cpu_backends/backend.h>
13 #include <__config>
14 #include <__iterator/iterator_traits.h>
15 #include <__numeric/transform_reduce.h>
16 #include <__type_traits/is_arithmetic.h>
17 #include <__type_traits/is_execution_policy.h>
18 #include <__type_traits/operation_traits.h>
19 #include <__utility/move.h>
20 #include <__utility/terminate_on_exception.h>
21 #include <new>
22 
23 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
24 #  pragma GCC system_header
25 #endif
26 
27 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
28 
29 _LIBCPP_BEGIN_NAMESPACE_STD
30 
31 template <
32     typename _DifferenceType,
33     typename _Tp,
34     typename _BinaryOperation,
35     typename _UnaryOperation,
36     __enable_if_t<__is_trivial_plus_operation<_BinaryOperation, _Tp, _Tp>::value && is_arithmetic_v<_Tp>, int> = 0>
37 _LIBCPP_HIDE_FROM_ABI _Tp
38 __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept {
39   _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init)
40   for (_DifferenceType __i = 0; __i < __n; ++__i)
41     __init += __f(__i);
42   return __init;
43 }
44 
45 template <
46     typename _Size,
47     typename _Tp,
48     typename _BinaryOperation,
49     typename _UnaryOperation,
50     __enable_if_t<!(__is_trivial_plus_operation<_BinaryOperation, _Tp, _Tp>::value && is_arithmetic_v<_Tp>), int> = 0>
51 _LIBCPP_HIDE_FROM_ABI _Tp
52 __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
53   const _Size __block_size = __lane_size / sizeof(_Tp);
54   if (__n > 2 * __block_size && __block_size > 1) {
55     alignas(__lane_size) char __lane_buffer[__lane_size];
56     _Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer);
57 
58     // initializer
59     _PSTL_PRAGMA_SIMD
60     for (_Size __i = 0; __i < __block_size; ++__i) {
61       ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i)));
62     }
63     // main loop
64     _Size __i                    = 2 * __block_size;
65     const _Size __last_iteration = __block_size * (__n / __block_size);
66     for (; __i < __last_iteration; __i += __block_size) {
67       _PSTL_PRAGMA_SIMD
68       for (_Size __j = 0; __j < __block_size; ++__j) {
69         __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__i + __j));
70       }
71     }
72     // remainder
73     _PSTL_PRAGMA_SIMD
74     for (_Size __j = 0; __j < __n - __last_iteration; ++__j) {
75       __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__last_iteration + __j));
76     }
77     // combiner
78     for (_Size __j = 0; __j < __block_size; ++__j) {
79       __init = __binary_op(std::move(__init), std::move(__lane[__j]));
80     }
81     // destroyer
82     _PSTL_PRAGMA_SIMD
83     for (_Size __j = 0; __j < __block_size; ++__j) {
84       __lane[__j].~_Tp();
85     }
86   } else {
87     for (_Size __i = 0; __i < __n; ++__i) {
88       __init = __binary_op(std::move(__init), __f(__i));
89     }
90   }
91   return __init;
92 }
93 
94 template <class _ExecutionPolicy,
95           class _ForwardIterator1,
96           class _ForwardIterator2,
97           class _Tp,
98           class _BinaryOperation1,
99           class _BinaryOperation2>
100 _LIBCPP_HIDE_FROM_ABI _Tp __pstl_transform_reduce(
101     __cpu_backend_tag,
102     _ForwardIterator1 __first1,
103     _ForwardIterator1 __last1,
104     _ForwardIterator2 __first2,
105     _Tp __init,
106     _BinaryOperation1 __reduce,
107     _BinaryOperation2 __transform) {
108   if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
109                 __has_random_access_iterator_category<_ForwardIterator1>::value &&
110                 __has_random_access_iterator_category<_ForwardIterator2>::value) {
111     return std::__terminate_on_exception([&] {
112       return __par_backend::__parallel_transform_reduce(
113           __first1,
114           std::move(__last1),
115           [__first1, __first2, __transform](_ForwardIterator1 __iter) {
116             return __transform(*__iter, *(__first2 + (__iter - __first1)));
117           },
118           std::move(__init),
119           std::move(__reduce),
120           [__first1, __first2, __reduce, __transform](
121               _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) {
122             return std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>(
123                 __cpu_backend_tag{},
124                 __brick_first,
125                 std::move(__brick_last),
126                 __first2 + (__brick_first - __first1),
127                 std::move(__brick_init),
128                 std::move(__reduce),
129                 std::move(__transform));
130           });
131     });
132   } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
133                        __has_random_access_iterator_category<_ForwardIterator1>::value &&
134                        __has_random_access_iterator_category<_ForwardIterator2>::value) {
135     return std::__simd_transform_reduce(
136         __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
137           return __transform(__first1[__i], __first2[__i]);
138         });
139   } else {
140     return std::transform_reduce(
141         std::move(__first1),
142         std::move(__last1),
143         std::move(__first2),
144         std::move(__init),
145         std::move(__reduce),
146         std::move(__transform));
147   }
148 }
149 
150 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
151 _LIBCPP_HIDE_FROM_ABI _Tp __pstl_transform_reduce(
152     __cpu_backend_tag,
153     _ForwardIterator __first,
154     _ForwardIterator __last,
155     _Tp __init,
156     _BinaryOperation __reduce,
157     _UnaryOperation __transform) {
158   if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
159                 __has_random_access_iterator_category<_ForwardIterator>::value) {
160     return std::__terminate_on_exception([&] {
161       return __par_backend::__parallel_transform_reduce(
162           std::move(__first),
163           std::move(__last),
164           [__transform](_ForwardIterator __iter) { return __transform(*__iter); },
165           std::move(__init),
166           __reduce,
167           [__transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) {
168             return std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>(
169                 __cpu_backend_tag{},
170                 std::move(__brick_first),
171                 std::move(__brick_last),
172                 std::move(__brick_init),
173                 std::move(__reduce),
174                 std::move(__transform));
175           });
176     });
177   } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
178                        __has_random_access_iterator_category<_ForwardIterator>::value) {
179     return std::__simd_transform_reduce(
180         __last - __first,
181         std::move(__init),
182         std::move(__reduce),
183         [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); });
184   } else {
185     return std::transform_reduce(
186         std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform));
187   }
188 }
189 
190 _LIBCPP_END_NAMESPACE_STD
191 
192 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
193 
194 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H
195