1 #ifndef PYTHONIC_NUMPY_REDUCE_HPP
2 #define PYTHONIC_NUMPY_REDUCE_HPP
3 
4 #include "pythonic/include/numpy/reduce.hpp"
5 
6 #include "pythonic/types/ndarray.hpp"
7 #include "pythonic/builtins/None.hpp"
8 #include "pythonic/builtins/ValueError.hpp"
9 #include "pythonic/utils/neutral.hpp"
10 
11 #ifdef USE_XSIMD
12 #include <xsimd/xsimd.hpp>
13 #endif
14 
15 #include <algorithm>
16 
17 PYTHONIC_NS_BEGIN
18 
19 namespace numpy
20 {
21   template <class Op, size_t N, class vector_form>
22   struct _reduce {
23     template <class E, class F>
operator ()numpy::_reduce24     F operator()(E &&e, F acc)
25     {
26       for (auto &&value : std::forward<E>(e))
27         acc = _reduce<Op, N - 1, vector_form>{}(
28             std::forward<decltype(value)>(value), acc);
29 
30       return acc;
31     }
32   };
33 
34   template <class Op, class vector_form>
35   struct _reduce<Op, 1, vector_form> {
36     template <class E, class F>
operator ()numpy::_reduce37     F operator()(E &&e, F acc)
38     {
39       for (auto value : std::forward<E>(e)) {
40         Op{}(acc, value);
41       }
42 
43       return acc;
44     }
45   };
46 
47   template <class Op, size_t N>
48   struct _reduce<Op, N, types::novectorize_nobroadcast> {
49     template <class E, class F, class... Indices>
50     F operator()(E &&e, F acc, Indices... indices)
51     {
52       for (long i = 0, n = e.template shape<std::decay<E>::type::value - N>();
53            i < n; ++i) {
54         acc = _reduce<Op, N - 1, types::novectorize_nobroadcast>{}(
55             e, acc, indices..., i);
56       }
57       return acc;
58     }
59   };
60 
61   template <class Op>
62   struct _reduce<Op, 1, types::novectorize_nobroadcast> {
63     template <class E, class F, class... Indices>
64     F operator()(E &&e, F acc, Indices... indices)
65     {
66       for (long i = 0, n = e.template shape<std::decay<E>::type::value - 1>();
67            i < n; ++i) {
68         Op{}(acc, e.load(indices..., i));
69       }
70       return acc;
71     }
72   };
73 
74 #ifdef USE_XSIMD
75   template <class vectorizer, class Op, class E, class F>
vreduce(E e,F acc)76   F vreduce(E e, F acc)
77   {
78     using T = typename E::dtype;
79     using vT = xsimd::batch<T>;
80     static const size_t vN = vT::size;
81     const long n = e.size();
82     auto viter = vectorizer::vbegin(e), vend = vectorizer::vend(e);
83     const long bound = std::distance(viter, vend);
84     if (bound > 0) {
85       auto vacc = *viter;
86       for (++viter; viter != vend; ++viter)
87         Op{}(vacc, *viter);
88       alignas(sizeof(vT)) T stored[vN];
89       vacc.store_aligned(&stored[0]);
90       for (size_t j = 0; j < vN; ++j)
91         Op{}(acc, stored[j]);
92     }
93     auto iter = e.begin() + bound * vN;
94 
95     for (long i = bound * vN; i < n; ++i, ++iter) {
96       Op{}(acc, *iter);
97     }
98     return acc;
99   }
100 
101   template <class Op>
102   struct _reduce<Op, 1, types::vectorizer> {
103     template <class E, class F>
operator ()numpy::_reduce104     F operator()(E &&e, F acc)
105     {
106       return vreduce<types::vectorizer, Op>(std::forward<E>(e), acc);
107     }
108   };
109   template <class Op>
110   struct _reduce<Op, 1, types::vectorizer_nobroadcast> {
111     template <class E, class F>
operator ()numpy::_reduce112     F operator()(E &&e, F acc)
113     {
114       return vreduce<types::vectorizer_nobroadcast, Op>(std::forward<E>(e),
115                                                         acc);
116     }
117   };
118 #else
119   template <class Op, size_t N>
120   struct _reduce<Op, N, types::vectorizer_nobroadcast>
121       : _reduce<Op, N, types::novectorize_nobroadcast> {
122   };
123   template <class Op>
124   struct _reduce<Op, 1, types::vectorizer_nobroadcast>
125       : _reduce<Op, 1, types::novectorize_nobroadcast> {
126   };
127 #endif
128   template <class Op, class E, bool vector_form>
129   struct reduce_helper;
130 
131   template <class Op, class E>
132   struct reduce_helper<Op, E, false> {
133     template <class T>
operator ()numpy::reduce_helper134     reduce_result_type<Op, E> operator()(E const &expr, T p) const
135     {
136       if (utils::no_broadcast_ex(expr))
137         return _reduce<Op, E::value, types::novectorize_nobroadcast>{}(expr, p);
138       else
139         return _reduce<Op, E::value, types::novectorize>{}(expr, p);
140     }
141   };
142   template <class Op, class E>
143   struct reduce_helper<Op, E, true> {
144     template <class T>
operator ()numpy::reduce_helper145     reduce_result_type<Op, E> operator()(E const &expr, T p) const
146     {
147       if (utils::no_broadcast_vectorize(expr))
148         return _reduce<Op, E::value, types::vectorizer_nobroadcast>{}(expr, p);
149       else
150         return _reduce<Op, E::value, types::vectorizer>{}(expr, p);
151     }
152   };
153   template <class Op, class E>
154   typename std::enable_if<
155       std::is_scalar<E>::value || types::is_complex<E>::value, E>::type
reduce(E const & expr,types::none_type)156   reduce(E const &expr, types::none_type)
157   {
158     return expr;
159   }
160 
161   template <class Op, class E>
162   typename std::enable_if<
163       std::is_scalar<E>::value || types::is_complex<E>::value, E>::type
reduce(E const & array,long axis)164   reduce(E const &array, long axis)
165   {
166     if (axis != 0)
167       throw types::ValueError("axis out of bounds");
168     return reduce<Op>(array);
169   }
170 
171   template <class Op, class E, class dtype>
172   typename std::enable_if<types::is_numexpr_arg<E>::value,
173                           reduce_result_type<Op, E, dtype>>::type
reduce(E const & expr,types::none_type axis,dtype)174   reduce(E const &expr, types::none_type axis, dtype)
175   {
176     using rrt = reduce_result_type<Op, E, dtype>;
177     bool constexpr is_vectorizable =
178         E::is_vectorizable && !std::is_same<typename E::dtype, bool>::value &&
179         std::is_same<rrt, typename E::dtype>::value;
180     rrt p = utils::neutral<Op, rrt>::value;
181     return reduce_helper<Op, E, is_vectorizable>{}(expr, p);
182   }
183 
184   template <class Op, class E, class dtype>
185   typename std::enable_if<E::value == 1, reduce_result_type<Op, E, dtype>>::type
reduce(E const & array,long axis,dtype d,types::none_type)186   reduce(E const &array, long axis, dtype d, types::none_type)
187   {
188     if (axis != 0)
189       throw types::ValueError("axis out of bounds");
190     return reduce<Op>(array, types::none_type{}, d);
191   }
192 
193   template <class Op, class E, class Out>
194   typename std::enable_if<E::value == 1, reduce_result_type<Op, E>>::type
reduce(E const & array,long axis,types::none_type,Out && out)195   reduce(E const &array, long axis, types::none_type, Out &&out)
196   {
197     if (axis != 0)
198       throw types::ValueError("axis out of bounds");
199     return std::forward<Out>(out) = reduce<Op>(array);
200   }
201 
202   template <class Op, size_t N>
203   struct _reduce_axisb {
204     template <class E, class F, class EIndices, class FIndices>
operator ()numpy::_reduce_axisb205     void operator()(E &&e, F &&f, long axis, EIndices &&e_indices,
206                     FIndices &&f_indices)
207     {
208       for (long i = 0, n = e.template shape<std::decay<E>::type::value - N>();
209            i < n; ++i) {
210         _reduce_axisb<Op, N - 1>{}(
211             e, f, axis, std::tuple_cat(e_indices, std::make_tuple(i)),
212             std::tuple_cat(f_indices, std::make_tuple(i)));
213       }
214     }
215   };
216 
217   template <class Op>
218   struct _reduce_axisb<Op, 0> {
219     template <class E, class F, class EIndices, class FIndices, size_t... Es,
220               size_t... Fs>
helpernumpy::_reduce_axisb221     void helper(E &&e, F &&f, EIndices &&e_indices, FIndices &&f_indices,
222                 utils::index_sequence<Es...>, utils::index_sequence<Fs...>)
223     {
224       f.template update<Op>(e.load(std::get<Es>(e_indices)...),
225                             (long)std::get<Fs>(f_indices)...);
226     }
227     template <class E, class F, class EIndices, class FIndices>
operator ()numpy::_reduce_axisb228     void operator()(E &&e, F &&f, long axis, EIndices &&e_indices,
229                     FIndices &&f_indices)
230     {
231       helper(
232           std::forward<E>(e), std::forward<F>(f), e_indices, f_indices,
233           utils::make_index_sequence<
234               std::tuple_size<typename std::decay<EIndices>::type>::value>(),
235           utils::make_index_sequence<
236               std::tuple_size<typename std::decay<FIndices>::type>::value>());
237     }
238   };
239 
240   template <class Op, size_t N>
241   struct _reduce_axis {
242     template <class E, class F, class EIndices, class FIndices>
operator ()numpy::_reduce_axis243     void operator()(E &&e, F &&f, long axis, EIndices &&e_indices,
244                     FIndices &&f_indices)
245     {
246       if (axis == std::decay<E>::type::value - N) {
247         for (long i = 0, n = e.template shape<std::decay<E>::type::value - N>();
248              i < n; ++i) {
249           _reduce_axisb<Op, N - 1>{}(
250               e, f, axis, std::tuple_cat(e_indices, std::make_tuple(i)),
251               std::forward<FIndices>(f_indices));
252         }
253       } else {
254         for (long i = 0, n = e.template shape<std::decay<E>::type::value - N>();
255              i < n; ++i) {
256           _reduce_axis<Op, N - 1>{}(
257               e, f, axis, std::tuple_cat(e_indices, std::make_tuple(i)),
258               std::tuple_cat(f_indices, std::make_tuple(i)));
259         }
260       }
261     }
262   };
263   template <class Op>
264   struct _reduce_axis<Op, 0> {
265     template <class E, class F, class EIndices, class FIndices>
operator ()numpy::_reduce_axis266     void operator()(E &&e, F &&f, long axis, EIndices &&e_indices,
267                     FIndices &&f_indices)
268     {
269     }
270   };
271 
272   template <class Op, class E, class dtype>
273   typename std::enable_if<E::value != 1, reduced_type<E, Op, dtype>>::type
reduce(E const & array,long axis,dtype,types::none_type)274   reduce(E const &array, long axis, dtype, types::none_type)
275   {
276     if (axis < 0)
277       axis += E::value;
278     if (axis < 0 || size_t(axis) >= E::value)
279       throw types::ValueError("axis out of bounds");
280     types::array<long, E::value - 1> shp;
281     auto tmp = sutils::getshape(array);
282     auto next = std::copy(tmp.begin(), tmp.begin() + axis, shp.begin());
283     std::copy(tmp.begin() + axis + 1, tmp.end(), next);
284     reduced_type<E, Op, dtype> out{shp, builtins::None};
285     std::fill(out.begin(), out.end(),
286               utils::neutral<Op, typename E::dtype>::value);
287     return reduce<Op>(array, axis, types::none_type{}, out);
288   }
289   template <class Op, class E, class Out>
290   typename std::enable_if<E::value != 1, reduced_type<E, Op>>::type
reduce(E const & array,long axis,types::none_type,Out && out)291   reduce(E const &array, long axis, types::none_type, Out &&out)
292   {
293     if (axis < 0)
294       axis += E::value;
295     if (axis < 0 || size_t(axis) >= E::value)
296       throw types::ValueError("axis out of bounds");
297     if (utils::no_broadcast(array)) {
298       std::fill(out.begin(), out.end(),
299                 utils::neutral<Op, typename E::dtype>::value);
300       _reduce_axis<Op, E::value>{}(array, std::forward<Out>(out), axis,
301                                    std::make_tuple(), std::make_tuple());
302       return std::forward<Out>(out);
303     } else {
304       if (axis == 0) {
305         std::fill(out.begin(), out.end(),
306                   utils::neutral<Op, typename E::dtype>::value);
307         return _reduce<Op, 1, types::novectorize /* not on scalars*/>{}(
308             array, std::forward<Out>(out));
309       } else {
310         std::transform(array.begin(), array.end(), out.begin(),
311                        [axis](typename E::const_iterator::value_type other) {
312                          return reduce<Op>(other, axis - 1);
313                        });
314         return std::forward<Out>(out);
315       }
316     }
317   }
318 }
319 PYTHONIC_NS_END
320 
321 #endif
322