1 #ifndef PYTHONIC_NUMPY_ARGMINMAX_HPP
2 #define PYTHONIC_NUMPY_ARGMINMAX_HPP
3 
4 #include "pythonic/utils/functor.hpp"
5 #include "pythonic/types/ndarray.hpp"
6 #include "pythonic/numpy/asarray.hpp"
7 #include "pythonic/builtins/ValueError.hpp"
8 
9 PYTHONIC_NS_BEGIN
10 
11 namespace numpy
12 {
13   namespace details
14   {
15     template <class P, size_t... Is>
16     P iota(utils::index_sequence<Is...>)
17     {
18       return {static_cast<typename P::value_type>(Is)...};
19     }
20 
21     template <class P>
iota()22     P iota()
23     {
24       return iota<P>(utils::make_index_sequence<P::size>());
25     }
26   }
27   template <class Op, class E, class T>
_argminmax_seq(E const & elts,T & minmax_elts)28   long _argminmax_seq(E const &elts, T &minmax_elts)
29   {
30     long index = 0;
31     long res = -1;
32     for (auto const &elt : elts) {
33       if (Op::value(elt, minmax_elts)) {
34         minmax_elts = elt;
35         res = index;
36       }
37       ++index;
38     }
39 
40     return res;
41   }
42   template <class Op, class E, class T>
43 #ifdef USE_XSIMD
44   typename std::enable_if<
45       !E::is_vectorizable ||
46           !types::is_vector_op<typename Op::op, T, T>::value ||
47           std::is_same<typename E::dtype, bool>::value,
48       long>::type
49 #else
50   long
51 #endif
_argminmax(E const & elts,T & minmax_elts,utils::int_<1>)52   _argminmax(E const &elts, T &minmax_elts, utils::int_<1>)
53   {
54     return _argminmax_seq<Op>(elts, minmax_elts);
55   }
56 
57   template <class Op, class E, class T, class... Indices>
_argminmax_fast(E const & elts,T & minmax_elts,long current_pos,utils::int_<1>,Indices...indices)58   std::tuple<long, long> _argminmax_fast(E const &elts, T &minmax_elts,
59                                          long current_pos, utils::int_<1>,
60                                          Indices... indices)
61   {
62     long res = -1;
63     long n = elts.template shape<std::decay<E>::type::value - 1>();
64     for (long i = 0; i < n; ++i) {
65       auto elt = elts.load(indices..., i);
66       if (Op::value(elt, minmax_elts)) {
67         minmax_elts = elt;
68         res = current_pos + i;
69       }
70     }
71 
72     return std::make_tuple(res, current_pos + n);
73   }
74 
75 #ifdef USE_XSIMD
76   template <bool IsInt>
77   struct bool_caster;
78   template <>
79   struct bool_caster<true> {
80     template <class T>
operator ()numpy::bool_caster81     auto operator()(T const &value) -> decltype(xsimd::bool_cast(value))
82     {
83       return xsimd::bool_cast(value);
84     }
85   };
86   template <>
87   struct bool_caster<false> {
88     template <class T>
operator ()numpy::bool_caster89     T operator()(T const &value)
90     {
91       return value;
92     }
93   };
94 
95   template <class Op, class E, class T>
96   typename std::enable_if<
97       E::is_vectorizable && types::is_vector_op<typename Op::op, T, T>::value &&
98           !std::is_same<typename E::dtype, bool>::value,
99       long>::type
_argminmax(E const & elts,T & minmax_elts,utils::int_<1>)100   _argminmax(E const &elts, T &minmax_elts, utils::int_<1>)
101   {
102     using vT = xsimd::batch<T>;
103     using iT = xsimd::as_integer_t<T>;
104     static const size_t vN = vT::size;
105     const long n = elts.size();
106     if (n >= std::numeric_limits<iT>::max()) {
107       return _argminmax_seq<Op>(elts, minmax_elts);
108     }
109 
110     auto viter = types::vectorizer_nobroadcast::vbegin(elts),
111          vend = types::vectorizer_nobroadcast::vend(elts);
112 
113     const long bound = std::distance(viter, vend);
114     long minmax_index = -1;
115     if (bound > 0) {
116       auto vacc = *viter;
117       iT iota[vN] = {0};
118       for (long i = 0; i < (long)vN; ++i)
119         iota[i] = i;
120       xsimd::batch<iT> curr = xsimd::load_unaligned(iota);
121       xsimd::batch<iT> indices = curr;
122       xsimd::batch<iT> step(vN);
123 
124       for (++viter; viter != vend; ++viter) {
125         curr += step;
126         auto c = *viter;
127         vacc = typename Op::op{}(vacc, c);
128         auto mask = c == vacc;
129         indices =
130             xsimd::select(bool_caster<std::is_floating_point<T>::value>{}(mask),
131                           curr, indices);
132       }
133 
134       alignas(sizeof(vT)) T stored[vN];
135       vacc.store_aligned(&stored[0]);
136       alignas(sizeof(vT)) long indexed[vN];
137       indices.store_aligned(&indexed[0]);
138 
139       for (size_t j = 0; j < vN; ++j) {
140         if (Op::value(stored[j], minmax_elts)) {
141           minmax_elts = stored[j];
142           minmax_index = indexed[j];
143         }
144       }
145     }
146     auto iter = elts.begin() + bound * vN;
147 
148     for (long i = bound * vN; i < n; ++i, ++iter) {
149       if (Op::value(*iter, minmax_elts)) {
150         minmax_elts = *iter;
151         minmax_index = i;
152       }
153     }
154     return minmax_index;
155   }
156 #endif
157 
158   template <class Op, class E, size_t N, class T>
_argminmax(E const & elts,T & minmax_elts,utils::int_<N>)159   long _argminmax(E const &elts, T &minmax_elts, utils::int_<N>)
160   {
161     long current_pos = 0;
162     long current_minmaxarg = 0;
163     for (auto &&elt : elts) {
164       long v = _argminmax<Op>(elt, minmax_elts, utils::int_<N - 1>());
165       if (v >= 0)
166         current_minmaxarg = current_pos + v;
167       current_pos += elt.flat_size();
168     }
169     return current_minmaxarg;
170   }
171   template <class Op, class E, size_t N, class T, class... Indices>
172   typename std::enable_if<N != 1, std::tuple<long, long>>::type
_argminmax_fast(E const & elts,T & minmax_elts,long current_pos,utils::int_<N>,Indices...indices)173   _argminmax_fast(E const &elts, T &minmax_elts, long current_pos,
174                   utils::int_<N>, Indices... indices)
175   {
176     long current_minmaxarg = 0;
177     for (long i = 0, n = elts.template shape<std::decay<E>::type::value - N>();
178          i < n; ++i) {
179       long v;
180       std::tie(v, current_pos) = _argminmax_fast<Op>(
181           elts, minmax_elts, current_pos, utils::int_<N - 1>(), indices..., i);
182       if (v >= 0)
183         current_minmaxarg = v;
184     }
185     return std::make_tuple(current_minmaxarg, current_pos);
186   }
187 
188   template <class Op, class E>
argminmax(E const & expr)189   long argminmax(E const &expr)
190   {
191     if (!expr.flat_size())
192       throw types::ValueError("empty sequence");
193     using elt_type = typename E::dtype;
194     elt_type argminmax_value = Op::limit();
195 #ifndef USE_XSIMD
196     if (utils::no_broadcast_ex(expr)) {
197       return std::get<0>(_argminmax_fast<Op>(expr, argminmax_value, 0,
198                                              utils::int_<E::value>()));
199     } else
200 #endif
201       return _argminmax<Op>(expr, argminmax_value, utils::int_<E::value>());
202   }
203 
204   template <class Op, size_t Dim, size_t Axis, class T, class E, class V>
_argminmax_tail(T && out,E const & expr,long curr,V && curr_minmax,std::integral_constant<size_t,0>)205   void _argminmax_tail(T &&out, E const &expr, long curr, V &&curr_minmax,
206                        std::integral_constant<size_t, 0>)
207   {
208     if (Op::value(expr, curr_minmax)) {
209       out = curr;
210       curr_minmax = expr;
211     }
212   }
213 
214   template <class Op, size_t Dim, size_t Axis, class T, class E, class V,
215             size_t N>
216   typename std::enable_if<Axis != (Dim - N), void>::type
_argminmax_tail(T && out,E const & expr,long curr,V && curr_minmax,std::integral_constant<size_t,N>)217   _argminmax_tail(T &&out, E const &expr, long curr, V &&curr_minmax,
218                   std::integral_constant<size_t, N>)
219   {
220     static_assert(N >= 1, "specialization ok");
221     long i = 0;
222     for (auto &&elt : expr) {
223       _argminmax_tail<Op, Dim, Axis>(out.fast(i), elt, curr,
224                                      curr_minmax.fast(i),
225                                      std::integral_constant<size_t, N - 1>());
226       ++i;
227     }
228   }
229 
230   template <class Op, size_t Dim, size_t Axis, class T, class E>
231   typename std::enable_if<Axis == (Dim - 1), void>::type
_argminmax_head(T && out,E const & expr,std::integral_constant<size_t,1>)232   _argminmax_head(T &&out, E const &expr, std::integral_constant<size_t, 1>)
233   {
234     typename E::dtype val = Op::limit();
235     long i = 0;
236     for (auto &&elt : expr)
237       _argminmax_tail<Op, Dim, Axis>(out, elt, i++, val,
238                                      std::integral_constant<size_t, 0>());
239   }
240 
241   template <class Op, size_t Dim, size_t Axis, class T, class E, size_t N>
242   typename std::enable_if<Axis == (Dim - N), void>::type
_argminmax_head(T && out,E const & expr,std::integral_constant<size_t,N>)243   _argminmax_head(T &&out, E const &expr, std::integral_constant<size_t, N>)
244   {
245     static_assert(N > 1, "specialization ok");
246     types::ndarray<typename E::dtype, types::array<long, N - 1>> val{
247         sutils::getshape(out), Op::limit()};
248     long i = 0;
249     for (auto &&elt : expr) {
250       _argminmax_tail<Op, Dim, Axis>(out, elt, i++, val,
251                                      std::integral_constant<size_t, N - 1>());
252     }
253   }
254 
255   template <class Op, size_t Dim, size_t Axis, class T, class E, size_t N>
256   typename std::enable_if<Axis != (Dim - N), void>::type
_argminmax_head(T && out,E const & expr,std::integral_constant<size_t,N>)257   _argminmax_head(T &&out, E const &expr, std::integral_constant<size_t, N>)
258   {
259     static_assert(N >= 1, "specialization ok");
260     auto out_iter = out.begin();
261     for (auto &&elt : expr) {
262       _argminmax_head<Op, Dim, Axis>(*out_iter, elt,
263                                      std::integral_constant<size_t, N - 1>());
264       ++out_iter;
265     }
266   }
267 
268   template <class Op, size_t N, class T, class E, size_t... Axis>
_argminmax_pick_axis(long axis,T && out,E const & expr,utils::index_sequence<Axis...>)269   void _argminmax_pick_axis(long axis, T &&out, E const &expr,
270                             utils::index_sequence<Axis...>)
271   {
272     (void)std::initializer_list<bool>{
273         ((Axis == axis) && (_argminmax_head<Op, N, Axis>(
274                                 out, expr, std::integral_constant<size_t, N>()),
275                             true))...};
276   }
277 
278   template <class Op, class E>
279   types::ndarray<long, types::array<long, E::value - 1>>
argminmax(E const & array,long axis)280   argminmax(E const &array, long axis)
281   {
282     if (axis < 0)
283       axis += E::value;
284     if (axis < 0 || size_t(axis) >= E::value)
285       throw types::ValueError("axis out of bounds");
286     auto shape = sutils::getshape(array);
287     types::array<long, E::value - 1> shp;
288     auto next = std::copy(shape.begin(), shape.begin() + axis, shp.begin());
289     std::copy(shape.begin() + axis + 1, shape.end(), next);
290     types::ndarray<long, types::array<long, E::value - 1>> out{shp,
291                                                                builtins::None};
292     _argminmax_pick_axis<Op, E::value>(axis, out, array,
293                                        utils::make_index_sequence<E::value>());
294     return out;
295   }
296 }
297 PYTHONIC_NS_END
298 
299 #endif
300