1 #ifndef PYTHONIC_NUMPY_ARGMINMAX_HPP 2 #define PYTHONIC_NUMPY_ARGMINMAX_HPP 3 4 #include "pythonic/utils/functor.hpp" 5 #include "pythonic/types/ndarray.hpp" 6 #include "pythonic/numpy/asarray.hpp" 7 #include "pythonic/builtins/ValueError.hpp" 8 9 PYTHONIC_NS_BEGIN 10 11 namespace numpy 12 { 13 namespace details 14 { 15 template <class P, size_t... Is> 16 P iota(utils::index_sequence<Is...>) 17 { 18 return {static_cast<typename P::value_type>(Is)...}; 19 } 20 21 template <class P> iota()22 P iota() 23 { 24 return iota<P>(utils::make_index_sequence<P::size>()); 25 } 26 } 27 template <class Op, class E, class T> _argminmax_seq(E const & elts,T & minmax_elts)28 long _argminmax_seq(E const &elts, T &minmax_elts) 29 { 30 long index = 0; 31 long res = -1; 32 for (auto const &elt : elts) { 33 if (Op::value(elt, minmax_elts)) { 34 minmax_elts = elt; 35 res = index; 36 } 37 ++index; 38 } 39 40 return res; 41 } 42 template <class Op, class E, class T> 43 #ifdef USE_XSIMD 44 typename std::enable_if< 45 !E::is_vectorizable || 46 !types::is_vector_op<typename Op::op, T, T>::value || 47 std::is_same<typename E::dtype, bool>::value, 48 long>::type 49 #else 50 long 51 #endif _argminmax(E const & elts,T & minmax_elts,utils::int_<1>)52 _argminmax(E const &elts, T &minmax_elts, utils::int_<1>) 53 { 54 return _argminmax_seq<Op>(elts, minmax_elts); 55 } 56 57 template <class Op, class E, class T, class... Indices> _argminmax_fast(E const & elts,T & minmax_elts,long current_pos,utils::int_<1>,Indices...indices)58 std::tuple<long, long> _argminmax_fast(E const &elts, T &minmax_elts, 59 long current_pos, utils::int_<1>, 60 Indices... indices) 61 { 62 long res = -1; 63 long n = elts.template shape<std::decay<E>::type::value - 1>(); 64 for (long i = 0; i < n; ++i) { 65 auto elt = elts.load(indices..., i); 66 if (Op::value(elt, minmax_elts)) { 67 minmax_elts = elt; 68 res = current_pos + i; 69 } 70 } 71 72 return std::make_tuple(res, current_pos + n); 73 } 74 75 #ifdef USE_XSIMD 76 template <bool IsInt> 77 struct bool_caster; 78 template <> 79 struct bool_caster<true> { 80 template <class T> operator ()numpy::bool_caster81 auto operator()(T const &value) -> decltype(xsimd::bool_cast(value)) 82 { 83 return xsimd::bool_cast(value); 84 } 85 }; 86 template <> 87 struct bool_caster<false> { 88 template <class T> operator ()numpy::bool_caster89 T operator()(T const &value) 90 { 91 return value; 92 } 93 }; 94 95 template <class Op, class E, class T> 96 typename std::enable_if< 97 E::is_vectorizable && types::is_vector_op<typename Op::op, T, T>::value && 98 !std::is_same<typename E::dtype, bool>::value, 99 long>::type _argminmax(E const & elts,T & minmax_elts,utils::int_<1>)100 _argminmax(E const &elts, T &minmax_elts, utils::int_<1>) 101 { 102 using vT = xsimd::batch<T>; 103 using iT = xsimd::as_integer_t<T>; 104 static const size_t vN = vT::size; 105 const long n = elts.size(); 106 if (n >= std::numeric_limits<iT>::max()) { 107 return _argminmax_seq<Op>(elts, minmax_elts); 108 } 109 110 auto viter = types::vectorizer_nobroadcast::vbegin(elts), 111 vend = types::vectorizer_nobroadcast::vend(elts); 112 113 const long bound = std::distance(viter, vend); 114 long minmax_index = -1; 115 if (bound > 0) { 116 auto vacc = *viter; 117 iT iota[vN] = {0}; 118 for (long i = 0; i < (long)vN; ++i) 119 iota[i] = i; 120 xsimd::batch<iT> curr = xsimd::load_unaligned(iota); 121 xsimd::batch<iT> indices = curr; 122 xsimd::batch<iT> step(vN); 123 124 for (++viter; viter != vend; ++viter) { 125 curr += step; 126 auto c = *viter; 127 vacc = typename Op::op{}(vacc, c); 128 auto mask = c == vacc; 129 indices = 130 xsimd::select(bool_caster<std::is_floating_point<T>::value>{}(mask), 131 curr, indices); 132 } 133 134 alignas(sizeof(vT)) T stored[vN]; 135 vacc.store_aligned(&stored[0]); 136 alignas(sizeof(vT)) long indexed[vN]; 137 indices.store_aligned(&indexed[0]); 138 139 for (size_t j = 0; j < vN; ++j) { 140 if (Op::value(stored[j], minmax_elts)) { 141 minmax_elts = stored[j]; 142 minmax_index = indexed[j]; 143 } 144 } 145 } 146 auto iter = elts.begin() + bound * vN; 147 148 for (long i = bound * vN; i < n; ++i, ++iter) { 149 if (Op::value(*iter, minmax_elts)) { 150 minmax_elts = *iter; 151 minmax_index = i; 152 } 153 } 154 return minmax_index; 155 } 156 #endif 157 158 template <class Op, class E, size_t N, class T> _argminmax(E const & elts,T & minmax_elts,utils::int_<N>)159 long _argminmax(E const &elts, T &minmax_elts, utils::int_<N>) 160 { 161 long current_pos = 0; 162 long current_minmaxarg = 0; 163 for (auto &&elt : elts) { 164 long v = _argminmax<Op>(elt, minmax_elts, utils::int_<N - 1>()); 165 if (v >= 0) 166 current_minmaxarg = current_pos + v; 167 current_pos += elt.flat_size(); 168 } 169 return current_minmaxarg; 170 } 171 template <class Op, class E, size_t N, class T, class... Indices> 172 typename std::enable_if<N != 1, std::tuple<long, long>>::type _argminmax_fast(E const & elts,T & minmax_elts,long current_pos,utils::int_<N>,Indices...indices)173 _argminmax_fast(E const &elts, T &minmax_elts, long current_pos, 174 utils::int_<N>, Indices... indices) 175 { 176 long current_minmaxarg = 0; 177 for (long i = 0, n = elts.template shape<std::decay<E>::type::value - N>(); 178 i < n; ++i) { 179 long v; 180 std::tie(v, current_pos) = _argminmax_fast<Op>( 181 elts, minmax_elts, current_pos, utils::int_<N - 1>(), indices..., i); 182 if (v >= 0) 183 current_minmaxarg = v; 184 } 185 return std::make_tuple(current_minmaxarg, current_pos); 186 } 187 188 template <class Op, class E> argminmax(E const & expr)189 long argminmax(E const &expr) 190 { 191 if (!expr.flat_size()) 192 throw types::ValueError("empty sequence"); 193 using elt_type = typename E::dtype; 194 elt_type argminmax_value = Op::limit(); 195 #ifndef USE_XSIMD 196 if (utils::no_broadcast_ex(expr)) { 197 return std::get<0>(_argminmax_fast<Op>(expr, argminmax_value, 0, 198 utils::int_<E::value>())); 199 } else 200 #endif 201 return _argminmax<Op>(expr, argminmax_value, utils::int_<E::value>()); 202 } 203 204 template <class Op, size_t Dim, size_t Axis, class T, class E, class V> _argminmax_tail(T && out,E const & expr,long curr,V && curr_minmax,std::integral_constant<size_t,0>)205 void _argminmax_tail(T &&out, E const &expr, long curr, V &&curr_minmax, 206 std::integral_constant<size_t, 0>) 207 { 208 if (Op::value(expr, curr_minmax)) { 209 out = curr; 210 curr_minmax = expr; 211 } 212 } 213 214 template <class Op, size_t Dim, size_t Axis, class T, class E, class V, 215 size_t N> 216 typename std::enable_if<Axis != (Dim - N), void>::type _argminmax_tail(T && out,E const & expr,long curr,V && curr_minmax,std::integral_constant<size_t,N>)217 _argminmax_tail(T &&out, E const &expr, long curr, V &&curr_minmax, 218 std::integral_constant<size_t, N>) 219 { 220 static_assert(N >= 1, "specialization ok"); 221 long i = 0; 222 for (auto &&elt : expr) { 223 _argminmax_tail<Op, Dim, Axis>(out.fast(i), elt, curr, 224 curr_minmax.fast(i), 225 std::integral_constant<size_t, N - 1>()); 226 ++i; 227 } 228 } 229 230 template <class Op, size_t Dim, size_t Axis, class T, class E> 231 typename std::enable_if<Axis == (Dim - 1), void>::type _argminmax_head(T && out,E const & expr,std::integral_constant<size_t,1>)232 _argminmax_head(T &&out, E const &expr, std::integral_constant<size_t, 1>) 233 { 234 typename E::dtype val = Op::limit(); 235 long i = 0; 236 for (auto &&elt : expr) 237 _argminmax_tail<Op, Dim, Axis>(out, elt, i++, val, 238 std::integral_constant<size_t, 0>()); 239 } 240 241 template <class Op, size_t Dim, size_t Axis, class T, class E, size_t N> 242 typename std::enable_if<Axis == (Dim - N), void>::type _argminmax_head(T && out,E const & expr,std::integral_constant<size_t,N>)243 _argminmax_head(T &&out, E const &expr, std::integral_constant<size_t, N>) 244 { 245 static_assert(N > 1, "specialization ok"); 246 types::ndarray<typename E::dtype, types::array<long, N - 1>> val{ 247 sutils::getshape(out), Op::limit()}; 248 long i = 0; 249 for (auto &&elt : expr) { 250 _argminmax_tail<Op, Dim, Axis>(out, elt, i++, val, 251 std::integral_constant<size_t, N - 1>()); 252 } 253 } 254 255 template <class Op, size_t Dim, size_t Axis, class T, class E, size_t N> 256 typename std::enable_if<Axis != (Dim - N), void>::type _argminmax_head(T && out,E const & expr,std::integral_constant<size_t,N>)257 _argminmax_head(T &&out, E const &expr, std::integral_constant<size_t, N>) 258 { 259 static_assert(N >= 1, "specialization ok"); 260 auto out_iter = out.begin(); 261 for (auto &&elt : expr) { 262 _argminmax_head<Op, Dim, Axis>(*out_iter, elt, 263 std::integral_constant<size_t, N - 1>()); 264 ++out_iter; 265 } 266 } 267 268 template <class Op, size_t N, class T, class E, size_t... Axis> _argminmax_pick_axis(long axis,T && out,E const & expr,utils::index_sequence<Axis...>)269 void _argminmax_pick_axis(long axis, T &&out, E const &expr, 270 utils::index_sequence<Axis...>) 271 { 272 (void)std::initializer_list<bool>{ 273 ((Axis == axis) && (_argminmax_head<Op, N, Axis>( 274 out, expr, std::integral_constant<size_t, N>()), 275 true))...}; 276 } 277 278 template <class Op, class E> 279 types::ndarray<long, types::array<long, E::value - 1>> argminmax(E const & array,long axis)280 argminmax(E const &array, long axis) 281 { 282 if (axis < 0) 283 axis += E::value; 284 if (axis < 0 || size_t(axis) >= E::value) 285 throw types::ValueError("axis out of bounds"); 286 auto shape = sutils::getshape(array); 287 types::array<long, E::value - 1> shp; 288 auto next = std::copy(shape.begin(), shape.begin() + axis, shp.begin()); 289 std::copy(shape.begin() + axis + 1, shape.end(), next); 290 types::ndarray<long, types::array<long, E::value - 1>> out{shp, 291 builtins::None}; 292 _argminmax_pick_axis<Op, E::value>(axis, out, array, 293 utils::make_index_sequence<E::value>()); 294 return out; 295 } 296 } 297 PYTHONIC_NS_END 298 299 #endif 300