1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_SORT_HPP_
46 #define KOKKOS_SORT_HPP_
47 
48 #include <Kokkos_Core.hpp>
49 
50 #include <algorithm>
51 
52 namespace Kokkos {
53 
54 namespace Impl {
55 
56 template <class DstViewType, class SrcViewType, int Rank = DstViewType::Rank>
57 struct CopyOp;
58 
59 template <class DstViewType, class SrcViewType>
60 struct CopyOp<DstViewType, SrcViewType, 1> {
61   KOKKOS_INLINE_FUNCTION
copyKokkos::Impl::CopyOp62   static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src,
63                    size_t i_src) {
64     dst(i_dst) = src(i_src);
65   }
66 };
67 
68 template <class DstViewType, class SrcViewType>
69 struct CopyOp<DstViewType, SrcViewType, 2> {
70   KOKKOS_INLINE_FUNCTION
copyKokkos::Impl::CopyOp71   static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src,
72                    size_t i_src) {
73     for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j);
74   }
75 };
76 
77 template <class DstViewType, class SrcViewType>
78 struct CopyOp<DstViewType, SrcViewType, 3> {
79   KOKKOS_INLINE_FUNCTION
copyKokkos::Impl::CopyOp80   static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src,
81                    size_t i_src) {
82     for (int j = 0; j < dst.extent(1); j++)
83       for (int k = 0; k < dst.extent(2); k++)
84         dst(i_dst, j, k) = src(i_src, j, k);
85   }
86 };
87 }  // namespace Impl
88 
89 //----------------------------------------------------------------------------
90 
91 template <class KeyViewType, class BinSortOp,
92           class Space    = typename KeyViewType::device_type,
93           class SizeType = typename KeyViewType::memory_space::size_type>
94 class BinSort {
95  public:
96   template <class DstViewType, class SrcViewType>
97   struct copy_functor {
98     using src_view_type = typename SrcViewType::const_type;
99 
100     using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
101 
102     DstViewType dst_values;
103     src_view_type src_values;
104     int dst_offset;
105 
copy_functorKokkos::BinSort::copy_functor106     copy_functor(DstViewType const& dst_values_, int const& dst_offset_,
107                  SrcViewType const& src_values_)
108         : dst_values(dst_values_),
109           src_values(src_values_),
110           dst_offset(dst_offset_) {}
111 
112     KOKKOS_INLINE_FUNCTION
operator ()Kokkos::BinSort::copy_functor113     void operator()(const int& i) const {
114       copy_op::copy(dst_values, i + dst_offset, src_values, i);
115     }
116   };
117 
118   template <class DstViewType, class PermuteViewType, class SrcViewType>
119   struct copy_permute_functor {
120     // If a Kokkos::View then can generate constant random access
121     // otherwise can only use the constant type.
122 
123     using src_view_type = typename std::conditional<
124         Kokkos::is_view<SrcViewType>::value,
125         Kokkos::View<typename SrcViewType::const_data_type,
126                      typename SrcViewType::array_layout,
127                      typename SrcViewType::device_type,
128                      Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
129         typename SrcViewType::const_type>::type;
130 
131     using perm_view_type = typename PermuteViewType::const_type;
132 
133     using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
134 
135     DstViewType dst_values;
136     perm_view_type sort_order;
137     src_view_type src_values;
138     int src_offset;
139 
copy_permute_functorKokkos::BinSort::copy_permute_functor140     copy_permute_functor(DstViewType const& dst_values_,
141                          PermuteViewType const& sort_order_,
142                          SrcViewType const& src_values_, int const& src_offset_)
143         : dst_values(dst_values_),
144           sort_order(sort_order_),
145           src_values(src_values_),
146           src_offset(src_offset_) {}
147 
148     KOKKOS_INLINE_FUNCTION
operator ()Kokkos::BinSort::copy_permute_functor149     void operator()(const int& i) const {
150       copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i));
151     }
152   };
153 
154   using execution_space = typename Space::execution_space;
155   using bin_op_type     = BinSortOp;
156 
157   struct bin_count_tag {};
158   struct bin_offset_tag {};
159   struct bin_binning_tag {};
160   struct bin_sort_bins_tag {};
161 
162  public:
163   using size_type  = SizeType;
164   using value_type = size_type;
165 
166   using offset_type    = Kokkos::View<size_type*, Space>;
167   using bin_count_type = Kokkos::View<const int*, Space>;
168 
169   using const_key_view_type = typename KeyViewType::const_type;
170 
171   // If a Kokkos::View then can generate constant random access
172   // otherwise can only use the constant type.
173 
174   using const_rnd_key_view_type = typename std::conditional<
175       Kokkos::is_view<KeyViewType>::value,
176       Kokkos::View<typename KeyViewType::const_data_type,
177                    typename KeyViewType::array_layout,
178                    typename KeyViewType::device_type,
179                    Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
180       const_key_view_type>::type;
181 
182   using non_const_key_scalar = typename KeyViewType::non_const_value_type;
183   using const_key_scalar     = typename KeyViewType::const_value_type;
184 
185   using bin_count_atomic_type =
186       Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >;
187 
188  private:
189   const_key_view_type keys;
190   const_rnd_key_view_type keys_rnd;
191 
192  public:
193   BinSortOp bin_op;
194   offset_type bin_offsets;
195   bin_count_atomic_type bin_count_atomic;
196   bin_count_type bin_count_const;
197   offset_type sort_order;
198 
199   int range_begin;
200   int range_end;
201   bool sort_within_bins;
202 
203  public:
204   BinSort() = default;
205 
206   //----------------------------------------
207   // Constructor: takes the keys, the binning_operator and optionally whether to
208   // sort within bins (default false)
BinSort(const_key_view_type keys_,int range_begin_,int range_end_,BinSortOp bin_op_,bool sort_within_bins_=false)209   BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
210           BinSortOp bin_op_, bool sort_within_bins_ = false)
211       : keys(keys_),
212         keys_rnd(keys_),
213         bin_op(bin_op_),
214         bin_offsets(),
215         bin_count_atomic(),
216         bin_count_const(),
217         sort_order(),
218         range_begin(range_begin_),
219         range_end(range_end_),
220         sort_within_bins(sort_within_bins_) {
221     bin_count_atomic = Kokkos::View<int*, Space>(
222         "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins());
223     bin_count_const = bin_count_atomic;
224     bin_offsets =
225         offset_type(view_alloc(WithoutInitializing,
226                                "Kokkos::SortImpl::BinSortFunctor::bin_offsets"),
227                     bin_op.max_bins());
228     sort_order =
229         offset_type(view_alloc(WithoutInitializing,
230                                "Kokkos::SortImpl::BinSortFunctor::sort_order"),
231                     range_end - range_begin);
232   }
233 
BinSort(const_key_view_type keys_,BinSortOp bin_op_,bool sort_within_bins_=false)234   BinSort(const_key_view_type keys_, BinSortOp bin_op_,
235           bool sort_within_bins_ = false)
236       : BinSort(keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}
237 
238   //----------------------------------------
239   // Create the permutation vector, the bin_offset array and the bin_count
240   // array. Can be called again if keys changed
create_permute_vector()241   void create_permute_vector() {
242     const size_t len = range_end - range_begin;
243     Kokkos::parallel_for(
244         "Kokkos::Sort::BinCount",
245         Kokkos::RangePolicy<execution_space, bin_count_tag>(0, len), *this);
246     Kokkos::parallel_scan("Kokkos::Sort::BinOffset",
247                           Kokkos::RangePolicy<execution_space, bin_offset_tag>(
248                               0, bin_op.max_bins()),
249                           *this);
250 
251     Kokkos::deep_copy(bin_count_atomic, 0);
252     Kokkos::parallel_for(
253         "Kokkos::Sort::BinBinning",
254         Kokkos::RangePolicy<execution_space, bin_binning_tag>(0, len), *this);
255 
256     if (sort_within_bins)
257       Kokkos::parallel_for(
258           "Kokkos::Sort::BinSort",
259           Kokkos::RangePolicy<execution_space, bin_sort_bins_tag>(
260               0, bin_op.max_bins()),
261           *this);
262   }
263 
264   // Sort a subset of a view with respect to the first dimension using the
265   // permutation array
266   template <class ValuesViewType>
sort(ValuesViewType const & values,int values_range_begin,int values_range_end) const267   void sort(ValuesViewType const& values, int values_range_begin,
268             int values_range_end) const {
269     using scratch_view_type =
270         Kokkos::View<typename ValuesViewType::data_type,
271                      typename ValuesViewType::array_layout,
272                      typename ValuesViewType::device_type>;
273 
274     const size_t len        = range_end - range_begin;
275     const size_t values_len = values_range_end - values_range_begin;
276     if (len != values_len) {
277       Kokkos::abort(
278           "BinSort::sort: values range length != permutation vector length");
279     }
280 
281     scratch_view_type sorted_values(
282         view_alloc(WithoutInitializing,
283                    "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
284         values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
285         values.rank_dynamic > 1 ? values.extent(1)
286                                 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
287         values.rank_dynamic > 2 ? values.extent(2)
288                                 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
289         values.rank_dynamic > 3 ? values.extent(3)
290                                 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
291         values.rank_dynamic > 4 ? values.extent(4)
292                                 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
293         values.rank_dynamic > 5 ? values.extent(5)
294                                 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
295         values.rank_dynamic > 6 ? values.extent(6)
296                                 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
297         values.rank_dynamic > 7 ? values.extent(7)
298                                 : KOKKOS_IMPL_CTOR_DEFAULT_ARG);
299 
300     {
301       copy_permute_functor<scratch_view_type /* DstViewType */
302                            ,
303                            offset_type /* PermuteViewType */
304                            ,
305                            ValuesViewType /* SrcViewType */
306                            >
307           functor(sorted_values, sort_order, values,
308                   values_range_begin - range_begin);
309 
310       parallel_for("Kokkos::Sort::CopyPermute",
311                    Kokkos::RangePolicy<execution_space>(0, len), functor);
312     }
313 
314     {
315       copy_functor<ValuesViewType, scratch_view_type> functor(
316           values, range_begin, sorted_values);
317 
318       parallel_for("Kokkos::Sort::Copy",
319                    Kokkos::RangePolicy<execution_space>(0, len), functor);
320     }
321 
322     execution_space().fence();
323   }
324 
325   template <class ValuesViewType>
sort(ValuesViewType const & values) const326   void sort(ValuesViewType const& values) const {
327     this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin);
328   }
329 
330   // Get the permutation vector
331   KOKKOS_INLINE_FUNCTION
get_permute_vector() const332   offset_type get_permute_vector() const { return sort_order; }
333 
334   // Get the start offsets for each bin
335   KOKKOS_INLINE_FUNCTION
get_bin_offsets() const336   offset_type get_bin_offsets() const { return bin_offsets; }
337 
338   // Get the count for each bin
339   KOKKOS_INLINE_FUNCTION
get_bin_count() const340   bin_count_type get_bin_count() const { return bin_count_const; }
341 
342  public:
343   KOKKOS_INLINE_FUNCTION
operator ()(const bin_count_tag &,const int i) const344   void operator()(const bin_count_tag& /*tag*/, const int i) const {
345     const int j = range_begin + i;
346     bin_count_atomic(bin_op.bin(keys, j))++;
347   }
348 
349   KOKKOS_INLINE_FUNCTION
operator ()(const bin_offset_tag &,const int i,value_type & offset,const bool & final) const350   void operator()(const bin_offset_tag& /*tag*/, const int i,
351                   value_type& offset, const bool& final) const {
352     if (final) {
353       bin_offsets(i) = offset;
354     }
355     offset += bin_count_const(i);
356   }
357 
358   KOKKOS_INLINE_FUNCTION
operator ()(const bin_binning_tag &,const int i) const359   void operator()(const bin_binning_tag& /*tag*/, const int i) const {
360     const int j     = range_begin + i;
361     const int bin   = bin_op.bin(keys, j);
362     const int count = bin_count_atomic(bin)++;
363 
364     sort_order(bin_offsets(bin) + count) = j;
365   }
366 
367   KOKKOS_INLINE_FUNCTION
operator ()(const bin_sort_bins_tag &,const int i) const368   void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const {
369     auto bin_size = bin_count_const(i);
370     if (bin_size <= 1) return;
371     int upper_bound = bin_offsets(i) + bin_size;
372     bool sorted     = false;
373     while (!sorted) {
374       sorted      = true;
375       int old_idx = sort_order(bin_offsets(i));
376       int new_idx = 0;
377       for (int k = bin_offsets(i) + 1; k < upper_bound; k++) {
378         new_idx = sort_order(k);
379 
380         if (!bin_op(keys_rnd, old_idx, new_idx)) {
381           sort_order(k - 1) = new_idx;
382           sort_order(k)     = old_idx;
383           sorted            = false;
384         } else {
385           old_idx = new_idx;
386         }
387       }
388       upper_bound--;
389     }
390   }
391 };
392 
393 //----------------------------------------------------------------------------
394 
395 template <class KeyViewType>
396 struct BinOp1D {
397   int max_bins_;
398   double mul_;
399   typename KeyViewType::const_value_type range_;
400   typename KeyViewType::const_value_type min_;
401 
BinOp1DKokkos::BinOp1D402   BinOp1D()
403       : max_bins_(0),
404         mul_(0.0),
405         range_(typename KeyViewType::const_value_type()),
406         min_(typename KeyViewType::const_value_type()) {}
407 
408   // Construct BinOp with number of bins, minimum value and maxuimum value
BinOp1DKokkos::BinOp1D409   BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
410           typename KeyViewType::const_value_type max)
411       : max_bins_(max_bins__ + 1),
412         mul_(1.0 * max_bins__ / (max - min)),
413         range_(max - min),
414         min_(min) {}
415 
416   // Determine bin index from key value
417   template <class ViewType>
binKokkos::BinOp1D418   KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
419     return int(mul_ * (keys(i) - min_));
420   }
421 
422   // Return maximum bin index + 1
423   KOKKOS_INLINE_FUNCTION
max_binsKokkos::BinOp1D424   int max_bins() const { return max_bins_; }
425 
426   // Compare to keys within a bin if true new_val will be put before old_val
427   template <class ViewType, typename iType1, typename iType2>
operator ()Kokkos::BinOp1D428   KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1,
429                                          iType2& i2) const {
430     return keys(i1) < keys(i2);
431   }
432 };
433 
434 template <class KeyViewType>
435 struct BinOp3D {
436   int max_bins_[3];
437   double mul_[3];
438   typename KeyViewType::non_const_value_type range_[3];
439   typename KeyViewType::non_const_value_type min_[3];
440 
441   BinOp3D() = default;
442 
BinOp3DKokkos::BinOp3D443   BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
444           typename KeyViewType::const_value_type max[]) {
445     max_bins_[0] = max_bins__[0];
446     max_bins_[1] = max_bins__[1];
447     max_bins_[2] = max_bins__[2];
448     mul_[0]      = 1.0 * max_bins__[0] / (max[0] - min[0]);
449     mul_[1]      = 1.0 * max_bins__[1] / (max[1] - min[1]);
450     mul_[2]      = 1.0 * max_bins__[2] / (max[2] - min[2]);
451     range_[0]    = max[0] - min[0];
452     range_[1]    = max[1] - min[1];
453     range_[2]    = max[2] - min[2];
454     min_[0]      = min[0];
455     min_[1]      = min[1];
456     min_[2]      = min[2];
457   }
458 
459   template <class ViewType>
binKokkos::BinOp3D460   KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
461     return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) +
462                  int(mul_[1] * (keys(i, 1) - min_[1]))) *
463                 max_bins_[2]) +
464                int(mul_[2] * (keys(i, 2) - min_[2])));
465   }
466 
467   KOKKOS_INLINE_FUNCTION
max_binsKokkos::BinOp3D468   int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; }
469 
470   template <class ViewType, typename iType1, typename iType2>
operator ()Kokkos::BinOp3D471   KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1,
472                                          iType2& i2) const {
473     if (keys(i1, 0) > keys(i2, 0))
474       return true;
475     else if (keys(i1, 0) == keys(i2, 0)) {
476       if (keys(i1, 1) > keys(i2, 1))
477         return true;
478       else if (keys(i1, 1) == keys(i2, 1)) {
479         if (keys(i1, 2) > keys(i2, 2)) return true;
480       }
481     }
482     return false;
483   }
484 };
485 
486 namespace Impl {
487 
488 template <class ViewType>
try_std_sort(ViewType view)489 bool try_std_sort(ViewType view) {
490   bool possible    = true;
491   size_t stride[8] = {view.stride_0(), view.stride_1(), view.stride_2(),
492                       view.stride_3(), view.stride_4(), view.stride_5(),
493                       view.stride_6(), view.stride_7()};
494   possible         = possible &&
495              std::is_same<typename ViewType::memory_space, HostSpace>::value;
496   possible = possible && (ViewType::Rank == 1);
497   possible = possible && (stride[0] == 1);
498   if (possible) {
499     std::sort(view.data(), view.data() + view.extent(0));
500   }
501   return possible;
502 }
503 
504 template <class ViewType>
505 struct min_max_functor {
506   using minmax_scalar =
507       Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>;
508 
509   ViewType view;
min_max_functorKokkos::Impl::min_max_functor510   min_max_functor(const ViewType& view_) : view(view_) {}
511 
512   KOKKOS_INLINE_FUNCTION
operator ()Kokkos::Impl::min_max_functor513   void operator()(const size_t& i, minmax_scalar& minmax) const {
514     if (view(i) < minmax.min_val) minmax.min_val = view(i);
515     if (view(i) > minmax.max_val) minmax.max_val = view(i);
516   }
517 };
518 
519 }  // namespace Impl
520 
521 template <class ViewType>
sort(ViewType const & view,bool const always_use_kokkos_sort=false)522 void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
523   if (!always_use_kokkos_sort) {
524     if (Impl::try_std_sort(view)) return;
525   }
526   using CompType = BinOp1D<ViewType>;
527 
528   Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
529   Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
530   parallel_reduce("Kokkos::Sort::FindExtent",
531                   Kokkos::RangePolicy<typename ViewType::execution_space>(
532                       0, view.extent(0)),
533                   Impl::min_max_functor<ViewType>(view), reducer);
534   if (result.min_val == result.max_val) return;
535   BinSort<ViewType, CompType> bin_sort(
536       view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true);
537   bin_sort.create_permute_vector();
538   bin_sort.sort(view);
539 }
540 
541 template <class ViewType>
sort(ViewType view,size_t const begin,size_t const end)542 void sort(ViewType view, size_t const begin, size_t const end) {
543   using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
544   using CompType     = BinOp1D<ViewType>;
545 
546   Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
547   Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
548 
549   parallel_reduce("Kokkos::Sort::FindExtent", range_policy(begin, end),
550                   Impl::min_max_functor<ViewType>(view), reducer);
551 
552   if (result.min_val == result.max_val) return;
553 
554   BinSort<ViewType, CompType> bin_sort(
555       view, begin, end,
556       CompType((end - begin) / 2, result.min_val, result.max_val), true);
557 
558   bin_sort.create_permute_vector();
559   bin_sort.sort(view, begin, end);
560 }
561 
562 }  // namespace Kokkos
563 
564 #endif
565