1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #ifndef KOKKOS_SORT_HPP_
46 #define KOKKOS_SORT_HPP_
47
48 #include <Kokkos_Core.hpp>
49
50 #include <algorithm>
51
52 namespace Kokkos {
53
54 namespace Impl {
55
56 template <class DstViewType, class SrcViewType, int Rank = DstViewType::Rank>
57 struct CopyOp;
58
59 template <class DstViewType, class SrcViewType>
60 struct CopyOp<DstViewType, SrcViewType, 1> {
61 KOKKOS_INLINE_FUNCTION
copyKokkos::Impl::CopyOp62 static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src,
63 size_t i_src) {
64 dst(i_dst) = src(i_src);
65 }
66 };
67
68 template <class DstViewType, class SrcViewType>
69 struct CopyOp<DstViewType, SrcViewType, 2> {
70 KOKKOS_INLINE_FUNCTION
copyKokkos::Impl::CopyOp71 static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src,
72 size_t i_src) {
73 for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j);
74 }
75 };
76
77 template <class DstViewType, class SrcViewType>
78 struct CopyOp<DstViewType, SrcViewType, 3> {
79 KOKKOS_INLINE_FUNCTION
copyKokkos::Impl::CopyOp80 static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src,
81 size_t i_src) {
82 for (int j = 0; j < dst.extent(1); j++)
83 for (int k = 0; k < dst.extent(2); k++)
84 dst(i_dst, j, k) = src(i_src, j, k);
85 }
86 };
87 } // namespace Impl
88
89 //----------------------------------------------------------------------------
90
91 template <class KeyViewType, class BinSortOp,
92 class Space = typename KeyViewType::device_type,
93 class SizeType = typename KeyViewType::memory_space::size_type>
94 class BinSort {
95 public:
96 template <class DstViewType, class SrcViewType>
97 struct copy_functor {
98 using src_view_type = typename SrcViewType::const_type;
99
100 using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
101
102 DstViewType dst_values;
103 src_view_type src_values;
104 int dst_offset;
105
copy_functorKokkos::BinSort::copy_functor106 copy_functor(DstViewType const& dst_values_, int const& dst_offset_,
107 SrcViewType const& src_values_)
108 : dst_values(dst_values_),
109 src_values(src_values_),
110 dst_offset(dst_offset_) {}
111
112 KOKKOS_INLINE_FUNCTION
operator ()Kokkos::BinSort::copy_functor113 void operator()(const int& i) const {
114 copy_op::copy(dst_values, i + dst_offset, src_values, i);
115 }
116 };
117
118 template <class DstViewType, class PermuteViewType, class SrcViewType>
119 struct copy_permute_functor {
120 // If a Kokkos::View then can generate constant random access
121 // otherwise can only use the constant type.
122
123 using src_view_type = typename std::conditional<
124 Kokkos::is_view<SrcViewType>::value,
125 Kokkos::View<typename SrcViewType::const_data_type,
126 typename SrcViewType::array_layout,
127 typename SrcViewType::device_type,
128 Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
129 typename SrcViewType::const_type>::type;
130
131 using perm_view_type = typename PermuteViewType::const_type;
132
133 using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
134
135 DstViewType dst_values;
136 perm_view_type sort_order;
137 src_view_type src_values;
138 int src_offset;
139
copy_permute_functorKokkos::BinSort::copy_permute_functor140 copy_permute_functor(DstViewType const& dst_values_,
141 PermuteViewType const& sort_order_,
142 SrcViewType const& src_values_, int const& src_offset_)
143 : dst_values(dst_values_),
144 sort_order(sort_order_),
145 src_values(src_values_),
146 src_offset(src_offset_) {}
147
148 KOKKOS_INLINE_FUNCTION
operator ()Kokkos::BinSort::copy_permute_functor149 void operator()(const int& i) const {
150 copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i));
151 }
152 };
153
154 using execution_space = typename Space::execution_space;
155 using bin_op_type = BinSortOp;
156
157 struct bin_count_tag {};
158 struct bin_offset_tag {};
159 struct bin_binning_tag {};
160 struct bin_sort_bins_tag {};
161
162 public:
163 using size_type = SizeType;
164 using value_type = size_type;
165
166 using offset_type = Kokkos::View<size_type*, Space>;
167 using bin_count_type = Kokkos::View<const int*, Space>;
168
169 using const_key_view_type = typename KeyViewType::const_type;
170
171 // If a Kokkos::View then can generate constant random access
172 // otherwise can only use the constant type.
173
174 using const_rnd_key_view_type = typename std::conditional<
175 Kokkos::is_view<KeyViewType>::value,
176 Kokkos::View<typename KeyViewType::const_data_type,
177 typename KeyViewType::array_layout,
178 typename KeyViewType::device_type,
179 Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
180 const_key_view_type>::type;
181
182 using non_const_key_scalar = typename KeyViewType::non_const_value_type;
183 using const_key_scalar = typename KeyViewType::const_value_type;
184
185 using bin_count_atomic_type =
186 Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >;
187
188 private:
189 const_key_view_type keys;
190 const_rnd_key_view_type keys_rnd;
191
192 public:
193 BinSortOp bin_op;
194 offset_type bin_offsets;
195 bin_count_atomic_type bin_count_atomic;
196 bin_count_type bin_count_const;
197 offset_type sort_order;
198
199 int range_begin;
200 int range_end;
201 bool sort_within_bins;
202
203 public:
204 BinSort() = default;
205
206 //----------------------------------------
207 // Constructor: takes the keys, the binning_operator and optionally whether to
208 // sort within bins (default false)
BinSort(const_key_view_type keys_,int range_begin_,int range_end_,BinSortOp bin_op_,bool sort_within_bins_=false)209 BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
210 BinSortOp bin_op_, bool sort_within_bins_ = false)
211 : keys(keys_),
212 keys_rnd(keys_),
213 bin_op(bin_op_),
214 bin_offsets(),
215 bin_count_atomic(),
216 bin_count_const(),
217 sort_order(),
218 range_begin(range_begin_),
219 range_end(range_end_),
220 sort_within_bins(sort_within_bins_) {
221 bin_count_atomic = Kokkos::View<int*, Space>(
222 "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins());
223 bin_count_const = bin_count_atomic;
224 bin_offsets =
225 offset_type(view_alloc(WithoutInitializing,
226 "Kokkos::SortImpl::BinSortFunctor::bin_offsets"),
227 bin_op.max_bins());
228 sort_order =
229 offset_type(view_alloc(WithoutInitializing,
230 "Kokkos::SortImpl::BinSortFunctor::sort_order"),
231 range_end - range_begin);
232 }
233
BinSort(const_key_view_type keys_,BinSortOp bin_op_,bool sort_within_bins_=false)234 BinSort(const_key_view_type keys_, BinSortOp bin_op_,
235 bool sort_within_bins_ = false)
236 : BinSort(keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}
237
238 //----------------------------------------
239 // Create the permutation vector, the bin_offset array and the bin_count
240 // array. Can be called again if keys changed
create_permute_vector()241 void create_permute_vector() {
242 const size_t len = range_end - range_begin;
243 Kokkos::parallel_for(
244 "Kokkos::Sort::BinCount",
245 Kokkos::RangePolicy<execution_space, bin_count_tag>(0, len), *this);
246 Kokkos::parallel_scan("Kokkos::Sort::BinOffset",
247 Kokkos::RangePolicy<execution_space, bin_offset_tag>(
248 0, bin_op.max_bins()),
249 *this);
250
251 Kokkos::deep_copy(bin_count_atomic, 0);
252 Kokkos::parallel_for(
253 "Kokkos::Sort::BinBinning",
254 Kokkos::RangePolicy<execution_space, bin_binning_tag>(0, len), *this);
255
256 if (sort_within_bins)
257 Kokkos::parallel_for(
258 "Kokkos::Sort::BinSort",
259 Kokkos::RangePolicy<execution_space, bin_sort_bins_tag>(
260 0, bin_op.max_bins()),
261 *this);
262 }
263
264 // Sort a subset of a view with respect to the first dimension using the
265 // permutation array
266 template <class ValuesViewType>
sort(ValuesViewType const & values,int values_range_begin,int values_range_end) const267 void sort(ValuesViewType const& values, int values_range_begin,
268 int values_range_end) const {
269 using scratch_view_type =
270 Kokkos::View<typename ValuesViewType::data_type,
271 typename ValuesViewType::array_layout,
272 typename ValuesViewType::device_type>;
273
274 const size_t len = range_end - range_begin;
275 const size_t values_len = values_range_end - values_range_begin;
276 if (len != values_len) {
277 Kokkos::abort(
278 "BinSort::sort: values range length != permutation vector length");
279 }
280
281 scratch_view_type sorted_values(
282 view_alloc(WithoutInitializing,
283 "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
284 values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
285 values.rank_dynamic > 1 ? values.extent(1)
286 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
287 values.rank_dynamic > 2 ? values.extent(2)
288 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
289 values.rank_dynamic > 3 ? values.extent(3)
290 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
291 values.rank_dynamic > 4 ? values.extent(4)
292 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
293 values.rank_dynamic > 5 ? values.extent(5)
294 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
295 values.rank_dynamic > 6 ? values.extent(6)
296 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
297 values.rank_dynamic > 7 ? values.extent(7)
298 : KOKKOS_IMPL_CTOR_DEFAULT_ARG);
299
300 {
301 copy_permute_functor<scratch_view_type /* DstViewType */
302 ,
303 offset_type /* PermuteViewType */
304 ,
305 ValuesViewType /* SrcViewType */
306 >
307 functor(sorted_values, sort_order, values,
308 values_range_begin - range_begin);
309
310 parallel_for("Kokkos::Sort::CopyPermute",
311 Kokkos::RangePolicy<execution_space>(0, len), functor);
312 }
313
314 {
315 copy_functor<ValuesViewType, scratch_view_type> functor(
316 values, range_begin, sorted_values);
317
318 parallel_for("Kokkos::Sort::Copy",
319 Kokkos::RangePolicy<execution_space>(0, len), functor);
320 }
321
322 execution_space().fence();
323 }
324
325 template <class ValuesViewType>
sort(ValuesViewType const & values) const326 void sort(ValuesViewType const& values) const {
327 this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin);
328 }
329
330 // Get the permutation vector
331 KOKKOS_INLINE_FUNCTION
get_permute_vector() const332 offset_type get_permute_vector() const { return sort_order; }
333
334 // Get the start offsets for each bin
335 KOKKOS_INLINE_FUNCTION
get_bin_offsets() const336 offset_type get_bin_offsets() const { return bin_offsets; }
337
338 // Get the count for each bin
339 KOKKOS_INLINE_FUNCTION
get_bin_count() const340 bin_count_type get_bin_count() const { return bin_count_const; }
341
342 public:
343 KOKKOS_INLINE_FUNCTION
operator ()(const bin_count_tag &,const int i) const344 void operator()(const bin_count_tag& /*tag*/, const int i) const {
345 const int j = range_begin + i;
346 bin_count_atomic(bin_op.bin(keys, j))++;
347 }
348
349 KOKKOS_INLINE_FUNCTION
operator ()(const bin_offset_tag &,const int i,value_type & offset,const bool & final) const350 void operator()(const bin_offset_tag& /*tag*/, const int i,
351 value_type& offset, const bool& final) const {
352 if (final) {
353 bin_offsets(i) = offset;
354 }
355 offset += bin_count_const(i);
356 }
357
358 KOKKOS_INLINE_FUNCTION
operator ()(const bin_binning_tag &,const int i) const359 void operator()(const bin_binning_tag& /*tag*/, const int i) const {
360 const int j = range_begin + i;
361 const int bin = bin_op.bin(keys, j);
362 const int count = bin_count_atomic(bin)++;
363
364 sort_order(bin_offsets(bin) + count) = j;
365 }
366
367 KOKKOS_INLINE_FUNCTION
operator ()(const bin_sort_bins_tag &,const int i) const368 void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const {
369 auto bin_size = bin_count_const(i);
370 if (bin_size <= 1) return;
371 int upper_bound = bin_offsets(i) + bin_size;
372 bool sorted = false;
373 while (!sorted) {
374 sorted = true;
375 int old_idx = sort_order(bin_offsets(i));
376 int new_idx = 0;
377 for (int k = bin_offsets(i) + 1; k < upper_bound; k++) {
378 new_idx = sort_order(k);
379
380 if (!bin_op(keys_rnd, old_idx, new_idx)) {
381 sort_order(k - 1) = new_idx;
382 sort_order(k) = old_idx;
383 sorted = false;
384 } else {
385 old_idx = new_idx;
386 }
387 }
388 upper_bound--;
389 }
390 }
391 };
392
393 //----------------------------------------------------------------------------
394
395 template <class KeyViewType>
396 struct BinOp1D {
397 int max_bins_;
398 double mul_;
399 typename KeyViewType::const_value_type range_;
400 typename KeyViewType::const_value_type min_;
401
BinOp1DKokkos::BinOp1D402 BinOp1D()
403 : max_bins_(0),
404 mul_(0.0),
405 range_(typename KeyViewType::const_value_type()),
406 min_(typename KeyViewType::const_value_type()) {}
407
408 // Construct BinOp with number of bins, minimum value and maxuimum value
BinOp1DKokkos::BinOp1D409 BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
410 typename KeyViewType::const_value_type max)
411 : max_bins_(max_bins__ + 1),
412 mul_(1.0 * max_bins__ / (max - min)),
413 range_(max - min),
414 min_(min) {}
415
416 // Determine bin index from key value
417 template <class ViewType>
binKokkos::BinOp1D418 KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
419 return int(mul_ * (keys(i) - min_));
420 }
421
422 // Return maximum bin index + 1
423 KOKKOS_INLINE_FUNCTION
max_binsKokkos::BinOp1D424 int max_bins() const { return max_bins_; }
425
426 // Compare to keys within a bin if true new_val will be put before old_val
427 template <class ViewType, typename iType1, typename iType2>
operator ()Kokkos::BinOp1D428 KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1,
429 iType2& i2) const {
430 return keys(i1) < keys(i2);
431 }
432 };
433
434 template <class KeyViewType>
435 struct BinOp3D {
436 int max_bins_[3];
437 double mul_[3];
438 typename KeyViewType::non_const_value_type range_[3];
439 typename KeyViewType::non_const_value_type min_[3];
440
441 BinOp3D() = default;
442
BinOp3DKokkos::BinOp3D443 BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
444 typename KeyViewType::const_value_type max[]) {
445 max_bins_[0] = max_bins__[0];
446 max_bins_[1] = max_bins__[1];
447 max_bins_[2] = max_bins__[2];
448 mul_[0] = 1.0 * max_bins__[0] / (max[0] - min[0]);
449 mul_[1] = 1.0 * max_bins__[1] / (max[1] - min[1]);
450 mul_[2] = 1.0 * max_bins__[2] / (max[2] - min[2]);
451 range_[0] = max[0] - min[0];
452 range_[1] = max[1] - min[1];
453 range_[2] = max[2] - min[2];
454 min_[0] = min[0];
455 min_[1] = min[1];
456 min_[2] = min[2];
457 }
458
459 template <class ViewType>
binKokkos::BinOp3D460 KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
461 return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) +
462 int(mul_[1] * (keys(i, 1) - min_[1]))) *
463 max_bins_[2]) +
464 int(mul_[2] * (keys(i, 2) - min_[2])));
465 }
466
467 KOKKOS_INLINE_FUNCTION
max_binsKokkos::BinOp3D468 int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; }
469
470 template <class ViewType, typename iType1, typename iType2>
operator ()Kokkos::BinOp3D471 KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1,
472 iType2& i2) const {
473 if (keys(i1, 0) > keys(i2, 0))
474 return true;
475 else if (keys(i1, 0) == keys(i2, 0)) {
476 if (keys(i1, 1) > keys(i2, 1))
477 return true;
478 else if (keys(i1, 1) == keys(i2, 1)) {
479 if (keys(i1, 2) > keys(i2, 2)) return true;
480 }
481 }
482 return false;
483 }
484 };
485
486 namespace Impl {
487
488 template <class ViewType>
try_std_sort(ViewType view)489 bool try_std_sort(ViewType view) {
490 bool possible = true;
491 size_t stride[8] = {view.stride_0(), view.stride_1(), view.stride_2(),
492 view.stride_3(), view.stride_4(), view.stride_5(),
493 view.stride_6(), view.stride_7()};
494 possible = possible &&
495 std::is_same<typename ViewType::memory_space, HostSpace>::value;
496 possible = possible && (ViewType::Rank == 1);
497 possible = possible && (stride[0] == 1);
498 if (possible) {
499 std::sort(view.data(), view.data() + view.extent(0));
500 }
501 return possible;
502 }
503
504 template <class ViewType>
505 struct min_max_functor {
506 using minmax_scalar =
507 Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>;
508
509 ViewType view;
min_max_functorKokkos::Impl::min_max_functor510 min_max_functor(const ViewType& view_) : view(view_) {}
511
512 KOKKOS_INLINE_FUNCTION
operator ()Kokkos::Impl::min_max_functor513 void operator()(const size_t& i, minmax_scalar& minmax) const {
514 if (view(i) < minmax.min_val) minmax.min_val = view(i);
515 if (view(i) > minmax.max_val) minmax.max_val = view(i);
516 }
517 };
518
519 } // namespace Impl
520
521 template <class ViewType>
sort(ViewType const & view,bool const always_use_kokkos_sort=false)522 void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
523 if (!always_use_kokkos_sort) {
524 if (Impl::try_std_sort(view)) return;
525 }
526 using CompType = BinOp1D<ViewType>;
527
528 Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
529 Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
530 parallel_reduce("Kokkos::Sort::FindExtent",
531 Kokkos::RangePolicy<typename ViewType::execution_space>(
532 0, view.extent(0)),
533 Impl::min_max_functor<ViewType>(view), reducer);
534 if (result.min_val == result.max_val) return;
535 BinSort<ViewType, CompType> bin_sort(
536 view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true);
537 bin_sort.create_permute_vector();
538 bin_sort.sort(view);
539 }
540
541 template <class ViewType>
sort(ViewType view,size_t const begin,size_t const end)542 void sort(ViewType view, size_t const begin, size_t const end) {
543 using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
544 using CompType = BinOp1D<ViewType>;
545
546 Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
547 Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
548
549 parallel_reduce("Kokkos::Sort::FindExtent", range_policy(begin, end),
550 Impl::min_max_functor<ViewType>(view), reducer);
551
552 if (result.min_val == result.max_val) return;
553
554 BinSort<ViewType, CompType> bin_sort(
555 view, begin, end,
556 CompType((end - begin) / 2, result.min_val, result.max_val), true);
557
558 bin_sort.create_permute_vector();
559 bin_sort.sort(view, begin, end);
560 }
561
562 } // namespace Kokkos
563
564 #endif
565