1 /*!
2  * Copyright 2017-2021 XGBoost contributors
3  */
4 #include <cstddef>
5 #include <cstdint>
6 #include <thrust/device_vector.h>
7 #include <vector>
8 #include <xgboost/base.h>
9 #include "../../../src/common/device_helpers.cuh"
10 #include "../../../src/common/quantile.h"
11 #include "../helpers.h"
12 #include "gtest/gtest.h"
13 
TEST(SumReduce,Test)14 TEST(SumReduce, Test) {
15   thrust::device_vector<float> data(100, 1.0f);
16   auto sum = dh::SumReduction(data.data().get(), data.size());
17   ASSERT_NEAR(sum, 100.0f, 1e-5);
18 }
19 
TestAtomicSizeT()20 void TestAtomicSizeT() {
21   size_t constexpr kThreads = 235;
22   dh::device_vector<size_t> out(1, 0);
23   auto d_out = dh::ToSpan(out);
24   dh::LaunchN(kThreads, [=] __device__(size_t idx) {
25     atomicAdd(&d_out[0], static_cast<size_t>(1));
26   });
27   ASSERT_EQ(out[0], kThreads);
28 }
29 
TEST(AtomicAdd,SizeT)30 TEST(AtomicAdd, SizeT) {
31   TestAtomicSizeT();
32 }
33 
TestSegmentID()34 void TestSegmentID() {
35   std::vector<size_t> segments{0, 1, 3};
36   thrust::device_vector<size_t> d_segments(segments);
37   auto s_segments = dh::ToSpan(d_segments);
38   dh::LaunchN(1, [=]__device__(size_t idx) {
39     auto id = dh::SegmentId(s_segments, 0);
40     SPAN_CHECK(id == 0);
41     id = dh::SegmentId(s_segments, 1);
42     SPAN_CHECK(id == 1);
43     id = dh::SegmentId(s_segments, 2);
44     SPAN_CHECK(id == 1);
45   });
46 }
47 
TEST(SegmentID,Basic)48 TEST(SegmentID, Basic) {
49   TestSegmentID();
50 }
51 
TEST(SegmentedUnique,Basic)52 TEST(SegmentedUnique, Basic) {
53   std::vector<float> values{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.62448811531066895f, 0.4f};
54   std::vector<size_t> segments{0, 3, 6};
55 
56   thrust::device_vector<float> d_values(values);
57   thrust::device_vector<xgboost::bst_feature_t> d_segments{segments};
58 
59   thrust::device_vector<xgboost::bst_feature_t> d_segs_out(d_segments.size());
60   thrust::device_vector<float> d_vals_out(d_values.size());
61 
62   size_t n_uniques = dh::SegmentedUnique(
63       d_segments.data().get(), d_segments.data().get() + d_segments.size(),
64       d_values.data().get(), d_values.data().get() + d_values.size(),
65       d_segs_out.data().get(), d_vals_out.data().get(),
66       thrust::equal_to<float>{});
67   CHECK_EQ(n_uniques, 5);
68 
69   std::vector<float> values_sol{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.4f};
70   for (auto i = 0 ; i < values_sol.size(); i ++) {
71     ASSERT_EQ(d_vals_out[i], values_sol[i]);
72   }
73 
74   std::vector<xgboost::bst_feature_t> segments_sol{0, 3, 5};
75   for (size_t i = 0; i < d_segments.size(); ++i) {
76     ASSERT_EQ(segments_sol[i], d_segs_out[i]);
77   }
78 
79   d_segments[1] = 4;
80   d_segments[2] = 6;
81   n_uniques = dh::SegmentedUnique(
82       d_segments.data().get(), d_segments.data().get() + d_segments.size(),
83       d_values.data().get(), d_values.data().get() + d_values.size(),
84       d_segs_out.data().get(), d_vals_out.data().get(),
85       thrust::equal_to<float>{});
86   ASSERT_EQ(n_uniques, values.size());
87   for (auto i = 0 ; i < values.size(); i ++) {
88     ASSERT_EQ(d_vals_out[i], values[i]);
89   }
90 }
91 
92 namespace {
93 using SketchEntry = xgboost::common::WQSummary<float, float>::Entry;
94 struct SketchUnique {
operator ()__anon3a3deb820111::SketchUnique95   bool __device__ operator()(SketchEntry const& a, SketchEntry const& b) const {
96     return a.value - b.value == 0;
97   }
98 };
99 struct IsSorted {
operator ()__anon3a3deb820111::IsSorted100   bool __device__ operator()(SketchEntry const& a, SketchEntry const& b) const {
101     return a.value < b.value;
102   }
103 };
104 }  // namespace
105 
106 namespace xgboost {
TestSegmentedUniqueRegression(std::vector<SketchEntry> values,size_t n_duplicated)107 void TestSegmentedUniqueRegression(std::vector<SketchEntry> values, size_t n_duplicated) {
108   std::vector<bst_feature_t> segments{0, static_cast<bst_feature_t>(values.size())};
109 
110   thrust::device_vector<SketchEntry> d_values(values);
111   thrust::device_vector<bst_feature_t> d_segments(segments);
112   thrust::device_vector<bst_feature_t> d_segments_out(segments.size());
113 
114   size_t n_uniques = dh::SegmentedUnique(
115       d_segments.data().get(), d_segments.data().get() + d_segments.size(), d_values.data().get(),
116       d_values.data().get() + d_values.size(), d_segments_out.data().get(), d_values.data().get(),
117       SketchUnique{});
118   ASSERT_EQ(n_uniques, values.size() - n_duplicated);
119   ASSERT_TRUE(thrust::is_sorted(thrust::device, d_values.begin(),
120                                 d_values.begin() + n_uniques, IsSorted{}));
121   ASSERT_EQ(segments.at(0), d_segments_out[0]);
122   ASSERT_EQ(segments.at(1), d_segments_out[1] + n_duplicated);
123 }
124 
TEST(DeviceHelpers,Reduce)125 TEST(DeviceHelpers, Reduce) {
126   size_t kSize = std::numeric_limits<uint32_t>::max();
127   auto it = thrust::make_counting_iterator(0ul);
128   dh::XGBCachingDeviceAllocator<char> alloc;
129   auto batched = dh::Reduce(thrust::cuda::par(alloc), it, it + kSize, 0ul, thrust::maximum<size_t>{});
130   CHECK_EQ(batched, kSize - 1);
131 }
132 
133 
TEST(SegmentedUnique,Regression)134 TEST(SegmentedUnique, Regression) {
135   {
136     std::vector<SketchEntry> values{{3149, 3150, 1, 0.62392902374267578},
137                                     {3151, 3152, 1, 0.62418866157531738},
138                                     {3152, 3153, 1, 0.62419462203979492},
139                                     {3153, 3154, 1, 0.62431186437606812},
140                                     {3154, 3155, 1, 0.6244881153106689453125},
141                                     {3155, 3156, 1, 0.6244881153106689453125},
142                                     {3155, 3156, 1, 0.6244881153106689453125},
143                                     {3155, 3156, 1, 0.6244881153106689453125},
144                                     {3157, 3158, 1, 0.62552797794342041},
145                                     {3158, 3159, 1, 0.6256556510925293},
146                                     {3159, 3160, 1, 0.62571090459823608},
147                                     {3160, 3161, 1, 0.62577134370803833}};
148     TestSegmentedUniqueRegression(values, 3);
149   }
150   {
151     std::vector<SketchEntry> values{{3149, 3150, 1, 0.62392902374267578},
152                                     {3151, 3152, 1, 0.62418866157531738},
153                                     {3152, 3153, 1, 0.62419462203979492},
154                                     {3153, 3154, 1, 0.62431186437606812},
155                                     {3154, 3155, 1, 0.6244881153106689453125},
156                                     {3157, 3158, 1, 0.62552797794342041},
157                                     {3158, 3159, 1, 0.6256556510925293},
158                                     {3159, 3160, 1, 0.62571090459823608},
159                                     {3160, 3161, 1, 0.62577134370803833}};
160     TestSegmentedUniqueRegression(values, 0);
161   }
162   {
163     std::vector<SketchEntry> values;
164     TestSegmentedUniqueRegression(values, 0);
165   }
166 }
167 
TEST(Allocator,OOM)168 TEST(Allocator, OOM) {
169   auto size = dh::AvailableMemory(0) * 4;
170   ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);
171   ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);
172   // Clear last error so we don't fail subsequent tests
173   cudaGetLastError();
174 }
175 
TEST(DeviceHelpers,ArgSort)176 TEST(DeviceHelpers, ArgSort) {
177   dh::device_vector<float> values(20);
178   dh::Iota(dh::ToSpan(values));  // accending
179   dh::device_vector<size_t> sorted_idx(20);
180   dh::ArgSort<false>(dh::ToSpan(values), dh::ToSpan(sorted_idx));  // sort to descending
181   ASSERT_TRUE(thrust::is_sorted(thrust::device, sorted_idx.begin(),
182                                 sorted_idx.end(), thrust::greater<size_t>{}));
183 
184   dh::Iota(dh::ToSpan(values));
185   dh::device_vector<size_t> groups(3);
186   groups[0] = 0;
187   groups[1] = 10;
188   groups[2] = 20;
189   dh::SegmentedArgSort<false>(dh::ToSpan(values), dh::ToSpan(groups),
190                               dh::ToSpan(sorted_idx));
191   ASSERT_FALSE(thrust::is_sorted(thrust::device, sorted_idx.begin(),
192                                  sorted_idx.end(), thrust::greater<size_t>{}));
193   ASSERT_TRUE(thrust::is_sorted(sorted_idx.begin(), sorted_idx.begin() + 10,
194                                 thrust::greater<size_t>{}));
195   ASSERT_TRUE(thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(),
196                                 thrust::greater<size_t>{}));
197 }
198 
199 namespace {
200 // Atomic add as type cast for test.
atomicAdd(int64_t * dst,int64_t src)201 XGBOOST_DEV_INLINE int64_t atomicAdd(int64_t *dst, int64_t src) {  // NOLINT
202   uint64_t* u_dst = reinterpret_cast<uint64_t*>(dst);
203   uint64_t u_src = *reinterpret_cast<uint64_t*>(&src);
204   uint64_t ret = ::atomicAdd(u_dst, u_src);
205   return *reinterpret_cast<int64_t*>(&ret);
206 }
207 }
208 
TestAtomicAdd()209 void TestAtomicAdd() {
210   size_t n_elements = 1024;
211   dh::device_vector<int64_t> result_a(1, 0);
212   auto d_result_a = result_a.data().get();
213 
214   dh::device_vector<int64_t> result_b(1, 0);
215   auto d_result_b = result_b.data().get();
216 
217   /**
218    * Test for simple inputs
219    */
220   std::vector<int64_t> h_inputs(n_elements);
221   for (size_t i = 0; i < h_inputs.size(); ++i) {
222     h_inputs[i] = (i % 2 == 0) ? i : -i;
223   }
224   dh::device_vector<int64_t> inputs(h_inputs);
225   auto d_inputs = inputs.data().get();
226 
227   dh::LaunchN(n_elements, [=] __device__(size_t i) {
228     dh::AtomicAdd64As32(d_result_a, d_inputs[i]);
229     atomicAdd(d_result_b, d_inputs[i]);
230   });
231   ASSERT_EQ(result_a[0], result_b[0]);
232 
233   /**
234    * Test for positive values that don't fit into 32 bit integer.
235    */
236   thrust::fill(inputs.begin(), inputs.end(),
237                (std::numeric_limits<uint32_t>::max() / 2));
238   thrust::fill(result_a.begin(), result_a.end(), 0);
239   thrust::fill(result_b.begin(), result_b.end(), 0);
240   dh::LaunchN(n_elements, [=] __device__(size_t i) {
241     dh::AtomicAdd64As32(d_result_a, d_inputs[i]);
242     atomicAdd(d_result_b, d_inputs[i]);
243   });
244   ASSERT_EQ(result_a[0], result_b[0]);
245   ASSERT_GT(result_a[0], std::numeric_limits<uint32_t>::max());
246   CHECK_EQ(thrust::reduce(inputs.begin(), inputs.end(), int64_t(0)), result_a[0]);
247 
248   /**
249    * Test for negative values that don't fit into 32 bit integer.
250    */
251   thrust::fill(inputs.begin(), inputs.end(),
252                (std::numeric_limits<int32_t>::min() / 2));
253   thrust::fill(result_a.begin(), result_a.end(), 0);
254   thrust::fill(result_b.begin(), result_b.end(), 0);
255   dh::LaunchN(n_elements, [=] __device__(size_t i) {
256     dh::AtomicAdd64As32(d_result_a, d_inputs[i]);
257     atomicAdd(d_result_b, d_inputs[i]);
258   });
259   ASSERT_EQ(result_a[0], result_b[0]);
260   ASSERT_LT(result_a[0], std::numeric_limits<int32_t>::min());
261   CHECK_EQ(thrust::reduce(inputs.begin(), inputs.end(), int64_t(0)), result_a[0]);
262 }
263 
TEST(AtomicAdd,Int64)264 TEST(AtomicAdd, Int64) {
265   TestAtomicAdd();
266 }
267 }  // namespace xgboost
268