1 /*!
2 * Copyright 2017-2021 XGBoost contributors
3 */
4 #include <cstddef>
5 #include <cstdint>
6 #include <thrust/device_vector.h>
7 #include <vector>
8 #include <xgboost/base.h>
9 #include "../../../src/common/device_helpers.cuh"
10 #include "../../../src/common/quantile.h"
11 #include "../helpers.h"
12 #include "gtest/gtest.h"
13
TEST(SumReduce,Test)14 TEST(SumReduce, Test) {
15 thrust::device_vector<float> data(100, 1.0f);
16 auto sum = dh::SumReduction(data.data().get(), data.size());
17 ASSERT_NEAR(sum, 100.0f, 1e-5);
18 }
19
TestAtomicSizeT()20 void TestAtomicSizeT() {
21 size_t constexpr kThreads = 235;
22 dh::device_vector<size_t> out(1, 0);
23 auto d_out = dh::ToSpan(out);
24 dh::LaunchN(kThreads, [=] __device__(size_t idx) {
25 atomicAdd(&d_out[0], static_cast<size_t>(1));
26 });
27 ASSERT_EQ(out[0], kThreads);
28 }
29
TEST(AtomicAdd,SizeT)30 TEST(AtomicAdd, SizeT) {
31 TestAtomicSizeT();
32 }
33
TestSegmentID()34 void TestSegmentID() {
35 std::vector<size_t> segments{0, 1, 3};
36 thrust::device_vector<size_t> d_segments(segments);
37 auto s_segments = dh::ToSpan(d_segments);
38 dh::LaunchN(1, [=]__device__(size_t idx) {
39 auto id = dh::SegmentId(s_segments, 0);
40 SPAN_CHECK(id == 0);
41 id = dh::SegmentId(s_segments, 1);
42 SPAN_CHECK(id == 1);
43 id = dh::SegmentId(s_segments, 2);
44 SPAN_CHECK(id == 1);
45 });
46 }
47
TEST(SegmentID,Basic)48 TEST(SegmentID, Basic) {
49 TestSegmentID();
50 }
51
TEST(SegmentedUnique,Basic)52 TEST(SegmentedUnique, Basic) {
53 std::vector<float> values{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.62448811531066895f, 0.4f};
54 std::vector<size_t> segments{0, 3, 6};
55
56 thrust::device_vector<float> d_values(values);
57 thrust::device_vector<xgboost::bst_feature_t> d_segments{segments};
58
59 thrust::device_vector<xgboost::bst_feature_t> d_segs_out(d_segments.size());
60 thrust::device_vector<float> d_vals_out(d_values.size());
61
62 size_t n_uniques = dh::SegmentedUnique(
63 d_segments.data().get(), d_segments.data().get() + d_segments.size(),
64 d_values.data().get(), d_values.data().get() + d_values.size(),
65 d_segs_out.data().get(), d_vals_out.data().get(),
66 thrust::equal_to<float>{});
67 CHECK_EQ(n_uniques, 5);
68
69 std::vector<float> values_sol{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.4f};
70 for (auto i = 0 ; i < values_sol.size(); i ++) {
71 ASSERT_EQ(d_vals_out[i], values_sol[i]);
72 }
73
74 std::vector<xgboost::bst_feature_t> segments_sol{0, 3, 5};
75 for (size_t i = 0; i < d_segments.size(); ++i) {
76 ASSERT_EQ(segments_sol[i], d_segs_out[i]);
77 }
78
79 d_segments[1] = 4;
80 d_segments[2] = 6;
81 n_uniques = dh::SegmentedUnique(
82 d_segments.data().get(), d_segments.data().get() + d_segments.size(),
83 d_values.data().get(), d_values.data().get() + d_values.size(),
84 d_segs_out.data().get(), d_vals_out.data().get(),
85 thrust::equal_to<float>{});
86 ASSERT_EQ(n_uniques, values.size());
87 for (auto i = 0 ; i < values.size(); i ++) {
88 ASSERT_EQ(d_vals_out[i], values[i]);
89 }
90 }
91
92 namespace {
93 using SketchEntry = xgboost::common::WQSummary<float, float>::Entry;
94 struct SketchUnique {
operator ()__anon3a3deb820111::SketchUnique95 bool __device__ operator()(SketchEntry const& a, SketchEntry const& b) const {
96 return a.value - b.value == 0;
97 }
98 };
99 struct IsSorted {
operator ()__anon3a3deb820111::IsSorted100 bool __device__ operator()(SketchEntry const& a, SketchEntry const& b) const {
101 return a.value < b.value;
102 }
103 };
104 } // namespace
105
106 namespace xgboost {
TestSegmentedUniqueRegression(std::vector<SketchEntry> values,size_t n_duplicated)107 void TestSegmentedUniqueRegression(std::vector<SketchEntry> values, size_t n_duplicated) {
108 std::vector<bst_feature_t> segments{0, static_cast<bst_feature_t>(values.size())};
109
110 thrust::device_vector<SketchEntry> d_values(values);
111 thrust::device_vector<bst_feature_t> d_segments(segments);
112 thrust::device_vector<bst_feature_t> d_segments_out(segments.size());
113
114 size_t n_uniques = dh::SegmentedUnique(
115 d_segments.data().get(), d_segments.data().get() + d_segments.size(), d_values.data().get(),
116 d_values.data().get() + d_values.size(), d_segments_out.data().get(), d_values.data().get(),
117 SketchUnique{});
118 ASSERT_EQ(n_uniques, values.size() - n_duplicated);
119 ASSERT_TRUE(thrust::is_sorted(thrust::device, d_values.begin(),
120 d_values.begin() + n_uniques, IsSorted{}));
121 ASSERT_EQ(segments.at(0), d_segments_out[0]);
122 ASSERT_EQ(segments.at(1), d_segments_out[1] + n_duplicated);
123 }
124
TEST(DeviceHelpers,Reduce)125 TEST(DeviceHelpers, Reduce) {
126 size_t kSize = std::numeric_limits<uint32_t>::max();
127 auto it = thrust::make_counting_iterator(0ul);
128 dh::XGBCachingDeviceAllocator<char> alloc;
129 auto batched = dh::Reduce(thrust::cuda::par(alloc), it, it + kSize, 0ul, thrust::maximum<size_t>{});
130 CHECK_EQ(batched, kSize - 1);
131 }
132
133
TEST(SegmentedUnique,Regression)134 TEST(SegmentedUnique, Regression) {
135 {
136 std::vector<SketchEntry> values{{3149, 3150, 1, 0.62392902374267578},
137 {3151, 3152, 1, 0.62418866157531738},
138 {3152, 3153, 1, 0.62419462203979492},
139 {3153, 3154, 1, 0.62431186437606812},
140 {3154, 3155, 1, 0.6244881153106689453125},
141 {3155, 3156, 1, 0.6244881153106689453125},
142 {3155, 3156, 1, 0.6244881153106689453125},
143 {3155, 3156, 1, 0.6244881153106689453125},
144 {3157, 3158, 1, 0.62552797794342041},
145 {3158, 3159, 1, 0.6256556510925293},
146 {3159, 3160, 1, 0.62571090459823608},
147 {3160, 3161, 1, 0.62577134370803833}};
148 TestSegmentedUniqueRegression(values, 3);
149 }
150 {
151 std::vector<SketchEntry> values{{3149, 3150, 1, 0.62392902374267578},
152 {3151, 3152, 1, 0.62418866157531738},
153 {3152, 3153, 1, 0.62419462203979492},
154 {3153, 3154, 1, 0.62431186437606812},
155 {3154, 3155, 1, 0.6244881153106689453125},
156 {3157, 3158, 1, 0.62552797794342041},
157 {3158, 3159, 1, 0.6256556510925293},
158 {3159, 3160, 1, 0.62571090459823608},
159 {3160, 3161, 1, 0.62577134370803833}};
160 TestSegmentedUniqueRegression(values, 0);
161 }
162 {
163 std::vector<SketchEntry> values;
164 TestSegmentedUniqueRegression(values, 0);
165 }
166 }
167
TEST(Allocator,OOM)168 TEST(Allocator, OOM) {
169 auto size = dh::AvailableMemory(0) * 4;
170 ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);
171 ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);
172 // Clear last error so we don't fail subsequent tests
173 cudaGetLastError();
174 }
175
TEST(DeviceHelpers,ArgSort)176 TEST(DeviceHelpers, ArgSort) {
177 dh::device_vector<float> values(20);
178 dh::Iota(dh::ToSpan(values)); // accending
179 dh::device_vector<size_t> sorted_idx(20);
180 dh::ArgSort<false>(dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
181 ASSERT_TRUE(thrust::is_sorted(thrust::device, sorted_idx.begin(),
182 sorted_idx.end(), thrust::greater<size_t>{}));
183
184 dh::Iota(dh::ToSpan(values));
185 dh::device_vector<size_t> groups(3);
186 groups[0] = 0;
187 groups[1] = 10;
188 groups[2] = 20;
189 dh::SegmentedArgSort<false>(dh::ToSpan(values), dh::ToSpan(groups),
190 dh::ToSpan(sorted_idx));
191 ASSERT_FALSE(thrust::is_sorted(thrust::device, sorted_idx.begin(),
192 sorted_idx.end(), thrust::greater<size_t>{}));
193 ASSERT_TRUE(thrust::is_sorted(sorted_idx.begin(), sorted_idx.begin() + 10,
194 thrust::greater<size_t>{}));
195 ASSERT_TRUE(thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(),
196 thrust::greater<size_t>{}));
197 }
198
199 namespace {
200 // Atomic add as type cast for test.
atomicAdd(int64_t * dst,int64_t src)201 XGBOOST_DEV_INLINE int64_t atomicAdd(int64_t *dst, int64_t src) { // NOLINT
202 uint64_t* u_dst = reinterpret_cast<uint64_t*>(dst);
203 uint64_t u_src = *reinterpret_cast<uint64_t*>(&src);
204 uint64_t ret = ::atomicAdd(u_dst, u_src);
205 return *reinterpret_cast<int64_t*>(&ret);
206 }
207 }
208
TestAtomicAdd()209 void TestAtomicAdd() {
210 size_t n_elements = 1024;
211 dh::device_vector<int64_t> result_a(1, 0);
212 auto d_result_a = result_a.data().get();
213
214 dh::device_vector<int64_t> result_b(1, 0);
215 auto d_result_b = result_b.data().get();
216
217 /**
218 * Test for simple inputs
219 */
220 std::vector<int64_t> h_inputs(n_elements);
221 for (size_t i = 0; i < h_inputs.size(); ++i) {
222 h_inputs[i] = (i % 2 == 0) ? i : -i;
223 }
224 dh::device_vector<int64_t> inputs(h_inputs);
225 auto d_inputs = inputs.data().get();
226
227 dh::LaunchN(n_elements, [=] __device__(size_t i) {
228 dh::AtomicAdd64As32(d_result_a, d_inputs[i]);
229 atomicAdd(d_result_b, d_inputs[i]);
230 });
231 ASSERT_EQ(result_a[0], result_b[0]);
232
233 /**
234 * Test for positive values that don't fit into 32 bit integer.
235 */
236 thrust::fill(inputs.begin(), inputs.end(),
237 (std::numeric_limits<uint32_t>::max() / 2));
238 thrust::fill(result_a.begin(), result_a.end(), 0);
239 thrust::fill(result_b.begin(), result_b.end(), 0);
240 dh::LaunchN(n_elements, [=] __device__(size_t i) {
241 dh::AtomicAdd64As32(d_result_a, d_inputs[i]);
242 atomicAdd(d_result_b, d_inputs[i]);
243 });
244 ASSERT_EQ(result_a[0], result_b[0]);
245 ASSERT_GT(result_a[0], std::numeric_limits<uint32_t>::max());
246 CHECK_EQ(thrust::reduce(inputs.begin(), inputs.end(), int64_t(0)), result_a[0]);
247
248 /**
249 * Test for negative values that don't fit into 32 bit integer.
250 */
251 thrust::fill(inputs.begin(), inputs.end(),
252 (std::numeric_limits<int32_t>::min() / 2));
253 thrust::fill(result_a.begin(), result_a.end(), 0);
254 thrust::fill(result_b.begin(), result_b.end(), 0);
255 dh::LaunchN(n_elements, [=] __device__(size_t i) {
256 dh::AtomicAdd64As32(d_result_a, d_inputs[i]);
257 atomicAdd(d_result_b, d_inputs[i]);
258 });
259 ASSERT_EQ(result_a[0], result_b[0]);
260 ASSERT_LT(result_a[0], std::numeric_limits<int32_t>::min());
261 CHECK_EQ(thrust::reduce(inputs.begin(), inputs.end(), int64_t(0)), result_a[0]);
262 }
263
TEST(AtomicAdd,Int64)264 TEST(AtomicAdd, Int64) {
265 TestAtomicAdd();
266 }
267 } // namespace xgboost
268