1 /*!
2  * Copyright 2019-2021 by XGBoost Contributors
3  *
4  * \file data.cu
5  * \brief Handles setting metainfo from array interface.
6  */
7 #include "xgboost/data.h"
8 #include "xgboost/logging.h"
9 #include "xgboost/json.h"
10 #include "array_interface.h"
11 #include "../common/device_helpers.cuh"
12 #include "device_adapter.cuh"
13 #include "simple_dmatrix.h"
14 
15 namespace xgboost {
16 
CopyInfoImpl(ArrayInterface column,HostDeviceVector<float> * out)17 void CopyInfoImpl(ArrayInterface column, HostDeviceVector<float>* out) {
18   auto SetDeviceToPtr = [](void* ptr) {
19     cudaPointerAttributes attr;
20     dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));
21     int32_t ptr_device = attr.device;
22     if (ptr_device >= 0) {
23       dh::safe_cuda(cudaSetDevice(ptr_device));
24     }
25     return ptr_device;
26   };
27   auto ptr_device = SetDeviceToPtr(column.data);
28 
29   if (column.num_rows == 0) {
30     return;
31   }
32   out->SetDevice(ptr_device);
33   out->Resize(column.num_rows);
34 
35   auto p_dst = thrust::device_pointer_cast(out->DevicePointer());
36 
37   dh::LaunchN(column.num_rows, [=] __device__(size_t idx) {
38     p_dst[idx] = column.GetElement(idx, 0);
39   });
40 }
41 
42 namespace {
SetDeviceToPtr(void * ptr)43 auto SetDeviceToPtr(void *ptr) {
44   cudaPointerAttributes attr;
45   dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));
46   int32_t ptr_device = attr.device;
47   dh::safe_cuda(cudaSetDevice(ptr_device));
48   return ptr_device;
49 }
50 }  // anonymous namespace
51 
CopyGroupInfoImpl(ArrayInterface column,std::vector<bst_group_t> * out)52 void CopyGroupInfoImpl(ArrayInterface column, std::vector<bst_group_t>* out) {
53   CHECK(column.type != ArrayInterface::kF4 && column.type != ArrayInterface::kF8)
54       << "Expected integer for group info.";
55 
56   auto ptr_device = SetDeviceToPtr(column.data);
57   CHECK_EQ(ptr_device, dh::CurrentDevice());
58   dh::TemporaryArray<bst_group_t> temp(column.num_rows);
59   auto d_tmp = temp.data();
60 
61   dh::LaunchN(column.num_rows, [=] __device__(size_t idx) {
62     d_tmp[idx] = column.GetElement<size_t>(idx, 0);
63   });
64   auto length = column.num_rows;
65   out->resize(length + 1);
66   out->at(0) = 0;
67   thrust::copy(temp.data(), temp.data() + length, out->begin() + 1);
68   std::partial_sum(out->begin(), out->end(), out->begin());
69 }
70 
CopyQidImpl(ArrayInterface array_interface,std::vector<bst_group_t> * p_group_ptr)71 void CopyQidImpl(ArrayInterface array_interface,
72                  std::vector<bst_group_t> *p_group_ptr) {
73   auto &group_ptr_ = *p_group_ptr;
74   auto it = dh::MakeTransformIterator<uint32_t>(
75       thrust::make_counting_iterator(0ul),
76       [array_interface] __device__(size_t i) {
77         return array_interface.GetElement<uint32_t>(i, 0);
78       });
79   dh::caching_device_vector<bool> flag(1);
80   auto d_flag = dh::ToSpan(flag);
81   auto d = SetDeviceToPtr(array_interface.data);
82   dh::LaunchN(1, [=] __device__(size_t) { d_flag[0] = true; });
83   dh::LaunchN(array_interface.num_rows - 1, [=] __device__(size_t i) {
84     if (array_interface.GetElement<uint32_t>(i, 0) >
85         array_interface.GetElement<uint32_t>(i + 1, 0)) {
86       d_flag[0] = false;
87     }
88   });
89   bool non_dec = true;
90   dh::safe_cuda(cudaMemcpy(&non_dec, flag.data().get(), sizeof(bool),
91                            cudaMemcpyDeviceToHost));
92   CHECK(non_dec) << "`qid` must be sorted in increasing order along with data.";
93   size_t bytes = 0;
94   dh::caching_device_vector<uint32_t> out(array_interface.num_rows);
95   dh::caching_device_vector<uint32_t> cnt(array_interface.num_rows);
96   HostDeviceVector<int> d_num_runs_out(1, 0, d);
97   cub::DeviceRunLengthEncode::Encode(
98       nullptr, bytes, it, out.begin(), cnt.begin(),
99       d_num_runs_out.DevicePointer(), array_interface.num_rows);
100   dh::caching_device_vector<char> tmp(bytes);
101   cub::DeviceRunLengthEncode::Encode(
102       tmp.data().get(), bytes, it, out.begin(), cnt.begin(),
103       d_num_runs_out.DevicePointer(), array_interface.num_rows);
104 
105   auto h_num_runs_out = d_num_runs_out.HostSpan()[0];
106   group_ptr_.clear();
107   group_ptr_.resize(h_num_runs_out + 1, 0);
108   dh::XGBCachingDeviceAllocator<char> alloc;
109   thrust::inclusive_scan(thrust::cuda::par(alloc), cnt.begin(),
110                          cnt.begin() + h_num_runs_out, cnt.begin());
111   thrust::copy(cnt.begin(), cnt.begin() + h_num_runs_out,
112                group_ptr_.begin() + 1);
113 }
114 
115 namespace {
116 // thrust::all_of tries to copy lambda function.
117 struct LabelsCheck {
operator ()xgboost::__anon2e69fb450311::LabelsCheck118   __device__ bool operator()(float y) { return ::isnan(y) || ::isinf(y); }
119 };
120 struct WeightsCheck {
operator ()xgboost::__anon2e69fb450311::WeightsCheck121   __device__ bool operator()(float w) { return LabelsCheck{}(w) || w < 0; }  // NOLINT
122 };
123 }  // anonymous namespace
124 
SetInfo(const char * c_key,std::string const & interface_str)125 void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
126   Json j_interface = Json::Load({interface_str.c_str(), interface_str.size()});
127   auto const& j_arr = get<Array>(j_interface);
128   CHECK_EQ(j_arr.size(), 1)
129       << "MetaInfo: " << c_key << ". " << ArrayInterfaceErrors::Dimension(1);
130   ArrayInterface array_interface(interface_str);
131   std::string key{c_key};
132   if (!((array_interface.num_cols == 1 && array_interface.num_rows == 0) ||
133         (array_interface.num_cols == 0 && array_interface.num_rows == 1))) {
134     // Not an empty column, transform it.
135     array_interface.AsColumnVector();
136   }
137 
138   CHECK(!array_interface.valid.Data())
139       << "Meta info " << key << " should be dense, found validity mask";
140   if (array_interface.num_rows == 0) {
141     return;
142   }
143 
144   if (key == "label") {
145     CopyInfoImpl(array_interface, &labels_);
146     auto ptr = labels_.ConstDevicePointer();
147     auto valid = thrust::none_of(thrust::device, ptr, ptr + labels_.Size(),
148                                  LabelsCheck{});
149     CHECK(valid) << "Label contains NaN, infinity or a value too large.";
150   } else if (key == "weight") {
151     CopyInfoImpl(array_interface, &weights_);
152     auto ptr = weights_.ConstDevicePointer();
153     auto valid = thrust::none_of(thrust::device, ptr, ptr + weights_.Size(),
154                                  WeightsCheck{});
155     CHECK(valid) << "Weights must be positive values.";
156   } else if (key == "base_margin") {
157     CopyInfoImpl(array_interface, &base_margin_);
158   } else if (key == "group") {
159     CopyGroupInfoImpl(array_interface, &group_ptr_);
160     return;
161   } else if (key == "qid") {
162     CopyQidImpl(array_interface, &group_ptr_);
163     return;
164   } else if (key == "label_lower_bound") {
165     CopyInfoImpl(array_interface, &labels_lower_bound_);
166     return;
167   } else if (key == "label_upper_bound") {
168     CopyInfoImpl(array_interface, &labels_upper_bound_);
169     return;
170   } else if (key == "feature_weights") {
171     CopyInfoImpl(array_interface, &feature_weigths);
172     auto d_feature_weights = feature_weigths.ConstDeviceSpan();
173     auto valid = thrust::none_of(
174         thrust::device, d_feature_weights.data(),
175         d_feature_weights.data() + d_feature_weights.size(), WeightsCheck{});
176     CHECK(valid) << "Feature weight must be greater than 0.";
177     return;
178   } else {
179     LOG(FATAL) << "Unknown metainfo: " << key;
180   }
181 }
182 
183 template <typename AdapterT>
Create(AdapterT * adapter,float missing,int nthread,const std::string & cache_prefix)184 DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
185                          const std::string& cache_prefix) {
186   CHECK_EQ(cache_prefix.size(), 0)
187       << "Device memory construction is not currently supported with external "
188          "memory.";
189   return new data::SimpleDMatrix(adapter, missing, nthread);
190 }
191 
192 template DMatrix* DMatrix::Create<data::CudfAdapter>(
193     data::CudfAdapter* adapter, float missing, int nthread,
194     const std::string& cache_prefix);
195 template DMatrix* DMatrix::Create<data::CupyAdapter>(
196     data::CupyAdapter* adapter, float missing, int nthread,
197     const std::string& cache_prefix);
198 }  // namespace xgboost
199