1 /*!
2 * Copyright 2017-2020 XGBoost contributors
3 */
4 #include <gtest/gtest.h>
5 #include <vector>
6 #include <thread>
7 #include "helpers.h"
8 #include <dmlc/filesystem.h>
9
10 #include <xgboost/learner.h>
11 #include <xgboost/version_config.h>
12 #include "xgboost/json.h"
13 #include "../../src/common/io.h"
14 #include "../../src/common/random.h"
15
16 namespace xgboost {
17
TEST(Learner,Basic)18 TEST(Learner, Basic) {
19 using Arg = std::pair<std::string, std::string>;
20 auto args = {Arg("tree_method", "exact")};
21 auto mat_ptr = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix();
22 auto learner = std::unique_ptr<Learner>(Learner::Create({mat_ptr}));
23 learner->SetParams(args);
24
25
26 auto major = XGBOOST_VER_MAJOR;
27 auto minor = XGBOOST_VER_MINOR;
28 auto patch = XGBOOST_VER_PATCH;
29
30 static_assert(std::is_integral<decltype(major)>::value, "Wrong major version type");
31 static_assert(std::is_integral<decltype(minor)>::value, "Wrong minor version type");
32 static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
33 }
34
TEST(Learner,ParameterValidation)35 TEST(Learner, ParameterValidation) {
36 ConsoleLogger::Configure({{"verbosity", "2"}});
37 size_t constexpr kRows = 1;
38 size_t constexpr kCols = 1;
39 auto p_mat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
40
41 auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
42 learner->SetParam("validate_parameters", "1");
43 learner->SetParam("Knock-Knock", "Who's-there?");
44 learner->SetParam("Silence", "....");
45 learner->SetParam("tree_method", "exact");
46
47 testing::internal::CaptureStderr();
48 learner->Configure();
49 std::string output = testing::internal::GetCapturedStderr();
50
51 ASSERT_TRUE(output.find(R"(Parameters: { "Knock-Knock", "Silence" })") != std::string::npos);
52
53 // whitespace
54 learner->SetParam("tree method", "exact");
55 EXPECT_THROW(learner->Configure(), dmlc::Error);
56 }
57
TEST(Learner,CheckGroup)58 TEST(Learner, CheckGroup) {
59 using Arg = std::pair<std::string, std::string>;
60 size_t constexpr kNumGroups = 4;
61 size_t constexpr kNumRows = 17;
62 bst_feature_t constexpr kNumCols = 15;
63
64 std::shared_ptr<DMatrix> p_mat{
65 RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
66 std::vector<bst_float> weight(kNumGroups);
67 std::vector<bst_int> group(kNumGroups);
68 group[0] = 2;
69 group[1] = 3;
70 group[2] = 7;
71 group[3] = 5;
72 std::vector<bst_float> labels (kNumRows);
73 for (size_t i = 0; i < kNumRows; ++i) {
74 labels[i] = i % 2;
75 }
76
77 p_mat->Info().SetInfo(
78 "weight", static_cast<void*>(weight.data()), DataType::kFloat32, kNumGroups);
79 p_mat->Info().SetInfo(
80 "group", group.data(), DataType::kUInt32, kNumGroups);
81 p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
82
83 std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
84 auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
85 learner->SetParams({Arg{"objective", "rank:pairwise"}});
86 EXPECT_NO_THROW(learner->UpdateOneIter(0, p_mat));
87
88 group.resize(kNumGroups+1);
89 group[3] = 4;
90 group[4] = 1;
91 p_mat->Info().SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1);
92 EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));
93 }
94
TEST(Learner,SLOW_CheckMultiBatch)95 TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
96 // Create sufficiently large data to make two row pages
97 dmlc::TemporaryDirectory tempdir;
98 const std::string tmp_file = tempdir.path + "/big.libsvm";
99 CreateBigTestData(tmp_file, 50000);
100 std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load(
101 tmp_file + "#" + tmp_file + ".cache", true, false, "auto"));
102 EXPECT_FALSE(dmat->SingleColBlock());
103 size_t num_row = dmat->Info().num_row_;
104 std::vector<bst_float> labels(num_row);
105 for (size_t i = 0; i < num_row; ++i) {
106 labels[i] = i % 2;
107 }
108 dmat->Info().SetInfo("label", labels.data(), DataType::kFloat32, num_row);
109 std::vector<std::shared_ptr<DMatrix>> mat{dmat};
110 auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
111 learner->SetParams(Args{{"objective", "binary:logistic"}});
112 learner->UpdateOneIter(0, dmat);
113 }
114
TEST(Learner,Configuration)115 TEST(Learner, Configuration) {
116 std::string const emetric = "eval_metric";
117 {
118 std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
119 learner->SetParam(emetric, "auc");
120 learner->SetParam(emetric, "rmsle");
121 learner->SetParam("foo", "bar");
122
123 // eval_metric is not part of configuration
124 auto attr_names = learner->GetConfigurationArguments();
125 ASSERT_EQ(attr_names.size(), 1ul);
126 ASSERT_EQ(attr_names.find(emetric), attr_names.cend());
127 ASSERT_EQ(attr_names.at("foo"), "bar");
128 }
129
130 {
131 std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
132 learner->SetParams({{"foo", "bar"}, {emetric, "auc"}, {emetric, "entropy"}, {emetric, "KL"}});
133 auto attr_names = learner->GetConfigurationArguments();
134 ASSERT_EQ(attr_names.size(), 1ul);
135 ASSERT_EQ(attr_names.at("foo"), "bar");
136 }
137 }
138
TEST(Learner,JsonModelIO)139 TEST(Learner, JsonModelIO) {
140 // Test of comparing JSON object directly.
141 size_t constexpr kRows = 8;
142 int32_t constexpr kIters = 4;
143
144 std::shared_ptr<DMatrix> p_dmat{
145 RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix()};
146 p_dmat->Info().labels_.Resize(kRows);
147 CHECK_NE(p_dmat->Info().num_col_, 0);
148
149 {
150 std::unique_ptr<Learner> learner { Learner::Create({p_dmat}) };
151 learner->Configure();
152 Json out { Object() };
153 learner->SaveModel(&out);
154
155 dmlc::TemporaryDirectory tmpdir;
156
157 std::ofstream fout (tmpdir.path + "/model.json");
158 fout << out;
159 fout.close();
160
161 auto loaded_str = common::LoadSequentialFile(tmpdir.path + "/model.json");
162 Json loaded = Json::Load(StringView{loaded_str.c_str(), loaded_str.size()});
163
164 learner->LoadModel(loaded);
165 learner->Configure();
166
167 Json new_in { Object() };
168 learner->SaveModel(&new_in);
169 ASSERT_EQ(new_in, out);
170 }
171
172 {
173 std::unique_ptr<Learner> learner { Learner::Create({p_dmat}) };
174 for (int32_t iter = 0; iter < kIters; ++iter) {
175 learner->UpdateOneIter(iter, p_dmat);
176 }
177 learner->SetAttr("best_score", "15.2");
178
179 Json out { Object() };
180 learner->SaveModel(&out);
181
182 learner->LoadModel(out);
183 Json new_in { Object() };
184 learner->Configure();
185 learner->SaveModel(&new_in);
186
187 ASSERT_TRUE(IsA<Object>(out["learner"]["attributes"]));
188 ASSERT_EQ(get<Object>(out["learner"]["attributes"]).size(), 1ul);
189 ASSERT_EQ(out, new_in);
190 }
191 }
192
193 // Crashes the test runner if there are race condiditions.
194 //
195 // Build with additional cmake flags to enable thread sanitizer
196 // which definitely catches problems. Note that OpenMP needs to be
197 // disabled, otherwise thread sanitizer will also report false
198 // positives.
199 //
200 // ```
201 // -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=thread -DUSE_OPENMP=OFF
202 // ```
TEST(Learner,MultiThreadedPredict)203 TEST(Learner, MultiThreadedPredict) {
204 size_t constexpr kRows = 1000;
205 size_t constexpr kCols = 100;
206
207 std::shared_ptr<DMatrix> p_dmat{
208 RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
209 p_dmat->Info().labels_.Resize(kRows);
210 CHECK_NE(p_dmat->Info().num_col_, 0);
211
212 std::shared_ptr<DMatrix> p_data{
213 RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
214 CHECK_NE(p_data->Info().num_col_, 0);
215
216 std::shared_ptr<Learner> learner{Learner::Create({p_dmat})};
217 learner->Configure();
218
219 std::vector<std::thread> threads;
220 for (uint32_t thread_id = 0;
221 thread_id < 2 * std::thread::hardware_concurrency(); ++thread_id) {
222 threads.emplace_back([learner, p_data] {
223 size_t constexpr kIters = 10;
224 auto &entry = learner->GetThreadLocal().prediction_entry;
225 HostDeviceVector<float> predictions;
226 for (size_t iter = 0; iter < kIters; ++iter) {
227 learner->Predict(p_data, false, &entry.predictions, 0, 0);
228
229 learner->Predict(p_data, false, &predictions, 0, 0, false, true); // leaf
230 learner->Predict(p_data, false, &predictions, 0, 0, false, false, true); // contribs
231 }
232 });
233 }
234 for (auto &thread : threads) {
235 thread.join();
236 }
237 }
238
TEST(Learner,BinaryModelIO)239 TEST(Learner, BinaryModelIO) {
240 size_t constexpr kRows = 8;
241 int32_t constexpr kIters = 4;
242 auto p_dmat = RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix();
243 p_dmat->Info().labels_.Resize(kRows);
244
245 std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
246 learner->SetParam("eval_metric", "rmsle");
247 learner->Configure();
248 for (int32_t iter = 0; iter < kIters; ++iter) {
249 learner->UpdateOneIter(iter, p_dmat);
250 }
251 dmlc::TemporaryDirectory tempdir;
252 std::string const fname = tempdir.path + "binary_model_io.bin";
253 {
254 // Make sure the write is complete before loading.
255 std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
256 learner->SaveModel(fo.get());
257 }
258
259 learner.reset(Learner::Create({p_dmat}));
260 std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
261 learner->LoadModel(fi.get());
262 learner->Configure();
263 Json config { Object() };
264 learner->SaveConfig(&config);
265 std::string config_str;
266 Json::Dump(config, &config_str);
267 ASSERT_NE(config_str.find("rmsle"), std::string::npos);
268 ASSERT_EQ(config_str.find("WARNING"), std::string::npos);
269 }
270
271 #if defined(XGBOOST_USE_CUDA)
272 // Tests for automatic GPU configuration.
TEST(Learner,GPUConfiguration)273 TEST(Learner, GPUConfiguration) {
274 using Arg = std::pair<std::string, std::string>;
275 size_t constexpr kRows = 10;
276 auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatrix();
277 std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
278 std::vector<bst_float> labels(kRows);
279 for (size_t i = 0; i < labels.size(); ++i) {
280 labels[i] = i;
281 }
282 p_dmat->Info().labels_.HostVector() = labels;
283 {
284 std::unique_ptr<Learner> learner {Learner::Create(mat)};
285 learner->SetParams({Arg{"booster", "gblinear"},
286 Arg{"updater", "gpu_coord_descent"}});
287 learner->UpdateOneIter(0, p_dmat);
288 ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
289 }
290 {
291 std::unique_ptr<Learner> learner {Learner::Create(mat)};
292 learner->SetParams({Arg{"tree_method", "gpu_hist"}});
293 learner->UpdateOneIter(0, p_dmat);
294 ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
295 }
296 {
297 std::unique_ptr<Learner> learner {Learner::Create(mat)};
298 learner->SetParams({Arg{"tree_method", "gpu_hist"},
299 Arg{"gpu_id", "-1"}});
300 learner->UpdateOneIter(0, p_dmat);
301 ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
302 }
303 {
304 // with CPU algorithm
305 std::unique_ptr<Learner> learner {Learner::Create(mat)};
306 learner->SetParams({Arg{"tree_method", "hist"}});
307 learner->UpdateOneIter(0, p_dmat);
308 ASSERT_EQ(learner->GetGenericParameter().gpu_id, -1);
309 }
310 {
311 // with CPU algorithm, but `gpu_id` takes priority
312 std::unique_ptr<Learner> learner {Learner::Create(mat)};
313 learner->SetParams({Arg{"tree_method", "hist"},
314 Arg{"gpu_id", "0"}});
315 learner->UpdateOneIter(0, p_dmat);
316 ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
317 }
318 {
319 // With CPU algorithm but GPU Predictor, this is to simulate when
320 // XGBoost is only used for prediction, so tree method is not
321 // specified.
322 std::unique_ptr<Learner> learner {Learner::Create(mat)};
323 learner->SetParams({Arg{"tree_method", "hist"},
324 Arg{"predictor", "gpu_predictor"}});
325 learner->UpdateOneIter(0, p_dmat);
326 ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
327 }
328 }
329 #endif // defined(XGBOOST_USE_CUDA)
330
TEST(Learner,Seed)331 TEST(Learner, Seed) {
332 auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix();
333 std::unique_ptr<Learner> learner {
334 Learner::Create({m})
335 };
336 auto seed = std::numeric_limits<int64_t>::max();
337 learner->SetParam("seed", std::to_string(seed));
338 learner->Configure();
339 Json config { Object() };
340 learner->SaveConfig(&config);
341 ASSERT_EQ(std::to_string(seed),
342 get<String>(config["learner"]["generic_param"]["seed"]));
343
344 seed = std::numeric_limits<int64_t>::min();
345 learner->SetParam("seed", std::to_string(seed));
346 learner->Configure();
347 learner->SaveConfig(&config);
348 ASSERT_EQ(std::to_string(seed),
349 get<String>(config["learner"]["generic_param"]["seed"]));
350 }
351
TEST(Learner,ConstantSeed)352 TEST(Learner, ConstantSeed) {
353 auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix(true);
354 std::unique_ptr<Learner> learner{Learner::Create({m})};
355 learner->Configure(); // seed the global random
356
357 std::uniform_real_distribution<float> dist;
358 auto& rng = common::GlobalRandom();
359 float v_0 = dist(rng);
360
361 learner->SetParam("", "");
362 learner->Configure(); // check configure doesn't change the seed.
363 float v_1 = dist(rng);
364 CHECK_NE(v_0, v_1);
365
366 {
367 rng.seed(GenericParameter::kDefaultSeed);
368 std::uniform_real_distribution<float> dist;
369 float v_2 = dist(rng);
370 CHECK_EQ(v_0, v_2);
371 }
372 }
373
TEST(Learner,FeatureInfo)374 TEST(Learner, FeatureInfo) {
375 size_t constexpr kCols = 10;
376 auto m = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);
377 std::vector<std::string> names(kCols);
378 for (size_t i = 0; i < kCols; ++i) {
379 names[i] = ("f" + std::to_string(i));
380 }
381
382 std::vector<std::string> types(kCols);
383 for (size_t i = 0; i < kCols; ++i) {
384 types[i] = "q";
385 }
386 types[8] = "f";
387 types[0] = "int";
388 types[3] = "i";
389 types[7] = "i";
390
391 std::vector<char const*> c_names(kCols);
392 for (size_t i = 0; i < names.size(); ++i) {
393 c_names[i] = names[i].c_str();
394 }
395 std::vector<char const*> c_types(kCols);
396 for (size_t i = 0; i < types.size(); ++i) {
397 c_types[i] = names[i].c_str();
398 }
399
400 std::vector<std::string> out_names;
401 std::vector<std::string> out_types;
402
403 Json model{Object()};
404 {
405 std::unique_ptr<Learner> learner{Learner::Create({m})};
406 learner->Configure();
407 learner->SetFeatureNames(names);
408 learner->GetFeatureNames(&out_names);
409
410 learner->SetFeatureTypes(types);
411 learner->GetFeatureTypes(&out_types);
412
413 ASSERT_TRUE(std::equal(out_names.begin(), out_names.end(), names.begin()));
414 ASSERT_TRUE(std::equal(out_types.begin(), out_types.end(), types.begin()));
415
416 learner->SaveModel(&model);
417 }
418
419 {
420 std::unique_ptr<Learner> learner{Learner::Create({m})};
421 learner->LoadModel(model);
422
423 learner->GetFeatureNames(&out_names);
424 learner->GetFeatureTypes(&out_types);
425 ASSERT_TRUE(std::equal(out_names.begin(), out_names.end(), names.begin()));
426 ASSERT_TRUE(std::equal(out_types.begin(), out_types.end(), types.begin()));
427 }
428 }
429 } // namespace xgboost
430