1 /*!
2  * Copyright 2017-2020 XGBoost contributors
3  */
4 #include <gtest/gtest.h>
5 #include <vector>
6 #include <thread>
7 #include "helpers.h"
8 #include <dmlc/filesystem.h>
9 
10 #include <xgboost/learner.h>
11 #include <xgboost/version_config.h>
12 #include "xgboost/json.h"
13 #include "../../src/common/io.h"
14 #include "../../src/common/random.h"
15 
16 namespace xgboost {
17 
TEST(Learner,Basic)18 TEST(Learner, Basic) {
19   using Arg = std::pair<std::string, std::string>;
20   auto args = {Arg("tree_method", "exact")};
21   auto mat_ptr = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix();
22   auto learner = std::unique_ptr<Learner>(Learner::Create({mat_ptr}));
23   learner->SetParams(args);
24 
25 
26   auto major = XGBOOST_VER_MAJOR;
27   auto minor = XGBOOST_VER_MINOR;
28   auto patch = XGBOOST_VER_PATCH;
29 
30   static_assert(std::is_integral<decltype(major)>::value, "Wrong major version type");
31   static_assert(std::is_integral<decltype(minor)>::value, "Wrong minor version type");
32   static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
33 }
34 
TEST(Learner,ParameterValidation)35 TEST(Learner, ParameterValidation) {
36   ConsoleLogger::Configure({{"verbosity", "2"}});
37   size_t constexpr kRows = 1;
38   size_t constexpr kCols = 1;
39   auto p_mat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
40 
41   auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
42   learner->SetParam("validate_parameters", "1");
43   learner->SetParam("Knock-Knock", "Who's-there?");
44   learner->SetParam("Silence", "....");
45   learner->SetParam("tree_method", "exact");
46 
47   testing::internal::CaptureStderr();
48   learner->Configure();
49   std::string output = testing::internal::GetCapturedStderr();
50 
51   ASSERT_TRUE(output.find(R"(Parameters: { "Knock-Knock", "Silence" })") != std::string::npos);
52 
53   // whitespace
54   learner->SetParam("tree method", "exact");
55   EXPECT_THROW(learner->Configure(), dmlc::Error);
56 }
57 
TEST(Learner,CheckGroup)58 TEST(Learner, CheckGroup) {
59   using Arg = std::pair<std::string, std::string>;
60   size_t constexpr kNumGroups = 4;
61   size_t constexpr kNumRows = 17;
62   bst_feature_t constexpr kNumCols = 15;
63 
64   std::shared_ptr<DMatrix> p_mat{
65       RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
66   std::vector<bst_float> weight(kNumGroups);
67   std::vector<bst_int> group(kNumGroups);
68   group[0] = 2;
69   group[1] = 3;
70   group[2] = 7;
71   group[3] = 5;
72   std::vector<bst_float> labels (kNumRows);
73   for (size_t i = 0; i < kNumRows; ++i) {
74     labels[i] = i % 2;
75   }
76 
77   p_mat->Info().SetInfo(
78       "weight", static_cast<void*>(weight.data()), DataType::kFloat32, kNumGroups);
79   p_mat->Info().SetInfo(
80       "group", group.data(), DataType::kUInt32, kNumGroups);
81   p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
82 
83   std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
84   auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
85   learner->SetParams({Arg{"objective", "rank:pairwise"}});
86   EXPECT_NO_THROW(learner->UpdateOneIter(0, p_mat));
87 
88   group.resize(kNumGroups+1);
89   group[3] = 4;
90   group[4] = 1;
91   p_mat->Info().SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1);
92   EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));
93 }
94 
TEST(Learner,SLOW_CheckMultiBatch)95 TEST(Learner, SLOW_CheckMultiBatch) {  // NOLINT
96   // Create sufficiently large data to make two row pages
97   dmlc::TemporaryDirectory tempdir;
98   const std::string tmp_file = tempdir.path + "/big.libsvm";
99   CreateBigTestData(tmp_file, 50000);
100   std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load(
101       tmp_file + "#" + tmp_file + ".cache", true, false, "auto"));
102   EXPECT_FALSE(dmat->SingleColBlock());
103   size_t num_row = dmat->Info().num_row_;
104   std::vector<bst_float> labels(num_row);
105   for (size_t i = 0; i < num_row; ++i) {
106     labels[i] = i % 2;
107   }
108   dmat->Info().SetInfo("label", labels.data(), DataType::kFloat32, num_row);
109   std::vector<std::shared_ptr<DMatrix>> mat{dmat};
110   auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
111   learner->SetParams(Args{{"objective", "binary:logistic"}});
112   learner->UpdateOneIter(0, dmat);
113 }
114 
TEST(Learner,Configuration)115 TEST(Learner, Configuration) {
116   std::string const emetric = "eval_metric";
117   {
118     std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
119     learner->SetParam(emetric, "auc");
120     learner->SetParam(emetric, "rmsle");
121     learner->SetParam("foo", "bar");
122 
123     // eval_metric is not part of configuration
124     auto attr_names = learner->GetConfigurationArguments();
125     ASSERT_EQ(attr_names.size(), 1ul);
126     ASSERT_EQ(attr_names.find(emetric), attr_names.cend());
127     ASSERT_EQ(attr_names.at("foo"), "bar");
128   }
129 
130   {
131     std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
132     learner->SetParams({{"foo", "bar"}, {emetric, "auc"}, {emetric, "entropy"}, {emetric, "KL"}});
133     auto attr_names = learner->GetConfigurationArguments();
134     ASSERT_EQ(attr_names.size(), 1ul);
135     ASSERT_EQ(attr_names.at("foo"), "bar");
136   }
137 }
138 
TEST(Learner,JsonModelIO)139 TEST(Learner, JsonModelIO) {
140   // Test of comparing JSON object directly.
141   size_t constexpr kRows = 8;
142   int32_t constexpr kIters = 4;
143 
144   std::shared_ptr<DMatrix> p_dmat{
145     RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix()};
146   p_dmat->Info().labels_.Resize(kRows);
147   CHECK_NE(p_dmat->Info().num_col_, 0);
148 
149   {
150     std::unique_ptr<Learner> learner { Learner::Create({p_dmat}) };
151     learner->Configure();
152     Json out { Object() };
153     learner->SaveModel(&out);
154 
155     dmlc::TemporaryDirectory tmpdir;
156 
157     std::ofstream fout (tmpdir.path + "/model.json");
158     fout << out;
159     fout.close();
160 
161     auto loaded_str = common::LoadSequentialFile(tmpdir.path + "/model.json");
162     Json loaded = Json::Load(StringView{loaded_str.c_str(), loaded_str.size()});
163 
164     learner->LoadModel(loaded);
165     learner->Configure();
166 
167     Json new_in { Object() };
168     learner->SaveModel(&new_in);
169     ASSERT_EQ(new_in, out);
170   }
171 
172   {
173     std::unique_ptr<Learner> learner { Learner::Create({p_dmat}) };
174     for (int32_t iter = 0; iter < kIters; ++iter) {
175       learner->UpdateOneIter(iter, p_dmat);
176     }
177     learner->SetAttr("best_score", "15.2");
178 
179     Json out { Object() };
180     learner->SaveModel(&out);
181 
182     learner->LoadModel(out);
183     Json new_in { Object() };
184     learner->Configure();
185     learner->SaveModel(&new_in);
186 
187     ASSERT_TRUE(IsA<Object>(out["learner"]["attributes"]));
188     ASSERT_EQ(get<Object>(out["learner"]["attributes"]).size(), 1ul);
189     ASSERT_EQ(out, new_in);
190   }
191 }
192 
193 // Crashes the test runner if there are race condiditions.
194 //
195 // Build with additional cmake flags to enable thread sanitizer
196 // which definitely catches problems. Note that OpenMP needs to be
197 // disabled, otherwise thread sanitizer will also report false
198 // positives.
199 //
200 // ```
201 // -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=thread -DUSE_OPENMP=OFF
202 // ```
TEST(Learner,MultiThreadedPredict)203 TEST(Learner, MultiThreadedPredict) {
204   size_t constexpr kRows = 1000;
205   size_t constexpr kCols = 100;
206 
207   std::shared_ptr<DMatrix> p_dmat{
208       RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
209   p_dmat->Info().labels_.Resize(kRows);
210   CHECK_NE(p_dmat->Info().num_col_, 0);
211 
212   std::shared_ptr<DMatrix> p_data{
213       RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
214   CHECK_NE(p_data->Info().num_col_, 0);
215 
216   std::shared_ptr<Learner> learner{Learner::Create({p_dmat})};
217   learner->Configure();
218 
219   std::vector<std::thread> threads;
220   for (uint32_t thread_id = 0;
221        thread_id < 2 * std::thread::hardware_concurrency(); ++thread_id) {
222     threads.emplace_back([learner, p_data] {
223       size_t constexpr kIters = 10;
224       auto &entry = learner->GetThreadLocal().prediction_entry;
225       HostDeviceVector<float> predictions;
226       for (size_t iter = 0; iter < kIters; ++iter) {
227         learner->Predict(p_data, false, &entry.predictions, 0, 0);
228 
229         learner->Predict(p_data, false, &predictions, 0, 0, false, true);  // leaf
230         learner->Predict(p_data, false, &predictions, 0, 0, false, false, true);  // contribs
231       }
232     });
233   }
234   for (auto &thread : threads) {
235     thread.join();
236   }
237 }
238 
TEST(Learner,BinaryModelIO)239 TEST(Learner, BinaryModelIO) {
240   size_t constexpr kRows = 8;
241   int32_t constexpr kIters = 4;
242   auto p_dmat = RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix();
243   p_dmat->Info().labels_.Resize(kRows);
244 
245   std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
246   learner->SetParam("eval_metric", "rmsle");
247   learner->Configure();
248   for (int32_t iter = 0; iter < kIters; ++iter) {
249     learner->UpdateOneIter(iter, p_dmat);
250   }
251   dmlc::TemporaryDirectory tempdir;
252   std::string const fname = tempdir.path + "binary_model_io.bin";
253   {
254     // Make sure the write is complete before loading.
255     std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
256     learner->SaveModel(fo.get());
257   }
258 
259   learner.reset(Learner::Create({p_dmat}));
260   std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
261   learner->LoadModel(fi.get());
262   learner->Configure();
263   Json config { Object() };
264   learner->SaveConfig(&config);
265   std::string config_str;
266   Json::Dump(config, &config_str);
267   ASSERT_NE(config_str.find("rmsle"), std::string::npos);
268   ASSERT_EQ(config_str.find("WARNING"), std::string::npos);
269 }
270 
271 #if defined(XGBOOST_USE_CUDA)
272 // Tests for automatic GPU configuration.
TEST(Learner,GPUConfiguration)273 TEST(Learner, GPUConfiguration) {
274   using Arg = std::pair<std::string, std::string>;
275   size_t constexpr kRows = 10;
276   auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatrix();
277   std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
278   std::vector<bst_float> labels(kRows);
279   for (size_t i = 0; i < labels.size(); ++i) {
280     labels[i] = i;
281   }
282   p_dmat->Info().labels_.HostVector() = labels;
283   {
284     std::unique_ptr<Learner> learner {Learner::Create(mat)};
285     learner->SetParams({Arg{"booster", "gblinear"},
286                         Arg{"updater", "gpu_coord_descent"}});
287     learner->UpdateOneIter(0, p_dmat);
288     ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
289   }
290   {
291     std::unique_ptr<Learner> learner {Learner::Create(mat)};
292     learner->SetParams({Arg{"tree_method", "gpu_hist"}});
293     learner->UpdateOneIter(0, p_dmat);
294     ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
295   }
296   {
297     std::unique_ptr<Learner> learner {Learner::Create(mat)};
298     learner->SetParams({Arg{"tree_method", "gpu_hist"},
299                         Arg{"gpu_id", "-1"}});
300     learner->UpdateOneIter(0, p_dmat);
301     ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
302   }
303   {
304     // with CPU algorithm
305     std::unique_ptr<Learner> learner {Learner::Create(mat)};
306     learner->SetParams({Arg{"tree_method", "hist"}});
307     learner->UpdateOneIter(0, p_dmat);
308     ASSERT_EQ(learner->GetGenericParameter().gpu_id, -1);
309   }
310   {
311     // with CPU algorithm, but `gpu_id` takes priority
312     std::unique_ptr<Learner> learner {Learner::Create(mat)};
313     learner->SetParams({Arg{"tree_method", "hist"},
314                         Arg{"gpu_id", "0"}});
315     learner->UpdateOneIter(0, p_dmat);
316     ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
317   }
318   {
319     // With CPU algorithm but GPU Predictor, this is to simulate when
320     // XGBoost is only used for prediction, so tree method is not
321     // specified.
322     std::unique_ptr<Learner> learner {Learner::Create(mat)};
323     learner->SetParams({Arg{"tree_method", "hist"},
324                         Arg{"predictor", "gpu_predictor"}});
325     learner->UpdateOneIter(0, p_dmat);
326     ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
327   }
328 }
329 #endif  // defined(XGBOOST_USE_CUDA)
330 
TEST(Learner,Seed)331 TEST(Learner, Seed) {
332   auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix();
333   std::unique_ptr<Learner> learner {
334     Learner::Create({m})
335   };
336   auto seed = std::numeric_limits<int64_t>::max();
337   learner->SetParam("seed", std::to_string(seed));
338   learner->Configure();
339   Json config { Object() };
340   learner->SaveConfig(&config);
341   ASSERT_EQ(std::to_string(seed),
342             get<String>(config["learner"]["generic_param"]["seed"]));
343 
344   seed = std::numeric_limits<int64_t>::min();
345   learner->SetParam("seed", std::to_string(seed));
346   learner->Configure();
347   learner->SaveConfig(&config);
348   ASSERT_EQ(std::to_string(seed),
349             get<String>(config["learner"]["generic_param"]["seed"]));
350 }
351 
TEST(Learner,ConstantSeed)352 TEST(Learner, ConstantSeed) {
353   auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix(true);
354   std::unique_ptr<Learner> learner{Learner::Create({m})};
355   learner->Configure();  // seed the global random
356 
357   std::uniform_real_distribution<float> dist;
358   auto& rng = common::GlobalRandom();
359   float v_0 = dist(rng);
360 
361   learner->SetParam("", "");
362   learner->Configure();  // check configure doesn't change the seed.
363   float v_1 = dist(rng);
364   CHECK_NE(v_0, v_1);
365 
366   {
367     rng.seed(GenericParameter::kDefaultSeed);
368     std::uniform_real_distribution<float> dist;
369     float v_2 = dist(rng);
370     CHECK_EQ(v_0, v_2);
371   }
372 }
373 
TEST(Learner,FeatureInfo)374 TEST(Learner, FeatureInfo) {
375   size_t constexpr kCols = 10;
376   auto m = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);
377   std::vector<std::string> names(kCols);
378   for (size_t i = 0; i < kCols; ++i) {
379     names[i] = ("f" + std::to_string(i));
380   }
381 
382   std::vector<std::string> types(kCols);
383   for (size_t i = 0; i < kCols; ++i) {
384     types[i] = "q";
385   }
386   types[8] = "f";
387   types[0] = "int";
388   types[3] = "i";
389   types[7] = "i";
390 
391   std::vector<char const*> c_names(kCols);
392   for (size_t i = 0; i < names.size(); ++i) {
393     c_names[i] = names[i].c_str();
394   }
395   std::vector<char const*> c_types(kCols);
396   for (size_t i = 0; i < types.size(); ++i) {
397     c_types[i] = names[i].c_str();
398   }
399 
400   std::vector<std::string> out_names;
401   std::vector<std::string> out_types;
402 
403   Json model{Object()};
404   {
405     std::unique_ptr<Learner> learner{Learner::Create({m})};
406     learner->Configure();
407     learner->SetFeatureNames(names);
408     learner->GetFeatureNames(&out_names);
409 
410     learner->SetFeatureTypes(types);
411     learner->GetFeatureTypes(&out_types);
412 
413     ASSERT_TRUE(std::equal(out_names.begin(), out_names.end(), names.begin()));
414     ASSERT_TRUE(std::equal(out_types.begin(), out_types.end(), types.begin()));
415 
416     learner->SaveModel(&model);
417   }
418 
419   {
420     std::unique_ptr<Learner> learner{Learner::Create({m})};
421     learner->LoadModel(model);
422 
423     learner->GetFeatureNames(&out_names);
424     learner->GetFeatureTypes(&out_types);
425     ASSERT_TRUE(std::equal(out_names.begin(), out_names.end(), names.begin()));
426     ASSERT_TRUE(std::equal(out_types.begin(), out_types.end(), types.begin()));
427   }
428 }
429 }  // namespace xgboost
430