1 /*!
2  * Copyright (c) by Contributors 2019-2021
3  */
4 #include <gtest/gtest.h>
5 #include <dmlc/filesystem.h>
6 #include <fstream>
7 #include <map>
8 
9 #include "xgboost/json.h"
10 #include "xgboost/logging.h"
11 #include "xgboost/json_io.h"
12 #include "../helpers.h"
13 #include "../../../src/common/io.h"
14 #include "../../../src/common/charconv.h"
15 
16 namespace xgboost {
17 
GetModelStr()18 std::string GetModelStr() {
19   std::string model_json = R"json(
20 {
21   "model_parameter": {
22     "base_score": "0.5",
23     "num_class": "0",
24     "num_feature": "10"
25   },
26   "train_parameter": {
27     "debug_verbose": "0",
28     "disable_default_eval_metric": "0",
29     "dsplit": "auto",
30     "nthread": "0",
31     "seed": "0",
32     "seed_per_iteration": "0",
33     "test_flag": "",
34     "tree_method": "gpu_hist"
35   },
36   "configuration": {
37     "booster": "gbtree",
38     "gpu_id": "0",
39     "num_class": "0",
40     "num_feature": "10",
41     "objective": "reg:linear",
42     "predictor": "gpu_predictor",
43     "tree_method": "gpu_hist",
44     "updater": "grow_gpu_hist"
45   },
46   "objective": "reg:linear",
47   "booster": "gbtree",
48   "gbm": {
49     "GBTreeModelParam": {
50       "num_feature": "10",
51       "num_output_group": "1",
52       "num_roots": "1",
53       "size_leaf_vector": "0"
54     },
55     "trees": [{
56         "TreeParam": {
57           "num_feature": "10",
58           "num_roots": "1",
59           "size_leaf_vector": "0"
60         },
61         "num_nodes": "9",
62         "nodes": [
63           {
64             "depth": 0,
65             "gain": 31.8892,
66             "hess": 10,
67             "left": 1,
68             "missing": 1,
69             "nodeid": 0,
70             "right": 2,
71             "split_condition": 0.580717,
72             "split_index": 2
73           },
74           {
75             "depth": 1,
76             "gain": 1.5625,
77             "hess": 3,
78             "left": 5,
79             "missing": 5,
80             "nodeid": 2,
81             "right": 6,
82             "split_condition": 0.160345,
83             "split_index": 0
84           },
85           {
86             "depth": 2,
87             "gain": 0.25,
88             "hess": 2,
89             "left": 7,
90             "missing": 7,
91             "nodeid": 6,
92             "right": 8,
93             "split_condition": 0.62788,
94             "split_index": 0
95           },
96           {
97             "hess": 1,
98             "leaf": 0.375,
99             "nodeid": 8
100           },
101           {
102             "hess": 1,
103             "leaf": 0.075,
104             "nodeid": 7
105           },
106           {
107             "hess": 1,
108             "leaf": -0.075,
109             "nodeid": 5
110           },
111           {
112             "depth": 3,
113             "gain": 10.4866,
114             "hess": 7,
115             "left": 3,
116             "missing": 3,
117             "nodeid": 1,
118             "right": 4,
119             "split_condition": 0.238748,
120             "split_index": 1
121           },
122           {
123             "hess": 6,
124             "leaf": 1.54286,
125             "nodeid": 4
126           },
127           {
128             "hess": 1,
129             "leaf": 0.225,
130             "nodeid": 3
131           }
132         ],
133         "leaf_vector": []
134       }],
135     "tree_info": [0]
136   }
137 }
138 )json";
139   return model_json;
140 }
141 
TEST(Json,TestParseObject)142 TEST(Json, TestParseObject) {
143   std::string str = R"obj({"TreeParam" : {"num_feature": "10"}})obj";
144   auto json = Json::Load(StringView{str.c_str(), str.size()});
145 }
146 
TEST(Json,ParseNumber)147 TEST(Json, ParseNumber) {
148   {
149     std::string str = "31.8892";
150     auto json = Json::Load(StringView{str.c_str(), str.size()});
151     ASSERT_EQ(get<JsonNumber>(json), 31.8892f);
152   }
153   {
154     std::string str = "-31.8892";
155     auto json = Json::Load(StringView{str.c_str(), str.size()});
156     ASSERT_EQ(get<JsonNumber>(json), -31.8892f);
157   }
158   {
159     std::string str = "2e4";
160     auto json = Json::Load(StringView{str.c_str(), str.size()});
161     ASSERT_EQ(get<JsonNumber>(json), 2e4f);
162   }
163   {
164     std::string str = "2e-4";
165     auto json = Json::Load(StringView{str.c_str(), str.size()});
166     ASSERT_EQ(get<JsonNumber>(json), 2e-4f);
167   }
168   {
169     std::string str = "-2e-4";
170     auto json = Json::Load(StringView{str.c_str(), str.size()});
171     ASSERT_EQ(get<JsonNumber>(json), -2e-4f);
172   }
173   {
174     std::string str = "-0.0";
175     auto json = Json::Load(StringView{str.c_str(), str.size()});
176     ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
177     ASSERT_EQ(get<JsonNumber>(json), -0);
178   }
179   {
180     std::string str = "-5.37645816802978516e-01";
181     auto json = Json::Load(StringView{str.c_str(), str.size()});
182     ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
183     // Larger than fast path limit.
184     ASSERT_EQ(get<JsonNumber>(json), -5.37645816802978516e-01);
185   }
186   {
187     std::string str = "9.86623668670654297e+00";
188     auto json = Json::Load(StringView{str.c_str(), str.size()});
189     ASSERT_FALSE(std::signbit(get<JsonNumber>(json)));
190     ASSERT_EQ(get<JsonNumber>(json), 9.86623668670654297e+00);
191   }
192 }
193 
TEST(Json,ParseArray)194 TEST(Json, ParseArray) {
195   std::string str = R"json(
196 {
197     "nodes": [
198         {
199 	    "depth": 3,
200 	    "gain": 10.4866,
201 	    "hess": 7,
202 	    "left": 3,
203 	    "missing": 3,
204 	    "nodeid": 1,
205 	    "right": 4,
206 	    "split_condition": 0.238748,
207 	    "split_index": 1
208         },
209         {
210 	    "hess": 6,
211 	    "leaf": 1.54286,
212 	    "nodeid": 4
213         },
214         {
215 	    "hess": 1,
216 	    "leaf": 0.225,
217 	    "nodeid": 3
218         }
219     ]
220 }
221 )json";
222   auto json = Json::Load(StringView{str.c_str(), str.size()});
223   json = json["nodes"];
224   std::vector<Json> arr = get<JsonArray>(json);
225   ASSERT_EQ(arr.size(), 3ul);
226   Json v0 = arr[0];
227   ASSERT_EQ(get<Integer>(v0["depth"]), 3);
228   ASSERT_NEAR(get<Number>(v0["gain"]), 10.4866, kRtEps);
229 
230   {
231     std::string str =
232         "[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+"
233         "02,2.13924217224121094e+00,7.72699451446533203e+00,2."
234         "30380615234375000e+02,2.64466613769531250e+02]";
235     auto json = Json::Load(StringView{str.c_str(), str.size()});
236 
237     auto const& vec = get<Array const>(json);
238     ASSERT_EQ(get<Number const>(vec[0]), 5.04713470458984375e+02);
239     ASSERT_EQ(get<Number const>(vec[1]), 9.86623668670654297e+00);
240     ASSERT_EQ(get<Number const>(vec[2]), 4.94847229003906250e+02);
241     ASSERT_EQ(get<Number const>(vec[3]), 2.13924217224121094e+00);
242     ASSERT_EQ(get<Number const>(vec[4]), 7.72699451446533203e+00);
243     ASSERT_EQ(get<Number const>(vec[5]), 2.30380615234375000e+02);
244     ASSERT_EQ(get<Number const>(vec[6]), 2.64466613769531250e+02);
245   }
246 }
247 
TEST(Json,Null)248 TEST(Json, Null) {
249   Json json {JsonNull()};
250   std::string ss;
251   Json::Dump(json, &ss);
252   ASSERT_EQ(ss, "null");
253 
254   std::string null_input {R"null({"key":  null })null"};
255 
256   json = Json::Load({null_input.c_str(), null_input.size()});
257   ASSERT_TRUE(IsA<Null>(json["key"]));
258 }
259 
TEST(Json,EmptyObject)260 TEST(Json, EmptyObject) {
261   std::string str = R"json(
262 {
263   "rank": 1,
264   "statistic": {
265 
266   }
267 }
268 )json";
269   std::stringstream iss(str);
270   auto json = Json::Load(StringView{str.c_str(), str.size()});
271   ASSERT_TRUE(IsA<Object>(json["statistic"]));
272 
273   str = R"json({"Config": {},"Model": {}})json"; // NOLINT
274   json = Json::Load(StringView{str.c_str(), str.size()});
275   ASSERT_TRUE(IsA<Object>(json["Model"]));
276 }
277 
TEST(Json,EmptyArray)278 TEST(Json, EmptyArray) {
279   std::string str = R"json(
280 {
281   "leaf_vector": []
282 }
283 )json";
284   std::istringstream iss(str);
285   auto json = Json::Load(StringView{str.c_str(), str.size()});
286   auto arr = get<JsonArray>(json["leaf_vector"]);
287   ASSERT_EQ(arr.size(), 0ul);
288 }
289 
TEST(Json,Boolean)290 TEST(Json, Boolean) {
291   std::string str = R"json(
292 {
293   "left_child": true,
294   "right_child": false
295 }
296 )json";
297   Json j {Json::Load(StringView{str.c_str(), str.size()})};
298   ASSERT_EQ(get<JsonBoolean>(j["left_child"]), true);
299   ASSERT_EQ(get<JsonBoolean>(j["right_child"]), false);
300 }
301 
TEST(Json,Indexing)302 TEST(Json, Indexing) {
303   auto str = GetModelStr();
304   JsonReader reader(StringView{str.c_str(), str.size()});
305   Json j {Json::Load(&reader)};
306   auto& value_1 = j["model_parameter"];
307   auto& value = value_1["base_score"];
308   std::string result = Cast<JsonString>(&value.GetValue())->GetString();
309 
310   ASSERT_EQ(result, "0.5");
311 }
312 
TEST(Json,AssigningObjects)313 TEST(Json, AssigningObjects) {
314   {
315     Json json;
316     json = JsonObject();
317     json["Okay"] = JsonArray();
318     ASSERT_EQ(get<JsonArray>(json["Okay"]).size(), 0ul);
319   }
320 
321   {
322     std::map<std::string, Json> objects;
323     Json json_objects { JsonObject() };
324     std::vector<Json> arr_0 (1, Json(3.3f));
325     json_objects["tree_parameters"] = JsonArray(arr_0);
326     std::vector<Json> json_arr = get<JsonArray>(json_objects["tree_parameters"]);
327     ASSERT_NEAR(get<JsonNumber>(json_arr[0]), 3.3f, kRtEps);
328   }
329 
330   {
331     Json json_object { JsonObject() };
332     auto str = JsonString("1");
333     auto& k = json_object["1"];
334     k  = std::move(str);
335     ASSERT_TRUE(str.GetString().empty());  // NOLINT
336     auto& m = json_object["1"];
337     std::string value = get<JsonString>(m);
338     ASSERT_EQ(value, "1");
339     ASSERT_EQ(get<JsonString>(json_object["1"]), "1");
340   }
341 }
342 
TEST(Json,AssigningArray)343 TEST(Json, AssigningArray) {
344   Json json;
345   json = JsonArray();
346   std::vector<Json> tmp_0 {Json(Number(1.0f)), Json(Number(2.0f))};
347   json = tmp_0;
348   std::vector<Json> tmp_1 {Json(Number(3.0f))};
349   get<Array>(json) = tmp_1;
350   std::vector<Json> res = get<Array>(json);
351   ASSERT_EQ(get<Number>(res[0]), 3);
352 }
353 
TEST(Json,AssigningNumber)354 TEST(Json, AssigningNumber) {
355   {
356     // right value
357     Json json = Json{ Number(4.0f) };
358     get<Number>(json) = 15;
359     ASSERT_EQ(get<Number>(json), 15);
360   }
361 
362   {
363     // left value ref
364     Json json = Json{ Number(4.0f) };
365     Number::Float& ref = get<Number>(json);
366     ref = 15;
367     ASSERT_EQ(get<Number>(json), 15);
368   }
369 
370   {
371     // left value
372     Json json = Json{ Number(4.0f) };
373     double value = get<Number>(json);
374     ASSERT_EQ(value, 4);
375     value = 15;  // NOLINT
376     ASSERT_EQ(get<Number>(json), 4);
377   }
378 
379   {
380     Json value {Number(std::numeric_limits<float>::quiet_NaN())};
381     ASSERT_TRUE(IsA<Number>(value));
382   }
383 }
384 
TEST(Json,AssigningString)385 TEST(Json, AssigningString) {
386   {
387     // right value
388     Json json = Json{ String("str") };
389     get<String>(json) = "modified";
390     ASSERT_EQ(get<String>(json), "modified");
391   }
392 
393   {
394     // left value ref
395     Json json = Json{ String("str") };
396     std::string& ref = get<String>(json);
397     ref = "modified";
398     ASSERT_EQ(get<String>(json), "modified");
399   }
400 
401   {
402     // left value
403     Json json = Json{ String("str") };
404     std::string value = get<String>(json);
405     value = "modified";
406     ASSERT_EQ(get<String>(json), "str");
407   }
408 }
409 
TEST(Json,LoadDump)410 TEST(Json, LoadDump) {
411   std::string ori_buffer = GetModelStr();
412   Json origin {Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
413 
414   dmlc::TemporaryDirectory tempdir;
415   auto const& path = tempdir.path + "test_model_dump";
416 
417   std::string out;
418   Json::Dump(origin, &out);
419 
420   std::ofstream fout(path);
421   ASSERT_TRUE(fout);
422   fout << out << std::flush;
423 
424   std::string new_buffer = common::LoadSequentialFile(path);
425 
426   Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
427   ASSERT_EQ(load_back, origin);
428 }
429 
TEST(Json,Invalid)430 TEST(Json, Invalid) {
431   {
432     std::string str = "}";
433     bool has_thrown = false;
434     try {
435       Json load{Json::Load(StringView(str.c_str(), str.size()))};
436     } catch (dmlc::Error const &e) {
437       std::string msg = e.what();
438       ASSERT_NE(msg.find("Unknown"), std::string::npos);
439       has_thrown = true;
440     };
441     ASSERT_TRUE(has_thrown);
442   }
443   {
444     std::string str = R"json({foo)json";
445     bool has_thrown = false;
446     try {
447       Json load{Json::Load(StringView(str.c_str(), str.size()))};
448     } catch (dmlc::Error const &e) {
449       std::string msg = e.what();
450       ASSERT_NE(msg.find("position: 1"), std::string::npos);
451       has_thrown = true;
452     };
453     ASSERT_TRUE(has_thrown);
454   }
455   {
456     std::string str = R"json({"foo")json";
457     bool has_thrown = false;
458     try {
459       Json load{Json::Load(StringView(str.c_str(), str.size()))};
460     } catch (dmlc::Error const &e) {
461       std::string msg = e.what();
462       ASSERT_TRUE(msg.find("EOF") != std::string::npos
463                   || msg.find("255") != std::string::npos);  // EOF is printed as 255 on s390x
464       has_thrown = true;
465     };
466     ASSERT_TRUE(has_thrown);
467   }
468 }
469 
470 // For now Json is quite ignorance about unicode.
TEST(Json,CopyUnicode)471 TEST(Json, CopyUnicode) {
472   std::string json_str = R"json(
473 {"m": ["\ud834\udd1e", "\u20ac", "\u0416", "\u00f6"]}
474 )json";
475   Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()})};
476 
477   std::string dumped_string;
478   Json::Dump(loaded, &dumped_string);
479 
480   ASSERT_NE(dumped_string.find("\\u20ac"), std::string::npos);
481 }
482 
TEST(Json,WrongCasts)483 TEST(Json, WrongCasts) {
484   {
485     Json json = Json{ String{"str"} };
486     ASSERT_ANY_THROW(get<Number>(json));
487   }
488   {
489     Json json = Json{ Array{ std::vector<Json>{ Json{ Number{1.0f} } } } };
490     ASSERT_ANY_THROW(get<Number>(json));
491   }
492   {
493     Json json = Json{ Object{std::map<std::string, Json>{
494           {"key", Json{String{"value"}}}} } };
495     ASSERT_ANY_THROW(get<Number>(json));
496   }
497 }
498 
TEST(Json,Integer)499 TEST(Json, Integer) {
500   for (int64_t i = 1; i < 10000; i *= 10) {
501     auto ten = Json{Integer{i}};
502     std::string str;
503     Json::Dump(ten, &str);
504     ASSERT_EQ(str, std::to_string(i));
505   }
506 }
507 
TEST(Json,IntVSFloat)508 TEST(Json, IntVSFloat) {
509   // If integer is parsed as float, calling `get<Integer>()' will throw.
510   {
511     std::string str = R"json(
512 {
513   "number": 123.4,
514   "integer": 123
515 })json";
516 
517     Json obj = Json::Load({str.c_str(), str.size()});
518     JsonNumber::Float number = get<Number>(obj["number"]);
519     ASSERT_NEAR(number, 123.4f, kRtEps);
520     JsonInteger::Int integer = get<Integer>(obj["integer"]);
521     ASSERT_EQ(integer, 123);
522   }
523 
524   {
525     std::string str = R"json(
526 {"data": [2503595760, false], "shape": [10]}
527 )json";
528     Json obj = Json::Load({str.c_str(), str.size()});
529     auto array = get<Array>(obj["data"]);
530     auto ptr = get<Integer>(array[0]);
531     ASSERT_EQ(ptr, 2503595760);
532   }
533 }
534 
TEST(Json,RoundTrip)535 TEST(Json, RoundTrip) {
536   uint32_t i = 0;
537   SimpleLCG rng;
538   SimpleRealUniformDistribution<float> dist(1.0f, 4096.0f);
539 
540   while (i <= std::numeric_limits<uint32_t>::max()) {
541     float f;
542     std::memcpy(&f, &i, sizeof(f));
543 
544     Json jf { f };
545     std::string str;
546     Json::Dump(jf, &str);
547     auto loaded = Json::Load({str.c_str(), str.size()});
548     if (XGBOOST_EXPECT(std::isnan(f), false)) {
549       ASSERT_TRUE(std::isnan(get<Number const>(loaded)));
550     } else {
551       ASSERT_EQ(get<Number const>(loaded), f);
552     }
553 
554     auto t = i;
555     i += static_cast<uint32_t>(dist(&rng));
556     if (i < t) {
557       break;
558     }
559   }
560 }
561 
TEST(Json,DISABLED_RoundTripExhaustive)562 TEST(Json, DISABLED_RoundTripExhaustive) {
563   auto test = [](uint32_t i) {
564     float f;
565     std::memcpy(&f, &i, sizeof(f));
566 
567     Json jf{f};
568     std::string str;
569     Json::Dump(jf, &str);
570     auto loaded = Json::Load({str.c_str(), str.size()});
571     if (XGBOOST_EXPECT(std::isnan(f), false)) {
572       EXPECT_TRUE(std::isnan(get<Number const>(loaded)));
573     } else {
574       EXPECT_EQ(get<Number const>(loaded), f);
575     }
576   };
577   int64_t int32_max = static_cast<int64_t>(std::numeric_limits<uint32_t>::max());
578 #pragma omp parallel for schedule(static)
579   for (int64_t i = 0; i <= int32_max; ++i) {
580     test(static_cast<uint32_t>(i));
581   }
582 }
583 
TEST(StringView,Basic)584 TEST(StringView, Basic) {
585   StringView str{"This is a string."};
586   std::stringstream ss;
587   ss << str;
588 
589   std::string res = ss.str();
590   ASSERT_EQ(str.size(), res.size());
591   ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin()));
592 }
593 }  // namespace xgboost
594