1 /*!
2 * Copyright (c) by Contributors 2019-2021
3 */
4 #include <gtest/gtest.h>
5 #include <dmlc/filesystem.h>
6 #include <fstream>
7 #include <map>
8
9 #include "xgboost/json.h"
10 #include "xgboost/logging.h"
11 #include "xgboost/json_io.h"
12 #include "../helpers.h"
13 #include "../../../src/common/io.h"
14 #include "../../../src/common/charconv.h"
15
16 namespace xgboost {
17
GetModelStr()18 std::string GetModelStr() {
19 std::string model_json = R"json(
20 {
21 "model_parameter": {
22 "base_score": "0.5",
23 "num_class": "0",
24 "num_feature": "10"
25 },
26 "train_parameter": {
27 "debug_verbose": "0",
28 "disable_default_eval_metric": "0",
29 "dsplit": "auto",
30 "nthread": "0",
31 "seed": "0",
32 "seed_per_iteration": "0",
33 "test_flag": "",
34 "tree_method": "gpu_hist"
35 },
36 "configuration": {
37 "booster": "gbtree",
38 "gpu_id": "0",
39 "num_class": "0",
40 "num_feature": "10",
41 "objective": "reg:linear",
42 "predictor": "gpu_predictor",
43 "tree_method": "gpu_hist",
44 "updater": "grow_gpu_hist"
45 },
46 "objective": "reg:linear",
47 "booster": "gbtree",
48 "gbm": {
49 "GBTreeModelParam": {
50 "num_feature": "10",
51 "num_output_group": "1",
52 "num_roots": "1",
53 "size_leaf_vector": "0"
54 },
55 "trees": [{
56 "TreeParam": {
57 "num_feature": "10",
58 "num_roots": "1",
59 "size_leaf_vector": "0"
60 },
61 "num_nodes": "9",
62 "nodes": [
63 {
64 "depth": 0,
65 "gain": 31.8892,
66 "hess": 10,
67 "left": 1,
68 "missing": 1,
69 "nodeid": 0,
70 "right": 2,
71 "split_condition": 0.580717,
72 "split_index": 2
73 },
74 {
75 "depth": 1,
76 "gain": 1.5625,
77 "hess": 3,
78 "left": 5,
79 "missing": 5,
80 "nodeid": 2,
81 "right": 6,
82 "split_condition": 0.160345,
83 "split_index": 0
84 },
85 {
86 "depth": 2,
87 "gain": 0.25,
88 "hess": 2,
89 "left": 7,
90 "missing": 7,
91 "nodeid": 6,
92 "right": 8,
93 "split_condition": 0.62788,
94 "split_index": 0
95 },
96 {
97 "hess": 1,
98 "leaf": 0.375,
99 "nodeid": 8
100 },
101 {
102 "hess": 1,
103 "leaf": 0.075,
104 "nodeid": 7
105 },
106 {
107 "hess": 1,
108 "leaf": -0.075,
109 "nodeid": 5
110 },
111 {
112 "depth": 3,
113 "gain": 10.4866,
114 "hess": 7,
115 "left": 3,
116 "missing": 3,
117 "nodeid": 1,
118 "right": 4,
119 "split_condition": 0.238748,
120 "split_index": 1
121 },
122 {
123 "hess": 6,
124 "leaf": 1.54286,
125 "nodeid": 4
126 },
127 {
128 "hess": 1,
129 "leaf": 0.225,
130 "nodeid": 3
131 }
132 ],
133 "leaf_vector": []
134 }],
135 "tree_info": [0]
136 }
137 }
138 )json";
139 return model_json;
140 }
141
TEST(Json,TestParseObject)142 TEST(Json, TestParseObject) {
143 std::string str = R"obj({"TreeParam" : {"num_feature": "10"}})obj";
144 auto json = Json::Load(StringView{str.c_str(), str.size()});
145 }
146
TEST(Json,ParseNumber)147 TEST(Json, ParseNumber) {
148 {
149 std::string str = "31.8892";
150 auto json = Json::Load(StringView{str.c_str(), str.size()});
151 ASSERT_EQ(get<JsonNumber>(json), 31.8892f);
152 }
153 {
154 std::string str = "-31.8892";
155 auto json = Json::Load(StringView{str.c_str(), str.size()});
156 ASSERT_EQ(get<JsonNumber>(json), -31.8892f);
157 }
158 {
159 std::string str = "2e4";
160 auto json = Json::Load(StringView{str.c_str(), str.size()});
161 ASSERT_EQ(get<JsonNumber>(json), 2e4f);
162 }
163 {
164 std::string str = "2e-4";
165 auto json = Json::Load(StringView{str.c_str(), str.size()});
166 ASSERT_EQ(get<JsonNumber>(json), 2e-4f);
167 }
168 {
169 std::string str = "-2e-4";
170 auto json = Json::Load(StringView{str.c_str(), str.size()});
171 ASSERT_EQ(get<JsonNumber>(json), -2e-4f);
172 }
173 {
174 std::string str = "-0.0";
175 auto json = Json::Load(StringView{str.c_str(), str.size()});
176 ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
177 ASSERT_EQ(get<JsonNumber>(json), -0);
178 }
179 {
180 std::string str = "-5.37645816802978516e-01";
181 auto json = Json::Load(StringView{str.c_str(), str.size()});
182 ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
183 // Larger than fast path limit.
184 ASSERT_EQ(get<JsonNumber>(json), -5.37645816802978516e-01);
185 }
186 {
187 std::string str = "9.86623668670654297e+00";
188 auto json = Json::Load(StringView{str.c_str(), str.size()});
189 ASSERT_FALSE(std::signbit(get<JsonNumber>(json)));
190 ASSERT_EQ(get<JsonNumber>(json), 9.86623668670654297e+00);
191 }
192 }
193
TEST(Json,ParseArray)194 TEST(Json, ParseArray) {
195 std::string str = R"json(
196 {
197 "nodes": [
198 {
199 "depth": 3,
200 "gain": 10.4866,
201 "hess": 7,
202 "left": 3,
203 "missing": 3,
204 "nodeid": 1,
205 "right": 4,
206 "split_condition": 0.238748,
207 "split_index": 1
208 },
209 {
210 "hess": 6,
211 "leaf": 1.54286,
212 "nodeid": 4
213 },
214 {
215 "hess": 1,
216 "leaf": 0.225,
217 "nodeid": 3
218 }
219 ]
220 }
221 )json";
222 auto json = Json::Load(StringView{str.c_str(), str.size()});
223 json = json["nodes"];
224 std::vector<Json> arr = get<JsonArray>(json);
225 ASSERT_EQ(arr.size(), 3ul);
226 Json v0 = arr[0];
227 ASSERT_EQ(get<Integer>(v0["depth"]), 3);
228 ASSERT_NEAR(get<Number>(v0["gain"]), 10.4866, kRtEps);
229
230 {
231 std::string str =
232 "[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+"
233 "02,2.13924217224121094e+00,7.72699451446533203e+00,2."
234 "30380615234375000e+02,2.64466613769531250e+02]";
235 auto json = Json::Load(StringView{str.c_str(), str.size()});
236
237 auto const& vec = get<Array const>(json);
238 ASSERT_EQ(get<Number const>(vec[0]), 5.04713470458984375e+02);
239 ASSERT_EQ(get<Number const>(vec[1]), 9.86623668670654297e+00);
240 ASSERT_EQ(get<Number const>(vec[2]), 4.94847229003906250e+02);
241 ASSERT_EQ(get<Number const>(vec[3]), 2.13924217224121094e+00);
242 ASSERT_EQ(get<Number const>(vec[4]), 7.72699451446533203e+00);
243 ASSERT_EQ(get<Number const>(vec[5]), 2.30380615234375000e+02);
244 ASSERT_EQ(get<Number const>(vec[6]), 2.64466613769531250e+02);
245 }
246 }
247
TEST(Json,Null)248 TEST(Json, Null) {
249 Json json {JsonNull()};
250 std::string ss;
251 Json::Dump(json, &ss);
252 ASSERT_EQ(ss, "null");
253
254 std::string null_input {R"null({"key": null })null"};
255
256 json = Json::Load({null_input.c_str(), null_input.size()});
257 ASSERT_TRUE(IsA<Null>(json["key"]));
258 }
259
TEST(Json,EmptyObject)260 TEST(Json, EmptyObject) {
261 std::string str = R"json(
262 {
263 "rank": 1,
264 "statistic": {
265
266 }
267 }
268 )json";
269 std::stringstream iss(str);
270 auto json = Json::Load(StringView{str.c_str(), str.size()});
271 ASSERT_TRUE(IsA<Object>(json["statistic"]));
272
273 str = R"json({"Config": {},"Model": {}})json"; // NOLINT
274 json = Json::Load(StringView{str.c_str(), str.size()});
275 ASSERT_TRUE(IsA<Object>(json["Model"]));
276 }
277
TEST(Json,EmptyArray)278 TEST(Json, EmptyArray) {
279 std::string str = R"json(
280 {
281 "leaf_vector": []
282 }
283 )json";
284 std::istringstream iss(str);
285 auto json = Json::Load(StringView{str.c_str(), str.size()});
286 auto arr = get<JsonArray>(json["leaf_vector"]);
287 ASSERT_EQ(arr.size(), 0ul);
288 }
289
TEST(Json,Boolean)290 TEST(Json, Boolean) {
291 std::string str = R"json(
292 {
293 "left_child": true,
294 "right_child": false
295 }
296 )json";
297 Json j {Json::Load(StringView{str.c_str(), str.size()})};
298 ASSERT_EQ(get<JsonBoolean>(j["left_child"]), true);
299 ASSERT_EQ(get<JsonBoolean>(j["right_child"]), false);
300 }
301
TEST(Json,Indexing)302 TEST(Json, Indexing) {
303 auto str = GetModelStr();
304 JsonReader reader(StringView{str.c_str(), str.size()});
305 Json j {Json::Load(&reader)};
306 auto& value_1 = j["model_parameter"];
307 auto& value = value_1["base_score"];
308 std::string result = Cast<JsonString>(&value.GetValue())->GetString();
309
310 ASSERT_EQ(result, "0.5");
311 }
312
TEST(Json,AssigningObjects)313 TEST(Json, AssigningObjects) {
314 {
315 Json json;
316 json = JsonObject();
317 json["Okay"] = JsonArray();
318 ASSERT_EQ(get<JsonArray>(json["Okay"]).size(), 0ul);
319 }
320
321 {
322 std::map<std::string, Json> objects;
323 Json json_objects { JsonObject() };
324 std::vector<Json> arr_0 (1, Json(3.3f));
325 json_objects["tree_parameters"] = JsonArray(arr_0);
326 std::vector<Json> json_arr = get<JsonArray>(json_objects["tree_parameters"]);
327 ASSERT_NEAR(get<JsonNumber>(json_arr[0]), 3.3f, kRtEps);
328 }
329
330 {
331 Json json_object { JsonObject() };
332 auto str = JsonString("1");
333 auto& k = json_object["1"];
334 k = std::move(str);
335 ASSERT_TRUE(str.GetString().empty()); // NOLINT
336 auto& m = json_object["1"];
337 std::string value = get<JsonString>(m);
338 ASSERT_EQ(value, "1");
339 ASSERT_EQ(get<JsonString>(json_object["1"]), "1");
340 }
341 }
342
TEST(Json,AssigningArray)343 TEST(Json, AssigningArray) {
344 Json json;
345 json = JsonArray();
346 std::vector<Json> tmp_0 {Json(Number(1.0f)), Json(Number(2.0f))};
347 json = tmp_0;
348 std::vector<Json> tmp_1 {Json(Number(3.0f))};
349 get<Array>(json) = tmp_1;
350 std::vector<Json> res = get<Array>(json);
351 ASSERT_EQ(get<Number>(res[0]), 3);
352 }
353
TEST(Json,AssigningNumber)354 TEST(Json, AssigningNumber) {
355 {
356 // right value
357 Json json = Json{ Number(4.0f) };
358 get<Number>(json) = 15;
359 ASSERT_EQ(get<Number>(json), 15);
360 }
361
362 {
363 // left value ref
364 Json json = Json{ Number(4.0f) };
365 Number::Float& ref = get<Number>(json);
366 ref = 15;
367 ASSERT_EQ(get<Number>(json), 15);
368 }
369
370 {
371 // left value
372 Json json = Json{ Number(4.0f) };
373 double value = get<Number>(json);
374 ASSERT_EQ(value, 4);
375 value = 15; // NOLINT
376 ASSERT_EQ(get<Number>(json), 4);
377 }
378
379 {
380 Json value {Number(std::numeric_limits<float>::quiet_NaN())};
381 ASSERT_TRUE(IsA<Number>(value));
382 }
383 }
384
TEST(Json,AssigningString)385 TEST(Json, AssigningString) {
386 {
387 // right value
388 Json json = Json{ String("str") };
389 get<String>(json) = "modified";
390 ASSERT_EQ(get<String>(json), "modified");
391 }
392
393 {
394 // left value ref
395 Json json = Json{ String("str") };
396 std::string& ref = get<String>(json);
397 ref = "modified";
398 ASSERT_EQ(get<String>(json), "modified");
399 }
400
401 {
402 // left value
403 Json json = Json{ String("str") };
404 std::string value = get<String>(json);
405 value = "modified";
406 ASSERT_EQ(get<String>(json), "str");
407 }
408 }
409
TEST(Json,LoadDump)410 TEST(Json, LoadDump) {
411 std::string ori_buffer = GetModelStr();
412 Json origin {Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
413
414 dmlc::TemporaryDirectory tempdir;
415 auto const& path = tempdir.path + "test_model_dump";
416
417 std::string out;
418 Json::Dump(origin, &out);
419
420 std::ofstream fout(path);
421 ASSERT_TRUE(fout);
422 fout << out << std::flush;
423
424 std::string new_buffer = common::LoadSequentialFile(path);
425
426 Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
427 ASSERT_EQ(load_back, origin);
428 }
429
TEST(Json,Invalid)430 TEST(Json, Invalid) {
431 {
432 std::string str = "}";
433 bool has_thrown = false;
434 try {
435 Json load{Json::Load(StringView(str.c_str(), str.size()))};
436 } catch (dmlc::Error const &e) {
437 std::string msg = e.what();
438 ASSERT_NE(msg.find("Unknown"), std::string::npos);
439 has_thrown = true;
440 };
441 ASSERT_TRUE(has_thrown);
442 }
443 {
444 std::string str = R"json({foo)json";
445 bool has_thrown = false;
446 try {
447 Json load{Json::Load(StringView(str.c_str(), str.size()))};
448 } catch (dmlc::Error const &e) {
449 std::string msg = e.what();
450 ASSERT_NE(msg.find("position: 1"), std::string::npos);
451 has_thrown = true;
452 };
453 ASSERT_TRUE(has_thrown);
454 }
455 {
456 std::string str = R"json({"foo")json";
457 bool has_thrown = false;
458 try {
459 Json load{Json::Load(StringView(str.c_str(), str.size()))};
460 } catch (dmlc::Error const &e) {
461 std::string msg = e.what();
462 ASSERT_TRUE(msg.find("EOF") != std::string::npos
463 || msg.find("255") != std::string::npos); // EOF is printed as 255 on s390x
464 has_thrown = true;
465 };
466 ASSERT_TRUE(has_thrown);
467 }
468 }
469
470 // For now Json is quite ignorance about unicode.
TEST(Json,CopyUnicode)471 TEST(Json, CopyUnicode) {
472 std::string json_str = R"json(
473 {"m": ["\ud834\udd1e", "\u20ac", "\u0416", "\u00f6"]}
474 )json";
475 Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()})};
476
477 std::string dumped_string;
478 Json::Dump(loaded, &dumped_string);
479
480 ASSERT_NE(dumped_string.find("\\u20ac"), std::string::npos);
481 }
482
TEST(Json,WrongCasts)483 TEST(Json, WrongCasts) {
484 {
485 Json json = Json{ String{"str"} };
486 ASSERT_ANY_THROW(get<Number>(json));
487 }
488 {
489 Json json = Json{ Array{ std::vector<Json>{ Json{ Number{1.0f} } } } };
490 ASSERT_ANY_THROW(get<Number>(json));
491 }
492 {
493 Json json = Json{ Object{std::map<std::string, Json>{
494 {"key", Json{String{"value"}}}} } };
495 ASSERT_ANY_THROW(get<Number>(json));
496 }
497 }
498
TEST(Json,Integer)499 TEST(Json, Integer) {
500 for (int64_t i = 1; i < 10000; i *= 10) {
501 auto ten = Json{Integer{i}};
502 std::string str;
503 Json::Dump(ten, &str);
504 ASSERT_EQ(str, std::to_string(i));
505 }
506 }
507
TEST(Json,IntVSFloat)508 TEST(Json, IntVSFloat) {
509 // If integer is parsed as float, calling `get<Integer>()' will throw.
510 {
511 std::string str = R"json(
512 {
513 "number": 123.4,
514 "integer": 123
515 })json";
516
517 Json obj = Json::Load({str.c_str(), str.size()});
518 JsonNumber::Float number = get<Number>(obj["number"]);
519 ASSERT_NEAR(number, 123.4f, kRtEps);
520 JsonInteger::Int integer = get<Integer>(obj["integer"]);
521 ASSERT_EQ(integer, 123);
522 }
523
524 {
525 std::string str = R"json(
526 {"data": [2503595760, false], "shape": [10]}
527 )json";
528 Json obj = Json::Load({str.c_str(), str.size()});
529 auto array = get<Array>(obj["data"]);
530 auto ptr = get<Integer>(array[0]);
531 ASSERT_EQ(ptr, 2503595760);
532 }
533 }
534
TEST(Json,RoundTrip)535 TEST(Json, RoundTrip) {
536 uint32_t i = 0;
537 SimpleLCG rng;
538 SimpleRealUniformDistribution<float> dist(1.0f, 4096.0f);
539
540 while (i <= std::numeric_limits<uint32_t>::max()) {
541 float f;
542 std::memcpy(&f, &i, sizeof(f));
543
544 Json jf { f };
545 std::string str;
546 Json::Dump(jf, &str);
547 auto loaded = Json::Load({str.c_str(), str.size()});
548 if (XGBOOST_EXPECT(std::isnan(f), false)) {
549 ASSERT_TRUE(std::isnan(get<Number const>(loaded)));
550 } else {
551 ASSERT_EQ(get<Number const>(loaded), f);
552 }
553
554 auto t = i;
555 i += static_cast<uint32_t>(dist(&rng));
556 if (i < t) {
557 break;
558 }
559 }
560 }
561
TEST(Json,DISABLED_RoundTripExhaustive)562 TEST(Json, DISABLED_RoundTripExhaustive) {
563 auto test = [](uint32_t i) {
564 float f;
565 std::memcpy(&f, &i, sizeof(f));
566
567 Json jf{f};
568 std::string str;
569 Json::Dump(jf, &str);
570 auto loaded = Json::Load({str.c_str(), str.size()});
571 if (XGBOOST_EXPECT(std::isnan(f), false)) {
572 EXPECT_TRUE(std::isnan(get<Number const>(loaded)));
573 } else {
574 EXPECT_EQ(get<Number const>(loaded), f);
575 }
576 };
577 int64_t int32_max = static_cast<int64_t>(std::numeric_limits<uint32_t>::max());
578 #pragma omp parallel for schedule(static)
579 for (int64_t i = 0; i <= int32_max; ++i) {
580 test(static_cast<uint32_t>(i));
581 }
582 }
583
TEST(StringView,Basic)584 TEST(StringView, Basic) {
585 StringView str{"This is a string."};
586 std::stringstream ss;
587 ss << str;
588
589 std::string res = ss.str();
590 ASSERT_EQ(str.size(), res.size());
591 ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin()));
592 }
593 } // namespace xgboost
594