1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <cmath>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 #include <memory>
24 #include <sstream>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28 #include <vector>
29
30 #include <gtest/gtest.h>
31
32 #include "arrow/array.h"
33 #include "arrow/ipc/json_simple.h"
34 #include "arrow/testing/gtest_util.h"
35 #include "arrow/type.h"
36 #include "arrow/type_traits.h"
37 #include "arrow/util/checked_cast.h"
38 #include "arrow/util/decimal.h"
39
40 #if defined(_MSC_VER)
41 // "warning C4307: '+': integral constant overflow"
42 #pragma warning(disable : 4307)
43 #endif
44
45 namespace arrow {
46 namespace ipc {
47 namespace internal {
48 namespace json {
49
50 using ::arrow::internal::checked_cast;
51 using ::arrow::internal::checked_pointer_cast;
52
53 // Avoid undefined behaviour on signed overflow
54 template <typename Signed>
SafeSignedAdd(Signed u,Signed v)55 Signed SafeSignedAdd(Signed u, Signed v) {
56 using Unsigned = typename std::make_unsigned<Signed>::type;
57 return static_cast<Signed>(static_cast<Unsigned>(u) + static_cast<Unsigned>(v));
58 }
59
60 // Special case for 8-bit ints (must output their decimal value, not the
61 // corresponding ASCII character)
JSONArrayInternal(std::ostream * ss,int8_t value)62 void JSONArrayInternal(std::ostream* ss, int8_t value) {
63 *ss << static_cast<int16_t>(value);
64 }
65
JSONArrayInternal(std::ostream * ss,uint8_t value)66 void JSONArrayInternal(std::ostream* ss, uint8_t value) {
67 *ss << static_cast<int16_t>(value);
68 }
69
70 template <typename Value>
JSONArrayInternal(std::ostream * ss,Value && value)71 void JSONArrayInternal(std::ostream* ss, Value&& value) {
72 *ss << value;
73 }
74
75 template <typename Value, typename... Tail>
JSONArrayInternal(std::ostream * ss,Value && value,Tail &&...tail)76 void JSONArrayInternal(std::ostream* ss, Value&& value, Tail&&... tail) {
77 JSONArrayInternal(ss, std::forward<Value>(value));
78 *ss << ", ";
79 JSONArrayInternal(ss, std::forward<Tail>(tail)...);
80 }
81
82 template <typename... Args>
JSONArray(Args &&...args)83 std::string JSONArray(Args&&... args) {
84 std::stringstream ss;
85 ss << "[";
86 JSONArrayInternal(&ss, std::forward<Args>(args)...);
87 ss << "]";
88 return ss.str();
89 }
90
91 template <typename T, typename C_TYPE = typename T::c_type>
AssertJSONArray(const std::shared_ptr<DataType> & type,const std::string & json,const std::vector<C_TYPE> & values)92 void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json,
93 const std::vector<C_TYPE>& values) {
94 std::shared_ptr<Array> actual, expected;
95
96 ASSERT_OK(ArrayFromJSON(type, json, &actual));
97 ASSERT_OK(actual->ValidateFull());
98 ArrayFromVector<T, C_TYPE>(type, values, &expected);
99 AssertArraysEqual(*expected, *actual);
100 }
101
102 template <typename T, typename C_TYPE = typename T::c_type>
AssertJSONArray(const std::shared_ptr<DataType> & type,const std::string & json,const std::vector<bool> & is_valid,const std::vector<C_TYPE> & values)103 void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json,
104 const std::vector<bool>& is_valid,
105 const std::vector<C_TYPE>& values) {
106 std::shared_ptr<Array> actual, expected;
107
108 ASSERT_OK(ArrayFromJSON(type, json, &actual));
109 ASSERT_OK(actual->ValidateFull());
110 ArrayFromVector<T, C_TYPE>(type, is_valid, values, &expected);
111 AssertArraysEqual(*expected, *actual);
112 }
113
TEST(TestHelper,JSONArray)114 TEST(TestHelper, JSONArray) {
115 // Test the JSONArray helper func
116 std::string s =
117 JSONArray(123, -4.5, static_cast<int8_t>(-12), static_cast<uint8_t>(34));
118 ASSERT_EQ(s, "[123, -4.5, -12, 34]");
119 s = JSONArray(9223372036854775807LL, 9223372036854775808ULL, -9223372036854775807LL - 1,
120 18446744073709551615ULL);
121 ASSERT_EQ(s,
122 "[9223372036854775807, 9223372036854775808, -9223372036854775808, "
123 "18446744073709551615]");
124 }
125
TEST(TestHelper,SafeSignedAdd)126 TEST(TestHelper, SafeSignedAdd) {
127 ASSERT_EQ(0, SafeSignedAdd<int8_t>(-128, -128));
128 ASSERT_EQ(1, SafeSignedAdd<int8_t>(-128, -127));
129 ASSERT_EQ(-128, SafeSignedAdd<int8_t>(1, 127));
130 ASSERT_EQ(-2147483648LL, SafeSignedAdd<int32_t>(1, 2147483647));
131 }
132
133 template <typename T>
134 class TestIntegers : public ::testing::Test {};
135
136 TYPED_TEST_SUITE_P(TestIntegers);
137
TYPED_TEST_P(TestIntegers,Basics)138 TYPED_TEST_P(TestIntegers, Basics) {
139 using T = TypeParam;
140 using c_type = typename T::c_type;
141
142 std::shared_ptr<Array> expected, actual;
143 std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
144
145 AssertJSONArray<T>(type, "[]", {});
146 AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
147 AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true}, {4, 0, 5});
148
149 // Test limits
150 const auto min_val = std::numeric_limits<c_type>::min();
151 const auto max_val = std::numeric_limits<c_type>::max();
152 std::string json_string = JSONArray(0, 1, min_val);
153 AssertJSONArray<T>(type, json_string, {0, 1, min_val});
154 json_string = JSONArray(0, 1, max_val);
155 AssertJSONArray<T>(type, json_string, {0, 1, max_val});
156 }
157
TYPED_TEST_P(TestIntegers,Errors)158 TYPED_TEST_P(TestIntegers, Errors) {
159 using T = TypeParam;
160
161 std::shared_ptr<Array> array;
162 std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
163
164 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "", &array));
165 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[", &array));
166 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "0", &array));
167 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "{}", &array));
168 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]", &array));
169 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"0\"]", &array));
170 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
171 }
172
TYPED_TEST_P(TestIntegers,OutOfBounds)173 TYPED_TEST_P(TestIntegers, OutOfBounds) {
174 using T = TypeParam;
175 using c_type = typename T::c_type;
176
177 std::shared_ptr<Array> array;
178 std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
179
180 if (type->id() == Type::UINT64) {
181 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[18446744073709551616]", &array));
182 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]", &array));
183 } else if (type->id() == Type::INT64) {
184 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[9223372036854775808]", &array));
185 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-9223372036854775809]", &array));
186 } else if (std::is_signed<c_type>::value) {
187 const auto lower = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::min(), -1);
188 const auto upper = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::max(), +1);
189 auto json_string = JSONArray(lower);
190 ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
191 json_string = JSONArray(upper);
192 ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
193 } else {
194 const auto upper = static_cast<uint64_t>(std::numeric_limits<c_type>::max()) + 1;
195 auto json_string = JSONArray(upper);
196 ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
197 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]", &array));
198 }
199 }
200
201 REGISTER_TYPED_TEST_SUITE_P(TestIntegers, Basics, Errors, OutOfBounds);
202
203 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt8, TestIntegers, Int8Type);
204 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt16, TestIntegers, Int16Type);
205 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt32, TestIntegers, Int32Type);
206 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt64, TestIntegers, Int64Type);
207 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestIntegers, UInt8Type);
208 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type);
209 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type);
210 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
211 INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
212
TEST(TestNull,Basics)213 TEST(TestNull, Basics) {
214 std::shared_ptr<DataType> type = null();
215 std::shared_ptr<Array> expected, actual;
216
217 AssertJSONArray<NullType, std::nullptr_t>(type, "[]", {});
218 AssertJSONArray<NullType, std::nullptr_t>(type, "[null, null]", {nullptr, nullptr});
219 }
220
TEST(TestNull,Errors)221 TEST(TestNull, Errors) {
222 std::shared_ptr<DataType> type = null();
223 std::shared_ptr<Array> array;
224
225 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
226 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
227 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[NaN]", &array));
228 }
229
TEST(TestBoolean,Basics)230 TEST(TestBoolean, Basics) {
231 std::shared_ptr<DataType> type = boolean();
232 std::shared_ptr<Array> expected, actual;
233
234 AssertJSONArray<BooleanType, bool>(type, "[]", {});
235 AssertJSONArray<BooleanType, bool>(type, "[false, true, false]", {false, true, false});
236 AssertJSONArray<BooleanType, bool>(type, "[false, true, null]", {true, true, false},
237 {false, true, false});
238 // Supports integer literal casting
239 AssertJSONArray<BooleanType, bool>(type, "[0, 1, 0]", {false, true, false});
240 AssertJSONArray<BooleanType, bool>(type, "[0, 1, null]", {true, true, false},
241 {false, true, false});
242 }
243
TEST(TestBoolean,Errors)244 TEST(TestBoolean, Errors) {
245 std::shared_ptr<DataType> type = boolean();
246 std::shared_ptr<Array> array;
247
248 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]", &array));
249 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"true\"]", &array));
250 }
251
TEST(TestFloat,Basics)252 TEST(TestFloat, Basics) {
253 std::shared_ptr<DataType> type = float32();
254 std::shared_ptr<Array> expected, actual;
255
256 AssertJSONArray<FloatType>(type, "[]", {});
257 AssertJSONArray<FloatType>(type, "[1, 2.5, -3e4]", {1.0f, 2.5f, -3.0e4f});
258 AssertJSONArray<FloatType>(type, "[-0.0, Inf, -Inf, null]", {true, true, true, false},
259 {-0.0f, INFINITY, -INFINITY, 0.0f});
260
261 // Check NaN separately as AssertArraysEqual simply memcmp's array contents
262 // and NaNs can have many bit representations.
263 ASSERT_OK(ArrayFromJSON(type, "[NaN]", &actual));
264 ASSERT_OK(actual->ValidateFull());
265 float value = checked_cast<FloatArray&>(*actual).Value(0);
266 ASSERT_TRUE(std::isnan(value));
267 }
268
TEST(TestFloat,Errors)269 TEST(TestFloat, Errors) {
270 std::shared_ptr<DataType> type = float32();
271 std::shared_ptr<Array> array;
272
273 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
274 }
275
TEST(TestDouble,Basics)276 TEST(TestDouble, Basics) {
277 std::shared_ptr<DataType> type = float64();
278 std::shared_ptr<Array> expected, actual;
279
280 AssertJSONArray<DoubleType>(type, "[]", {});
281 AssertJSONArray<DoubleType>(type, "[1, 2.5, -3e4]", {1.0, 2.5, -3.0e4});
282 AssertJSONArray<DoubleType>(type, "[-0.0, Inf, -Inf, null]", {true, true, true, false},
283 {-0.0, INFINITY, -INFINITY, 0.0});
284
285 ASSERT_OK(ArrayFromJSON(type, "[NaN]", &actual));
286 ASSERT_OK(actual->ValidateFull());
287 double value = checked_cast<DoubleArray&>(*actual).Value(0);
288 ASSERT_TRUE(std::isnan(value));
289 }
290
TEST(TestDouble,Errors)291 TEST(TestDouble, Errors) {
292 std::shared_ptr<DataType> type = float64();
293 std::shared_ptr<Array> array;
294
295 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
296 }
297
TEST(TestString,Basics)298 TEST(TestString, Basics) {
299 // String type
300 std::shared_ptr<DataType> type = utf8();
301 std::shared_ptr<Array> expected, actual;
302
303 AssertJSONArray<StringType, std::string>(type, "[]", {});
304 AssertJSONArray<StringType, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
305 AssertJSONArray<StringType, std::string>(type, "[\"\", null]", {true, false}, {"", ""});
306 // NUL character in string
307 std::string s = "some";
308 s += '\x00';
309 s += "char";
310 AssertJSONArray<StringType, std::string>(type, "[\"\", \"some\\u0000char\"]", {"", s});
311 // UTF8 sequence in string
312 AssertJSONArray<StringType, std::string>(type, "[\"\xc3\xa9\"]", {"\xc3\xa9"});
313
314 // Binary type
315 type = binary();
316 AssertJSONArray<BinaryType, std::string>(type, "[\"\", \"foo\", null]",
317 {true, true, false}, {"", "foo", ""});
318 // Arbitrary binary (non-UTF8) sequence in string
319 s = "\xff\x9f";
320 AssertJSONArray<BinaryType, std::string>(type, "[\"" + s + "\"]", {s});
321 // Bytes < 0x20 can be represented as JSON unicode escapes
322 s = '\x00';
323 s += "\x1f";
324 AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
325 }
326
TEST(TestLargeString,Basics)327 TEST(TestLargeString, Basics) {
328 // Similar as TestString above, only testing the basics
329 std::shared_ptr<DataType> type = large_utf8();
330 std::shared_ptr<Array> expected, actual;
331
332 AssertJSONArray<LargeStringType, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
333 AssertJSONArray<LargeStringType, std::string>(type, "[\"\", null]", {true, false},
334 {"", ""});
335
336 // Large binary type
337 type = large_binary();
338 AssertJSONArray<LargeBinaryType, std::string>(type, "[\"\", \"foo\", null]",
339 {true, true, false}, {"", "foo", ""});
340 }
341
TEST(TestTimestamp,Basics)342 TEST(TestTimestamp, Basics) {
343 // Timestamp type
344 auto type = timestamp(TimeUnit::SECOND);
345 AssertJSONArray<TimestampType, int64_t>(
346 type, R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])",
347 {0, 951782400, 63730281600LL, -2203977600LL});
348
349 type = timestamp(TimeUnit::NANO);
350 AssertJSONArray<TimestampType, int64_t>(
351 type, R"(["1970-01-01","2000-02-29","1900-02-28"])",
352 {0, 951782400000000000LL, -2203977600000000000LL});
353 }
354
TEST(TestDate,Basics)355 TEST(TestDate, Basics) {
356 auto type = date32();
357 AssertJSONArray<Date32Type>(type, R"([5, null, 42])", {true, false, true}, {5, 0, 42});
358 type = date64();
359 AssertJSONArray<Date64Type>(type, R"([1, null, 9999999999999])", {true, false, true},
360 {1, 0, 9999999999999LL});
361 }
362
TEST(TestTime,Basics)363 TEST(TestTime, Basics) {
364 auto type = time32(TimeUnit::SECOND);
365 AssertJSONArray<Time32Type>(type, R"([5, null, 42])", {true, false, true}, {5, 0, 42});
366 type = time32(TimeUnit::MILLI);
367 AssertJSONArray<Time32Type>(type, R"([5, null, 42])", {true, false, true}, {5, 0, 42});
368
369 type = time64(TimeUnit::MICRO);
370 AssertJSONArray<Time64Type>(type, R"([1, null, 9999999999999])", {true, false, true},
371 {1, 0, 9999999999999LL});
372 type = time64(TimeUnit::NANO);
373 AssertJSONArray<Time64Type>(type, R"([1, null, 9999999999999])", {true, false, true},
374 {1, 0, 9999999999999LL});
375 }
376
TEST(TestDuration,Basics)377 TEST(TestDuration, Basics) {
378 auto type = duration(TimeUnit::SECOND);
379 AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
380 {false, true, true},
381 {0, -7777777777777LL, 9999999999999LL});
382 type = duration(TimeUnit::MILLI);
383 AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
384 {false, true, true},
385 {0, -7777777777777LL, 9999999999999LL});
386 type = duration(TimeUnit::MICRO);
387 AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
388 {false, true, true},
389 {0, -7777777777777LL, 9999999999999LL});
390 type = duration(TimeUnit::NANO);
391 AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
392 {false, true, true},
393 {0, -7777777777777LL, 9999999999999LL});
394 }
395
TEST(TestMonthInterval,Basics)396 TEST(TestMonthInterval, Basics) {
397 auto type = month_interval();
398 AssertJSONArray<MonthIntervalType>(type, R"([123, -456, null])", {true, true, false},
399 {123, -456, 0});
400 }
401
TEST(TestDayTimeInterval,Basics)402 TEST(TestDayTimeInterval, Basics) {
403 auto type = day_time_interval();
404 AssertJSONArray<DayTimeIntervalType>(type, R"([[1, -600], null])", {true, false},
405 {{1, -600}, {}});
406 }
407
TEST(TestString,Errors)408 TEST(TestString, Errors) {
409 std::shared_ptr<DataType> type = utf8();
410 std::shared_ptr<Array> array;
411
412 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
413 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
414 }
415
TEST(TestFixedSizeBinary,Basics)416 TEST(TestFixedSizeBinary, Basics) {
417 std::shared_ptr<DataType> type = fixed_size_binary(3);
418 std::shared_ptr<Array> expected, actual;
419
420 AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[]", {});
421 AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"foo\", \"bar\"]",
422 {"foo", "bar"});
423 AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[null, \"foo\"]",
424 {false, true}, {"", "foo"});
425 // Arbitrary binary (non-UTF8) sequence in string
426 std::string s = "\xff\x9f\xcc";
427 AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"" + s + "\"]", {s});
428 }
429
TEST(TestFixedSizeBinary,Errors)430 TEST(TestFixedSizeBinary, Errors) {
431 std::shared_ptr<DataType> type = fixed_size_binary(3);
432 std::shared_ptr<Array> array;
433
434 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
435 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
436 // Invalid length
437 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]", &array));
438 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"abcd\"]", &array));
439 }
440
TEST(TestDecimal,Basics)441 TEST(TestDecimal, Basics) {
442 std::shared_ptr<DataType> type = decimal(10, 4);
443 std::shared_ptr<Array> expected, actual;
444
445 ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
446 ASSERT_OK(actual->ValidateFull());
447 {
448 Decimal128Builder builder(type);
449 ASSERT_OK(builder.Finish(&expected));
450 }
451 AssertArraysEqual(*expected, *actual);
452
453 ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", \"-78.9000\"]", &actual));
454 ASSERT_OK(actual->ValidateFull());
455 {
456 Decimal128Builder builder(type);
457 ASSERT_OK(builder.Append(Decimal128(1234567)));
458 ASSERT_OK(builder.Append(Decimal128(-789000)));
459 ASSERT_OK(builder.Finish(&expected));
460 }
461 AssertArraysEqual(*expected, *actual);
462
463 ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", null]", &actual));
464 ASSERT_OK(actual->ValidateFull());
465 {
466 Decimal128Builder builder(type);
467 ASSERT_OK(builder.Append(Decimal128(1234567)));
468 ASSERT_OK(builder.AppendNull());
469 ASSERT_OK(builder.Finish(&expected));
470 }
471 AssertArraysEqual(*expected, *actual);
472 }
473
TEST(TestDecimal,Errors)474 TEST(TestDecimal, Errors) {
475 std::shared_ptr<DataType> type = decimal(10, 4);
476 std::shared_ptr<Array> array;
477
478 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
479 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[12.3456]", &array));
480 // Bad scale
481 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.345\"]", &array));
482 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.34560\"]", &array));
483 }
484
TEST(TestList,IntegerList)485 TEST(TestList, IntegerList) {
486 auto pool = default_memory_pool();
487 std::shared_ptr<DataType> type = list(int64());
488 std::shared_ptr<Array> offsets, values, expected, actual;
489
490 ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
491 ASSERT_OK(actual->ValidateFull());
492 ArrayFromVector<Int32Type>({0}, &offsets);
493 ArrayFromVector<Int64Type>({}, &values);
494 ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
495 AssertArraysEqual(*expected, *actual);
496
497 ASSERT_OK(ArrayFromJSON(type, "[[4, 5], [], [6]]", &actual));
498 ASSERT_OK(actual->ValidateFull());
499 ArrayFromVector<Int32Type>({0, 2, 2, 3}, &offsets);
500 ArrayFromVector<Int64Type>({4, 5, 6}, &values);
501 ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
502 AssertArraysEqual(*expected, *actual);
503
504 ASSERT_OK(ArrayFromJSON(type, "[[], [null], [6, null]]", &actual));
505 ASSERT_OK(actual->ValidateFull());
506 ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets);
507 auto is_valid = std::vector<bool>{false, true, false};
508 ArrayFromVector<Int64Type>(is_valid, {0, 6, 0}, &values);
509 ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
510 AssertArraysEqual(*expected, *actual);
511
512 ASSERT_OK(ArrayFromJSON(type, "[null, [], null]", &actual));
513 ASSERT_OK(actual->ValidateFull());
514 {
515 std::unique_ptr<ArrayBuilder> builder;
516 ASSERT_OK(MakeBuilder(pool, type, &builder));
517 auto& list_builder = checked_cast<ListBuilder&>(*builder);
518 ASSERT_OK(list_builder.AppendNull());
519 ASSERT_OK(list_builder.Append());
520 ASSERT_OK(list_builder.AppendNull());
521 ASSERT_OK(list_builder.Finish(&expected));
522 }
523 AssertArraysEqual(*expected, *actual);
524 }
525
TEST(TestList,IntegerListErrors)526 TEST(TestList, IntegerListErrors) {
527 std::shared_ptr<DataType> type = list(int64());
528 std::shared_ptr<Array> array;
529
530 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
531 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0]]", &array));
532 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808]]", &array));
533 }
534
TEST(TestList,NullList)535 TEST(TestList, NullList) {
536 auto pool = default_memory_pool();
537 std::shared_ptr<DataType> type = list(null());
538 std::shared_ptr<Array> offsets, values, expected, actual;
539
540 ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
541 ASSERT_OK(actual->ValidateFull());
542 ArrayFromVector<Int32Type>({0}, &offsets);
543 values = std::make_shared<NullArray>(0);
544 ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
545 AssertArraysEqual(*expected, *actual);
546
547 ASSERT_OK(ArrayFromJSON(type, "[[], [null], [null, null]]", &actual));
548 ASSERT_OK(actual->ValidateFull());
549 ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets);
550 values = std::make_shared<NullArray>(3);
551 ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
552 AssertArraysEqual(*expected, *actual);
553
554 ASSERT_OK(ArrayFromJSON(type, "[null, [], null]", &actual));
555 ASSERT_OK(actual->ValidateFull());
556 {
557 std::unique_ptr<ArrayBuilder> builder;
558 ASSERT_OK(MakeBuilder(pool, type, &builder));
559 auto& list_builder = checked_cast<ListBuilder&>(*builder);
560 ASSERT_OK(list_builder.AppendNull());
561 ASSERT_OK(list_builder.Append());
562 ASSERT_OK(list_builder.AppendNull());
563 ASSERT_OK(list_builder.Finish(&expected));
564 }
565 AssertArraysEqual(*expected, *actual);
566 }
567
TEST(TestList,IntegerListList)568 TEST(TestList, IntegerListList) {
569 auto pool = default_memory_pool();
570 std::shared_ptr<DataType> type = list(list(uint8()));
571 std::shared_ptr<Array> offsets, values, nested, expected, actual;
572
573 ASSERT_OK(ArrayFromJSON(type, "[[[4], [5, 6]], [[7, 8, 9]]]", &actual));
574 ASSERT_OK(actual->ValidateFull());
575 ArrayFromVector<Int32Type>({0, 1, 3, 6}, &offsets);
576 ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values);
577 ASSERT_OK_AND_ASSIGN(nested, ListArray::FromArrays(*offsets, *values, pool));
578 ArrayFromVector<Int32Type>({0, 2, 3}, &offsets);
579 ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *nested, pool));
580 ASSERT_EQ(actual->length(), 2);
581 AssertArraysEqual(*expected, *actual);
582
583 ASSERT_OK(ArrayFromJSON(type, "[[], [[]], [[4], [], [5, 6]], [[7, 8, 9]]]", &actual));
584 ASSERT_OK(actual->ValidateFull());
585 ArrayFromVector<Int32Type>({0, 0, 1, 1, 3, 6}, &offsets);
586 ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values);
587 ASSERT_OK_AND_ASSIGN(nested, ListArray::FromArrays(*offsets, *values, pool));
588 ArrayFromVector<Int32Type>({0, 0, 1, 4, 5}, &offsets);
589 ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *nested, pool));
590 ASSERT_EQ(actual->length(), 4);
591 AssertArraysEqual(*expected, *actual);
592
593 ASSERT_OK(ArrayFromJSON(type, "[null, [null], [[null]]]", &actual));
594 ASSERT_OK(actual->ValidateFull());
595 {
596 std::unique_ptr<ArrayBuilder> builder;
597 ASSERT_OK(MakeBuilder(pool, type, &builder));
598 auto& list_builder = checked_cast<ListBuilder&>(*builder);
599 auto& child_builder = checked_cast<ListBuilder&>(*list_builder.value_builder());
600 ASSERT_OK(list_builder.AppendNull());
601 ASSERT_OK(list_builder.Append());
602 ASSERT_OK(child_builder.AppendNull());
603 ASSERT_OK(list_builder.Append());
604 ASSERT_OK(child_builder.Append());
605 ASSERT_OK(list_builder.Finish(&expected));
606 }
607 }
608
TEST(TestLargeList,Basics)609 TEST(TestLargeList, Basics) {
610 // Similar as TestList above, only testing the basics
611 auto pool = default_memory_pool();
612 std::shared_ptr<DataType> type = large_list(int16());
613 std::shared_ptr<Array> offsets, values, expected, actual;
614
615 ASSERT_OK(ArrayFromJSON(type, "[[], [null], [6, null]]", &actual));
616 ASSERT_OK(actual->ValidateFull());
617 ArrayFromVector<Int64Type>({0, 0, 1, 3}, &offsets);
618 auto is_valid = std::vector<bool>{false, true, false};
619 ArrayFromVector<Int16Type>(is_valid, {0, 6, 0}, &values);
620 ASSERT_OK_AND_ASSIGN(expected, LargeListArray::FromArrays(*offsets, *values, pool));
621 AssertArraysEqual(*expected, *actual);
622 }
623
TEST(TestMap,IntegerToInteger)624 TEST(TestMap, IntegerToInteger) {
625 auto type = map(int16(), int16());
626 std::shared_ptr<Array> expected, actual;
627
628 const char* input = R"(
629 [
630 [[0, 1], [1, 1], [2, 2], [3, 3], [4, 5], [5, 8]],
631 null,
632 [[0, null], [1, null], [2, 0], [3, 1], [4, null], [5, 2]],
633 []
634 ]
635 )";
636 ASSERT_OK(ArrayFromJSON(type, input, &actual));
637
638 std::unique_ptr<ArrayBuilder> builder;
639 ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder));
640 auto& map_builder = checked_cast<MapBuilder&>(*builder);
641 auto& key_builder = checked_cast<Int16Builder&>(*map_builder.key_builder());
642 auto& item_builder = checked_cast<Int16Builder&>(*map_builder.item_builder());
643
644 ASSERT_OK(map_builder.Append());
645 ASSERT_OK(key_builder.AppendValues({0, 1, 2, 3, 4, 5}));
646 ASSERT_OK(item_builder.AppendValues({1, 1, 2, 3, 5, 8}));
647 ASSERT_OK(map_builder.AppendNull());
648 ASSERT_OK(map_builder.Append());
649 ASSERT_OK(key_builder.AppendValues({0, 1, 2, 3, 4, 5}));
650 ASSERT_OK(item_builder.AppendValues({-1, -1, 0, 1, -1, 2}, {0, 0, 1, 1, 0, 1}));
651 ASSERT_OK(map_builder.Append());
652 ASSERT_OK(map_builder.Finish(&expected));
653
654 ASSERT_ARRAYS_EQUAL(*actual, *expected);
655 }
656
TEST(TestMap,StringToInteger)657 TEST(TestMap, StringToInteger) {
658 auto type = map(utf8(), int32());
659 const char* input = R"(
660 [
661 [["joe", 0], ["mark", null]],
662 null,
663 [["cap", 8]],
664 []
665 ]
666 )";
667 auto actual = ArrayFromJSON(type, input);
668 std::vector<int32_t> offsets = {0, 2, 2, 3, 3};
669 auto expected_keys = ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])");
670 auto expected_values = ArrayFromJSON(int32(), "[0, null, 8]");
671 ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, BitUtil::BytesToBits({1, 0, 1, 1}));
672 auto expected =
673 std::make_shared<MapArray>(type, 4, Buffer::Wrap(offsets), expected_keys,
674 expected_values, expected_null_bitmap, 1);
675 ASSERT_ARRAYS_EQUAL(*actual, *expected);
676 }
677
TEST(TestMap,Errors)678 TEST(TestMap, Errors) {
679 auto type = map(int16(), int16());
680 std::shared_ptr<Array> array;
681
682 // list of pairs isn't an array
683 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
684 // pair isn't an array
685 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
686 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[null]]", &array));
687 // pair with length != 2
688 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0]]]", &array));
689 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0, 1, 2]]]", &array));
690 // null key
691 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[null, 0]]]", &array));
692 // key or value fails to convert
693 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0.0, 0]]]", &array));
694 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0, 0.0]]]", &array));
695 }
696
TEST(TestMap,IntegerMapToStringList)697 TEST(TestMap, IntegerMapToStringList) {
698 auto type = map(map(int16(), int16()), list(utf8()));
699 std::shared_ptr<Array> expected, actual;
700
701 const char* input = R"(
702 [
703 [
704 [
705 [],
706 [null, "empty"]
707 ],
708 [
709 [[0, 1]],
710 null
711 ],
712 [
713 [[0, 0], [1, 1]],
714 ["bootstrapping tautology?", "lispy", null, "i can see eternity"]
715 ]
716 ],
717 null
718 ]
719 )";
720 ASSERT_OK(ArrayFromJSON(type, input, &actual));
721
722 std::unique_ptr<ArrayBuilder> builder;
723 ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder));
724 auto& map_builder = checked_cast<MapBuilder&>(*builder);
725 auto& key_builder = checked_cast<MapBuilder&>(*map_builder.key_builder());
726 auto& key_key_builder = checked_cast<Int16Builder&>(*key_builder.key_builder());
727 auto& key_item_builder = checked_cast<Int16Builder&>(*key_builder.item_builder());
728 auto& item_builder = checked_cast<ListBuilder&>(*map_builder.item_builder());
729 auto& item_value_builder = checked_cast<StringBuilder&>(*item_builder.value_builder());
730
731 ASSERT_OK(map_builder.Append());
732 ASSERT_OK(key_builder.Append());
733 ASSERT_OK(item_builder.Append());
734 ASSERT_OK(item_value_builder.AppendNull());
735 ASSERT_OK(item_value_builder.Append("empty"));
736
737 ASSERT_OK(key_builder.Append());
738 ASSERT_OK(item_builder.AppendNull());
739 ASSERT_OK(key_key_builder.AppendValues({0}));
740 ASSERT_OK(key_item_builder.AppendValues({1}));
741
742 ASSERT_OK(key_builder.Append());
743 ASSERT_OK(item_builder.Append());
744 ASSERT_OK(key_key_builder.AppendValues({0, 1}));
745 ASSERT_OK(key_item_builder.AppendValues({0, 1}));
746 ASSERT_OK(item_value_builder.Append("bootstrapping tautology?"));
747 ASSERT_OK(item_value_builder.Append("lispy"));
748 ASSERT_OK(item_value_builder.AppendNull());
749 ASSERT_OK(item_value_builder.Append("i can see eternity"));
750
751 ASSERT_OK(map_builder.AppendNull());
752
753 ASSERT_OK(map_builder.Finish(&expected));
754 ASSERT_ARRAYS_EQUAL(*actual, *expected);
755 }
756
TEST(TestFixedSizeList,IntegerList)757 TEST(TestFixedSizeList, IntegerList) {
758 auto pool = default_memory_pool();
759 auto type = fixed_size_list(int64(), 2);
760 std::shared_ptr<Array> values, expected, actual;
761
762 ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
763 ASSERT_OK(actual->ValidateFull());
764 ArrayFromVector<Int64Type>({}, &values);
765 expected = std::make_shared<FixedSizeListArray>(type, 0, values);
766 AssertArraysEqual(*expected, *actual);
767
768 ASSERT_OK(ArrayFromJSON(type, "[[4, 5], [0, 0], [6, 7]]", &actual));
769 ASSERT_OK(actual->ValidateFull());
770 ArrayFromVector<Int64Type>({4, 5, 0, 0, 6, 7}, &values);
771 expected = std::make_shared<FixedSizeListArray>(type, 3, values);
772 AssertArraysEqual(*expected, *actual);
773
774 ASSERT_OK(ArrayFromJSON(type, "[[null, null], [0, null], [6, null]]", &actual));
775 ASSERT_OK(actual->ValidateFull());
776 auto is_valid = std::vector<bool>{false, false, true, false, true, false};
777 ArrayFromVector<Int64Type>(is_valid, {0, 0, 0, 0, 6, 0}, &values);
778 expected = std::make_shared<FixedSizeListArray>(type, 3, values);
779 AssertArraysEqual(*expected, *actual);
780
781 ASSERT_OK(ArrayFromJSON(type, "[null, [null, null], null]", &actual));
782 ASSERT_OK(actual->ValidateFull());
783 {
784 std::unique_ptr<ArrayBuilder> builder;
785 ASSERT_OK(MakeBuilder(pool, type, &builder));
786 auto& list_builder = checked_cast<FixedSizeListBuilder&>(*builder);
787 auto value_builder = checked_cast<Int64Builder*>(list_builder.value_builder());
788 ASSERT_OK(list_builder.AppendNull());
789 ASSERT_OK(list_builder.Append());
790 ASSERT_OK(value_builder->AppendNull());
791 ASSERT_OK(value_builder->AppendNull());
792 ASSERT_OK(list_builder.AppendNull());
793 ASSERT_OK(list_builder.Finish(&expected));
794 }
795 AssertArraysEqual(*expected, *actual);
796 }
797
TEST(TestFixedSizeList,IntegerListErrors)798 TEST(TestFixedSizeList, IntegerListErrors) {
799 std::shared_ptr<DataType> type = fixed_size_list(int64(), 2);
800 std::shared_ptr<Array> array;
801
802 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
803 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0, 1.0]]", &array));
804 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
805 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808, 0]]", &array));
806 }
807
TEST(TestFixedSizeList,NullList)808 TEST(TestFixedSizeList, NullList) {
809 auto pool = default_memory_pool();
810 std::shared_ptr<DataType> type = fixed_size_list(null(), 2);
811 std::shared_ptr<Array> values, expected, actual;
812
813 ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
814 ASSERT_OK(actual->ValidateFull());
815 values = std::make_shared<NullArray>(0);
816 expected = std::make_shared<FixedSizeListArray>(type, 0, values);
817 AssertArraysEqual(*expected, *actual);
818
819 ASSERT_OK(ArrayFromJSON(type, "[[null, null], [null, null], [null, null]]", &actual));
820 ASSERT_OK(actual->ValidateFull());
821 values = std::make_shared<NullArray>(6);
822 expected = std::make_shared<FixedSizeListArray>(type, 3, values);
823 AssertArraysEqual(*expected, *actual);
824
825 ASSERT_OK(ArrayFromJSON(type, "[null, [null, null], null]", &actual));
826 ASSERT_OK(actual->ValidateFull());
827 {
828 std::unique_ptr<ArrayBuilder> builder;
829 ASSERT_OK(MakeBuilder(pool, type, &builder));
830 auto& list_builder = checked_cast<FixedSizeListBuilder&>(*builder);
831 auto value_builder = checked_cast<NullBuilder*>(list_builder.value_builder());
832 ASSERT_OK(list_builder.AppendNull());
833 ASSERT_OK(list_builder.Append());
834 ASSERT_OK(value_builder->AppendNull());
835 ASSERT_OK(value_builder->AppendNull());
836 ASSERT_OK(list_builder.AppendNull());
837 ASSERT_OK(list_builder.Finish(&expected));
838 }
839 AssertArraysEqual(*expected, *actual);
840 }
841
TEST(TestFixedSizeList,IntegerListList)842 TEST(TestFixedSizeList, IntegerListList) {
843 auto pool = default_memory_pool();
844 auto nested_type = fixed_size_list(uint8(), 2);
845 std::shared_ptr<DataType> type = fixed_size_list(nested_type, 1);
846 std::shared_ptr<Array> values, nested, expected, actual;
847
848 ASSERT_OK(ArrayFromJSON(type, "[[[1, 4]], [[2, 5]], [[3, 6]]]", &actual));
849 ASSERT_OK(actual->ValidateFull());
850 ArrayFromVector<UInt8Type>({1, 4, 2, 5, 3, 6}, &values);
851 nested = std::make_shared<FixedSizeListArray>(nested_type, 3, values);
852 expected = std::make_shared<FixedSizeListArray>(type, 3, nested);
853 AssertArraysEqual(*expected, *actual);
854
855 ASSERT_OK(ArrayFromJSON(type, "[[[1, null]], [null], null]", &actual));
856 ASSERT_OK(actual->ValidateFull());
857 {
858 std::unique_ptr<ArrayBuilder> builder;
859 ASSERT_OK(MakeBuilder(pool, type, &builder));
860 auto& list_builder = checked_cast<FixedSizeListBuilder&>(*builder);
861 auto nested_builder =
862 checked_cast<FixedSizeListBuilder*>(list_builder.value_builder());
863 auto value_builder = checked_cast<UInt8Builder*>(nested_builder->value_builder());
864
865 ASSERT_OK(list_builder.Append());
866 ASSERT_OK(nested_builder->Append());
867 ASSERT_OK(value_builder->Append(1));
868 ASSERT_OK(value_builder->AppendNull());
869
870 ASSERT_OK(list_builder.Append());
871 ASSERT_OK(nested_builder->AppendNull());
872
873 ASSERT_OK(list_builder.AppendNull());
874
875 ASSERT_OK(list_builder.Finish(&expected));
876 }
877 AssertArraysEqual(*expected, *actual);
878 }
879
TEST(TestStruct,SimpleStruct)880 TEST(TestStruct, SimpleStruct) {
881 auto field_a = field("a", int8());
882 auto field_b = field("b", boolean());
883 std::shared_ptr<DataType> type = struct_({field_a, field_b});
884 std::shared_ptr<Array> a, b, expected, actual;
885 std::shared_ptr<Buffer> null_bitmap;
886 std::vector<bool> is_valid;
887 std::vector<std::shared_ptr<Array>> children;
888
889 // Trivial
890 ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
891 ASSERT_OK(actual->ValidateFull());
892 ArrayFromVector<Int8Type>({}, &a);
893 ArrayFromVector<BooleanType, bool>({}, &b);
894 children.assign({a, b});
895 expected = std::make_shared<StructArray>(type, 0, children);
896 AssertArraysEqual(*expected, *actual);
897
898 // Non-empty
899 ArrayFromVector<Int8Type>({5, 6}, &a);
900 ArrayFromVector<BooleanType, bool>({true, false}, &b);
901 children.assign({a, b});
902 expected = std::make_shared<StructArray>(type, 2, children);
903
904 ASSERT_OK(ArrayFromJSON(type, "[[5, true], [6, false]]", &actual));
905 ASSERT_OK(actual->ValidateFull());
906 AssertArraysEqual(*expected, *actual);
907 ASSERT_OK(ArrayFromJSON(type, "[{\"a\": 5, \"b\": true}, {\"b\": false, \"a\": 6}]",
908 &actual));
909 ASSERT_OK(actual->ValidateFull());
910 AssertArraysEqual(*expected, *actual);
911
912 // With nulls
913 is_valid = {false, true, false, false};
914 ArrayFromVector<Int8Type>(is_valid, {0, 5, 6, 0}, &a);
915 is_valid = {false, false, true, false};
916 ArrayFromVector<BooleanType, bool>(is_valid, {false, true, false, false}, &b);
917 children.assign({a, b});
918 BitmapFromVector<bool>({false, true, true, true}, &null_bitmap);
919 expected = std::make_shared<StructArray>(type, 4, children, null_bitmap, 1);
920
921 ASSERT_OK(
922 ArrayFromJSON(type, "[null, [5, null], [null, false], [null, null]]", &actual));
923 ASSERT_OK(actual->ValidateFull());
924 AssertArraysEqual(*expected, *actual);
925 // When using object notation, null members can be omitted
926 ASSERT_OK(ArrayFromJSON(type, "[null, {\"a\": 5, \"b\": null}, {\"b\": false}, {}]",
927 &actual));
928 ASSERT_OK(actual->ValidateFull());
929 AssertArraysEqual(*expected, *actual);
930 }
931
TEST(TestStruct,NestedStruct)932 TEST(TestStruct, NestedStruct) {
933 auto field_a = field("a", int8());
934 auto field_b = field("b", boolean());
935 auto field_c = field("c", float64());
936 std::shared_ptr<DataType> nested_type = struct_({field_a, field_b});
937 auto field_nested = field("nested", nested_type);
938 std::shared_ptr<DataType> type = struct_({field_nested, field_c});
939 std::shared_ptr<Array> expected, actual;
940 std::shared_ptr<Buffer> null_bitmap;
941 std::vector<bool> is_valid;
942 std::vector<std::shared_ptr<Array>> children(2);
943
944 ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
945 ASSERT_OK(actual->ValidateFull());
946 ArrayFromVector<Int8Type>({}, &children[0]);
947 ArrayFromVector<BooleanType, bool>({}, &children[1]);
948 children[0] = std::make_shared<StructArray>(nested_type, 0, children);
949 ArrayFromVector<DoubleType>({}, &children[1]);
950 expected = std::make_shared<StructArray>(type, 0, children);
951 AssertArraysEqual(*expected, *actual);
952
953 ASSERT_OK(ArrayFromJSON(type, "[[[5, true], 1.5], [[6, false], -3e2]]", &actual));
954 ASSERT_OK(actual->ValidateFull());
955 ArrayFromVector<Int8Type>({5, 6}, &children[0]);
956 ArrayFromVector<BooleanType, bool>({true, false}, &children[1]);
957 children[0] = std::make_shared<StructArray>(nested_type, 2, children);
958 ArrayFromVector<DoubleType>({1.5, -300.0}, &children[1]);
959 expected = std::make_shared<StructArray>(type, 2, children);
960 AssertArraysEqual(*expected, *actual);
961
962 ASSERT_OK(ArrayFromJSON(type, "[null, [[5, null], null], [null, -3e2]]", &actual));
963 ASSERT_OK(actual->ValidateFull());
964 is_valid = {false, true, false};
965 ArrayFromVector<Int8Type>(is_valid, {0, 5, 0}, &children[0]);
966 is_valid = {false, false, false};
967 ArrayFromVector<BooleanType, bool>(is_valid, {false, false, false}, &children[1]);
968 BitmapFromVector<bool>({false, true, false}, &null_bitmap);
969 children[0] = std::make_shared<StructArray>(nested_type, 3, children, null_bitmap, 2);
970 is_valid = {false, false, true};
971 ArrayFromVector<DoubleType>(is_valid, {0.0, 0.0, -300.0}, &children[1]);
972 BitmapFromVector<bool>({false, true, true}, &null_bitmap);
973 expected = std::make_shared<StructArray>(type, 3, children, null_bitmap, 1);
974 AssertArraysEqual(*expected, *actual);
975 }
976
TEST(TestStruct,Errors)977 TEST(TestStruct, Errors) {
978 auto field_a = field("a", int8());
979 auto field_b = field("b", boolean());
980 std::shared_ptr<DataType> type = struct_({field_a, field_b});
981 std::shared_ptr<Array> array;
982
983 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0, true]", &array));
984 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
985 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, true, 1]]", &array));
986 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[true, 0]]", &array));
987 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"b\": 0, \"a\": true}]", &array));
988 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"c\": 0}]", &array));
989 }
990
TEST(TestDenseUnion,Basics)991 TEST(TestDenseUnion, Basics) {
992 auto field_a = field("a", int8());
993 auto field_b = field("b", boolean());
994
995 auto type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
996 auto array = checked_pointer_cast<UnionArray>(
997 ArrayFromJSON(type, "[null, [4, 122], [8, true], [4, null], null, [8, false]]"));
998
999 auto expected_types = ArrayFromJSON(int8(), "[null, 4, 8, 4, null, 8]");
1000 auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 0, 1]");
1001 auto expected_a = ArrayFromJSON(int8(), "[122, null]");
1002 auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
1003
1004 ASSERT_OK_AND_ASSIGN(
1005 auto expected, UnionArray::MakeDense(*expected_types, *expected_offsets,
1006 {expected_a, expected_b}, {"a", "b"}, {4, 8}));
1007
1008 ASSERT_ARRAYS_EQUAL(*expected, *array);
1009
1010 // ensure that the array is as dense as we expect
1011 ASSERT_TRUE(array->value_offsets()->Equals(*expected_offsets->data()->buffers[1]));
1012 ASSERT_ARRAYS_EQUAL(*expected_a, *array->field(0));
1013 ASSERT_ARRAYS_EQUAL(*expected_b, *array->field(1));
1014 }
1015
TEST(TestSparseUnion,Basics)1016 TEST(TestSparseUnion, Basics) {
1017 auto field_a = field("a", int8());
1018 auto field_b = field("b", boolean());
1019
1020 auto type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
1021 auto array = ArrayFromJSON(type, "[[4, 122], [8, true], [4, null], null, [8, false]]");
1022
1023 auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
1024 auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
1025 auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null, false]");
1026
1027 ASSERT_OK_AND_ASSIGN(auto expected,
1028 UnionArray::MakeSparse(*expected_types, {expected_a, expected_b},
1029 {"a", "b"}, {4, 8}));
1030
1031 ASSERT_ARRAYS_EQUAL(*expected, *array);
1032 }
1033
TEST(TestDenseUnion,ListOfUnion)1034 TEST(TestDenseUnion, ListOfUnion) {
1035 auto field_a = field("a", int8());
1036 auto field_b = field("b", boolean());
1037 auto union_type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
1038 auto list_type = list(union_type);
1039 auto array =
1040 checked_pointer_cast<ListArray>(ArrayFromJSON(list_type,
1041 "["
1042 "[[4, 122], [8, true]],"
1043 "[[4, null], null, [8, false]]"
1044 "]"));
1045
1046 auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
1047 auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
1048 auto expected_a = ArrayFromJSON(int8(), "[122, null]");
1049 auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
1050
1051 ASSERT_OK_AND_ASSIGN(
1052 auto expected_values,
1053 UnionArray::MakeDense(*expected_types, *expected_offsets, {expected_a, expected_b},
1054 {"a", "b"}, {4, 8}));
1055 auto expected_list_offsets = ArrayFromJSON(int32(), "[0, 2, 5]");
1056 ASSERT_OK_AND_ASSIGN(auto expected,
1057 ListArray::FromArrays(*expected_list_offsets, *expected_values));
1058
1059 ASSERT_ARRAYS_EQUAL(*expected, *array);
1060
1061 // ensure that the array is as dense as we expect
1062 auto array_values = checked_pointer_cast<UnionArray>(array->values());
1063 ASSERT_TRUE(array_values->value_offsets()->Equals(
1064 *checked_pointer_cast<UnionArray>(expected_values)->value_offsets()));
1065 ASSERT_ARRAYS_EQUAL(*expected_a, *array_values->field(0));
1066 ASSERT_ARRAYS_EQUAL(*expected_b, *array_values->field(1));
1067 }
1068
TEST(TestSparseUnion,ListOfUnion)1069 TEST(TestSparseUnion, ListOfUnion) {
1070 auto field_a = field("a", int8());
1071 auto field_b = field("b", boolean());
1072 auto union_type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
1073 auto list_type = list(union_type);
1074 auto array = ArrayFromJSON(list_type,
1075 "["
1076 "[[4, 122], [8, true]],"
1077 "[[4, null], null, [8, false]]"
1078 "]");
1079
1080 auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
1081 auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
1082 auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null, false]");
1083
1084 ASSERT_OK_AND_ASSIGN(auto expected_values,
1085 UnionArray::MakeSparse(*expected_types, {expected_a, expected_b},
1086 {"a", "b"}, {4, 8}));
1087 auto expected_list_offsets = ArrayFromJSON(int32(), "[0, 2, 5]");
1088 ASSERT_OK_AND_ASSIGN(auto expected,
1089 ListArray::FromArrays(*expected_list_offsets, *expected_values));
1090
1091 ASSERT_ARRAYS_EQUAL(*expected, *array);
1092 }
1093
TEST(TestDenseUnion,UnionOfStructs)1094 TEST(TestDenseUnion, UnionOfStructs) {
1095 std::vector<std::shared_ptr<Field>> fields = {
1096 field("ab", struct_({field("alpha", float64()), field("bravo", utf8())})),
1097 field("wtf", struct_({field("whiskey", int8()), field("tango", float64()),
1098 field("foxtrot", list(int8()))})),
1099 field("q", struct_({field("quebec", utf8())}))};
1100 auto type = union_(fields, {0, 23, 47}, UnionMode::DENSE);
1101 auto array = checked_pointer_cast<UnionArray>(ArrayFromJSON(type, R"([
1102 [0, {"alpha": 0.0, "bravo": "charlie"}],
1103 [23, {"whiskey": 99}],
1104 [0, {"bravo": "mike"}],
1105 null,
1106 [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
1107 ])"));
1108
1109 auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
1110 auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
1111 ArrayVector expected_fields = {ArrayFromJSON(fields[0]->type(), R"([
1112 {"alpha": 0.0, "bravo": "charlie"},
1113 {"bravo": "mike"}
1114 ])"),
1115 ArrayFromJSON(fields[1]->type(), R"([
1116 {"whiskey": 99},
1117 {"tango": 8.25, "foxtrot": [0, 2, 3]}
1118 ])"),
1119 ArrayFromJSON(fields[2]->type(), "[]")};
1120
1121 ASSERT_OK_AND_ASSIGN(
1122 auto expected,
1123 UnionArray::MakeDense(*expected_types, *expected_offsets, expected_fields,
1124 {"ab", "wtf", "q"}, {0, 23, 47}));
1125
1126 ASSERT_ARRAYS_EQUAL(*expected, *array);
1127
1128 // ensure that the array is as dense as we expect
1129 ASSERT_TRUE(array->value_offsets()->Equals(*expected_offsets->data()->buffers[1]));
1130 for (int i = 0; i < type->num_fields(); ++i) {
1131 ASSERT_ARRAYS_EQUAL(*checked_cast<const UnionArray&>(*expected).field(i),
1132 *array->field(i));
1133 }
1134 }
1135
TEST(TestSparseUnion,UnionOfStructs)1136 TEST(TestSparseUnion, UnionOfStructs) {
1137 std::vector<std::shared_ptr<Field>> fields = {
1138 field("ab", struct_({field("alpha", float64()), field("bravo", utf8())})),
1139 field("wtf", struct_({field("whiskey", int8()), field("tango", float64()),
1140 field("foxtrot", list(int8()))})),
1141 field("q", struct_({field("quebec", utf8())}))};
1142 auto type = union_(fields, {0, 23, 47}, UnionMode::SPARSE);
1143 auto array = ArrayFromJSON(type, R"([
1144 [0, {"alpha": 0.0, "bravo": "charlie"}],
1145 [23, {"whiskey": 99}],
1146 [0, {"bravo": "mike"}],
1147 null,
1148 [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
1149 ])");
1150
1151 auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
1152 ArrayVector expected_fields = {
1153 ArrayFromJSON(fields[0]->type(), R"([
1154 {"alpha": 0.0, "bravo": "charlie"},
1155 null,
1156 {"bravo": "mike"},
1157 null,
1158 null
1159 ])"),
1160 ArrayFromJSON(fields[1]->type(), R"([
1161 null,
1162 {"whiskey": 99},
1163 null,
1164 null,
1165 {"tango": 8.25, "foxtrot": [0, 2, 3]}
1166 ])"),
1167 ArrayFromJSON(fields[2]->type(), "[null, null, null, null, null]")};
1168
1169 ASSERT_OK_AND_ASSIGN(auto expected,
1170 UnionArray::MakeSparse(*expected_types, expected_fields,
1171 {"ab", "wtf", "q"}, {0, 23, 47}));
1172
1173 ASSERT_ARRAYS_EQUAL(*expected, *array);
1174 }
1175
TEST(TestDenseUnion,Errors)1176 TEST(TestDenseUnion, Errors) {
1177 auto field_a = field("a", int8());
1178 auto field_b = field("b", boolean());
1179 std::shared_ptr<DataType> type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
1180 std::shared_ptr<Array> array;
1181
1182 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]", &array));
1183 ASSERT_RAISES(Invalid,
1184 ArrayFromJSON(type, "[[0, 99]]", &array)); // 0 is not one of {4, 8}
1185 ASSERT_RAISES(Invalid,
1186 ArrayFromJSON(type, "[[4, \"\"]]", &array)); // "" is not a valid int8()
1187
1188 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]", &array));
1189 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
1190 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
1191 }
1192
TEST(TestSparseUnion,Errors)1193 TEST(TestSparseUnion, Errors) {
1194 auto field_a = field("a", int8());
1195 auto field_b = field("b", boolean());
1196 std::shared_ptr<DataType> type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
1197 std::shared_ptr<Array> array;
1198
1199 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]", &array));
1200 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 99]]", &array));
1201 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]", &array));
1202
1203 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]", &array));
1204 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
1205 ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
1206 }
1207
TEST(TestDictionary,Basics)1208 TEST(TestDictionary, Basics) {
1209 auto type = dictionary(int32(), utf8());
1210 auto array =
1211 DictArrayFromJSON(type, "[null, 2, 1, 0]", R"(["whiskey", "tango", "foxtrot"])");
1212
1213 auto expected_indices = ArrayFromJSON(int32(), "[null, 2, 1, 0]");
1214 auto expected_dictionary = ArrayFromJSON(utf8(), R"(["whiskey", "tango", "foxtrot"])");
1215
1216 ASSERT_ARRAYS_EQUAL(DictionaryArray(type, expected_indices, expected_dictionary),
1217 *array);
1218 }
1219
TEST(TestDictionary,Errors)1220 TEST(TestDictionary, Errors) {
1221 auto type = dictionary(int32(), utf8());
1222 std::shared_ptr<Array> array;
1223
1224 ASSERT_RAISES(Invalid,
1225 DictArrayFromJSON(type, "[\"not a valid index\"]", "[\"\"]", &array));
1226 ASSERT_RAISES(Invalid, DictArrayFromJSON(type, "[0, 1]", "[1]",
1227 &array)); // dict value isn't string
1228 }
1229
1230 } // namespace json
1231 } // namespace internal
1232 } // namespace ipc
1233 } // namespace arrow
1234