1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <cmath>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 #include <memory>
24 #include <sstream>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28 #include <vector>
29 
30 #include <gtest/gtest.h>
31 
32 #include "arrow/array.h"
33 #include "arrow/ipc/json_simple.h"
34 #include "arrow/testing/gtest_util.h"
35 #include "arrow/type.h"
36 #include "arrow/type_traits.h"
37 #include "arrow/util/checked_cast.h"
38 #include "arrow/util/decimal.h"
39 
40 #if defined(_MSC_VER)
41 // "warning C4307: '+': integral constant overflow"
42 #pragma warning(disable : 4307)
43 #endif
44 
45 namespace arrow {
46 namespace ipc {
47 namespace internal {
48 namespace json {
49 
50 using ::arrow::internal::checked_cast;
51 using ::arrow::internal::checked_pointer_cast;
52 
53 // Avoid undefined behaviour on signed overflow
54 template <typename Signed>
SafeSignedAdd(Signed u,Signed v)55 Signed SafeSignedAdd(Signed u, Signed v) {
56   using Unsigned = typename std::make_unsigned<Signed>::type;
57   return static_cast<Signed>(static_cast<Unsigned>(u) + static_cast<Unsigned>(v));
58 }
59 
60 // Special case for 8-bit ints (must output their decimal value, not the
61 // corresponding ASCII character)
JSONArrayInternal(std::ostream * ss,int8_t value)62 void JSONArrayInternal(std::ostream* ss, int8_t value) {
63   *ss << static_cast<int16_t>(value);
64 }
65 
JSONArrayInternal(std::ostream * ss,uint8_t value)66 void JSONArrayInternal(std::ostream* ss, uint8_t value) {
67   *ss << static_cast<int16_t>(value);
68 }
69 
70 template <typename Value>
JSONArrayInternal(std::ostream * ss,Value && value)71 void JSONArrayInternal(std::ostream* ss, Value&& value) {
72   *ss << value;
73 }
74 
75 template <typename Value, typename... Tail>
JSONArrayInternal(std::ostream * ss,Value && value,Tail &&...tail)76 void JSONArrayInternal(std::ostream* ss, Value&& value, Tail&&... tail) {
77   JSONArrayInternal(ss, std::forward<Value>(value));
78   *ss << ", ";
79   JSONArrayInternal(ss, std::forward<Tail>(tail)...);
80 }
81 
82 template <typename... Args>
JSONArray(Args &&...args)83 std::string JSONArray(Args&&... args) {
84   std::stringstream ss;
85   ss << "[";
86   JSONArrayInternal(&ss, std::forward<Args>(args)...);
87   ss << "]";
88   return ss.str();
89 }
90 
91 template <typename T, typename C_TYPE = typename T::c_type>
AssertJSONArray(const std::shared_ptr<DataType> & type,const std::string & json,const std::vector<C_TYPE> & values)92 void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json,
93                      const std::vector<C_TYPE>& values) {
94   std::shared_ptr<Array> actual, expected;
95 
96   ASSERT_OK(ArrayFromJSON(type, json, &actual));
97   ASSERT_OK(actual->ValidateFull());
98   ArrayFromVector<T, C_TYPE>(type, values, &expected);
99   AssertArraysEqual(*expected, *actual);
100 }
101 
102 template <typename T, typename C_TYPE = typename T::c_type>
AssertJSONArray(const std::shared_ptr<DataType> & type,const std::string & json,const std::vector<bool> & is_valid,const std::vector<C_TYPE> & values)103 void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json,
104                      const std::vector<bool>& is_valid,
105                      const std::vector<C_TYPE>& values) {
106   std::shared_ptr<Array> actual, expected;
107 
108   ASSERT_OK(ArrayFromJSON(type, json, &actual));
109   ASSERT_OK(actual->ValidateFull());
110   ArrayFromVector<T, C_TYPE>(type, is_valid, values, &expected);
111   AssertArraysEqual(*expected, *actual);
112 }
113 
TEST(TestHelper,JSONArray)114 TEST(TestHelper, JSONArray) {
115   // Test the JSONArray helper func
116   std::string s =
117       JSONArray(123, -4.5, static_cast<int8_t>(-12), static_cast<uint8_t>(34));
118   ASSERT_EQ(s, "[123, -4.5, -12, 34]");
119   s = JSONArray(9223372036854775807LL, 9223372036854775808ULL, -9223372036854775807LL - 1,
120                 18446744073709551615ULL);
121   ASSERT_EQ(s,
122             "[9223372036854775807, 9223372036854775808, -9223372036854775808, "
123             "18446744073709551615]");
124 }
125 
TEST(TestHelper,SafeSignedAdd)126 TEST(TestHelper, SafeSignedAdd) {
127   ASSERT_EQ(0, SafeSignedAdd<int8_t>(-128, -128));
128   ASSERT_EQ(1, SafeSignedAdd<int8_t>(-128, -127));
129   ASSERT_EQ(-128, SafeSignedAdd<int8_t>(1, 127));
130   ASSERT_EQ(-2147483648LL, SafeSignedAdd<int32_t>(1, 2147483647));
131 }
132 
133 template <typename T>
134 class TestIntegers : public ::testing::Test {};
135 
136 TYPED_TEST_SUITE_P(TestIntegers);
137 
TYPED_TEST_P(TestIntegers,Basics)138 TYPED_TEST_P(TestIntegers, Basics) {
139   using T = TypeParam;
140   using c_type = typename T::c_type;
141 
142   std::shared_ptr<Array> expected, actual;
143   std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
144 
145   AssertJSONArray<T>(type, "[]", {});
146   AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
147   AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true}, {4, 0, 5});
148 
149   // Test limits
150   const auto min_val = std::numeric_limits<c_type>::min();
151   const auto max_val = std::numeric_limits<c_type>::max();
152   std::string json_string = JSONArray(0, 1, min_val);
153   AssertJSONArray<T>(type, json_string, {0, 1, min_val});
154   json_string = JSONArray(0, 1, max_val);
155   AssertJSONArray<T>(type, json_string, {0, 1, max_val});
156 }
157 
TYPED_TEST_P(TestIntegers,Errors)158 TYPED_TEST_P(TestIntegers, Errors) {
159   using T = TypeParam;
160 
161   std::shared_ptr<Array> array;
162   std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
163 
164   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "", &array));
165   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[", &array));
166   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "0", &array));
167   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "{}", &array));
168   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]", &array));
169   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"0\"]", &array));
170   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
171 }
172 
TYPED_TEST_P(TestIntegers,OutOfBounds)173 TYPED_TEST_P(TestIntegers, OutOfBounds) {
174   using T = TypeParam;
175   using c_type = typename T::c_type;
176 
177   std::shared_ptr<Array> array;
178   std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
179 
180   if (type->id() == Type::UINT64) {
181     ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[18446744073709551616]", &array));
182     ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]", &array));
183   } else if (type->id() == Type::INT64) {
184     ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[9223372036854775808]", &array));
185     ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-9223372036854775809]", &array));
186   } else if (std::is_signed<c_type>::value) {
187     const auto lower = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::min(), -1);
188     const auto upper = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::max(), +1);
189     auto json_string = JSONArray(lower);
190     ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
191     json_string = JSONArray(upper);
192     ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
193   } else {
194     const auto upper = static_cast<uint64_t>(std::numeric_limits<c_type>::max()) + 1;
195     auto json_string = JSONArray(upper);
196     ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
197     ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]", &array));
198   }
199 }
200 
201 REGISTER_TYPED_TEST_SUITE_P(TestIntegers, Basics, Errors, OutOfBounds);
202 
203 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt8, TestIntegers, Int8Type);
204 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt16, TestIntegers, Int16Type);
205 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt32, TestIntegers, Int32Type);
206 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt64, TestIntegers, Int64Type);
207 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestIntegers, UInt8Type);
208 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type);
209 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type);
210 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
211 INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
212 
TEST(TestNull,Basics)213 TEST(TestNull, Basics) {
214   std::shared_ptr<DataType> type = null();
215   std::shared_ptr<Array> expected, actual;
216 
217   AssertJSONArray<NullType, std::nullptr_t>(type, "[]", {});
218   AssertJSONArray<NullType, std::nullptr_t>(type, "[null, null]", {nullptr, nullptr});
219 }
220 
TEST(TestNull,Errors)221 TEST(TestNull, Errors) {
222   std::shared_ptr<DataType> type = null();
223   std::shared_ptr<Array> array;
224 
225   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
226   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
227   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[NaN]", &array));
228 }
229 
TEST(TestBoolean,Basics)230 TEST(TestBoolean, Basics) {
231   std::shared_ptr<DataType> type = boolean();
232   std::shared_ptr<Array> expected, actual;
233 
234   AssertJSONArray<BooleanType, bool>(type, "[]", {});
235   AssertJSONArray<BooleanType, bool>(type, "[false, true, false]", {false, true, false});
236   AssertJSONArray<BooleanType, bool>(type, "[false, true, null]", {true, true, false},
237                                      {false, true, false});
238   // Supports integer literal casting
239   AssertJSONArray<BooleanType, bool>(type, "[0, 1, 0]", {false, true, false});
240   AssertJSONArray<BooleanType, bool>(type, "[0, 1, null]", {true, true, false},
241                                      {false, true, false});
242 }
243 
TEST(TestBoolean,Errors)244 TEST(TestBoolean, Errors) {
245   std::shared_ptr<DataType> type = boolean();
246   std::shared_ptr<Array> array;
247 
248   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]", &array));
249   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"true\"]", &array));
250 }
251 
TEST(TestFloat,Basics)252 TEST(TestFloat, Basics) {
253   std::shared_ptr<DataType> type = float32();
254   std::shared_ptr<Array> expected, actual;
255 
256   AssertJSONArray<FloatType>(type, "[]", {});
257   AssertJSONArray<FloatType>(type, "[1, 2.5, -3e4]", {1.0f, 2.5f, -3.0e4f});
258   AssertJSONArray<FloatType>(type, "[-0.0, Inf, -Inf, null]", {true, true, true, false},
259                              {-0.0f, INFINITY, -INFINITY, 0.0f});
260 
261   // Check NaN separately as AssertArraysEqual simply memcmp's array contents
262   // and NaNs can have many bit representations.
263   ASSERT_OK(ArrayFromJSON(type, "[NaN]", &actual));
264   ASSERT_OK(actual->ValidateFull());
265   float value = checked_cast<FloatArray&>(*actual).Value(0);
266   ASSERT_TRUE(std::isnan(value));
267 }
268 
TEST(TestFloat,Errors)269 TEST(TestFloat, Errors) {
270   std::shared_ptr<DataType> type = float32();
271   std::shared_ptr<Array> array;
272 
273   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
274 }
275 
TEST(TestDouble,Basics)276 TEST(TestDouble, Basics) {
277   std::shared_ptr<DataType> type = float64();
278   std::shared_ptr<Array> expected, actual;
279 
280   AssertJSONArray<DoubleType>(type, "[]", {});
281   AssertJSONArray<DoubleType>(type, "[1, 2.5, -3e4]", {1.0, 2.5, -3.0e4});
282   AssertJSONArray<DoubleType>(type, "[-0.0, Inf, -Inf, null]", {true, true, true, false},
283                               {-0.0, INFINITY, -INFINITY, 0.0});
284 
285   ASSERT_OK(ArrayFromJSON(type, "[NaN]", &actual));
286   ASSERT_OK(actual->ValidateFull());
287   double value = checked_cast<DoubleArray&>(*actual).Value(0);
288   ASSERT_TRUE(std::isnan(value));
289 }
290 
TEST(TestDouble,Errors)291 TEST(TestDouble, Errors) {
292   std::shared_ptr<DataType> type = float64();
293   std::shared_ptr<Array> array;
294 
295   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
296 }
297 
TEST(TestString,Basics)298 TEST(TestString, Basics) {
299   // String type
300   std::shared_ptr<DataType> type = utf8();
301   std::shared_ptr<Array> expected, actual;
302 
303   AssertJSONArray<StringType, std::string>(type, "[]", {});
304   AssertJSONArray<StringType, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
305   AssertJSONArray<StringType, std::string>(type, "[\"\", null]", {true, false}, {"", ""});
306   // NUL character in string
307   std::string s = "some";
308   s += '\x00';
309   s += "char";
310   AssertJSONArray<StringType, std::string>(type, "[\"\", \"some\\u0000char\"]", {"", s});
311   // UTF8 sequence in string
312   AssertJSONArray<StringType, std::string>(type, "[\"\xc3\xa9\"]", {"\xc3\xa9"});
313 
314   // Binary type
315   type = binary();
316   AssertJSONArray<BinaryType, std::string>(type, "[\"\", \"foo\", null]",
317                                            {true, true, false}, {"", "foo", ""});
318   // Arbitrary binary (non-UTF8) sequence in string
319   s = "\xff\x9f";
320   AssertJSONArray<BinaryType, std::string>(type, "[\"" + s + "\"]", {s});
321   // Bytes < 0x20 can be represented as JSON unicode escapes
322   s = '\x00';
323   s += "\x1f";
324   AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
325 }
326 
TEST(TestLargeString,Basics)327 TEST(TestLargeString, Basics) {
328   // Similar as TestString above, only testing the basics
329   std::shared_ptr<DataType> type = large_utf8();
330   std::shared_ptr<Array> expected, actual;
331 
332   AssertJSONArray<LargeStringType, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
333   AssertJSONArray<LargeStringType, std::string>(type, "[\"\", null]", {true, false},
334                                                 {"", ""});
335 
336   // Large binary type
337   type = large_binary();
338   AssertJSONArray<LargeBinaryType, std::string>(type, "[\"\", \"foo\", null]",
339                                                 {true, true, false}, {"", "foo", ""});
340 }
341 
TEST(TestTimestamp,Basics)342 TEST(TestTimestamp, Basics) {
343   // Timestamp type
344   auto type = timestamp(TimeUnit::SECOND);
345   AssertJSONArray<TimestampType, int64_t>(
346       type, R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])",
347       {0, 951782400, 63730281600LL, -2203977600LL});
348 
349   type = timestamp(TimeUnit::NANO);
350   AssertJSONArray<TimestampType, int64_t>(
351       type, R"(["1970-01-01","2000-02-29","1900-02-28"])",
352       {0, 951782400000000000LL, -2203977600000000000LL});
353 }
354 
TEST(TestDate,Basics)355 TEST(TestDate, Basics) {
356   auto type = date32();
357   AssertJSONArray<Date32Type>(type, R"([5, null, 42])", {true, false, true}, {5, 0, 42});
358   type = date64();
359   AssertJSONArray<Date64Type>(type, R"([1, null, 9999999999999])", {true, false, true},
360                               {1, 0, 9999999999999LL});
361 }
362 
TEST(TestTime,Basics)363 TEST(TestTime, Basics) {
364   auto type = time32(TimeUnit::SECOND);
365   AssertJSONArray<Time32Type>(type, R"([5, null, 42])", {true, false, true}, {5, 0, 42});
366   type = time32(TimeUnit::MILLI);
367   AssertJSONArray<Time32Type>(type, R"([5, null, 42])", {true, false, true}, {5, 0, 42});
368 
369   type = time64(TimeUnit::MICRO);
370   AssertJSONArray<Time64Type>(type, R"([1, null, 9999999999999])", {true, false, true},
371                               {1, 0, 9999999999999LL});
372   type = time64(TimeUnit::NANO);
373   AssertJSONArray<Time64Type>(type, R"([1, null, 9999999999999])", {true, false, true},
374                               {1, 0, 9999999999999LL});
375 }
376 
TEST(TestDuration,Basics)377 TEST(TestDuration, Basics) {
378   auto type = duration(TimeUnit::SECOND);
379   AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
380                                 {false, true, true},
381                                 {0, -7777777777777LL, 9999999999999LL});
382   type = duration(TimeUnit::MILLI);
383   AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
384                                 {false, true, true},
385                                 {0, -7777777777777LL, 9999999999999LL});
386   type = duration(TimeUnit::MICRO);
387   AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
388                                 {false, true, true},
389                                 {0, -7777777777777LL, 9999999999999LL});
390   type = duration(TimeUnit::NANO);
391   AssertJSONArray<DurationType>(type, R"([null, -7777777777777, 9999999999999])",
392                                 {false, true, true},
393                                 {0, -7777777777777LL, 9999999999999LL});
394 }
395 
TEST(TestMonthInterval,Basics)396 TEST(TestMonthInterval, Basics) {
397   auto type = month_interval();
398   AssertJSONArray<MonthIntervalType>(type, R"([123, -456, null])", {true, true, false},
399                                      {123, -456, 0});
400 }
401 
TEST(TestDayTimeInterval,Basics)402 TEST(TestDayTimeInterval, Basics) {
403   auto type = day_time_interval();
404   AssertJSONArray<DayTimeIntervalType>(type, R"([[1, -600], null])", {true, false},
405                                        {{1, -600}, {}});
406 }
407 
TEST(TestString,Errors)408 TEST(TestString, Errors) {
409   std::shared_ptr<DataType> type = utf8();
410   std::shared_ptr<Array> array;
411 
412   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
413   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
414 }
415 
TEST(TestFixedSizeBinary,Basics)416 TEST(TestFixedSizeBinary, Basics) {
417   std::shared_ptr<DataType> type = fixed_size_binary(3);
418   std::shared_ptr<Array> expected, actual;
419 
420   AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[]", {});
421   AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"foo\", \"bar\"]",
422                                                     {"foo", "bar"});
423   AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[null, \"foo\"]",
424                                                     {false, true}, {"", "foo"});
425   // Arbitrary binary (non-UTF8) sequence in string
426   std::string s = "\xff\x9f\xcc";
427   AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"" + s + "\"]", {s});
428 }
429 
TEST(TestFixedSizeBinary,Errors)430 TEST(TestFixedSizeBinary, Errors) {
431   std::shared_ptr<DataType> type = fixed_size_binary(3);
432   std::shared_ptr<Array> array;
433 
434   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
435   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
436   // Invalid length
437   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]", &array));
438   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"abcd\"]", &array));
439 }
440 
TEST(TestDecimal,Basics)441 TEST(TestDecimal, Basics) {
442   std::shared_ptr<DataType> type = decimal(10, 4);
443   std::shared_ptr<Array> expected, actual;
444 
445   ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
446   ASSERT_OK(actual->ValidateFull());
447   {
448     Decimal128Builder builder(type);
449     ASSERT_OK(builder.Finish(&expected));
450   }
451   AssertArraysEqual(*expected, *actual);
452 
453   ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", \"-78.9000\"]", &actual));
454   ASSERT_OK(actual->ValidateFull());
455   {
456     Decimal128Builder builder(type);
457     ASSERT_OK(builder.Append(Decimal128(1234567)));
458     ASSERT_OK(builder.Append(Decimal128(-789000)));
459     ASSERT_OK(builder.Finish(&expected));
460   }
461   AssertArraysEqual(*expected, *actual);
462 
463   ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", null]", &actual));
464   ASSERT_OK(actual->ValidateFull());
465   {
466     Decimal128Builder builder(type);
467     ASSERT_OK(builder.Append(Decimal128(1234567)));
468     ASSERT_OK(builder.AppendNull());
469     ASSERT_OK(builder.Finish(&expected));
470   }
471   AssertArraysEqual(*expected, *actual);
472 }
473 
TEST(TestDecimal,Errors)474 TEST(TestDecimal, Errors) {
475   std::shared_ptr<DataType> type = decimal(10, 4);
476   std::shared_ptr<Array> array;
477 
478   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
479   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[12.3456]", &array));
480   // Bad scale
481   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.345\"]", &array));
482   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.34560\"]", &array));
483 }
484 
TEST(TestList,IntegerList)485 TEST(TestList, IntegerList) {
486   auto pool = default_memory_pool();
487   std::shared_ptr<DataType> type = list(int64());
488   std::shared_ptr<Array> offsets, values, expected, actual;
489 
490   ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
491   ASSERT_OK(actual->ValidateFull());
492   ArrayFromVector<Int32Type>({0}, &offsets);
493   ArrayFromVector<Int64Type>({}, &values);
494   ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
495   AssertArraysEqual(*expected, *actual);
496 
497   ASSERT_OK(ArrayFromJSON(type, "[[4, 5], [], [6]]", &actual));
498   ASSERT_OK(actual->ValidateFull());
499   ArrayFromVector<Int32Type>({0, 2, 2, 3}, &offsets);
500   ArrayFromVector<Int64Type>({4, 5, 6}, &values);
501   ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
502   AssertArraysEqual(*expected, *actual);
503 
504   ASSERT_OK(ArrayFromJSON(type, "[[], [null], [6, null]]", &actual));
505   ASSERT_OK(actual->ValidateFull());
506   ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets);
507   auto is_valid = std::vector<bool>{false, true, false};
508   ArrayFromVector<Int64Type>(is_valid, {0, 6, 0}, &values);
509   ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
510   AssertArraysEqual(*expected, *actual);
511 
512   ASSERT_OK(ArrayFromJSON(type, "[null, [], null]", &actual));
513   ASSERT_OK(actual->ValidateFull());
514   {
515     std::unique_ptr<ArrayBuilder> builder;
516     ASSERT_OK(MakeBuilder(pool, type, &builder));
517     auto& list_builder = checked_cast<ListBuilder&>(*builder);
518     ASSERT_OK(list_builder.AppendNull());
519     ASSERT_OK(list_builder.Append());
520     ASSERT_OK(list_builder.AppendNull());
521     ASSERT_OK(list_builder.Finish(&expected));
522   }
523   AssertArraysEqual(*expected, *actual);
524 }
525 
TEST(TestList,IntegerListErrors)526 TEST(TestList, IntegerListErrors) {
527   std::shared_ptr<DataType> type = list(int64());
528   std::shared_ptr<Array> array;
529 
530   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
531   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0]]", &array));
532   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808]]", &array));
533 }
534 
TEST(TestList,NullList)535 TEST(TestList, NullList) {
536   auto pool = default_memory_pool();
537   std::shared_ptr<DataType> type = list(null());
538   std::shared_ptr<Array> offsets, values, expected, actual;
539 
540   ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
541   ASSERT_OK(actual->ValidateFull());
542   ArrayFromVector<Int32Type>({0}, &offsets);
543   values = std::make_shared<NullArray>(0);
544   ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
545   AssertArraysEqual(*expected, *actual);
546 
547   ASSERT_OK(ArrayFromJSON(type, "[[], [null], [null, null]]", &actual));
548   ASSERT_OK(actual->ValidateFull());
549   ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets);
550   values = std::make_shared<NullArray>(3);
551   ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *values, pool));
552   AssertArraysEqual(*expected, *actual);
553 
554   ASSERT_OK(ArrayFromJSON(type, "[null, [], null]", &actual));
555   ASSERT_OK(actual->ValidateFull());
556   {
557     std::unique_ptr<ArrayBuilder> builder;
558     ASSERT_OK(MakeBuilder(pool, type, &builder));
559     auto& list_builder = checked_cast<ListBuilder&>(*builder);
560     ASSERT_OK(list_builder.AppendNull());
561     ASSERT_OK(list_builder.Append());
562     ASSERT_OK(list_builder.AppendNull());
563     ASSERT_OK(list_builder.Finish(&expected));
564   }
565   AssertArraysEqual(*expected, *actual);
566 }
567 
TEST(TestList,IntegerListList)568 TEST(TestList, IntegerListList) {
569   auto pool = default_memory_pool();
570   std::shared_ptr<DataType> type = list(list(uint8()));
571   std::shared_ptr<Array> offsets, values, nested, expected, actual;
572 
573   ASSERT_OK(ArrayFromJSON(type, "[[[4], [5, 6]], [[7, 8, 9]]]", &actual));
574   ASSERT_OK(actual->ValidateFull());
575   ArrayFromVector<Int32Type>({0, 1, 3, 6}, &offsets);
576   ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values);
577   ASSERT_OK_AND_ASSIGN(nested, ListArray::FromArrays(*offsets, *values, pool));
578   ArrayFromVector<Int32Type>({0, 2, 3}, &offsets);
579   ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *nested, pool));
580   ASSERT_EQ(actual->length(), 2);
581   AssertArraysEqual(*expected, *actual);
582 
583   ASSERT_OK(ArrayFromJSON(type, "[[], [[]], [[4], [], [5, 6]], [[7, 8, 9]]]", &actual));
584   ASSERT_OK(actual->ValidateFull());
585   ArrayFromVector<Int32Type>({0, 0, 1, 1, 3, 6}, &offsets);
586   ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values);
587   ASSERT_OK_AND_ASSIGN(nested, ListArray::FromArrays(*offsets, *values, pool));
588   ArrayFromVector<Int32Type>({0, 0, 1, 4, 5}, &offsets);
589   ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *nested, pool));
590   ASSERT_EQ(actual->length(), 4);
591   AssertArraysEqual(*expected, *actual);
592 
593   ASSERT_OK(ArrayFromJSON(type, "[null, [null], [[null]]]", &actual));
594   ASSERT_OK(actual->ValidateFull());
595   {
596     std::unique_ptr<ArrayBuilder> builder;
597     ASSERT_OK(MakeBuilder(pool, type, &builder));
598     auto& list_builder = checked_cast<ListBuilder&>(*builder);
599     auto& child_builder = checked_cast<ListBuilder&>(*list_builder.value_builder());
600     ASSERT_OK(list_builder.AppendNull());
601     ASSERT_OK(list_builder.Append());
602     ASSERT_OK(child_builder.AppendNull());
603     ASSERT_OK(list_builder.Append());
604     ASSERT_OK(child_builder.Append());
605     ASSERT_OK(list_builder.Finish(&expected));
606   }
607 }
608 
TEST(TestLargeList,Basics)609 TEST(TestLargeList, Basics) {
610   // Similar as TestList above, only testing the basics
611   auto pool = default_memory_pool();
612   std::shared_ptr<DataType> type = large_list(int16());
613   std::shared_ptr<Array> offsets, values, expected, actual;
614 
615   ASSERT_OK(ArrayFromJSON(type, "[[], [null], [6, null]]", &actual));
616   ASSERT_OK(actual->ValidateFull());
617   ArrayFromVector<Int64Type>({0, 0, 1, 3}, &offsets);
618   auto is_valid = std::vector<bool>{false, true, false};
619   ArrayFromVector<Int16Type>(is_valid, {0, 6, 0}, &values);
620   ASSERT_OK_AND_ASSIGN(expected, LargeListArray::FromArrays(*offsets, *values, pool));
621   AssertArraysEqual(*expected, *actual);
622 }
623 
TEST(TestMap,IntegerToInteger)624 TEST(TestMap, IntegerToInteger) {
625   auto type = map(int16(), int16());
626   std::shared_ptr<Array> expected, actual;
627 
628   const char* input = R"(
629 [
630     [[0, 1], [1, 1], [2, 2], [3, 3], [4, 5], [5, 8]],
631     null,
632     [[0, null], [1, null], [2, 0], [3, 1], [4, null], [5, 2]],
633     []
634   ]
635 )";
636   ASSERT_OK(ArrayFromJSON(type, input, &actual));
637 
638   std::unique_ptr<ArrayBuilder> builder;
639   ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder));
640   auto& map_builder = checked_cast<MapBuilder&>(*builder);
641   auto& key_builder = checked_cast<Int16Builder&>(*map_builder.key_builder());
642   auto& item_builder = checked_cast<Int16Builder&>(*map_builder.item_builder());
643 
644   ASSERT_OK(map_builder.Append());
645   ASSERT_OK(key_builder.AppendValues({0, 1, 2, 3, 4, 5}));
646   ASSERT_OK(item_builder.AppendValues({1, 1, 2, 3, 5, 8}));
647   ASSERT_OK(map_builder.AppendNull());
648   ASSERT_OK(map_builder.Append());
649   ASSERT_OK(key_builder.AppendValues({0, 1, 2, 3, 4, 5}));
650   ASSERT_OK(item_builder.AppendValues({-1, -1, 0, 1, -1, 2}, {0, 0, 1, 1, 0, 1}));
651   ASSERT_OK(map_builder.Append());
652   ASSERT_OK(map_builder.Finish(&expected));
653 
654   ASSERT_ARRAYS_EQUAL(*actual, *expected);
655 }
656 
TEST(TestMap,StringToInteger)657 TEST(TestMap, StringToInteger) {
658   auto type = map(utf8(), int32());
659   const char* input = R"(
660 [
661     [["joe", 0], ["mark", null]],
662     null,
663     [["cap", 8]],
664     []
665   ]
666 )";
667   auto actual = ArrayFromJSON(type, input);
668   std::vector<int32_t> offsets = {0, 2, 2, 3, 3};
669   auto expected_keys = ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])");
670   auto expected_values = ArrayFromJSON(int32(), "[0, null, 8]");
671   ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, BitUtil::BytesToBits({1, 0, 1, 1}));
672   auto expected =
673       std::make_shared<MapArray>(type, 4, Buffer::Wrap(offsets), expected_keys,
674                                  expected_values, expected_null_bitmap, 1);
675   ASSERT_ARRAYS_EQUAL(*actual, *expected);
676 }
677 
TEST(TestMap,Errors)678 TEST(TestMap, Errors) {
679   auto type = map(int16(), int16());
680   std::shared_ptr<Array> array;
681 
682   // list of pairs isn't an array
683   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
684   // pair isn't an array
685   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
686   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[null]]", &array));
687   // pair with length != 2
688   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0]]]", &array));
689   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0, 1, 2]]]", &array));
690   // null key
691   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[null, 0]]]", &array));
692   // key or value fails to convert
693   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0.0, 0]]]", &array));
694   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0, 0.0]]]", &array));
695 }
696 
TEST(TestMap,IntegerMapToStringList)697 TEST(TestMap, IntegerMapToStringList) {
698   auto type = map(map(int16(), int16()), list(utf8()));
699   std::shared_ptr<Array> expected, actual;
700 
701   const char* input = R"(
702 [
703     [
704       [
705         [],
706         [null, "empty"]
707       ],
708       [
709         [[0, 1]],
710         null
711       ],
712       [
713         [[0, 0], [1, 1]],
714         ["bootstrapping tautology?", "lispy", null, "i can see eternity"]
715       ]
716     ],
717     null
718   ]
719 )";
720   ASSERT_OK(ArrayFromJSON(type, input, &actual));
721 
722   std::unique_ptr<ArrayBuilder> builder;
723   ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder));
724   auto& map_builder = checked_cast<MapBuilder&>(*builder);
725   auto& key_builder = checked_cast<MapBuilder&>(*map_builder.key_builder());
726   auto& key_key_builder = checked_cast<Int16Builder&>(*key_builder.key_builder());
727   auto& key_item_builder = checked_cast<Int16Builder&>(*key_builder.item_builder());
728   auto& item_builder = checked_cast<ListBuilder&>(*map_builder.item_builder());
729   auto& item_value_builder = checked_cast<StringBuilder&>(*item_builder.value_builder());
730 
731   ASSERT_OK(map_builder.Append());
732   ASSERT_OK(key_builder.Append());
733   ASSERT_OK(item_builder.Append());
734   ASSERT_OK(item_value_builder.AppendNull());
735   ASSERT_OK(item_value_builder.Append("empty"));
736 
737   ASSERT_OK(key_builder.Append());
738   ASSERT_OK(item_builder.AppendNull());
739   ASSERT_OK(key_key_builder.AppendValues({0}));
740   ASSERT_OK(key_item_builder.AppendValues({1}));
741 
742   ASSERT_OK(key_builder.Append());
743   ASSERT_OK(item_builder.Append());
744   ASSERT_OK(key_key_builder.AppendValues({0, 1}));
745   ASSERT_OK(key_item_builder.AppendValues({0, 1}));
746   ASSERT_OK(item_value_builder.Append("bootstrapping tautology?"));
747   ASSERT_OK(item_value_builder.Append("lispy"));
748   ASSERT_OK(item_value_builder.AppendNull());
749   ASSERT_OK(item_value_builder.Append("i can see eternity"));
750 
751   ASSERT_OK(map_builder.AppendNull());
752 
753   ASSERT_OK(map_builder.Finish(&expected));
754   ASSERT_ARRAYS_EQUAL(*actual, *expected);
755 }
756 
TEST(TestFixedSizeList,IntegerList)757 TEST(TestFixedSizeList, IntegerList) {
758   auto pool = default_memory_pool();
759   auto type = fixed_size_list(int64(), 2);
760   std::shared_ptr<Array> values, expected, actual;
761 
762   ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
763   ASSERT_OK(actual->ValidateFull());
764   ArrayFromVector<Int64Type>({}, &values);
765   expected = std::make_shared<FixedSizeListArray>(type, 0, values);
766   AssertArraysEqual(*expected, *actual);
767 
768   ASSERT_OK(ArrayFromJSON(type, "[[4, 5], [0, 0], [6, 7]]", &actual));
769   ASSERT_OK(actual->ValidateFull());
770   ArrayFromVector<Int64Type>({4, 5, 0, 0, 6, 7}, &values);
771   expected = std::make_shared<FixedSizeListArray>(type, 3, values);
772   AssertArraysEqual(*expected, *actual);
773 
774   ASSERT_OK(ArrayFromJSON(type, "[[null, null], [0, null], [6, null]]", &actual));
775   ASSERT_OK(actual->ValidateFull());
776   auto is_valid = std::vector<bool>{false, false, true, false, true, false};
777   ArrayFromVector<Int64Type>(is_valid, {0, 0, 0, 0, 6, 0}, &values);
778   expected = std::make_shared<FixedSizeListArray>(type, 3, values);
779   AssertArraysEqual(*expected, *actual);
780 
781   ASSERT_OK(ArrayFromJSON(type, "[null, [null, null], null]", &actual));
782   ASSERT_OK(actual->ValidateFull());
783   {
784     std::unique_ptr<ArrayBuilder> builder;
785     ASSERT_OK(MakeBuilder(pool, type, &builder));
786     auto& list_builder = checked_cast<FixedSizeListBuilder&>(*builder);
787     auto value_builder = checked_cast<Int64Builder*>(list_builder.value_builder());
788     ASSERT_OK(list_builder.AppendNull());
789     ASSERT_OK(list_builder.Append());
790     ASSERT_OK(value_builder->AppendNull());
791     ASSERT_OK(value_builder->AppendNull());
792     ASSERT_OK(list_builder.AppendNull());
793     ASSERT_OK(list_builder.Finish(&expected));
794   }
795   AssertArraysEqual(*expected, *actual);
796 }
797 
TEST(TestFixedSizeList,IntegerListErrors)798 TEST(TestFixedSizeList, IntegerListErrors) {
799   std::shared_ptr<DataType> type = fixed_size_list(int64(), 2);
800   std::shared_ptr<Array> array;
801 
802   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
803   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0, 1.0]]", &array));
804   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
805   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808, 0]]", &array));
806 }
807 
TEST(TestFixedSizeList,NullList)808 TEST(TestFixedSizeList, NullList) {
809   auto pool = default_memory_pool();
810   std::shared_ptr<DataType> type = fixed_size_list(null(), 2);
811   std::shared_ptr<Array> values, expected, actual;
812 
813   ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
814   ASSERT_OK(actual->ValidateFull());
815   values = std::make_shared<NullArray>(0);
816   expected = std::make_shared<FixedSizeListArray>(type, 0, values);
817   AssertArraysEqual(*expected, *actual);
818 
819   ASSERT_OK(ArrayFromJSON(type, "[[null, null], [null, null], [null, null]]", &actual));
820   ASSERT_OK(actual->ValidateFull());
821   values = std::make_shared<NullArray>(6);
822   expected = std::make_shared<FixedSizeListArray>(type, 3, values);
823   AssertArraysEqual(*expected, *actual);
824 
825   ASSERT_OK(ArrayFromJSON(type, "[null, [null, null], null]", &actual));
826   ASSERT_OK(actual->ValidateFull());
827   {
828     std::unique_ptr<ArrayBuilder> builder;
829     ASSERT_OK(MakeBuilder(pool, type, &builder));
830     auto& list_builder = checked_cast<FixedSizeListBuilder&>(*builder);
831     auto value_builder = checked_cast<NullBuilder*>(list_builder.value_builder());
832     ASSERT_OK(list_builder.AppendNull());
833     ASSERT_OK(list_builder.Append());
834     ASSERT_OK(value_builder->AppendNull());
835     ASSERT_OK(value_builder->AppendNull());
836     ASSERT_OK(list_builder.AppendNull());
837     ASSERT_OK(list_builder.Finish(&expected));
838   }
839   AssertArraysEqual(*expected, *actual);
840 }
841 
TEST(TestFixedSizeList,IntegerListList)842 TEST(TestFixedSizeList, IntegerListList) {
843   auto pool = default_memory_pool();
844   auto nested_type = fixed_size_list(uint8(), 2);
845   std::shared_ptr<DataType> type = fixed_size_list(nested_type, 1);
846   std::shared_ptr<Array> values, nested, expected, actual;
847 
848   ASSERT_OK(ArrayFromJSON(type, "[[[1, 4]], [[2, 5]], [[3, 6]]]", &actual));
849   ASSERT_OK(actual->ValidateFull());
850   ArrayFromVector<UInt8Type>({1, 4, 2, 5, 3, 6}, &values);
851   nested = std::make_shared<FixedSizeListArray>(nested_type, 3, values);
852   expected = std::make_shared<FixedSizeListArray>(type, 3, nested);
853   AssertArraysEqual(*expected, *actual);
854 
855   ASSERT_OK(ArrayFromJSON(type, "[[[1, null]], [null], null]", &actual));
856   ASSERT_OK(actual->ValidateFull());
857   {
858     std::unique_ptr<ArrayBuilder> builder;
859     ASSERT_OK(MakeBuilder(pool, type, &builder));
860     auto& list_builder = checked_cast<FixedSizeListBuilder&>(*builder);
861     auto nested_builder =
862         checked_cast<FixedSizeListBuilder*>(list_builder.value_builder());
863     auto value_builder = checked_cast<UInt8Builder*>(nested_builder->value_builder());
864 
865     ASSERT_OK(list_builder.Append());
866     ASSERT_OK(nested_builder->Append());
867     ASSERT_OK(value_builder->Append(1));
868     ASSERT_OK(value_builder->AppendNull());
869 
870     ASSERT_OK(list_builder.Append());
871     ASSERT_OK(nested_builder->AppendNull());
872 
873     ASSERT_OK(list_builder.AppendNull());
874 
875     ASSERT_OK(list_builder.Finish(&expected));
876   }
877   AssertArraysEqual(*expected, *actual);
878 }
879 
TEST(TestStruct,SimpleStruct)880 TEST(TestStruct, SimpleStruct) {
881   auto field_a = field("a", int8());
882   auto field_b = field("b", boolean());
883   std::shared_ptr<DataType> type = struct_({field_a, field_b});
884   std::shared_ptr<Array> a, b, expected, actual;
885   std::shared_ptr<Buffer> null_bitmap;
886   std::vector<bool> is_valid;
887   std::vector<std::shared_ptr<Array>> children;
888 
889   // Trivial
890   ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
891   ASSERT_OK(actual->ValidateFull());
892   ArrayFromVector<Int8Type>({}, &a);
893   ArrayFromVector<BooleanType, bool>({}, &b);
894   children.assign({a, b});
895   expected = std::make_shared<StructArray>(type, 0, children);
896   AssertArraysEqual(*expected, *actual);
897 
898   // Non-empty
899   ArrayFromVector<Int8Type>({5, 6}, &a);
900   ArrayFromVector<BooleanType, bool>({true, false}, &b);
901   children.assign({a, b});
902   expected = std::make_shared<StructArray>(type, 2, children);
903 
904   ASSERT_OK(ArrayFromJSON(type, "[[5, true], [6, false]]", &actual));
905   ASSERT_OK(actual->ValidateFull());
906   AssertArraysEqual(*expected, *actual);
907   ASSERT_OK(ArrayFromJSON(type, "[{\"a\": 5, \"b\": true}, {\"b\": false, \"a\": 6}]",
908                           &actual));
909   ASSERT_OK(actual->ValidateFull());
910   AssertArraysEqual(*expected, *actual);
911 
912   // With nulls
913   is_valid = {false, true, false, false};
914   ArrayFromVector<Int8Type>(is_valid, {0, 5, 6, 0}, &a);
915   is_valid = {false, false, true, false};
916   ArrayFromVector<BooleanType, bool>(is_valid, {false, true, false, false}, &b);
917   children.assign({a, b});
918   BitmapFromVector<bool>({false, true, true, true}, &null_bitmap);
919   expected = std::make_shared<StructArray>(type, 4, children, null_bitmap, 1);
920 
921   ASSERT_OK(
922       ArrayFromJSON(type, "[null, [5, null], [null, false], [null, null]]", &actual));
923   ASSERT_OK(actual->ValidateFull());
924   AssertArraysEqual(*expected, *actual);
925   // When using object notation, null members can be omitted
926   ASSERT_OK(ArrayFromJSON(type, "[null, {\"a\": 5, \"b\": null}, {\"b\": false}, {}]",
927                           &actual));
928   ASSERT_OK(actual->ValidateFull());
929   AssertArraysEqual(*expected, *actual);
930 }
931 
TEST(TestStruct,NestedStruct)932 TEST(TestStruct, NestedStruct) {
933   auto field_a = field("a", int8());
934   auto field_b = field("b", boolean());
935   auto field_c = field("c", float64());
936   std::shared_ptr<DataType> nested_type = struct_({field_a, field_b});
937   auto field_nested = field("nested", nested_type);
938   std::shared_ptr<DataType> type = struct_({field_nested, field_c});
939   std::shared_ptr<Array> expected, actual;
940   std::shared_ptr<Buffer> null_bitmap;
941   std::vector<bool> is_valid;
942   std::vector<std::shared_ptr<Array>> children(2);
943 
944   ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
945   ASSERT_OK(actual->ValidateFull());
946   ArrayFromVector<Int8Type>({}, &children[0]);
947   ArrayFromVector<BooleanType, bool>({}, &children[1]);
948   children[0] = std::make_shared<StructArray>(nested_type, 0, children);
949   ArrayFromVector<DoubleType>({}, &children[1]);
950   expected = std::make_shared<StructArray>(type, 0, children);
951   AssertArraysEqual(*expected, *actual);
952 
953   ASSERT_OK(ArrayFromJSON(type, "[[[5, true], 1.5], [[6, false], -3e2]]", &actual));
954   ASSERT_OK(actual->ValidateFull());
955   ArrayFromVector<Int8Type>({5, 6}, &children[0]);
956   ArrayFromVector<BooleanType, bool>({true, false}, &children[1]);
957   children[0] = std::make_shared<StructArray>(nested_type, 2, children);
958   ArrayFromVector<DoubleType>({1.5, -300.0}, &children[1]);
959   expected = std::make_shared<StructArray>(type, 2, children);
960   AssertArraysEqual(*expected, *actual);
961 
962   ASSERT_OK(ArrayFromJSON(type, "[null, [[5, null], null], [null, -3e2]]", &actual));
963   ASSERT_OK(actual->ValidateFull());
964   is_valid = {false, true, false};
965   ArrayFromVector<Int8Type>(is_valid, {0, 5, 0}, &children[0]);
966   is_valid = {false, false, false};
967   ArrayFromVector<BooleanType, bool>(is_valid, {false, false, false}, &children[1]);
968   BitmapFromVector<bool>({false, true, false}, &null_bitmap);
969   children[0] = std::make_shared<StructArray>(nested_type, 3, children, null_bitmap, 2);
970   is_valid = {false, false, true};
971   ArrayFromVector<DoubleType>(is_valid, {0.0, 0.0, -300.0}, &children[1]);
972   BitmapFromVector<bool>({false, true, true}, &null_bitmap);
973   expected = std::make_shared<StructArray>(type, 3, children, null_bitmap, 1);
974   AssertArraysEqual(*expected, *actual);
975 }
976 
TEST(TestStruct,Errors)977 TEST(TestStruct, Errors) {
978   auto field_a = field("a", int8());
979   auto field_b = field("b", boolean());
980   std::shared_ptr<DataType> type = struct_({field_a, field_b});
981   std::shared_ptr<Array> array;
982 
983   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0, true]", &array));
984   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
985   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, true, 1]]", &array));
986   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[true, 0]]", &array));
987   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"b\": 0, \"a\": true}]", &array));
988   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"c\": 0}]", &array));
989 }
990 
TEST(TestDenseUnion,Basics)991 TEST(TestDenseUnion, Basics) {
992   auto field_a = field("a", int8());
993   auto field_b = field("b", boolean());
994 
995   auto type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
996   auto array = checked_pointer_cast<UnionArray>(
997       ArrayFromJSON(type, "[null, [4, 122], [8, true], [4, null], null, [8, false]]"));
998 
999   auto expected_types = ArrayFromJSON(int8(), "[null, 4, 8, 4, null, 8]");
1000   auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 0, 1]");
1001   auto expected_a = ArrayFromJSON(int8(), "[122, null]");
1002   auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
1003 
1004   ASSERT_OK_AND_ASSIGN(
1005       auto expected, UnionArray::MakeDense(*expected_types, *expected_offsets,
1006                                            {expected_a, expected_b}, {"a", "b"}, {4, 8}));
1007 
1008   ASSERT_ARRAYS_EQUAL(*expected, *array);
1009 
1010   // ensure that the array is as dense as we expect
1011   ASSERT_TRUE(array->value_offsets()->Equals(*expected_offsets->data()->buffers[1]));
1012   ASSERT_ARRAYS_EQUAL(*expected_a, *array->field(0));
1013   ASSERT_ARRAYS_EQUAL(*expected_b, *array->field(1));
1014 }
1015 
TEST(TestSparseUnion,Basics)1016 TEST(TestSparseUnion, Basics) {
1017   auto field_a = field("a", int8());
1018   auto field_b = field("b", boolean());
1019 
1020   auto type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
1021   auto array = ArrayFromJSON(type, "[[4, 122], [8, true], [4, null], null, [8, false]]");
1022 
1023   auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
1024   auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
1025   auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null, false]");
1026 
1027   ASSERT_OK_AND_ASSIGN(auto expected,
1028                        UnionArray::MakeSparse(*expected_types, {expected_a, expected_b},
1029                                               {"a", "b"}, {4, 8}));
1030 
1031   ASSERT_ARRAYS_EQUAL(*expected, *array);
1032 }
1033 
TEST(TestDenseUnion,ListOfUnion)1034 TEST(TestDenseUnion, ListOfUnion) {
1035   auto field_a = field("a", int8());
1036   auto field_b = field("b", boolean());
1037   auto union_type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
1038   auto list_type = list(union_type);
1039   auto array =
1040       checked_pointer_cast<ListArray>(ArrayFromJSON(list_type,
1041                                                     "["
1042                                                     "[[4, 122], [8, true]],"
1043                                                     "[[4, null], null, [8, false]]"
1044                                                     "]"));
1045 
1046   auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
1047   auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
1048   auto expected_a = ArrayFromJSON(int8(), "[122, null]");
1049   auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
1050 
1051   ASSERT_OK_AND_ASSIGN(
1052       auto expected_values,
1053       UnionArray::MakeDense(*expected_types, *expected_offsets, {expected_a, expected_b},
1054                             {"a", "b"}, {4, 8}));
1055   auto expected_list_offsets = ArrayFromJSON(int32(), "[0, 2, 5]");
1056   ASSERT_OK_AND_ASSIGN(auto expected,
1057                        ListArray::FromArrays(*expected_list_offsets, *expected_values));
1058 
1059   ASSERT_ARRAYS_EQUAL(*expected, *array);
1060 
1061   // ensure that the array is as dense as we expect
1062   auto array_values = checked_pointer_cast<UnionArray>(array->values());
1063   ASSERT_TRUE(array_values->value_offsets()->Equals(
1064       *checked_pointer_cast<UnionArray>(expected_values)->value_offsets()));
1065   ASSERT_ARRAYS_EQUAL(*expected_a, *array_values->field(0));
1066   ASSERT_ARRAYS_EQUAL(*expected_b, *array_values->field(1));
1067 }
1068 
TEST(TestSparseUnion,ListOfUnion)1069 TEST(TestSparseUnion, ListOfUnion) {
1070   auto field_a = field("a", int8());
1071   auto field_b = field("b", boolean());
1072   auto union_type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
1073   auto list_type = list(union_type);
1074   auto array = ArrayFromJSON(list_type,
1075                              "["
1076                              "[[4, 122], [8, true]],"
1077                              "[[4, null], null, [8, false]]"
1078                              "]");
1079 
1080   auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
1081   auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
1082   auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null, false]");
1083 
1084   ASSERT_OK_AND_ASSIGN(auto expected_values,
1085                        UnionArray::MakeSparse(*expected_types, {expected_a, expected_b},
1086                                               {"a", "b"}, {4, 8}));
1087   auto expected_list_offsets = ArrayFromJSON(int32(), "[0, 2, 5]");
1088   ASSERT_OK_AND_ASSIGN(auto expected,
1089                        ListArray::FromArrays(*expected_list_offsets, *expected_values));
1090 
1091   ASSERT_ARRAYS_EQUAL(*expected, *array);
1092 }
1093 
TEST(TestDenseUnion,UnionOfStructs)1094 TEST(TestDenseUnion, UnionOfStructs) {
1095   std::vector<std::shared_ptr<Field>> fields = {
1096       field("ab", struct_({field("alpha", float64()), field("bravo", utf8())})),
1097       field("wtf", struct_({field("whiskey", int8()), field("tango", float64()),
1098                             field("foxtrot", list(int8()))})),
1099       field("q", struct_({field("quebec", utf8())}))};
1100   auto type = union_(fields, {0, 23, 47}, UnionMode::DENSE);
1101   auto array = checked_pointer_cast<UnionArray>(ArrayFromJSON(type, R"([
1102     [0, {"alpha": 0.0, "bravo": "charlie"}],
1103     [23, {"whiskey": 99}],
1104     [0, {"bravo": "mike"}],
1105     null,
1106     [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
1107   ])"));
1108 
1109   auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
1110   auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
1111   ArrayVector expected_fields = {ArrayFromJSON(fields[0]->type(), R"([
1112       {"alpha": 0.0, "bravo": "charlie"},
1113       {"bravo": "mike"}
1114     ])"),
1115                                  ArrayFromJSON(fields[1]->type(), R"([
1116       {"whiskey": 99},
1117       {"tango": 8.25, "foxtrot": [0, 2, 3]}
1118     ])"),
1119                                  ArrayFromJSON(fields[2]->type(), "[]")};
1120 
1121   ASSERT_OK_AND_ASSIGN(
1122       auto expected,
1123       UnionArray::MakeDense(*expected_types, *expected_offsets, expected_fields,
1124                             {"ab", "wtf", "q"}, {0, 23, 47}));
1125 
1126   ASSERT_ARRAYS_EQUAL(*expected, *array);
1127 
1128   // ensure that the array is as dense as we expect
1129   ASSERT_TRUE(array->value_offsets()->Equals(*expected_offsets->data()->buffers[1]));
1130   for (int i = 0; i < type->num_fields(); ++i) {
1131     ASSERT_ARRAYS_EQUAL(*checked_cast<const UnionArray&>(*expected).field(i),
1132                         *array->field(i));
1133   }
1134 }
1135 
TEST(TestSparseUnion,UnionOfStructs)1136 TEST(TestSparseUnion, UnionOfStructs) {
1137   std::vector<std::shared_ptr<Field>> fields = {
1138       field("ab", struct_({field("alpha", float64()), field("bravo", utf8())})),
1139       field("wtf", struct_({field("whiskey", int8()), field("tango", float64()),
1140                             field("foxtrot", list(int8()))})),
1141       field("q", struct_({field("quebec", utf8())}))};
1142   auto type = union_(fields, {0, 23, 47}, UnionMode::SPARSE);
1143   auto array = ArrayFromJSON(type, R"([
1144     [0, {"alpha": 0.0, "bravo": "charlie"}],
1145     [23, {"whiskey": 99}],
1146     [0, {"bravo": "mike"}],
1147     null,
1148     [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
1149   ])");
1150 
1151   auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
1152   ArrayVector expected_fields = {
1153       ArrayFromJSON(fields[0]->type(), R"([
1154       {"alpha": 0.0, "bravo": "charlie"},
1155       null,
1156       {"bravo": "mike"},
1157       null,
1158       null
1159     ])"),
1160       ArrayFromJSON(fields[1]->type(), R"([
1161       null,
1162       {"whiskey": 99},
1163       null,
1164       null,
1165       {"tango": 8.25, "foxtrot": [0, 2, 3]}
1166     ])"),
1167       ArrayFromJSON(fields[2]->type(), "[null, null, null, null, null]")};
1168 
1169   ASSERT_OK_AND_ASSIGN(auto expected,
1170                        UnionArray::MakeSparse(*expected_types, expected_fields,
1171                                               {"ab", "wtf", "q"}, {0, 23, 47}));
1172 
1173   ASSERT_ARRAYS_EQUAL(*expected, *array);
1174 }
1175 
TEST(TestDenseUnion,Errors)1176 TEST(TestDenseUnion, Errors) {
1177   auto field_a = field("a", int8());
1178   auto field_b = field("b", boolean());
1179   std::shared_ptr<DataType> type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
1180   std::shared_ptr<Array> array;
1181 
1182   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]", &array));
1183   ASSERT_RAISES(Invalid,
1184                 ArrayFromJSON(type, "[[0, 99]]", &array));  // 0 is not one of {4, 8}
1185   ASSERT_RAISES(Invalid,
1186                 ArrayFromJSON(type, "[[4, \"\"]]", &array));  // "" is not a valid int8()
1187 
1188   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]", &array));
1189   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
1190   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
1191 }
1192 
TEST(TestSparseUnion,Errors)1193 TEST(TestSparseUnion, Errors) {
1194   auto field_a = field("a", int8());
1195   auto field_b = field("b", boolean());
1196   std::shared_ptr<DataType> type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
1197   std::shared_ptr<Array> array;
1198 
1199   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]", &array));
1200   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 99]]", &array));
1201   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]", &array));
1202 
1203   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]", &array));
1204   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
1205   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
1206 }
1207 
TEST(TestDictionary,Basics)1208 TEST(TestDictionary, Basics) {
1209   auto type = dictionary(int32(), utf8());
1210   auto array =
1211       DictArrayFromJSON(type, "[null, 2, 1, 0]", R"(["whiskey", "tango", "foxtrot"])");
1212 
1213   auto expected_indices = ArrayFromJSON(int32(), "[null, 2, 1, 0]");
1214   auto expected_dictionary = ArrayFromJSON(utf8(), R"(["whiskey", "tango", "foxtrot"])");
1215 
1216   ASSERT_ARRAYS_EQUAL(DictionaryArray(type, expected_indices, expected_dictionary),
1217                       *array);
1218 }
1219 
TEST(TestDictionary,Errors)1220 TEST(TestDictionary, Errors) {
1221   auto type = dictionary(int32(), utf8());
1222   std::shared_ptr<Array> array;
1223 
1224   ASSERT_RAISES(Invalid,
1225                 DictArrayFromJSON(type, "[\"not a valid index\"]", "[\"\"]", &array));
1226   ASSERT_RAISES(Invalid, DictArrayFromJSON(type, "[0, 1]", "[1]",
1227                                            &array));  // dict value isn't string
1228 }
1229 
1230 }  // namespace json
1231 }  // namespace internal
1232 }  // namespace ipc
1233 }  // namespace arrow
1234