1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <string>
19
20 #include <gtest/gtest.h>
21
22 #include "arrow/array.h"
23 #include "arrow/extension_type.h"
24 #include "arrow/testing/gtest_util.h"
25 #include "arrow/testing/util.h"
26 #include "arrow/type.h"
27 #include "arrow/util/logging.h"
28
29 namespace arrow {
30
CheckView(const std::shared_ptr<Array> & input,const std::shared_ptr<DataType> & view_type,const std::shared_ptr<Array> & expected)31 void CheckView(const std::shared_ptr<Array>& input,
32 const std::shared_ptr<DataType>& view_type,
33 const std::shared_ptr<Array>& expected) {
34 ASSERT_OK_AND_ASSIGN(auto result, input->View(view_type));
35 ASSERT_OK(result->ValidateFull());
36 AssertArraysEqual(*expected, *result);
37 }
38
CheckView(const std::shared_ptr<Array> & input,const std::shared_ptr<Array> & expected_view)39 void CheckView(const std::shared_ptr<Array>& input,
40 const std::shared_ptr<Array>& expected_view) {
41 CheckView(input, expected_view->type(), expected_view);
42 }
43
CheckViewFails(const std::shared_ptr<Array> & input,const std::shared_ptr<DataType> & view_type)44 void CheckViewFails(const std::shared_ptr<Array>& input,
45 const std::shared_ptr<DataType>& view_type) {
46 ASSERT_RAISES(Invalid, input->View(view_type));
47 }
48
49 class IPv4Type : public ExtensionType {
50 public:
IPv4Type()51 IPv4Type() : ExtensionType(fixed_size_binary(4)) {}
52
extension_name() const53 std::string extension_name() const override { return "ipv4"; }
54
ExtensionEquals(const ExtensionType & other) const55 bool ExtensionEquals(const ExtensionType& other) const override {
56 return other.extension_name() == this->extension_name();
57 }
58
MakeArray(std::shared_ptr<ArrayData> data) const59 std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
60 DCHECK_EQ(data->type->id(), Type::EXTENSION);
61 DCHECK_EQ("ipv4", static_cast<const ExtensionType&>(*data->type).extension_name());
62 return std::make_shared<ExtensionArray>(data);
63 }
64
Deserialize(std::shared_ptr<DataType> storage_type,const std::string & serialized) const65 Result<std::shared_ptr<DataType>> Deserialize(
66 std::shared_ptr<DataType> storage_type,
67 const std::string& serialized) const override {
68 return Status::NotImplemented("IPv4Type::Deserialize");
69 }
70
Serialize() const71 std::string Serialize() const override { return ""; }
72 };
73
TEST(TestArrayView,IdentityPrimitive)74 TEST(TestArrayView, IdentityPrimitive) {
75 auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
76 CheckView(arr, arr->type(), arr);
77 arr = ArrayFromJSON(int16(), "[0, -1, 42, null]");
78 CheckView(arr, arr->type(), arr);
79 arr = ArrayFromJSON(boolean(), "[true, false, null]");
80 CheckView(arr, arr->type(), arr);
81 }
82
TEST(TestArrayView,IdentityNullType)83 TEST(TestArrayView, IdentityNullType) {
84 auto arr = ArrayFromJSON(null(), "[null, null, null]");
85 CheckView(arr, arr->type(), arr);
86 }
87
TEST(TestArrayView,PrimitiveAsPrimitive)88 TEST(TestArrayView, PrimitiveAsPrimitive) {
89 auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
90 auto expected = ArrayFromJSON(uint16(), "[0, 65535, 42]");
91 CheckView(arr, expected);
92 CheckView(expected, arr);
93
94 arr = ArrayFromJSON(int32(), "[0, 1069547520, -1071644672, null]");
95 expected = ArrayFromJSON(float32(), "[0.0, 1.5, -2.5, null]");
96 CheckView(arr, expected);
97
98 arr = ArrayFromJSON(timestamp(TimeUnit::SECOND),
99 R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])");
100 expected = ArrayFromJSON(int64(), "[0, 951782400, 63730281600, -2203977600]");
101 CheckView(arr, expected);
102 CheckView(expected, arr);
103 }
104
TEST(TestArrayView,PrimitiveAsFixedSizeBinary)105 TEST(TestArrayView, PrimitiveAsFixedSizeBinary) {
106 #if ARROW_LITTLE_ENDIAN
107 auto arr = ArrayFromJSON(int32(), "[2020568934, 2054316386, null]");
108 #else
109 auto arr = ArrayFromJSON(int32(), "[1718579064, 1650553466, null]");
110 #endif
111 auto expected = ArrayFromJSON(fixed_size_binary(4), R"(["foox", "barz", null])");
112 CheckView(arr, expected);
113 CheckView(expected, arr);
114 }
115
TEST(TestArrayView,StringAsBinary)116 TEST(TestArrayView, StringAsBinary) {
117 auto arr = ArrayFromJSON(utf8(), R"(["foox", "barz", null])");
118 auto expected = ArrayFromJSON(binary(), R"(["foox", "barz", null])");
119 CheckView(arr, expected);
120 CheckView(expected, arr);
121 }
122
TEST(TestArrayView,PrimitiveWrongSize)123 TEST(TestArrayView, PrimitiveWrongSize) {
124 auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
125 CheckViewFails(arr, int8());
126 CheckViewFails(arr, fixed_size_binary(3));
127 CheckViewFails(arr, null());
128 }
129
TEST(TestArrayView,StructAsStructSimple)130 TEST(TestArrayView, StructAsStructSimple) {
131 auto ty1 = struct_({field("a", int8()), field("b", int32())});
132 auto ty2 = struct_({field("c", uint8()), field("d", float32())});
133
134 auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
135 auto expected = ArrayFromJSON(ty2, "[[0, 0], [1, 1.5], [255, -2.5]]");
136 CheckView(arr, expected);
137 CheckView(expected, arr);
138
139 // With nulls
140 arr = ArrayFromJSON(ty1, "[[0, 0], null, [-1, -1071644672]]");
141 expected = ArrayFromJSON(ty2, "[[0, 0], null, [255, -2.5]]");
142 CheckView(arr, expected);
143 CheckView(expected, arr);
144
145 // With nested nulls
146 arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, -1071644672]]");
147 expected = ArrayFromJSON(ty2, "[[0, null], null, [255, -2.5]]");
148 CheckView(arr, expected);
149 CheckView(expected, arr);
150
151 ty2 = struct_({field("c", uint8()), field("d", fixed_size_binary(4))});
152 #if ARROW_LITTLE_ENDIAN
153 arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, 2020568934]]");
154 #else
155 arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, 1718579064]]");
156 #endif
157 expected = ArrayFromJSON(ty2, R"([[0, null], null, [255, "foox"]])");
158 CheckView(arr, expected);
159 CheckView(expected, arr);
160 }
161
TEST(TestArrayView,StructAsStructNonNullable)162 TEST(TestArrayView, StructAsStructNonNullable) {
163 auto ty1 = struct_({field("a", int8()), field("b", int32())});
164 auto ty2 = struct_({field("c", uint8(), /*nullable=*/false), field("d", float32())});
165
166 auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
167 auto expected = ArrayFromJSON(ty2, "[[0, 0], [1, 1.5], [255, -2.5]]");
168 CheckView(arr, expected);
169 CheckView(expected, arr);
170
171 // With nested nulls
172 arr = ArrayFromJSON(ty1, "[[0, null], [-1, -1071644672]]");
173 expected = ArrayFromJSON(ty2, "[[0, null], [255, -2.5]]");
174 CheckView(arr, expected);
175 CheckView(expected, arr);
176
177 // Nested null cannot be viewed as non-null field
178 arr = ArrayFromJSON(ty1, "[[0, null], [null, -1071644672]]");
179 CheckViewFails(arr, ty2);
180 }
181
TEST(TestArrayView,StructAsStructWrongLayout)182 TEST(TestArrayView, StructAsStructWrongLayout) {
183 auto ty1 = struct_({field("a", int8()), field("b", int32())});
184 auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
185
186 auto ty2 = struct_({field("c", int16()), field("d", int32())});
187 CheckViewFails(arr, ty2);
188 ty2 = struct_({field("c", int32()), field("d", int8())});
189 CheckViewFails(arr, ty2);
190 ty2 = struct_({field("c", int8())});
191 CheckViewFails(arr, ty2);
192 ty2 = struct_({field("c", fixed_size_binary(5))});
193 CheckViewFails(arr, ty2);
194 }
195
TEST(TestArrayView,StructAsStructWithNullType)196 TEST(TestArrayView, StructAsStructWithNullType) {
197 auto ty1 = struct_({field("a", int8()), field("b", null())});
198 auto ty2 = struct_({field("c", uint8()), field("d", null())});
199
200 auto arr = ArrayFromJSON(ty1, "[[0, null], [1, null], [-1, null]]");
201 auto expected = ArrayFromJSON(ty2, "[[0, null], [1, null], [255, null]]");
202 CheckView(arr, expected);
203 CheckView(expected, arr);
204
205 // With nulls and nested nulls
206 arr = ArrayFromJSON(ty1, "[null, [null, null], [-1, null]]");
207 expected = ArrayFromJSON(ty2, "[null, [null, null], [255, null]]");
208 CheckView(arr, expected);
209 CheckView(expected, arr);
210
211 // Moving the null types around
212 ty2 = struct_({field("c", null()), field("d", uint8())});
213 expected = ArrayFromJSON(ty2, "[null, [null, null], [null, 255]]");
214 CheckView(arr, expected);
215 CheckView(expected, arr);
216
217 // Removing the null type
218 ty2 = struct_({field("c", uint8())});
219 expected = ArrayFromJSON(ty2, "[null, [null], [255]]");
220 CheckView(arr, expected);
221 CheckView(expected, arr);
222 }
223
TEST(TestArrayView,StructAsFlat)224 TEST(TestArrayView, StructAsFlat) {
225 auto ty1 = struct_({field("a", int16())});
226 auto arr = ArrayFromJSON(ty1, "[[0], [1], [-1]]");
227 auto expected = ArrayFromJSON(uint16(), "[0, 1, 65535]");
228 CheckView(arr, expected);
229 CheckView(expected, arr);
230
231 // With nulls
232 arr = ArrayFromJSON(ty1, "[[0], null, [-1]]");
233 expected = ArrayFromJSON(uint16(), "[0, null, 65535]");
234 // CheckView(arr, expected); // XXX currently fails
235 CheckView(expected, arr);
236
237 // With nested nulls => fails
238 arr = ArrayFromJSON(ty1, "[[0], [null], [-1]]");
239 CheckViewFails(arr, uint16());
240 }
241
TEST(TestArrayView,StructAsFlatWithNullType)242 TEST(TestArrayView, StructAsFlatWithNullType) {
243 auto ty1 = struct_({field("a", null()), field("b", int16()), field("c", null())});
244 auto arr = ArrayFromJSON(ty1, "[[null, 0, null], [null, -1, null]]");
245 auto expected = ArrayFromJSON(uint16(), "[0, 65535]");
246 CheckView(arr, expected);
247 CheckView(expected, arr);
248
249 // With nulls
250 arr = ArrayFromJSON(ty1, "[[null, 0, null], null, [null, -1, null]]");
251 expected = ArrayFromJSON(uint16(), "[0, null, 65535]");
252 // CheckView(arr, expected); // XXX currently fails
253 CheckView(expected, arr);
254
255 // With nested nulls => fails
256 arr = ArrayFromJSON(ty1, "[[null, null, null]]");
257 CheckViewFails(arr, uint16());
258 }
259
TEST(TestArrayView,StructAsStructNested)260 TEST(TestArrayView, StructAsStructNested) {
261 // Nesting tree shape need not be identical
262 auto ty1 = struct_({field("a", struct_({field("b", int8())})), field("d", int32())});
263 auto ty2 = struct_({field("a", uint8()), field("b", struct_({field("b", float32())}))});
264 auto arr = ArrayFromJSON(ty1, "[[[0], 1069547520], [[-1], -1071644672]]");
265 auto expected = ArrayFromJSON(ty2, "[[0, [1.5]], [255, [-2.5]]]");
266 CheckView(arr, expected);
267 CheckView(expected, arr);
268
269 // With null types
270 ty1 = struct_({field("a", struct_({field("xx", null()), field("b", int8())})),
271 field("d", int32())});
272 ty2 = struct_({field("a", uint8()),
273 field("b", struct_({field("b", float32()), field("xx", null())}))});
274 arr = ArrayFromJSON(ty1, "[[[null, 0], 1069547520], [[null, -1], -1071644672]]");
275 expected = ArrayFromJSON(ty2, "[[0, [1.5, null]], [255, [-2.5, null]]]");
276 CheckView(arr, expected);
277 CheckView(expected, arr);
278
279 // XXX With nulls (currently fails)
280 }
281
TEST(TestArrayView,ListAsListSimple)282 TEST(TestArrayView, ListAsListSimple) {
283 auto arr = ArrayFromJSON(list(int16()), "[[0, -1], [], [42]]");
284 auto expected = ArrayFromJSON(list(uint16()), "[[0, 65535], [], [42]]");
285 CheckView(arr, expected);
286 CheckView(expected, arr);
287
288 // With nulls
289 arr = ArrayFromJSON(list(int16()), "[[0, -1], null, [42]]");
290 expected = ArrayFromJSON(list(uint16()), "[[0, 65535], null, [42]]");
291 CheckView(arr, expected);
292 CheckView(expected, arr);
293
294 // With nested nulls
295 arr = ArrayFromJSON(list(int16()), "[[0, -1], null, [null, 42]]");
296 expected = ArrayFromJSON(list(uint16()), "[[0, 65535], null, [null, 42]]");
297 CheckView(arr, expected);
298 CheckView(expected, arr);
299 }
300
TEST(TestArrayView,FixedSizeListAsFixedSizeList)301 TEST(TestArrayView, FixedSizeListAsFixedSizeList) {
302 auto ty1 = fixed_size_list(int16(), 3);
303 auto ty2 = fixed_size_list(uint16(), 3);
304 auto arr = ArrayFromJSON(ty1, "[[0, -1, 42], [5, 6, -16384]]");
305 auto expected = ArrayFromJSON(ty2, "[[0, 65535, 42], [5, 6, 49152]]");
306 CheckView(arr, expected);
307 CheckView(expected, arr);
308
309 // With nested nulls
310 arr = ArrayFromJSON(ty1, "[[0, -1, null], null, [5, 6, -16384]]");
311 expected = ArrayFromJSON(ty2, "[[0, 65535, null], null, [5, 6, 49152]]");
312 CheckView(arr, expected);
313 CheckView(expected, arr);
314 }
315
TEST(TestArrayView,FixedSizeListAsFlat)316 TEST(TestArrayView, FixedSizeListAsFlat) {
317 auto ty1 = fixed_size_list(int16(), 3);
318 auto arr = ArrayFromJSON(ty1, "[[0, -1, 42], [5, 6, -16384]]");
319 auto expected = ArrayFromJSON(uint16(), "[0, 65535, 42, 5, 6, 49152]");
320 CheckView(arr, expected);
321 // CheckView(expected, arr); // XXX currently fails
322
323 // XXX With nulls (currently fails)
324 }
325
TEST(TestArrayView,SparseUnionAsStruct)326 TEST(TestArrayView, SparseUnionAsStruct) {
327 auto child1 = ArrayFromJSON(int16(), "[0, -1, 42]");
328 auto child2 = ArrayFromJSON(int32(), "[0, 1069547520, -1071644672]");
329 auto indices = ArrayFromJSON(int8(), "[0, 0, 1]");
330 ASSERT_OK_AND_ASSIGN(auto arr, UnionArray::MakeSparse(*indices, {child1, child2}));
331 ASSERT_OK(arr->ValidateFull());
332
333 auto ty1 = struct_({field("a", int8()), field("b", uint16()), field("c", float32())});
334 auto expected = ArrayFromJSON(ty1, "[[0, 0, 0], [0, 65535, 1.5], [1, 42, -2.5]]");
335 CheckView(arr, expected);
336 CheckView(expected, arr);
337
338 // With nulls
339 indices = ArrayFromJSON(int8(), "[null, 0, 1]");
340 ASSERT_OK_AND_ASSIGN(arr, UnionArray::MakeSparse(*indices, {child1, child2}));
341 ASSERT_OK(arr->ValidateFull());
342 expected = ArrayFromJSON(ty1, "[null, [0, 65535, 1.5], [1, 42, -2.5]]");
343 CheckView(arr, expected);
344 // CheckView(expected, arr); // XXX currently fails
345
346 // With nested nulls
347 child1 = ArrayFromJSON(int16(), "[0, -1, null]");
348 child2 = ArrayFromJSON(int32(), "[0, null, -1071644672]");
349 ASSERT_OK_AND_ASSIGN(arr, UnionArray::MakeSparse(*indices, {child1, child2}));
350 ASSERT_OK(arr->ValidateFull());
351 expected = ArrayFromJSON(ty1, "[null, [0, 65535, null], [1, null, -2.5]]");
352 CheckView(arr, expected);
353 // CheckView(expected, arr); // XXX currently fails
354 }
355
TEST(TestArrayView,DecimalRoundTrip)356 TEST(TestArrayView, DecimalRoundTrip) {
357 auto ty1 = decimal(10, 4);
358 auto arr = ArrayFromJSON(ty1, R"(["123.4567", "-78.9000", null])");
359
360 auto ty2 = fixed_size_binary(16);
361 ASSERT_OK_AND_ASSIGN(auto v, arr->View(ty2));
362 ASSERT_OK(v->ValidateFull());
363 ASSERT_OK_AND_ASSIGN(auto w, v->View(ty1));
364 ASSERT_OK(w->ValidateFull());
365 AssertArraysEqual(*arr, *w);
366 }
367
TEST(TestArrayView,Dictionaries)368 TEST(TestArrayView, Dictionaries) {
369 // ARROW-6049
370 auto ty1 = dictionary(int8(), float32());
371 auto ty2 = dictionary(int8(), int32());
372
373 auto indices = ArrayFromJSON(int8(), "[0, 2, null, 1]");
374 auto values = ArrayFromJSON(float32(), "[0.0, 1.5, -2.5]");
375
376 ASSERT_OK_AND_ASSIGN(auto expected_dict, values->View(int32()));
377 ASSERT_OK_AND_ASSIGN(auto arr, DictionaryArray::FromArrays(ty1, indices, values));
378 ASSERT_OK_AND_ASSIGN(auto expected,
379 DictionaryArray::FromArrays(ty2, indices, expected_dict));
380
381 CheckView(arr, expected);
382 CheckView(expected, arr);
383
384 // Incompatible index type
385 auto ty3 = dictionary(int16(), int32());
386 CheckViewFails(arr, ty3);
387
388 // Incompatible dictionary type
389 auto ty4 = dictionary(int16(), float64());
390 CheckViewFails(arr, ty4);
391
392 // Check dictionary-encoded child
393 auto offsets = ArrayFromJSON(int32(), "[0, 2, 2, 4]");
394 ASSERT_OK_AND_ASSIGN(auto list_arr, ListArray::FromArrays(*offsets, *arr));
395 ASSERT_OK_AND_ASSIGN(auto expected_list_arr,
396 ListArray::FromArrays(*offsets, *expected));
397 CheckView(list_arr, expected_list_arr);
398 CheckView(expected_list_arr, list_arr);
399 }
400
TEST(TestArrayView,ExtensionType)401 TEST(TestArrayView, ExtensionType) {
402 auto ty1 = std::make_shared<IPv4Type>();
403 auto data = ArrayFromJSON(ty1->storage_type(), R"(["ABCD", null])")->data();
404 data->type = ty1;
405 auto arr = ty1->MakeArray(data);
406 #if ARROW_LITTLE_ENDIAN
407 auto expected = ArrayFromJSON(uint32(), "[1145258561, null]");
408 #else
409 auto expected = ArrayFromJSON(uint32(), "[1094861636, null]");
410 #endif
411 CheckView(arr, expected);
412 CheckView(expected, arr);
413 }
414
TEST(TestArrayView,NonZeroOffset)415 TEST(TestArrayView, NonZeroOffset) {
416 auto arr = ArrayFromJSON(int16(), "[10, 11, 12, 13]");
417
418 ASSERT_OK_AND_ASSIGN(auto expected, arr->View(fixed_size_binary(2)));
419 CheckView(arr->Slice(1), expected->Slice(1));
420 }
421
TEST(TestArrayView,NonZeroNestedOffset)422 TEST(TestArrayView, NonZeroNestedOffset) {
423 auto list_values = ArrayFromJSON(int16(), "[10, 11, 12, 13, 14]");
424 auto view_values = ArrayFromJSON(uint16(), "[10, 11, 12, 13, 14]");
425
426 auto list_offsets = ArrayFromJSON(int32(), "[0, 2, 3]");
427
428 ASSERT_OK_AND_ASSIGN(auto arr,
429 ListArray::FromArrays(*list_offsets, *list_values->Slice(2)));
430 ASSERT_OK_AND_ASSIGN(auto expected,
431 ListArray::FromArrays(*list_offsets, *view_values->Slice(2)));
432 ASSERT_OK(arr->ValidateFull());
433 CheckView(arr->Slice(1), expected->Slice(1));
434
435 // Be extra paranoid about checking offsets
436 ASSERT_OK_AND_ASSIGN(auto result, arr->Slice(1)->View(expected->type()));
437 ASSERT_EQ(1, result->offset());
438 ASSERT_EQ(2, static_cast<const ListArray&>(*result).values()->offset());
439 }
440
441 } // namespace arrow
442