1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <string>
19 
20 #include <gtest/gtest.h>
21 
22 #include "arrow/array.h"
23 #include "arrow/extension_type.h"
24 #include "arrow/testing/gtest_util.h"
25 #include "arrow/testing/util.h"
26 #include "arrow/type.h"
27 #include "arrow/util/logging.h"
28 
29 namespace arrow {
30 
CheckView(const std::shared_ptr<Array> & input,const std::shared_ptr<DataType> & view_type,const std::shared_ptr<Array> & expected)31 void CheckView(const std::shared_ptr<Array>& input,
32                const std::shared_ptr<DataType>& view_type,
33                const std::shared_ptr<Array>& expected) {
34   ASSERT_OK_AND_ASSIGN(auto result, input->View(view_type));
35   ASSERT_OK(result->ValidateFull());
36   AssertArraysEqual(*expected, *result);
37 }
38 
CheckView(const std::shared_ptr<Array> & input,const std::shared_ptr<Array> & expected_view)39 void CheckView(const std::shared_ptr<Array>& input,
40                const std::shared_ptr<Array>& expected_view) {
41   CheckView(input, expected_view->type(), expected_view);
42 }
43 
CheckViewFails(const std::shared_ptr<Array> & input,const std::shared_ptr<DataType> & view_type)44 void CheckViewFails(const std::shared_ptr<Array>& input,
45                     const std::shared_ptr<DataType>& view_type) {
46   ASSERT_RAISES(Invalid, input->View(view_type));
47 }
48 
49 class IPv4Type : public ExtensionType {
50  public:
IPv4Type()51   IPv4Type() : ExtensionType(fixed_size_binary(4)) {}
52 
extension_name() const53   std::string extension_name() const override { return "ipv4"; }
54 
ExtensionEquals(const ExtensionType & other) const55   bool ExtensionEquals(const ExtensionType& other) const override {
56     return other.extension_name() == this->extension_name();
57   }
58 
MakeArray(std::shared_ptr<ArrayData> data) const59   std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
60     DCHECK_EQ(data->type->id(), Type::EXTENSION);
61     DCHECK_EQ("ipv4", static_cast<const ExtensionType&>(*data->type).extension_name());
62     return std::make_shared<ExtensionArray>(data);
63   }
64 
Deserialize(std::shared_ptr<DataType> storage_type,const std::string & serialized) const65   Result<std::shared_ptr<DataType>> Deserialize(
66       std::shared_ptr<DataType> storage_type,
67       const std::string& serialized) const override {
68     return Status::NotImplemented("IPv4Type::Deserialize");
69   }
70 
Serialize() const71   std::string Serialize() const override { return ""; }
72 };
73 
TEST(TestArrayView,IdentityPrimitive)74 TEST(TestArrayView, IdentityPrimitive) {
75   auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
76   CheckView(arr, arr->type(), arr);
77   arr = ArrayFromJSON(int16(), "[0, -1, 42, null]");
78   CheckView(arr, arr->type(), arr);
79   arr = ArrayFromJSON(boolean(), "[true, false, null]");
80   CheckView(arr, arr->type(), arr);
81 }
82 
TEST(TestArrayView,IdentityNullType)83 TEST(TestArrayView, IdentityNullType) {
84   auto arr = ArrayFromJSON(null(), "[null, null, null]");
85   CheckView(arr, arr->type(), arr);
86 }
87 
TEST(TestArrayView,PrimitiveAsPrimitive)88 TEST(TestArrayView, PrimitiveAsPrimitive) {
89   auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
90   auto expected = ArrayFromJSON(uint16(), "[0, 65535, 42]");
91   CheckView(arr, expected);
92   CheckView(expected, arr);
93 
94   arr = ArrayFromJSON(int32(), "[0, 1069547520, -1071644672, null]");
95   expected = ArrayFromJSON(float32(), "[0.0, 1.5, -2.5, null]");
96   CheckView(arr, expected);
97 
98   arr = ArrayFromJSON(timestamp(TimeUnit::SECOND),
99                       R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])");
100   expected = ArrayFromJSON(int64(), "[0, 951782400, 63730281600, -2203977600]");
101   CheckView(arr, expected);
102   CheckView(expected, arr);
103 }
104 
TEST(TestArrayView,PrimitiveAsFixedSizeBinary)105 TEST(TestArrayView, PrimitiveAsFixedSizeBinary) {
106 #if ARROW_LITTLE_ENDIAN
107   auto arr = ArrayFromJSON(int32(), "[2020568934, 2054316386, null]");
108 #else
109   auto arr = ArrayFromJSON(int32(), "[1718579064, 1650553466, null]");
110 #endif
111   auto expected = ArrayFromJSON(fixed_size_binary(4), R"(["foox", "barz", null])");
112   CheckView(arr, expected);
113   CheckView(expected, arr);
114 }
115 
TEST(TestArrayView,StringAsBinary)116 TEST(TestArrayView, StringAsBinary) {
117   auto arr = ArrayFromJSON(utf8(), R"(["foox", "barz", null])");
118   auto expected = ArrayFromJSON(binary(), R"(["foox", "barz", null])");
119   CheckView(arr, expected);
120   CheckView(expected, arr);
121 }
122 
TEST(TestArrayView,PrimitiveWrongSize)123 TEST(TestArrayView, PrimitiveWrongSize) {
124   auto arr = ArrayFromJSON(int16(), "[0, -1, 42]");
125   CheckViewFails(arr, int8());
126   CheckViewFails(arr, fixed_size_binary(3));
127   CheckViewFails(arr, null());
128 }
129 
TEST(TestArrayView,StructAsStructSimple)130 TEST(TestArrayView, StructAsStructSimple) {
131   auto ty1 = struct_({field("a", int8()), field("b", int32())});
132   auto ty2 = struct_({field("c", uint8()), field("d", float32())});
133 
134   auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
135   auto expected = ArrayFromJSON(ty2, "[[0, 0], [1, 1.5], [255, -2.5]]");
136   CheckView(arr, expected);
137   CheckView(expected, arr);
138 
139   // With nulls
140   arr = ArrayFromJSON(ty1, "[[0, 0], null, [-1, -1071644672]]");
141   expected = ArrayFromJSON(ty2, "[[0, 0], null, [255, -2.5]]");
142   CheckView(arr, expected);
143   CheckView(expected, arr);
144 
145   // With nested nulls
146   arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, -1071644672]]");
147   expected = ArrayFromJSON(ty2, "[[0, null], null, [255, -2.5]]");
148   CheckView(arr, expected);
149   CheckView(expected, arr);
150 
151   ty2 = struct_({field("c", uint8()), field("d", fixed_size_binary(4))});
152 #if ARROW_LITTLE_ENDIAN
153   arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, 2020568934]]");
154 #else
155   arr = ArrayFromJSON(ty1, "[[0, null], null, [-1, 1718579064]]");
156 #endif
157   expected = ArrayFromJSON(ty2, R"([[0, null], null, [255, "foox"]])");
158   CheckView(arr, expected);
159   CheckView(expected, arr);
160 }
161 
TEST(TestArrayView,StructAsStructNonNullable)162 TEST(TestArrayView, StructAsStructNonNullable) {
163   auto ty1 = struct_({field("a", int8()), field("b", int32())});
164   auto ty2 = struct_({field("c", uint8(), /*nullable=*/false), field("d", float32())});
165 
166   auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
167   auto expected = ArrayFromJSON(ty2, "[[0, 0], [1, 1.5], [255, -2.5]]");
168   CheckView(arr, expected);
169   CheckView(expected, arr);
170 
171   // With nested nulls
172   arr = ArrayFromJSON(ty1, "[[0, null], [-1, -1071644672]]");
173   expected = ArrayFromJSON(ty2, "[[0, null], [255, -2.5]]");
174   CheckView(arr, expected);
175   CheckView(expected, arr);
176 
177   // Nested null cannot be viewed as non-null field
178   arr = ArrayFromJSON(ty1, "[[0, null], [null, -1071644672]]");
179   CheckViewFails(arr, ty2);
180 }
181 
TEST(TestArrayView,StructAsStructWrongLayout)182 TEST(TestArrayView, StructAsStructWrongLayout) {
183   auto ty1 = struct_({field("a", int8()), field("b", int32())});
184   auto arr = ArrayFromJSON(ty1, "[[0, 0], [1, 1069547520], [-1, -1071644672]]");
185 
186   auto ty2 = struct_({field("c", int16()), field("d", int32())});
187   CheckViewFails(arr, ty2);
188   ty2 = struct_({field("c", int32()), field("d", int8())});
189   CheckViewFails(arr, ty2);
190   ty2 = struct_({field("c", int8())});
191   CheckViewFails(arr, ty2);
192   ty2 = struct_({field("c", fixed_size_binary(5))});
193   CheckViewFails(arr, ty2);
194 }
195 
TEST(TestArrayView,StructAsStructWithNullType)196 TEST(TestArrayView, StructAsStructWithNullType) {
197   auto ty1 = struct_({field("a", int8()), field("b", null())});
198   auto ty2 = struct_({field("c", uint8()), field("d", null())});
199 
200   auto arr = ArrayFromJSON(ty1, "[[0, null], [1, null], [-1, null]]");
201   auto expected = ArrayFromJSON(ty2, "[[0, null], [1, null], [255, null]]");
202   CheckView(arr, expected);
203   CheckView(expected, arr);
204 
205   // With nulls and nested nulls
206   arr = ArrayFromJSON(ty1, "[null, [null, null], [-1, null]]");
207   expected = ArrayFromJSON(ty2, "[null, [null, null], [255, null]]");
208   CheckView(arr, expected);
209   CheckView(expected, arr);
210 
211   // Moving the null types around
212   ty2 = struct_({field("c", null()), field("d", uint8())});
213   expected = ArrayFromJSON(ty2, "[null, [null, null], [null, 255]]");
214   CheckView(arr, expected);
215   CheckView(expected, arr);
216 
217   // Removing the null type
218   ty2 = struct_({field("c", uint8())});
219   expected = ArrayFromJSON(ty2, "[null, [null], [255]]");
220   CheckView(arr, expected);
221   CheckView(expected, arr);
222 }
223 
TEST(TestArrayView,StructAsFlat)224 TEST(TestArrayView, StructAsFlat) {
225   auto ty1 = struct_({field("a", int16())});
226   auto arr = ArrayFromJSON(ty1, "[[0], [1], [-1]]");
227   auto expected = ArrayFromJSON(uint16(), "[0, 1, 65535]");
228   CheckView(arr, expected);
229   CheckView(expected, arr);
230 
231   // With nulls
232   arr = ArrayFromJSON(ty1, "[[0], null, [-1]]");
233   expected = ArrayFromJSON(uint16(), "[0, null, 65535]");
234   //   CheckView(arr, expected);  // XXX currently fails
235   CheckView(expected, arr);
236 
237   // With nested nulls => fails
238   arr = ArrayFromJSON(ty1, "[[0], [null], [-1]]");
239   CheckViewFails(arr, uint16());
240 }
241 
TEST(TestArrayView,StructAsFlatWithNullType)242 TEST(TestArrayView, StructAsFlatWithNullType) {
243   auto ty1 = struct_({field("a", null()), field("b", int16()), field("c", null())});
244   auto arr = ArrayFromJSON(ty1, "[[null, 0, null], [null, -1, null]]");
245   auto expected = ArrayFromJSON(uint16(), "[0, 65535]");
246   CheckView(arr, expected);
247   CheckView(expected, arr);
248 
249   // With nulls
250   arr = ArrayFromJSON(ty1, "[[null, 0, null], null, [null, -1, null]]");
251   expected = ArrayFromJSON(uint16(), "[0, null, 65535]");
252   //   CheckView(arr, expected);  // XXX currently fails
253   CheckView(expected, arr);
254 
255   // With nested nulls => fails
256   arr = ArrayFromJSON(ty1, "[[null, null, null]]");
257   CheckViewFails(arr, uint16());
258 }
259 
TEST(TestArrayView,StructAsStructNested)260 TEST(TestArrayView, StructAsStructNested) {
261   // Nesting tree shape need not be identical
262   auto ty1 = struct_({field("a", struct_({field("b", int8())})), field("d", int32())});
263   auto ty2 = struct_({field("a", uint8()), field("b", struct_({field("b", float32())}))});
264   auto arr = ArrayFromJSON(ty1, "[[[0], 1069547520], [[-1], -1071644672]]");
265   auto expected = ArrayFromJSON(ty2, "[[0, [1.5]], [255, [-2.5]]]");
266   CheckView(arr, expected);
267   CheckView(expected, arr);
268 
269   // With null types
270   ty1 = struct_({field("a", struct_({field("xx", null()), field("b", int8())})),
271                  field("d", int32())});
272   ty2 = struct_({field("a", uint8()),
273                  field("b", struct_({field("b", float32()), field("xx", null())}))});
274   arr = ArrayFromJSON(ty1, "[[[null, 0], 1069547520], [[null, -1], -1071644672]]");
275   expected = ArrayFromJSON(ty2, "[[0, [1.5, null]], [255, [-2.5, null]]]");
276   CheckView(arr, expected);
277   CheckView(expected, arr);
278 
279   // XXX With nulls (currently fails)
280 }
281 
TEST(TestArrayView,ListAsListSimple)282 TEST(TestArrayView, ListAsListSimple) {
283   auto arr = ArrayFromJSON(list(int16()), "[[0, -1], [], [42]]");
284   auto expected = ArrayFromJSON(list(uint16()), "[[0, 65535], [], [42]]");
285   CheckView(arr, expected);
286   CheckView(expected, arr);
287 
288   // With nulls
289   arr = ArrayFromJSON(list(int16()), "[[0, -1], null, [42]]");
290   expected = ArrayFromJSON(list(uint16()), "[[0, 65535], null, [42]]");
291   CheckView(arr, expected);
292   CheckView(expected, arr);
293 
294   // With nested nulls
295   arr = ArrayFromJSON(list(int16()), "[[0, -1], null, [null, 42]]");
296   expected = ArrayFromJSON(list(uint16()), "[[0, 65535], null, [null, 42]]");
297   CheckView(arr, expected);
298   CheckView(expected, arr);
299 }
300 
TEST(TestArrayView,FixedSizeListAsFixedSizeList)301 TEST(TestArrayView, FixedSizeListAsFixedSizeList) {
302   auto ty1 = fixed_size_list(int16(), 3);
303   auto ty2 = fixed_size_list(uint16(), 3);
304   auto arr = ArrayFromJSON(ty1, "[[0, -1, 42], [5, 6, -16384]]");
305   auto expected = ArrayFromJSON(ty2, "[[0, 65535, 42], [5, 6, 49152]]");
306   CheckView(arr, expected);
307   CheckView(expected, arr);
308 
309   // With nested nulls
310   arr = ArrayFromJSON(ty1, "[[0, -1, null], null, [5, 6, -16384]]");
311   expected = ArrayFromJSON(ty2, "[[0, 65535, null], null, [5, 6, 49152]]");
312   CheckView(arr, expected);
313   CheckView(expected, arr);
314 }
315 
TEST(TestArrayView,FixedSizeListAsFlat)316 TEST(TestArrayView, FixedSizeListAsFlat) {
317   auto ty1 = fixed_size_list(int16(), 3);
318   auto arr = ArrayFromJSON(ty1, "[[0, -1, 42], [5, 6, -16384]]");
319   auto expected = ArrayFromJSON(uint16(), "[0, 65535, 42, 5, 6, 49152]");
320   CheckView(arr, expected);
321   // CheckView(expected, arr);  // XXX currently fails
322 
323   // XXX With nulls (currently fails)
324 }
325 
TEST(TestArrayView,SparseUnionAsStruct)326 TEST(TestArrayView, SparseUnionAsStruct) {
327   auto child1 = ArrayFromJSON(int16(), "[0, -1, 42]");
328   auto child2 = ArrayFromJSON(int32(), "[0, 1069547520, -1071644672]");
329   auto indices = ArrayFromJSON(int8(), "[0, 0, 1]");
330   ASSERT_OK_AND_ASSIGN(auto arr, UnionArray::MakeSparse(*indices, {child1, child2}));
331   ASSERT_OK(arr->ValidateFull());
332 
333   auto ty1 = struct_({field("a", int8()), field("b", uint16()), field("c", float32())});
334   auto expected = ArrayFromJSON(ty1, "[[0, 0, 0], [0, 65535, 1.5], [1, 42, -2.5]]");
335   CheckView(arr, expected);
336   CheckView(expected, arr);
337 
338   // With nulls
339   indices = ArrayFromJSON(int8(), "[null, 0, 1]");
340   ASSERT_OK_AND_ASSIGN(arr, UnionArray::MakeSparse(*indices, {child1, child2}));
341   ASSERT_OK(arr->ValidateFull());
342   expected = ArrayFromJSON(ty1, "[null, [0, 65535, 1.5], [1, 42, -2.5]]");
343   CheckView(arr, expected);
344   //   CheckView(expected, arr);  // XXX currently fails
345 
346   // With nested nulls
347   child1 = ArrayFromJSON(int16(), "[0, -1, null]");
348   child2 = ArrayFromJSON(int32(), "[0, null, -1071644672]");
349   ASSERT_OK_AND_ASSIGN(arr, UnionArray::MakeSparse(*indices, {child1, child2}));
350   ASSERT_OK(arr->ValidateFull());
351   expected = ArrayFromJSON(ty1, "[null, [0, 65535, null], [1, null, -2.5]]");
352   CheckView(arr, expected);
353   //   CheckView(expected, arr);  // XXX currently fails
354 }
355 
TEST(TestArrayView,DecimalRoundTrip)356 TEST(TestArrayView, DecimalRoundTrip) {
357   auto ty1 = decimal(10, 4);
358   auto arr = ArrayFromJSON(ty1, R"(["123.4567", "-78.9000", null])");
359 
360   auto ty2 = fixed_size_binary(16);
361   ASSERT_OK_AND_ASSIGN(auto v, arr->View(ty2));
362   ASSERT_OK(v->ValidateFull());
363   ASSERT_OK_AND_ASSIGN(auto w, v->View(ty1));
364   ASSERT_OK(w->ValidateFull());
365   AssertArraysEqual(*arr, *w);
366 }
367 
TEST(TestArrayView,Dictionaries)368 TEST(TestArrayView, Dictionaries) {
369   // ARROW-6049
370   auto ty1 = dictionary(int8(), float32());
371   auto ty2 = dictionary(int8(), int32());
372 
373   auto indices = ArrayFromJSON(int8(), "[0, 2, null, 1]");
374   auto values = ArrayFromJSON(float32(), "[0.0, 1.5, -2.5]");
375 
376   ASSERT_OK_AND_ASSIGN(auto expected_dict, values->View(int32()));
377   ASSERT_OK_AND_ASSIGN(auto arr, DictionaryArray::FromArrays(ty1, indices, values));
378   ASSERT_OK_AND_ASSIGN(auto expected,
379                        DictionaryArray::FromArrays(ty2, indices, expected_dict));
380 
381   CheckView(arr, expected);
382   CheckView(expected, arr);
383 
384   // Incompatible index type
385   auto ty3 = dictionary(int16(), int32());
386   CheckViewFails(arr, ty3);
387 
388   // Incompatible dictionary type
389   auto ty4 = dictionary(int16(), float64());
390   CheckViewFails(arr, ty4);
391 
392   // Check dictionary-encoded child
393   auto offsets = ArrayFromJSON(int32(), "[0, 2, 2, 4]");
394   ASSERT_OK_AND_ASSIGN(auto list_arr, ListArray::FromArrays(*offsets, *arr));
395   ASSERT_OK_AND_ASSIGN(auto expected_list_arr,
396                        ListArray::FromArrays(*offsets, *expected));
397   CheckView(list_arr, expected_list_arr);
398   CheckView(expected_list_arr, list_arr);
399 }
400 
TEST(TestArrayView,ExtensionType)401 TEST(TestArrayView, ExtensionType) {
402   auto ty1 = std::make_shared<IPv4Type>();
403   auto data = ArrayFromJSON(ty1->storage_type(), R"(["ABCD", null])")->data();
404   data->type = ty1;
405   auto arr = ty1->MakeArray(data);
406 #if ARROW_LITTLE_ENDIAN
407   auto expected = ArrayFromJSON(uint32(), "[1145258561, null]");
408 #else
409   auto expected = ArrayFromJSON(uint32(), "[1094861636, null]");
410 #endif
411   CheckView(arr, expected);
412   CheckView(expected, arr);
413 }
414 
TEST(TestArrayView,NonZeroOffset)415 TEST(TestArrayView, NonZeroOffset) {
416   auto arr = ArrayFromJSON(int16(), "[10, 11, 12, 13]");
417 
418   ASSERT_OK_AND_ASSIGN(auto expected, arr->View(fixed_size_binary(2)));
419   CheckView(arr->Slice(1), expected->Slice(1));
420 }
421 
TEST(TestArrayView,NonZeroNestedOffset)422 TEST(TestArrayView, NonZeroNestedOffset) {
423   auto list_values = ArrayFromJSON(int16(), "[10, 11, 12, 13, 14]");
424   auto view_values = ArrayFromJSON(uint16(), "[10, 11, 12, 13, 14]");
425 
426   auto list_offsets = ArrayFromJSON(int32(), "[0, 2, 3]");
427 
428   ASSERT_OK_AND_ASSIGN(auto arr,
429                        ListArray::FromArrays(*list_offsets, *list_values->Slice(2)));
430   ASSERT_OK_AND_ASSIGN(auto expected,
431                        ListArray::FromArrays(*list_offsets, *view_values->Slice(2)));
432   ASSERT_OK(arr->ValidateFull());
433   CheckView(arr->Slice(1), expected->Slice(1));
434 
435   // Be extra paranoid about checking offsets
436   ASSERT_OK_AND_ASSIGN(auto result, arr->Slice(1)->View(expected->type()));
437   ASSERT_EQ(1, result->offset());
438   ASSERT_EQ(2, static_cast<const ListArray&>(*result).values()->offset());
439 }
440 
441 }  // namespace arrow
442