1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <cstdint>
19 #include <memory>
20 #include <vector>
21 
22 #include <gtest/gtest.h>
23 
24 #include "arrow/chunked_array.h"
25 #include "arrow/status.h"
26 #include "arrow/testing/gtest_common.h"
27 #include "arrow/testing/gtest_util.h"
28 #include "arrow/testing/random.h"
29 #include "arrow/type.h"
30 #include "arrow/util/endian.h"
31 #include "arrow/util/key_value_metadata.h"
32 
33 namespace arrow {
34 
35 class TestChunkedArray : public TestBase {
36  protected:
Construct()37   virtual void Construct() {
38     one_ = std::make_shared<ChunkedArray>(arrays_one_);
39     if (!arrays_another_.empty()) {
40       another_ = std::make_shared<ChunkedArray>(arrays_another_);
41     }
42   }
43 
44   ArrayVector arrays_one_;
45   ArrayVector arrays_another_;
46 
47   std::shared_ptr<ChunkedArray> one_;
48   std::shared_ptr<ChunkedArray> another_;
49 };
50 
TEST_F(TestChunkedArray,Make)51 TEST_F(TestChunkedArray, Make) {
52   ASSERT_RAISES(Invalid, ChunkedArray::Make({}));
53 
54   ASSERT_OK_AND_ASSIGN(std::shared_ptr<ChunkedArray> result,
55                        ChunkedArray::Make({}, int64()));
56   AssertTypeEqual(*int64(), *result->type());
57   ASSERT_EQ(result->num_chunks(), 0);
58 
59   auto chunk0 = ArrayFromJSON(int8(), "[0, 1, 2]");
60   auto chunk1 = ArrayFromJSON(int16(), "[3, 4, 5]");
61 
62   ASSERT_OK_AND_ASSIGN(result, ChunkedArray::Make({chunk0, chunk0}));
63   ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8()));
64   AssertChunkedEqual(*result, *result2);
65 
66   ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0, chunk1}));
67   ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0}, int16()));
68 }
69 
TEST_F(TestChunkedArray,BasicEquals)70 TEST_F(TestChunkedArray, BasicEquals) {
71   std::vector<bool> null_bitmap(100, true);
72   std::vector<int32_t> data(100, 1);
73   std::shared_ptr<Array> array;
74   ArrayFromVector<Int32Type, int32_t>(null_bitmap, data, &array);
75   arrays_one_.push_back(array);
76   arrays_another_.push_back(array);
77 
78   Construct();
79   ASSERT_TRUE(one_->Equals(one_));
80   ASSERT_FALSE(one_->Equals(nullptr));
81   ASSERT_TRUE(one_->Equals(another_));
82   ASSERT_TRUE(one_->Equals(*another_.get()));
83 }
84 
TEST_F(TestChunkedArray,EqualsDifferingTypes)85 TEST_F(TestChunkedArray, EqualsDifferingTypes) {
86   std::vector<bool> null_bitmap(100, true);
87   std::vector<int32_t> data32(100, 1);
88   std::vector<int64_t> data64(100, 1);
89   std::shared_ptr<Array> array;
90   ArrayFromVector<Int32Type, int32_t>(null_bitmap, data32, &array);
91   arrays_one_.push_back(array);
92   ArrayFromVector<Int64Type, int64_t>(null_bitmap, data64, &array);
93   arrays_another_.push_back(array);
94 
95   Construct();
96   ASSERT_FALSE(one_->Equals(another_));
97   ASSERT_FALSE(one_->Equals(*another_.get()));
98 }
99 
TEST_F(TestChunkedArray,EqualsDifferingLengths)100 TEST_F(TestChunkedArray, EqualsDifferingLengths) {
101   std::vector<bool> null_bitmap100(100, true);
102   std::vector<bool> null_bitmap101(101, true);
103   std::vector<int32_t> data100(100, 1);
104   std::vector<int32_t> data101(101, 1);
105   std::shared_ptr<Array> array;
106   ArrayFromVector<Int32Type, int32_t>(null_bitmap100, data100, &array);
107   arrays_one_.push_back(array);
108   ArrayFromVector<Int32Type, int32_t>(null_bitmap101, data101, &array);
109   arrays_another_.push_back(array);
110 
111   Construct();
112   ASSERT_FALSE(one_->Equals(another_));
113   ASSERT_FALSE(one_->Equals(*another_.get()));
114 
115   std::vector<bool> null_bitmap1(1, true);
116   std::vector<int32_t> data1(1, 1);
117   ArrayFromVector<Int32Type, int32_t>(null_bitmap1, data1, &array);
118   arrays_one_.push_back(array);
119 
120   Construct();
121   ASSERT_TRUE(one_->Equals(another_));
122   ASSERT_TRUE(one_->Equals(*another_.get()));
123 }
124 
TEST_F(TestChunkedArray,EqualsDifferingMetadata)125 TEST_F(TestChunkedArray, EqualsDifferingMetadata) {
126   auto left_ty = list(field("item", int32()));
127 
128   auto metadata = key_value_metadata({"foo"}, {"bar"});
129   auto right_ty = list(field("item", int32(), true, metadata));
130 
131   std::vector<std::shared_ptr<Array>> left_chunks = {ArrayFromJSON(left_ty, "[[]]")};
132   std::vector<std::shared_ptr<Array>> right_chunks = {ArrayFromJSON(right_ty, "[[]]")};
133 
134   ChunkedArray left(left_chunks);
135   ChunkedArray right(right_chunks);
136   ASSERT_TRUE(left.Equals(right));
137 }
138 
TEST_F(TestChunkedArray,SliceEquals)139 TEST_F(TestChunkedArray, SliceEquals) {
140   arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
141   arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
142   arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
143   Construct();
144 
145   std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50);
146   ASSERT_EQ(slice->length(), 50);
147   AssertChunkedEqual(*one_->Slice(125, 50), *slice);
148 
149   std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50);
150   ASSERT_EQ(slice2->length(), 50);
151   AssertChunkedEqual(*slice, *slice2);
152 
153   // Making empty slices of a ChunkedArray
154   std::shared_ptr<ChunkedArray> slice3 = one_->Slice(one_->length(), 99);
155   ASSERT_EQ(slice3->length(), 0);
156   ASSERT_EQ(slice3->num_chunks(), 1);
157   ASSERT_TRUE(slice3->type()->Equals(one_->type()));
158 
159   std::shared_ptr<ChunkedArray> slice4 = one_->Slice(10, 0);
160   ASSERT_EQ(slice4->length(), 0);
161   ASSERT_EQ(slice4->num_chunks(), 1);
162   ASSERT_TRUE(slice4->type()->Equals(one_->type()));
163 
164   // Slicing an empty ChunkedArray
165   std::shared_ptr<ChunkedArray> slice5 = slice4->Slice(0, 10);
166   ASSERT_EQ(slice5->length(), 0);
167   ASSERT_EQ(slice5->num_chunks(), 1);
168   ASSERT_TRUE(slice5->type()->Equals(one_->type()));
169 }
170 
TEST_F(TestChunkedArray,ZeroChunksIssues)171 TEST_F(TestChunkedArray, ZeroChunksIssues) {
172   ArrayVector empty = {};
173   auto no_chunks = std::make_shared<ChunkedArray>(empty, int8());
174 
175   // ARROW-8911, assert that slicing is a no-op when there are zero-chunks
176   auto sliced = no_chunks->Slice(0, 0);
177   auto sliced2 = no_chunks->Slice(0, 5);
178   AssertChunkedEqual(*no_chunks, *sliced);
179   AssertChunkedEqual(*no_chunks, *sliced2);
180 }
181 
TEST_F(TestChunkedArray,Validate)182 TEST_F(TestChunkedArray, Validate) {
183   // Valid if empty
184   ArrayVector empty = {};
185   auto no_chunks = std::make_shared<ChunkedArray>(empty, utf8());
186   ASSERT_OK(no_chunks->ValidateFull());
187 
188   random::RandomArrayGenerator gen(0);
189   arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
190   Construct();
191   ASSERT_OK(one_->ValidateFull());
192 
193   arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
194   Construct();
195   ASSERT_OK(one_->ValidateFull());
196 
197   arrays_one_.push_back(gen.String(50, 0, 10, 0.1));
198   Construct();
199   ASSERT_RAISES(Invalid, one_->ValidateFull());
200 }
201 
TEST_F(TestChunkedArray,PrintDiff)202 TEST_F(TestChunkedArray, PrintDiff) {
203   random::RandomArrayGenerator gen(0);
204   arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
205   Construct();
206 
207   auto other = one_->Slice(25);
208   ASSERT_OK_AND_ASSIGN(auto diff, PrintArrayDiff(*one_, *other));
209   ASSERT_EQ(*diff, "Expected length 50 but was actually 25");
210 
211   ASSERT_OK_AND_ASSIGN(diff, PrintArrayDiff(*other, *one_));
212   ASSERT_EQ(*diff, "Expected length 25 but was actually 50");
213 }
214 
TEST_F(TestChunkedArray,View)215 TEST_F(TestChunkedArray, View) {
216   auto in_ty = int32();
217   auto out_ty = fixed_size_binary(4);
218 #if ARROW_LITTLE_ENDIAN
219   auto arr = ArrayFromJSON(in_ty, "[2020568934, 2054316386, null]");
220   auto arr2 = ArrayFromJSON(in_ty, "[2020568934, 2054316386]");
221 #else
222   auto arr = ArrayFromJSON(in_ty, "[1718579064, 1650553466, null]");
223   auto arr2 = ArrayFromJSON(in_ty, "[1718579064, 1650553466]");
224 #endif
225   auto ex = ArrayFromJSON(out_ty, R"(["foox", "barz", null])");
226   auto ex2 = ArrayFromJSON(out_ty, R"(["foox", "barz"])");
227 
228   ArrayVector chunks = {arr, arr2};
229   ArrayVector ex_chunks = {ex, ex2};
230   auto carr = std::make_shared<ChunkedArray>(chunks);
231   auto expected = std::make_shared<ChunkedArray>(ex_chunks);
232 
233   ASSERT_OK_AND_ASSIGN(auto result, carr->View(out_ty));
234   AssertChunkedEqual(*expected, *result);
235 
236   // Zero length
237   ArrayVector empty = {};
238   carr = std::make_shared<ChunkedArray>(empty, in_ty);
239   expected = std::make_shared<ChunkedArray>(empty, out_ty);
240   ASSERT_OK_AND_ASSIGN(result, carr->View(out_ty));
241   AssertChunkedEqual(*expected, *result);
242 }
243 
244 }  // namespace arrow
245