1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <cstdint>
19 #include <memory>
20 #include <vector>
21
22 #include <gtest/gtest.h>
23
24 #include "arrow/chunked_array.h"
25 #include "arrow/status.h"
26 #include "arrow/testing/gtest_common.h"
27 #include "arrow/testing/gtest_util.h"
28 #include "arrow/testing/random.h"
29 #include "arrow/type.h"
30 #include "arrow/util/endian.h"
31 #include "arrow/util/key_value_metadata.h"
32
33 namespace arrow {
34
35 class TestChunkedArray : public TestBase {
36 protected:
Construct()37 virtual void Construct() {
38 one_ = std::make_shared<ChunkedArray>(arrays_one_);
39 if (!arrays_another_.empty()) {
40 another_ = std::make_shared<ChunkedArray>(arrays_another_);
41 }
42 }
43
44 ArrayVector arrays_one_;
45 ArrayVector arrays_another_;
46
47 std::shared_ptr<ChunkedArray> one_;
48 std::shared_ptr<ChunkedArray> another_;
49 };
50
TEST_F(TestChunkedArray,Make)51 TEST_F(TestChunkedArray, Make) {
52 ASSERT_RAISES(Invalid, ChunkedArray::Make({}));
53
54 ASSERT_OK_AND_ASSIGN(std::shared_ptr<ChunkedArray> result,
55 ChunkedArray::Make({}, int64()));
56 AssertTypeEqual(*int64(), *result->type());
57 ASSERT_EQ(result->num_chunks(), 0);
58
59 auto chunk0 = ArrayFromJSON(int8(), "[0, 1, 2]");
60 auto chunk1 = ArrayFromJSON(int16(), "[3, 4, 5]");
61
62 ASSERT_OK_AND_ASSIGN(result, ChunkedArray::Make({chunk0, chunk0}));
63 ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8()));
64 AssertChunkedEqual(*result, *result2);
65
66 ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0, chunk1}));
67 ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0}, int16()));
68 }
69
TEST_F(TestChunkedArray,BasicEquals)70 TEST_F(TestChunkedArray, BasicEquals) {
71 std::vector<bool> null_bitmap(100, true);
72 std::vector<int32_t> data(100, 1);
73 std::shared_ptr<Array> array;
74 ArrayFromVector<Int32Type, int32_t>(null_bitmap, data, &array);
75 arrays_one_.push_back(array);
76 arrays_another_.push_back(array);
77
78 Construct();
79 ASSERT_TRUE(one_->Equals(one_));
80 ASSERT_FALSE(one_->Equals(nullptr));
81 ASSERT_TRUE(one_->Equals(another_));
82 ASSERT_TRUE(one_->Equals(*another_.get()));
83 }
84
TEST_F(TestChunkedArray,EqualsDifferingTypes)85 TEST_F(TestChunkedArray, EqualsDifferingTypes) {
86 std::vector<bool> null_bitmap(100, true);
87 std::vector<int32_t> data32(100, 1);
88 std::vector<int64_t> data64(100, 1);
89 std::shared_ptr<Array> array;
90 ArrayFromVector<Int32Type, int32_t>(null_bitmap, data32, &array);
91 arrays_one_.push_back(array);
92 ArrayFromVector<Int64Type, int64_t>(null_bitmap, data64, &array);
93 arrays_another_.push_back(array);
94
95 Construct();
96 ASSERT_FALSE(one_->Equals(another_));
97 ASSERT_FALSE(one_->Equals(*another_.get()));
98 }
99
TEST_F(TestChunkedArray,EqualsDifferingLengths)100 TEST_F(TestChunkedArray, EqualsDifferingLengths) {
101 std::vector<bool> null_bitmap100(100, true);
102 std::vector<bool> null_bitmap101(101, true);
103 std::vector<int32_t> data100(100, 1);
104 std::vector<int32_t> data101(101, 1);
105 std::shared_ptr<Array> array;
106 ArrayFromVector<Int32Type, int32_t>(null_bitmap100, data100, &array);
107 arrays_one_.push_back(array);
108 ArrayFromVector<Int32Type, int32_t>(null_bitmap101, data101, &array);
109 arrays_another_.push_back(array);
110
111 Construct();
112 ASSERT_FALSE(one_->Equals(another_));
113 ASSERT_FALSE(one_->Equals(*another_.get()));
114
115 std::vector<bool> null_bitmap1(1, true);
116 std::vector<int32_t> data1(1, 1);
117 ArrayFromVector<Int32Type, int32_t>(null_bitmap1, data1, &array);
118 arrays_one_.push_back(array);
119
120 Construct();
121 ASSERT_TRUE(one_->Equals(another_));
122 ASSERT_TRUE(one_->Equals(*another_.get()));
123 }
124
TEST_F(TestChunkedArray,EqualsDifferingMetadata)125 TEST_F(TestChunkedArray, EqualsDifferingMetadata) {
126 auto left_ty = list(field("item", int32()));
127
128 auto metadata = key_value_metadata({"foo"}, {"bar"});
129 auto right_ty = list(field("item", int32(), true, metadata));
130
131 std::vector<std::shared_ptr<Array>> left_chunks = {ArrayFromJSON(left_ty, "[[]]")};
132 std::vector<std::shared_ptr<Array>> right_chunks = {ArrayFromJSON(right_ty, "[[]]")};
133
134 ChunkedArray left(left_chunks);
135 ChunkedArray right(right_chunks);
136 ASSERT_TRUE(left.Equals(right));
137 }
138
TEST_F(TestChunkedArray,SliceEquals)139 TEST_F(TestChunkedArray, SliceEquals) {
140 arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
141 arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
142 arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
143 Construct();
144
145 std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50);
146 ASSERT_EQ(slice->length(), 50);
147 AssertChunkedEqual(*one_->Slice(125, 50), *slice);
148
149 std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50);
150 ASSERT_EQ(slice2->length(), 50);
151 AssertChunkedEqual(*slice, *slice2);
152
153 // Making empty slices of a ChunkedArray
154 std::shared_ptr<ChunkedArray> slice3 = one_->Slice(one_->length(), 99);
155 ASSERT_EQ(slice3->length(), 0);
156 ASSERT_EQ(slice3->num_chunks(), 1);
157 ASSERT_TRUE(slice3->type()->Equals(one_->type()));
158
159 std::shared_ptr<ChunkedArray> slice4 = one_->Slice(10, 0);
160 ASSERT_EQ(slice4->length(), 0);
161 ASSERT_EQ(slice4->num_chunks(), 1);
162 ASSERT_TRUE(slice4->type()->Equals(one_->type()));
163
164 // Slicing an empty ChunkedArray
165 std::shared_ptr<ChunkedArray> slice5 = slice4->Slice(0, 10);
166 ASSERT_EQ(slice5->length(), 0);
167 ASSERT_EQ(slice5->num_chunks(), 1);
168 ASSERT_TRUE(slice5->type()->Equals(one_->type()));
169 }
170
TEST_F(TestChunkedArray,ZeroChunksIssues)171 TEST_F(TestChunkedArray, ZeroChunksIssues) {
172 ArrayVector empty = {};
173 auto no_chunks = std::make_shared<ChunkedArray>(empty, int8());
174
175 // ARROW-8911, assert that slicing is a no-op when there are zero-chunks
176 auto sliced = no_chunks->Slice(0, 0);
177 auto sliced2 = no_chunks->Slice(0, 5);
178 AssertChunkedEqual(*no_chunks, *sliced);
179 AssertChunkedEqual(*no_chunks, *sliced2);
180 }
181
TEST_F(TestChunkedArray,Validate)182 TEST_F(TestChunkedArray, Validate) {
183 // Valid if empty
184 ArrayVector empty = {};
185 auto no_chunks = std::make_shared<ChunkedArray>(empty, utf8());
186 ASSERT_OK(no_chunks->ValidateFull());
187
188 random::RandomArrayGenerator gen(0);
189 arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
190 Construct();
191 ASSERT_OK(one_->ValidateFull());
192
193 arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
194 Construct();
195 ASSERT_OK(one_->ValidateFull());
196
197 arrays_one_.push_back(gen.String(50, 0, 10, 0.1));
198 Construct();
199 ASSERT_RAISES(Invalid, one_->ValidateFull());
200 }
201
TEST_F(TestChunkedArray,PrintDiff)202 TEST_F(TestChunkedArray, PrintDiff) {
203 random::RandomArrayGenerator gen(0);
204 arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
205 Construct();
206
207 auto other = one_->Slice(25);
208 ASSERT_OK_AND_ASSIGN(auto diff, PrintArrayDiff(*one_, *other));
209 ASSERT_EQ(*diff, "Expected length 50 but was actually 25");
210
211 ASSERT_OK_AND_ASSIGN(diff, PrintArrayDiff(*other, *one_));
212 ASSERT_EQ(*diff, "Expected length 25 but was actually 50");
213 }
214
TEST_F(TestChunkedArray,View)215 TEST_F(TestChunkedArray, View) {
216 auto in_ty = int32();
217 auto out_ty = fixed_size_binary(4);
218 #if ARROW_LITTLE_ENDIAN
219 auto arr = ArrayFromJSON(in_ty, "[2020568934, 2054316386, null]");
220 auto arr2 = ArrayFromJSON(in_ty, "[2020568934, 2054316386]");
221 #else
222 auto arr = ArrayFromJSON(in_ty, "[1718579064, 1650553466, null]");
223 auto arr2 = ArrayFromJSON(in_ty, "[1718579064, 1650553466]");
224 #endif
225 auto ex = ArrayFromJSON(out_ty, R"(["foox", "barz", null])");
226 auto ex2 = ArrayFromJSON(out_ty, R"(["foox", "barz"])");
227
228 ArrayVector chunks = {arr, arr2};
229 ArrayVector ex_chunks = {ex, ex2};
230 auto carr = std::make_shared<ChunkedArray>(chunks);
231 auto expected = std::make_shared<ChunkedArray>(ex_chunks);
232
233 ASSERT_OK_AND_ASSIGN(auto result, carr->View(out_ty));
234 AssertChunkedEqual(*expected, *result);
235
236 // Zero length
237 ArrayVector empty = {};
238 carr = std::make_shared<ChunkedArray>(empty, in_ty);
239 expected = std::make_shared<ChunkedArray>(empty, out_ty);
240 ASSERT_OK_AND_ASSIGN(result, carr->View(out_ty));
241 AssertChunkedEqual(*expected, *result);
242 }
243
244 } // namespace arrow
245