1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <cstdint>
19 #include <cstdio>
20 #include <functional>
21 #include <memory>
22 #include <string>
23 #include <vector>
24
25 #include <gtest/gtest.h>
26
27 #include "arrow/array.h"
28 #include "arrow/buffer.h"
29 #include "arrow/extension_type.h"
30 #include "arrow/memory_pool.h"
31 #include "arrow/status.h"
32 #include "arrow/table.h"
33 #include "arrow/testing/extension_type.h"
34 #include "arrow/testing/gtest_common.h"
35 #include "arrow/testing/gtest_util.h"
36 #include "arrow/testing/random.h"
37 #include "arrow/type.h"
38 #include "arrow/type_fwd.h"
39 #include "arrow/type_traits.h"
40 #include "arrow/util/checked_cast.h"
41 #include "arrow/util/decimal.h"
42
43 #include "arrow/compute/api_vector.h"
44 #include "arrow/compute/cast.h"
45 #include "arrow/compute/kernel.h"
46 #include "arrow/compute/test_util.h"
47
48 namespace arrow {
49 namespace compute {
50
51 using internal::checked_cast;
52
53 static constexpr const char* kInvalidUtf8 = "\xa0\xa1";
54
55 static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
56 uint8(), int8(), uint16(), int16(), uint32(),
57 int32(), uint64(), int64(), float32(), float64()};
58
AssertBufferSame(const Array & left,const Array & right,int buffer_index)59 static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
60 ASSERT_EQ(left.data()->buffers[buffer_index].get(),
61 right.data()->buffers[buffer_index].get());
62 }
63
64 class TestCast : public TestBase {
65 public:
CheckPass(const Array & input,const Array & expected,const std::shared_ptr<DataType> & out_type,const CastOptions & options)66 void CheckPass(const Array& input, const Array& expected,
67 const std::shared_ptr<DataType>& out_type, const CastOptions& options) {
68 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(input, out_type, options));
69 ASSERT_OK(result->ValidateFull());
70 AssertArraysEqual(expected, *result, /*verbose=*/true);
71 }
72
73 template <typename InType, typename I_TYPE>
CheckFails(const std::shared_ptr<DataType> & in_type,const std::vector<I_TYPE> & in_values,const std::vector<bool> & is_valid,const std::shared_ptr<DataType> & out_type,const CastOptions & options)74 void CheckFails(const std::shared_ptr<DataType>& in_type,
75 const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
76 const std::shared_ptr<DataType>& out_type, const CastOptions& options) {
77 std::shared_ptr<Array> input;
78 if (is_valid.size() > 0) {
79 ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
80 } else {
81 ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
82 }
83 ASSERT_RAISES(Invalid, Cast(*input, out_type, options));
84 }
85
CheckZeroCopy(const Array & input,const std::shared_ptr<DataType> & out_type)86 void CheckZeroCopy(const Array& input, const std::shared_ptr<DataType>& out_type) {
87 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(input, out_type));
88 ASSERT_OK(result->ValidateFull());
89 ASSERT_EQ(input.data()->buffers.size(), result->data()->buffers.size());
90 for (size_t i = 0; i < input.data()->buffers.size(); ++i) {
91 AssertBufferSame(input, *result, static_cast<int>(i));
92 }
93 }
94
95 template <typename InType, typename I_TYPE, typename OutType, typename O_TYPE>
CheckCase(const std::shared_ptr<DataType> & in_type,const std::vector<I_TYPE> & in_values,const std::vector<bool> & is_valid,const std::shared_ptr<DataType> & out_type,const std::vector<O_TYPE> & out_values,const CastOptions & options)96 void CheckCase(const std::shared_ptr<DataType>& in_type,
97 const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
98 const std::shared_ptr<DataType>& out_type,
99 const std::vector<O_TYPE>& out_values, const CastOptions& options) {
100 ASSERT_EQ(in_values.size(), out_values.size());
101 std::shared_ptr<Array> input, expected;
102 if (is_valid.size() > 0) {
103 ASSERT_EQ(is_valid.size(), out_values.size());
104 ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
105 ArrayFromVector<OutType, O_TYPE>(out_type, is_valid, out_values, &expected);
106 } else {
107 ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
108 ArrayFromVector<OutType, O_TYPE>(out_type, out_values, &expected);
109 }
110 CheckPass(*input, *expected, out_type, options);
111
112 // Check a sliced variant
113 if (input->length() > 1) {
114 CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
115 }
116 }
117
CheckCaseJSON(const std::shared_ptr<DataType> & in_type,const std::shared_ptr<DataType> & out_type,const std::string & in_json,const std::string & expected_json,const CastOptions & options=CastOptions ())118 void CheckCaseJSON(const std::shared_ptr<DataType>& in_type,
119 const std::shared_ptr<DataType>& out_type,
120 const std::string& in_json, const std::string& expected_json,
121 const CastOptions& options = CastOptions()) {
122 std::shared_ptr<Array> input = ArrayFromJSON(in_type, in_json);
123 std::shared_ptr<Array> expected = ArrayFromJSON(out_type, expected_json);
124 ASSERT_EQ(input->length(), expected->length());
125 CheckPass(*input, *expected, out_type, options);
126
127 // Check a sliced variant
128 if (input->length() > 1) {
129 CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
130 }
131 }
132
133 template <typename SourceType, typename DestType>
TestCastBinaryToString()134 void TestCastBinaryToString() {
135 CastOptions options;
136 auto src_type = TypeTraits<SourceType>::type_singleton();
137 auto dest_type = TypeTraits<DestType>::type_singleton();
138
139 // All valid except the last one
140 std::vector<bool> all = {1, 1, 1, 1, 1};
141 std::vector<bool> valid = {1, 1, 1, 1, 0};
142 std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
143
144 std::shared_ptr<Array> array;
145
146 // Should accept when invalid but null.
147 ArrayFromVector<SourceType, std::string>(src_type, valid, strings, &array);
148 CheckZeroCopy(*array, dest_type);
149
150 // Should refuse due to invalid utf8 payload
151 CheckFails<SourceType, std::string>(src_type, strings, all, dest_type, options);
152
153 // Should accept due to option override
154 options.allow_invalid_utf8 = true;
155 CheckCase<SourceType, std::string, DestType, std::string>(
156 src_type, strings, all, dest_type, strings, options);
157 }
158
159 template <typename DestType>
TestCastNumberToString()160 void TestCastNumberToString() {
161 auto dest_type = TypeTraits<DestType>::type_singleton();
162
163 CheckCaseJSON(int8(), dest_type, "[0, 1, 127, -128, null]",
164 R"(["0", "1", "127", "-128", null])");
165 CheckCaseJSON(uint8(), dest_type, "[0, 1, 255, null]", R"(["0", "1", "255", null])");
166 CheckCaseJSON(int16(), dest_type, "[0, 1, 32767, -32768, null]",
167 R"(["0", "1", "32767", "-32768", null])");
168 CheckCaseJSON(uint16(), dest_type, "[0, 1, 65535, null]",
169 R"(["0", "1", "65535", null])");
170 CheckCaseJSON(int32(), dest_type, "[0, 1, 2147483647, -2147483648, null]",
171 R"(["0", "1", "2147483647", "-2147483648", null])");
172 CheckCaseJSON(uint32(), dest_type, "[0, 1, 4294967295, null]",
173 R"(["0", "1", "4294967295", null])");
174 CheckCaseJSON(int64(), dest_type,
175 "[0, 1, 9223372036854775807, -9223372036854775808, null]",
176 R"(["0", "1", "9223372036854775807", "-9223372036854775808", null])");
177 CheckCaseJSON(uint64(), dest_type, "[0, 1, 18446744073709551615, null]",
178 R"(["0", "1", "18446744073709551615", null])");
179
180 CheckCaseJSON(float32(), dest_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]",
181 R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])");
182 CheckCaseJSON(float64(), dest_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]",
183 R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])");
184 }
185
186 template <typename DestType>
TestCastBooleanToString()187 void TestCastBooleanToString() {
188 auto dest_type = TypeTraits<DestType>::type_singleton();
189
190 CheckCaseJSON(boolean(), dest_type, "[true, true, false, null]",
191 R"(["true", "true", "false", null])");
192 }
193
194 template <typename SourceType>
TestCastStringToNumber()195 void TestCastStringToNumber() {
196 CastOptions options;
197 auto src_type = TypeTraits<SourceType>::type_singleton();
198
199 std::vector<bool> is_valid = {true, false, true, true, true};
200
201 // string to int
202 std::vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
203 std::vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
204 std::vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
205 std::vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
206 std::vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
207 CheckCase<SourceType, std::string, Int8Type, int8_t>(src_type, v_int, is_valid,
208 int8(), e_int8, options);
209 CheckCase<SourceType, std::string, Int16Type, int16_t>(src_type, v_int, is_valid,
210 int16(), e_int16, options);
211 CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
212 int32(), e_int32, options);
213 CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
214 int64(), e_int64, options);
215
216 v_int = {"2147483647", "0", "-2147483648", "0", "0"};
217 e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
218 CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
219 int32(), e_int32, options);
220 v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
221 e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
222 CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
223 int64(), e_int64, options);
224
225 // string to uint
226 std::vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
227 std::vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
228 std::vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
229 std::vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
230 std::vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
231 CheckCase<SourceType, std::string, UInt8Type, uint8_t>(src_type, v_uint, is_valid,
232 uint8(), e_uint8, options);
233 CheckCase<SourceType, std::string, UInt16Type, uint16_t>(src_type, v_uint, is_valid,
234 uint16(), e_uint16, options);
235 CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
236 uint32(), e_uint32, options);
237 CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
238 uint64(), e_uint64, options);
239
240 v_uint = {"4294967295", "0", "0", "0", "0"};
241 e_uint32 = {4294967295, 0, 0, 0, 0};
242 CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
243 uint32(), e_uint32, options);
244 v_uint = {"18446744073709551615", "0", "0", "0", "0"};
245 e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
246 CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
247 uint64(), e_uint64, options);
248
249 // string to float
250 std::vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
251 std::vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
252 std::vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
253 CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
254 float32(), e_float, options);
255 CheckCase<SourceType, std::string, DoubleType, double>(src_type, v_float, is_valid,
256 float64(), e_double, options);
257
258 #if !defined(_WIN32) || defined(NDEBUG)
259 // Test that casting is locale-independent
260 {
261 // French locale uses the comma as decimal point
262 LocaleGuard locale_guard("fr_FR.UTF-8");
263 CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
264 float32(), e_float, options);
265 CheckCase<SourceType, std::string, DoubleType, double>(
266 src_type, v_float, is_valid, float64(), e_double, options);
267 }
268 #endif
269 }
270
271 template <typename SourceType>
TestCastStringToTimestamp()272 void TestCastStringToTimestamp() {
273 CastOptions options;
274 auto src_type = TypeTraits<SourceType>::type_singleton();
275
276 std::vector<bool> is_valid = {true, false, true};
277 std::vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
278
279 auto type = timestamp(TimeUnit::SECOND);
280 std::vector<int64_t> e = {0, 0, 951782400};
281 CheckCase<SourceType, std::string, TimestampType, int64_t>(
282 src_type, strings, is_valid, type, e, options);
283
284 type = timestamp(TimeUnit::MICRO);
285 e = {0, 0, 951782400000000LL};
286 CheckCase<SourceType, std::string, TimestampType, int64_t>(
287 src_type, strings, is_valid, type, e, options);
288
289 // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
290 }
291 };
292
TEST_F(TestCast,SameTypeZeroCopy)293 TEST_F(TestCast, SameTypeZeroCopy) {
294 std::shared_ptr<Array> arr = ArrayFromJSON(int32(), "[0, null, 2, 3, 4]");
295 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(*arr, int32()));
296
297 AssertBufferSame(*arr, *result, 0);
298 AssertBufferSame(*arr, *result, 1);
299 }
300
TEST_F(TestCast,ZeroChunks)301 TEST_F(TestCast, ZeroChunks) {
302 auto chunked_i32 = std::make_shared<ChunkedArray>(ArrayVector{}, int32());
303 ASSERT_OK_AND_ASSIGN(Datum result, Cast(chunked_i32, utf8()));
304
305 ASSERT_EQ(result.kind(), Datum::CHUNKED_ARRAY);
306 AssertChunkedEqual(*result.chunked_array(), ChunkedArray({}, utf8()));
307 }
308
TEST_F(TestCast,FromBoolean)309 TEST_F(TestCast, FromBoolean) {
310 CastOptions options;
311
312 std::vector<bool> is_valid(20, true);
313 is_valid[3] = false;
314
315 std::vector<bool> v1(is_valid.size(), true);
316 std::vector<int32_t> e1(is_valid.size(), 1);
317 for (size_t i = 0; i < v1.size(); ++i) {
318 if (i % 3 == 1) {
319 v1[i] = false;
320 e1[i] = 0;
321 }
322 }
323
324 CheckCase<BooleanType, bool, Int32Type, int32_t>(boolean(), v1, is_valid, int32(), e1,
325 options);
326 }
327
TEST_F(TestCast,ToBoolean)328 TEST_F(TestCast, ToBoolean) {
329 CastOptions options;
330 for (auto type : kNumericTypes) {
331 CheckCaseJSON(type, boolean(), "[0, null, 127, 1, 0]",
332 "[false, null, true, true, false]");
333 }
334
335 // Check negative numbers
336 CheckCaseJSON(int8(), boolean(), "[0, null, 127, -1, 0]",
337 "[false, null, true, true, false]");
338 CheckCaseJSON(float64(), boolean(), "[0, null, 127, -1, 0]",
339 "[false, null, true, true, false]");
340 }
341
TEST_F(TestCast,ToIntUpcast)342 TEST_F(TestCast, ToIntUpcast) {
343 CastOptions options;
344 options.allow_int_overflow = false;
345
346 std::vector<bool> is_valid = {true, false, true, true, true};
347
348 // int8 to int32
349 std::vector<int8_t> v1 = {0, 1, 127, -1, 0};
350 std::vector<int32_t> e1 = {0, 1, 127, -1, 0};
351 CheckCase<Int8Type, int8_t, Int32Type, int32_t>(int8(), v1, is_valid, int32(), e1,
352 options);
353
354 // bool to int8
355 std::vector<bool> v2 = {false, true, false, true, true};
356 std::vector<int8_t> e2 = {0, 1, 0, 1, 1};
357 CheckCase<BooleanType, bool, Int8Type, int8_t>(boolean(), v2, is_valid, int8(), e2,
358 options);
359
360 // uint8 to int16, no overflow/underrun
361 std::vector<uint8_t> v3 = {0, 100, 200, 255, 0};
362 std::vector<int16_t> e3 = {0, 100, 200, 255, 0};
363 CheckCase<UInt8Type, uint8_t, Int16Type, int16_t>(uint8(), v3, is_valid, int16(), e3,
364 options);
365 }
366
TEST_F(TestCast,OverflowInNullSlot)367 TEST_F(TestCast, OverflowInNullSlot) {
368 CastOptions options;
369 options.allow_int_overflow = false;
370
371 std::vector<bool> is_valid = {true, false, true, true, true};
372
373 std::vector<int32_t> v11 = {0, 70000, 2000, 1000, 0};
374 std::vector<int16_t> e11 = {0, 0, 2000, 1000, 0};
375
376 std::shared_ptr<Array> expected;
377 ArrayFromVector<Int16Type, int16_t>(int16(), is_valid, e11, &expected);
378
379 auto buf = Buffer::Wrap(v11.data(), v11.size());
380 Int32Array tmp11(5, buf, expected->null_bitmap(), -1);
381
382 CheckPass(tmp11, *expected, int16(), options);
383 }
384
TEST_F(TestCast,ToIntDowncastSafe)385 TEST_F(TestCast, ToIntDowncastSafe) {
386 CastOptions options;
387 options.allow_int_overflow = false;
388
389 std::vector<bool> is_valid = {true, false, true, true, true};
390
391 // int16 to uint8, no overflow/underrun
392 std::vector<int16_t> v1 = {0, 100, 200, 1, 2};
393 std::vector<uint8_t> e1 = {0, 100, 200, 1, 2};
394 CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
395 options);
396
397 // int16 to uint8, with overflow
398 std::vector<int16_t> v2 = {0, 100, 256, 0, 0};
399 CheckFails<Int16Type>(int16(), v2, is_valid, uint8(), options);
400
401 // underflow
402 std::vector<int16_t> v3 = {0, 100, -1, 0, 0};
403 CheckFails<Int16Type>(int16(), v3, is_valid, uint8(), options);
404
405 // int32 to int16, no overflow
406 std::vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
407 std::vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
408 CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
409 options);
410
411 // int32 to int16, overflow
412 std::vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
413 CheckFails<Int32Type>(int32(), v5, is_valid, int16(), options);
414
415 // underflow
416 std::vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
417 CheckFails<Int32Type>(int32(), v6, is_valid, int16(), options);
418
419 std::vector<int32_t> v7 = {0, 1000, 2000, -70000, 0};
420 CheckFails<Int32Type>(int32(), v7, is_valid, uint8(), options);
421 }
422
423 template <typename O, typename I>
UnsafeVectorCast(const std::vector<I> & v)424 std::vector<O> UnsafeVectorCast(const std::vector<I>& v) {
425 size_t n_elems = v.size();
426 std::vector<O> result(n_elems);
427
428 for (size_t i = 0; i < v.size(); i++) result[i] = static_cast<O>(v[i]);
429
430 return result;
431 }
432
TEST_F(TestCast,IntegerSignedToUnsigned)433 TEST_F(TestCast, IntegerSignedToUnsigned) {
434 CastOptions options;
435 options.allow_int_overflow = false;
436
437 std::vector<bool> is_valid = {true, false, true, true, true};
438
439 std::vector<int32_t> v1 = {INT32_MIN, 100, -1, UINT16_MAX, INT32_MAX};
440
441 // Same width
442 CheckFails<Int32Type>(int32(), v1, is_valid, uint32(), options);
443 // Wider
444 CheckFails<Int32Type>(int32(), v1, is_valid, uint64(), options);
445 // Narrower
446 CheckFails<Int32Type>(int32(), v1, is_valid, uint16(), options);
447 // Fail because of overflow (instead of underflow).
448 std::vector<int32_t> over = {0, -11, 0, UINT16_MAX + 1, INT32_MAX};
449 CheckFails<Int32Type>(int32(), over, is_valid, uint16(), options);
450
451 options.allow_int_overflow = true;
452
453 CheckCase<Int32Type, int32_t, UInt32Type, uint32_t>(
454 int32(), v1, is_valid, uint32(), UnsafeVectorCast<uint32_t, int32_t>(v1), options);
455 CheckCase<Int32Type, int32_t, UInt64Type, uint64_t>(
456 int32(), v1, is_valid, uint64(), UnsafeVectorCast<uint64_t, int32_t>(v1), options);
457 CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
458 int32(), v1, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(v1), options);
459 CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
460 int32(), over, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(over),
461 options);
462 }
463
TEST_F(TestCast,IntegerUnsignedToSigned)464 TEST_F(TestCast, IntegerUnsignedToSigned) {
465 CastOptions options;
466 options.allow_int_overflow = false;
467
468 std::vector<bool> is_valid = {true, true, true};
469
470 std::vector<uint32_t> v1 = {0, INT16_MAX + 1, UINT32_MAX};
471 std::vector<uint32_t> v2 = {0, INT16_MAX + 1, 2};
472 // Same width
473 CheckFails<UInt32Type>(uint32(), v1, is_valid, int32(), options);
474 // Narrower
475 CheckFails<UInt32Type>(uint32(), v1, is_valid, int16(), options);
476 CheckFails<UInt32Type>(uint32(), v2, is_valid, int16(), options);
477
478 options.allow_int_overflow = true;
479
480 CheckCase<UInt32Type, uint32_t, Int32Type, int32_t>(
481 uint32(), v1, is_valid, int32(), UnsafeVectorCast<int32_t, uint32_t>(v1), options);
482 CheckCase<UInt32Type, uint32_t, Int64Type, int64_t>(
483 uint32(), v1, is_valid, int64(), UnsafeVectorCast<int64_t, uint32_t>(v1), options);
484 CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
485 uint32(), v1, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v1), options);
486 CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
487 uint32(), v2, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v2), options);
488 }
489
TEST_F(TestCast,ToIntDowncastUnsafe)490 TEST_F(TestCast, ToIntDowncastUnsafe) {
491 CastOptions options;
492 options.allow_int_overflow = true;
493
494 std::vector<bool> is_valid = {true, false, true, true, true};
495
496 // int16 to uint8, no overflow/underrun
497 std::vector<int16_t> v1 = {0, 100, 200, 1, 2};
498 std::vector<uint8_t> e1 = {0, 100, 200, 1, 2};
499 CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
500 options);
501
502 // int16 to uint8, with overflow
503 std::vector<int16_t> v2 = {0, 100, 256, 0, 0};
504 std::vector<uint8_t> e2 = {0, 100, 0, 0, 0};
505 CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v2, is_valid, uint8(), e2,
506 options);
507
508 // underflow
509 std::vector<int16_t> v3 = {0, 100, -1, 0, 0};
510 std::vector<uint8_t> e3 = {0, 100, 255, 0, 0};
511 CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v3, is_valid, uint8(), e3,
512 options);
513
514 // int32 to int16, no overflow
515 std::vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
516 std::vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
517 CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
518 options);
519
520 // int32 to int16, overflow
521 // TODO(wesm): do we want to allow this? we could set to null
522 std::vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
523 std::vector<int16_t> e5 = {0, 1000, 2000, 4464, 0};
524 CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v5, is_valid, int16(), e5,
525 options);
526
527 // underflow
528 // TODO(wesm): do we want to allow this? we could set overflow to null
529 std::vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
530 std::vector<int16_t> e6 = {0, 1000, 2000, -4464, 0};
531 CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v6, is_valid, int16(), e6,
532 options);
533 }
534
TEST_F(TestCast,FloatingPointToInt)535 TEST_F(TestCast, FloatingPointToInt) {
536 // which means allow_float_truncate == false
537 auto options = CastOptions::Safe();
538
539 std::vector<bool> is_valid = {true, false, true, true, true};
540 std::vector<bool> all_valid = {true, true, true, true, true};
541
542 // float32 to int32 no truncation
543 std::vector<float> v1 = {1.0, 0, 0.0, -1.0, 5.0};
544 std::vector<int32_t> e1 = {1, 0, 0, -1, 5};
545 CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, is_valid, int32(), e1,
546 options);
547 CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, all_valid, int32(), e1,
548 options);
549
550 // float64 to int32 no truncation
551 std::vector<double> v2 = {1.0, 0, 0.0, -1.0, 5.0};
552 std::vector<int32_t> e2 = {1, 0, 0, -1, 5};
553 CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, is_valid, int32(), e2,
554 options);
555 CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, all_valid, int32(), e2,
556 options);
557
558 // float64 to int64 no truncation
559 std::vector<double> v3 = {1.0, 0, 0.0, -1.0, 5.0};
560 std::vector<int64_t> e3 = {1, 0, 0, -1, 5};
561 CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, is_valid, int64(), e3,
562 options);
563 CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, all_valid, int64(), e3,
564 options);
565
566 // float64 to int32 truncate
567 std::vector<double> v4 = {1.5, 0, 0.5, -1.5, 5.5};
568 std::vector<int32_t> e4 = {1, 0, 0, -1, 5};
569
570 options.allow_float_truncate = false;
571 CheckFails<DoubleType>(float64(), v4, is_valid, int32(), options);
572 CheckFails<DoubleType>(float64(), v4, all_valid, int32(), options);
573
574 options.allow_float_truncate = true;
575 CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, is_valid, int32(), e4,
576 options);
577 CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, all_valid, int32(), e4,
578 options);
579
580 // float64 to int64 truncate
581 std::vector<double> v5 = {1.5, 0, 0.5, -1.5, 5.5};
582 std::vector<int64_t> e5 = {1, 0, 0, -1, 5};
583
584 options.allow_float_truncate = false;
585 CheckFails<DoubleType>(float64(), v5, is_valid, int64(), options);
586 CheckFails<DoubleType>(float64(), v5, all_valid, int64(), options);
587
588 options.allow_float_truncate = true;
589 CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, is_valid, int64(), e5,
590 options);
591 CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, all_valid, int64(), e5,
592 options);
593 }
594
595 #if ARROW_BITNESS >= 64
TEST_F(TestCast,IntToFloatingPoint)596 TEST_F(TestCast, IntToFloatingPoint) {
597 auto options = CastOptions::Safe();
598
599 std::vector<bool> all_valid = {true, true, true, true, true};
600 std::vector<bool> all_invalid = {false, false, false, false, false};
601
602 std::vector<int64_t> v1 = {INT64_MIN, INT64_MIN + 1, 0, INT64_MAX - 1, INT64_MAX};
603 CheckFails<Int64Type>(int64(), v1, all_valid, float32(), options);
604
605 // While it's not safe to convert, all values are null.
606 CheckCase<Int64Type, int64_t, DoubleType, double>(int64(), v1, all_invalid, float64(),
607 UnsafeVectorCast<double, int64_t>(v1),
608 options);
609 }
610 #endif
611
TEST_F(TestCast,DecimalToInt)612 TEST_F(TestCast, DecimalToInt) {
613 CastOptions options;
614 std::vector<bool> is_valid2 = {true, true};
615 std::vector<bool> is_valid3 = {true, true, false};
616
617 // no overflow no truncation
618 std::vector<Decimal128> v12 = {Decimal128("02.0000000000"),
619 Decimal128("-11.0000000000")};
620 std::vector<Decimal128> v13 = {Decimal128("02.0000000000"),
621 Decimal128("-11.0000000000"),
622 Decimal128("-12.0000000000")};
623 std::vector<int64_t> e12 = {2, -11};
624 std::vector<int64_t> e13 = {2, -11, 0};
625
626 for (bool allow_int_overflow : {false, true}) {
627 for (bool allow_decimal_truncate : {false, true}) {
628 options.allow_int_overflow = allow_int_overflow;
629 options.allow_decimal_truncate = allow_decimal_truncate;
630 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
631 decimal(38, 10), v12, is_valid2, int64(), e12, options);
632 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
633 decimal(38, 10), v13, is_valid3, int64(), e13, options);
634 }
635 }
636
637 // truncation, no overflow
638 std::vector<Decimal128> v22 = {Decimal128("02.1000000000"),
639 Decimal128("-11.0000004500")};
640 std::vector<Decimal128> v23 = {Decimal128("02.1000000000"),
641 Decimal128("-11.0000004500"),
642 Decimal128("-12.0000004500")};
643 std::vector<int64_t> e22 = {2, -11};
644 std::vector<int64_t> e23 = {2, -11, 0};
645
646 for (bool allow_int_overflow : {false, true}) {
647 options.allow_int_overflow = allow_int_overflow;
648 options.allow_decimal_truncate = true;
649 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
650 decimal(38, 10), v22, is_valid2, int64(), e22, options);
651 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
652 decimal(38, 10), v23, is_valid3, int64(), e23, options);
653 options.allow_decimal_truncate = false;
654 CheckFails<Decimal128Type>(decimal(38, 10), v22, is_valid2, int64(), options);
655 CheckFails<Decimal128Type>(decimal(38, 10), v23, is_valid3, int64(), options);
656 }
657
658 // overflow, no truncation
659 std::vector<Decimal128> v32 = {Decimal128("12345678901234567890000.0000000000"),
660 Decimal128("99999999999999999999999.0000000000")};
661 std::vector<Decimal128> v33 = {Decimal128("12345678901234567890000.0000000000"),
662 Decimal128("99999999999999999999999.0000000000"),
663 Decimal128("99999999999999999999999.0000000000")};
664 // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
665 std::vector<int64_t> e32 = {4807115922877858896, 200376420520689663};
666 std::vector<int64_t> e33 = {4807115922877858896, 200376420520689663, -2};
667
668 for (bool allow_decimal_truncate : {false, true}) {
669 options.allow_decimal_truncate = allow_decimal_truncate;
670 options.allow_int_overflow = true;
671 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
672 decimal(38, 10), v32, is_valid2, int64(), e32, options);
673 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
674 decimal(38, 10), v33, is_valid3, int64(), e33, options);
675 options.allow_int_overflow = false;
676 CheckFails<Decimal128Type>(decimal(38, 10), v32, is_valid2, int64(), options);
677 CheckFails<Decimal128Type>(decimal(38, 10), v33, is_valid3, int64(), options);
678 }
679
680 // overflow, truncation
681 std::vector<Decimal128> v42 = {Decimal128("12345678901234567890000.0045345000"),
682 Decimal128("99999999999999999999999.0000005430")};
683 std::vector<Decimal128> v43 = {Decimal128("12345678901234567890000.0005345340"),
684 Decimal128("99999999999999999999999.0000344300"),
685 Decimal128("99999999999999999999999.0004354000")};
686 // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
687 std::vector<int64_t> e42 = {4807115922877858896, 200376420520689663};
688 std::vector<int64_t> e43 = {4807115922877858896, 200376420520689663, -2};
689
690 for (bool allow_int_overflow : {false, true}) {
691 for (bool allow_decimal_truncate : {false, true}) {
692 options.allow_int_overflow = allow_int_overflow;
693 options.allow_decimal_truncate = allow_decimal_truncate;
694 if (options.allow_int_overflow && options.allow_decimal_truncate) {
695 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
696 decimal(38, 10), v42, is_valid2, int64(), e42, options);
697 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
698 decimal(38, 10), v43, is_valid3, int64(), e43, options);
699 } else {
700 CheckFails<Decimal128Type>(decimal(38, 10), v42, is_valid2, int64(), options);
701 CheckFails<Decimal128Type>(decimal(38, 10), v43, is_valid3, int64(), options);
702 }
703 }
704 }
705
706 // negative scale
707 std::vector<Decimal128> v5 = {Decimal128("1234567890000."), Decimal128("-120000.")};
708 for (int i = 0; i < 2; i++) v5[i] = v5[i].Rescale(0, -4).ValueOrDie();
709 std::vector<int64_t> e5 = {1234567890000, -120000};
710 CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
711 decimal(38, -4), v5, is_valid2, int64(), e5, options);
712 }
713
TEST_F(TestCast,DecimalToDecimal)714 TEST_F(TestCast, DecimalToDecimal) {
715 CastOptions options;
716
717 std::vector<bool> is_valid2 = {true, true};
718 std::vector<bool> is_valid3 = {true, true, false};
719
720 // simple cases decimal
721
722 std::vector<Decimal128> v12 = {Decimal128("02.0000000000"),
723 Decimal128("30.0000000000")};
724 std::vector<Decimal128> e12 = {Decimal128("02."), Decimal128("30.")};
725 std::vector<Decimal128> v13 = {Decimal128("02.0000000000"), Decimal128("30.0000000000"),
726 Decimal128("30.0000000000")};
727 std::vector<Decimal128> e13 = {Decimal128("02."), Decimal128("30."), Decimal128("-1.")};
728
729 for (bool allow_decimal_truncate : {false, true}) {
730 options.allow_decimal_truncate = allow_decimal_truncate;
731 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
732 decimal(38, 10), v12, is_valid2, decimal(28, 0), e12, options);
733 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
734 decimal(38, 10), v13, is_valid3, decimal(28, 0), e13, options);
735 // and back
736 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
737 decimal(28, 0), e12, is_valid2, decimal(38, 10), v12, options);
738 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
739 decimal(28, 0), e13, is_valid3, decimal(38, 10), v13, options);
740 }
741
742 std::vector<Decimal128> v22 = {Decimal128("-02.1234567890"),
743 Decimal128("30.1234567890")};
744 std::vector<Decimal128> e22 = {Decimal128("-02."), Decimal128("30.")};
745 std::vector<Decimal128> f22 = {Decimal128("-02.0000000000"),
746 Decimal128("30.0000000000")};
747 std::vector<Decimal128> v23 = {Decimal128("-02.1234567890"),
748 Decimal128("30.1234567890"),
749 Decimal128("30.1234567890")};
750 std::vector<Decimal128> e23 = {Decimal128("-02."), Decimal128("30."),
751 Decimal128("-70.")};
752 std::vector<Decimal128> f23 = {Decimal128("-02.0000000000"),
753 Decimal128("30.0000000000"),
754 Decimal128("80.0000000000")};
755
756 options.allow_decimal_truncate = true;
757 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
758 decimal(38, 10), v22, is_valid2, decimal(28, 0), e22, options);
759 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
760 decimal(38, 10), v23, is_valid3, decimal(28, 0), e23, options);
761 // and back
762 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
763 decimal(28, 0), e22, is_valid2, decimal(38, 10), f22, options);
764 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
765 decimal(28, 0), e23, is_valid3, decimal(38, 10), f23, options);
766
767 options.allow_decimal_truncate = false;
768 CheckFails<Decimal128Type>(decimal(38, 10), v22, is_valid2, decimal(28, 0), options);
769 CheckFails<Decimal128Type>(decimal(38, 10), v23, is_valid3, decimal(28, 0), options);
770 // back case is ok
771 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
772 decimal(28, 0), e22, is_valid2, decimal(38, 10), f22, options);
773 CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
774 decimal(28, 0), e23, is_valid3, decimal(38, 10), f23, options);
775 }
776
TEST_F(TestCast,TimestampToTimestamp)777 TEST_F(TestCast, TimestampToTimestamp) {
778 CastOptions options;
779
780 auto CheckTimestampCast =
781 [this](const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
782 const std::vector<int64_t>& from_values,
783 const std::vector<int64_t>& to_values, const std::vector<bool>& is_valid) {
784 CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
785 timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
786 options);
787 };
788
789 std::vector<bool> is_valid = {true, false, true, true, true};
790
791 // Multiply promotions
792 std::vector<int64_t> v1 = {0, 100, 200, 1, 2};
793 std::vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
794 CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
795
796 std::vector<int64_t> v2 = {0, 100, 200, 1, 2};
797 std::vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
798 CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
799
800 std::vector<int64_t> v3 = {0, 100, 200, 1, 2};
801 std::vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
802 CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
803
804 std::vector<int64_t> v4 = {0, 100, 200, 1, 2};
805 std::vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
806 CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
807
808 std::vector<int64_t> v5 = {0, 100, 200, 1, 2};
809 std::vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
810 CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
811
812 std::vector<int64_t> v6 = {0, 100, 200, 1, 2};
813 std::vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
814 CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
815
816 // Zero copy
817 std::vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
818 std::shared_ptr<Array> arr;
819 ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid, v7,
820 &arr);
821 CheckZeroCopy(*arr, timestamp(TimeUnit::SECOND));
822
823 // ARROW-1773, cast to integer
824 CheckZeroCopy(*arr, int64());
825
826 // Divide, truncate
827 std::vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
828 std::vector<int64_t> e8 = {0, 100, 200, 1, 2};
829
830 options.allow_time_truncate = true;
831 CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
832 CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
833 CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
834
835 std::vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
836 std::vector<int64_t> e9 = {0, 100, 200, 1, 2};
837 CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
838 CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
839
840 std::vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
841 std::vector<int64_t> e10 = {0, 100, 200, 1, 2};
842 CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
843
844 // Disallow truncate, failures
845 options.allow_time_truncate = false;
846 CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
847 timestamp(TimeUnit::SECOND), options);
848 CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v8, is_valid,
849 timestamp(TimeUnit::MILLI), options);
850 CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v8, is_valid,
851 timestamp(TimeUnit::MICRO), options);
852 CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v9, is_valid,
853 timestamp(TimeUnit::SECOND), options);
854 CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v9, is_valid,
855 timestamp(TimeUnit::MILLI), options);
856 CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v10, is_valid,
857 timestamp(TimeUnit::SECOND), options);
858
859 // Multiply overflow
860
861 // 1000-01-01, 1800-01-01 , 2000-01-01, 2300-01-01, 3000-01-01
862 std::vector<int64_t> v11 = {-30610224000, -5364662400, 946684800, 10413792000,
863 32503680000};
864
865 options.allow_time_overflow = false;
866 CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v11, is_valid,
867 timestamp(TimeUnit::NANO), options);
868 }
869
TEST_F(TestCast,TimestampToDate32_Date64)870 TEST_F(TestCast, TimestampToDate32_Date64) {
871 CastOptions options;
872
873 std::vector<bool> is_valid = {true, true, false};
874
875 // 2000-01-01, 2000-01-02, null
876 std::vector<int64_t> v_nano = {946684800000000000, 946771200000000000, 0};
877 std::vector<int64_t> v_micro = {946684800000000, 946771200000000, 0};
878 std::vector<int64_t> v_milli = {946684800000, 946771200000, 0};
879 std::vector<int64_t> v_second = {946684800, 946771200, 0};
880 std::vector<int32_t> v_day = {10957, 10958, 0};
881
882 // Simple conversions
883 CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
884 timestamp(TimeUnit::NANO), v_nano, is_valid, date64(), v_milli, options);
885 CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
886 timestamp(TimeUnit::MICRO), v_micro, is_valid, date64(), v_milli, options);
887 CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
888 timestamp(TimeUnit::MILLI), v_milli, is_valid, date64(), v_milli, options);
889 CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
890 timestamp(TimeUnit::SECOND), v_second, is_valid, date64(), v_milli, options);
891
892 CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
893 timestamp(TimeUnit::NANO), v_nano, is_valid, date32(), v_day, options);
894 CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
895 timestamp(TimeUnit::MICRO), v_micro, is_valid, date32(), v_day, options);
896 CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
897 timestamp(TimeUnit::MILLI), v_milli, is_valid, date32(), v_day, options);
898 CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
899 timestamp(TimeUnit::SECOND), v_second, is_valid, date32(), v_day, options);
900
901 // Disallow truncate, failures
902 std::vector<int64_t> v_nano_fail = {946684800000000001, 946771200000000001, 0};
903 std::vector<int64_t> v_micro_fail = {946684800000001, 946771200000001, 0};
904 std::vector<int64_t> v_milli_fail = {946684800001, 946771200001, 0};
905 std::vector<int64_t> v_second_fail = {946684801, 946771201, 0};
906
907 options.allow_time_truncate = false;
908 CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date64(),
909 options);
910 CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date64(),
911 options);
912 CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date64(),
913 options);
914 CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
915 date64(), options);
916
917 CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date32(),
918 options);
919 CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date32(),
920 options);
921 CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date32(),
922 options);
923 CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
924 date32(), options);
925
926 // Make sure that nulls are excluded from the truncation checks
927 std::vector<int64_t> v_second_nofail = {946684800, 946771200, 1};
928 CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
929 timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date64(), v_milli, options);
930 CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
931 timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date32(), v_day, options);
932 }
933
TEST_F(TestCast,TimeToCompatible)934 TEST_F(TestCast, TimeToCompatible) {
935 CastOptions options;
936
937 std::vector<bool> is_valid = {true, false, true, true, true};
938
939 // Multiply promotions
940 std::vector<int32_t> v1 = {0, 100, 200, 1, 2};
941 std::vector<int32_t> e1 = {0, 100000, 200000, 1000, 2000};
942 CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
943 time32(TimeUnit::SECOND), v1, is_valid, time32(TimeUnit::MILLI), e1, options);
944
945 std::vector<int32_t> v2 = {0, 100, 200, 1, 2};
946 std::vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
947 CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
948 time32(TimeUnit::SECOND), v2, is_valid, time64(TimeUnit::MICRO), e2, options);
949
950 std::vector<int32_t> v3 = {0, 100, 200, 1, 2};
951 std::vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
952 CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
953 time32(TimeUnit::SECOND), v3, is_valid, time64(TimeUnit::NANO), e3, options);
954
955 std::vector<int32_t> v4 = {0, 100, 200, 1, 2};
956 std::vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
957 CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
958 time32(TimeUnit::MILLI), v4, is_valid, time64(TimeUnit::MICRO), e4, options);
959
960 std::vector<int32_t> v5 = {0, 100, 200, 1, 2};
961 std::vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
962 CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
963 time32(TimeUnit::MILLI), v5, is_valid, time64(TimeUnit::NANO), e5, options);
964
965 std::vector<int64_t> v6 = {0, 100, 200, 1, 2};
966 std::vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
967 CheckCase<Time64Type, int64_t, Time64Type, int64_t>(
968 time64(TimeUnit::MICRO), v6, is_valid, time64(TimeUnit::NANO), e6, options);
969
970 // Zero copy
971 std::vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
972 std::shared_ptr<Array> arr;
973 ArrayFromVector<Time64Type, int64_t>(time64(TimeUnit::MICRO), is_valid, v7, &arr);
974 CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
975
976 // ARROW-1773: cast to int64
977 CheckZeroCopy(*arr, int64());
978
979 std::vector<int32_t> v7_2 = {0, 70000, 2000, 1000, 0};
980 ArrayFromVector<Time32Type, int32_t>(time32(TimeUnit::SECOND), is_valid, v7_2, &arr);
981 CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
982
983 // ARROW-1773: cast to int64
984 CheckZeroCopy(*arr, int32());
985
986 // Divide, truncate
987 std::vector<int32_t> v8 = {0, 100123, 200456, 1123, 2456};
988 std::vector<int32_t> e8 = {0, 100, 200, 1, 2};
989
990 options.allow_time_truncate = true;
991 CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
992 time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND), e8, options);
993 CheckCase<Time64Type, int32_t, Time32Type, int32_t>(
994 time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI), e8, options);
995 CheckCase<Time64Type, int32_t, Time64Type, int32_t>(
996 time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO), e8, options);
997
998 std::vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
999 std::vector<int32_t> e9 = {0, 100, 200, 1, 2};
1000 CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
1001 time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND), e9, options);
1002 CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
1003 time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI), e9, options);
1004
1005 std::vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
1006 std::vector<int32_t> e10 = {0, 100, 200, 1, 2};
1007 CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
1008 time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND), e10, options);
1009
1010 // Disallow truncate, failures
1011
1012 options.allow_time_truncate = false;
1013 CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND),
1014 options);
1015 CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI),
1016 options);
1017 CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO),
1018 options);
1019 CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND),
1020 options);
1021 CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI),
1022 options);
1023 CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND),
1024 options);
1025 }
1026
TEST_F(TestCast,DateToCompatible)1027 TEST_F(TestCast, DateToCompatible) {
1028 CastOptions options;
1029
1030 std::vector<bool> is_valid = {true, false, true, true, true};
1031
1032 constexpr int64_t F = 86400000;
1033
1034 // Multiply promotion
1035 std::vector<int32_t> v1 = {0, 100, 200, 1, 2};
1036 std::vector<int64_t> e1 = {0, 100 * F, 200 * F, F, 2 * F};
1037 CheckCase<Date32Type, int32_t, Date64Type, int64_t>(date32(), v1, is_valid, date64(),
1038 e1, options);
1039
1040 // Zero copy
1041 std::vector<int32_t> v2 = {0, 70000, 2000, 1000, 0};
1042 std::vector<int64_t> v3 = {0, 70000, 2000, 1000, 0};
1043 std::shared_ptr<Array> arr;
1044 ArrayFromVector<Date32Type, int32_t>(date32(), is_valid, v2, &arr);
1045 CheckZeroCopy(*arr, date32());
1046
1047 // ARROW-1773: zero copy cast to integer
1048 CheckZeroCopy(*arr, int32());
1049
1050 ArrayFromVector<Date64Type, int64_t>(date64(), is_valid, v3, &arr);
1051 CheckZeroCopy(*arr, date64());
1052
1053 // ARROW-1773: zero copy cast to integer
1054 CheckZeroCopy(*arr, int64());
1055
1056 // Divide, truncate
1057 std::vector<int64_t> v8 = {0, 100 * F + 123, 200 * F + 456, F + 123, 2 * F + 456};
1058 std::vector<int32_t> e8 = {0, 100, 200, 1, 2};
1059
1060 options.allow_time_truncate = true;
1061 CheckCase<Date64Type, int64_t, Date32Type, int32_t>(date64(), v8, is_valid, date32(),
1062 e8, options);
1063
1064 // Disallow truncate, failures
1065 options.allow_time_truncate = false;
1066 CheckFails<Date64Type>(date64(), v8, is_valid, date32(), options);
1067 }
1068
TEST_F(TestCast,DurationToCompatible)1069 TEST_F(TestCast, DurationToCompatible) {
1070 CastOptions options;
1071
1072 auto CheckDurationCast =
1073 [this](const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
1074 const std::vector<int64_t>& from_values,
1075 const std::vector<int64_t>& to_values, const std::vector<bool>& is_valid) {
1076 CheckCase<DurationType, int64_t, DurationType, int64_t>(
1077 duration(from_unit), from_values, is_valid, duration(to_unit), to_values,
1078 options);
1079 };
1080
1081 std::vector<bool> is_valid = {true, false, true, true, true};
1082
1083 // Multiply promotions
1084 std::vector<int64_t> v1 = {0, 100, 200, 1, 2};
1085 std::vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
1086 CheckDurationCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
1087
1088 std::vector<int64_t> v2 = {0, 100, 200, 1, 2};
1089 std::vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
1090 CheckDurationCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
1091
1092 std::vector<int64_t> v3 = {0, 100, 200, 1, 2};
1093 std::vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
1094 CheckDurationCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
1095
1096 std::vector<int64_t> v4 = {0, 100, 200, 1, 2};
1097 std::vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
1098 CheckDurationCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
1099
1100 std::vector<int64_t> v5 = {0, 100, 200, 1, 2};
1101 std::vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
1102 CheckDurationCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
1103
1104 std::vector<int64_t> v6 = {0, 100, 200, 1, 2};
1105 std::vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
1106 CheckDurationCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
1107
1108 // Zero copy
1109 std::vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
1110 std::shared_ptr<Array> arr;
1111 ArrayFromVector<DurationType, int64_t>(duration(TimeUnit::SECOND), is_valid, v7, &arr);
1112 CheckZeroCopy(*arr, duration(TimeUnit::SECOND));
1113 CheckZeroCopy(*arr, int64());
1114
1115 // Divide, truncate
1116 std::vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
1117 std::vector<int64_t> e8 = {0, 100, 200, 1, 2};
1118
1119 options.allow_time_truncate = true;
1120 CheckDurationCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
1121 CheckDurationCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
1122 CheckDurationCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
1123
1124 std::vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
1125 std::vector<int64_t> e9 = {0, 100, 200, 1, 2};
1126 CheckDurationCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
1127 CheckDurationCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
1128
1129 std::vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
1130 std::vector<int64_t> e10 = {0, 100, 200, 1, 2};
1131 CheckDurationCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
1132
1133 // Disallow truncate, failures
1134 options.allow_time_truncate = false;
1135 CheckFails<DurationType>(duration(TimeUnit::MILLI), v8, is_valid,
1136 duration(TimeUnit::SECOND), options);
1137 CheckFails<DurationType>(duration(TimeUnit::MICRO), v8, is_valid,
1138 duration(TimeUnit::MILLI), options);
1139 CheckFails<DurationType>(duration(TimeUnit::NANO), v8, is_valid,
1140 duration(TimeUnit::MICRO), options);
1141 CheckFails<DurationType>(duration(TimeUnit::MICRO), v9, is_valid,
1142 duration(TimeUnit::SECOND), options);
1143 CheckFails<DurationType>(duration(TimeUnit::NANO), v9, is_valid,
1144 duration(TimeUnit::MILLI), options);
1145 CheckFails<DurationType>(duration(TimeUnit::NANO), v10, is_valid,
1146 duration(TimeUnit::SECOND), options);
1147
1148 // Multiply overflow
1149
1150 // 1000-01-01, 1800-01-01 , 2000-01-01, 2300-01-01, 3000-01-01
1151 std::vector<int64_t> v11 = {10000000000, 1, 2, 3, 10000000000};
1152
1153 options.allow_time_overflow = false;
1154 CheckFails<DurationType>(duration(TimeUnit::SECOND), v11, is_valid,
1155 duration(TimeUnit::NANO), options);
1156 }
1157
TEST_F(TestCast,ToDouble)1158 TEST_F(TestCast, ToDouble) {
1159 CastOptions options;
1160 std::vector<bool> is_valid = {true, false, true, true, true};
1161
1162 // int16 to double
1163 std::vector<int16_t> v1 = {0, 100, 200, 1, 2};
1164 std::vector<double> e1 = {0, 100, 200, 1, 2};
1165 CheckCase<Int16Type, int16_t, DoubleType, double>(int16(), v1, is_valid, float64(), e1,
1166 options);
1167
1168 // float to double
1169 std::vector<float> v2 = {0, 100, 200, 1, 2};
1170 std::vector<double> e2 = {0, 100, 200, 1, 2};
1171 CheckCase<FloatType, float, DoubleType, double>(float32(), v2, is_valid, float64(), e2,
1172 options);
1173
1174 // bool to double
1175 std::vector<bool> v3 = {true, true, false, false, true};
1176 std::vector<double> e3 = {1, 1, 0, 0, 1};
1177 CheckCase<BooleanType, bool, DoubleType, double>(boolean(), v3, is_valid, float64(), e3,
1178 options);
1179 }
1180
TEST_F(TestCast,ChunkedArray)1181 TEST_F(TestCast, ChunkedArray) {
1182 std::vector<int16_t> values1 = {0, 1, 2};
1183 std::vector<int16_t> values2 = {3, 4, 5};
1184
1185 auto type = int16();
1186 auto out_type = int64();
1187
1188 auto a1 = _MakeArray<Int16Type, int16_t>(type, values1, {});
1189 auto a2 = _MakeArray<Int16Type, int16_t>(type, values2, {});
1190
1191 ArrayVector arrays = {a1, a2};
1192 auto carr = std::make_shared<ChunkedArray>(arrays);
1193
1194 CastOptions options;
1195
1196 ASSERT_OK_AND_ASSIGN(Datum out, Cast(carr, out_type, options));
1197 ASSERT_EQ(Datum::CHUNKED_ARRAY, out.kind());
1198
1199 auto out_carr = out.chunked_array();
1200
1201 std::vector<int64_t> ex_values1 = {0, 1, 2};
1202 std::vector<int64_t> ex_values2 = {3, 4, 5};
1203 auto a3 = _MakeArray<Int64Type, int64_t>(out_type, ex_values1, {});
1204 auto a4 = _MakeArray<Int64Type, int64_t>(out_type, ex_values2, {});
1205
1206 ArrayVector ex_arrays = {a3, a4};
1207 auto ex_carr = std::make_shared<ChunkedArray>(ex_arrays);
1208
1209 ASSERT_TRUE(out.chunked_array()->Equals(*ex_carr));
1210 }
1211
TEST_F(TestCast,UnsupportedTarget)1212 TEST_F(TestCast, UnsupportedTarget) {
1213 std::vector<bool> is_valid = {true, false, true, true, true};
1214 std::vector<int32_t> v1 = {0, 1, 2, 3, 4};
1215
1216 std::shared_ptr<Array> arr;
1217 ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
1218
1219 ASSERT_RAISES(NotImplemented, Cast(*arr, list(utf8())));
1220 }
1221
TEST_F(TestCast,DateTimeZeroCopy)1222 TEST_F(TestCast, DateTimeZeroCopy) {
1223 std::vector<bool> is_valid = {true, false, true, true, true};
1224
1225 std::vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
1226 std::shared_ptr<Array> arr;
1227 ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
1228
1229 CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
1230 CheckZeroCopy(*arr, date32());
1231
1232 std::vector<int64_t> v2 = {0, 70000, 2000, 1000, 0};
1233 ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, v2, &arr);
1234
1235 CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
1236 CheckZeroCopy(*arr, date64());
1237 CheckZeroCopy(*arr, timestamp(TimeUnit::NANO));
1238 CheckZeroCopy(*arr, duration(TimeUnit::MILLI));
1239 }
1240
TEST_F(TestCast,StringToBoolean)1241 TEST_F(TestCast, StringToBoolean) {
1242 CastOptions options;
1243
1244 std::vector<bool> is_valid = {true, false, true, true, true};
1245
1246 std::vector<std::string> v1 = {"False", "true", "true", "True", "false"};
1247 std::vector<std::string> v2 = {"0", "1", "1", "1", "0"};
1248 std::vector<bool> e = {false, true, true, true, false};
1249 CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v1, is_valid, boolean(),
1250 e, options);
1251 CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v2, is_valid, boolean(),
1252 e, options);
1253
1254 // Same with LargeStringType
1255 CheckCase<LargeStringType, std::string, BooleanType, bool>(large_utf8(), v1, is_valid,
1256 boolean(), e, options);
1257 }
1258
TEST_F(TestCast,StringToBooleanErrors)1259 TEST_F(TestCast, StringToBooleanErrors) {
1260 CastOptions options;
1261
1262 std::vector<bool> is_valid = {true};
1263
1264 CheckFails<StringType, std::string>(utf8(), {"false "}, is_valid, boolean(), options);
1265 CheckFails<StringType, std::string>(utf8(), {"T"}, is_valid, boolean(), options);
1266 CheckFails<LargeStringType, std::string>(large_utf8(), {"T"}, is_valid, boolean(),
1267 options);
1268 }
1269
TEST_F(TestCast,StringToNumber)1270 TEST_F(TestCast, StringToNumber) { TestCastStringToNumber<StringType>(); }
1271
TEST_F(TestCast,LargeStringToNumber)1272 TEST_F(TestCast, LargeStringToNumber) { TestCastStringToNumber<LargeStringType>(); }
1273
TEST_F(TestCast,StringToNumberErrors)1274 TEST_F(TestCast, StringToNumberErrors) {
1275 CastOptions options;
1276
1277 std::vector<bool> is_valid = {true};
1278
1279 CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, int8(), options);
1280 CheckFails<StringType, std::string>(utf8(), {"12 z"}, is_valid, int8(), options);
1281 CheckFails<StringType, std::string>(utf8(), {"128"}, is_valid, int8(), options);
1282 CheckFails<StringType, std::string>(utf8(), {"-129"}, is_valid, int8(), options);
1283 CheckFails<StringType, std::string>(utf8(), {"0.5"}, is_valid, int8(), options);
1284
1285 CheckFails<StringType, std::string>(utf8(), {"256"}, is_valid, uint8(), options);
1286 CheckFails<StringType, std::string>(utf8(), {"-1"}, is_valid, uint8(), options);
1287
1288 CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, float32(), options);
1289 }
1290
TEST_F(TestCast,StringToTimestamp)1291 TEST_F(TestCast, StringToTimestamp) { TestCastStringToTimestamp<StringType>(); }
1292
TEST_F(TestCast,LargeStringToTimestamp)1293 TEST_F(TestCast, LargeStringToTimestamp) { TestCastStringToTimestamp<LargeStringType>(); }
1294
TEST_F(TestCast,StringToTimestampErrors)1295 TEST_F(TestCast, StringToTimestampErrors) {
1296 CastOptions options;
1297
1298 std::vector<bool> is_valid = {true};
1299
1300 for (auto unit : {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
1301 auto type = timestamp(unit);
1302 CheckFails<StringType, std::string>(utf8(), {""}, is_valid, type, options);
1303 CheckFails<StringType, std::string>(utf8(), {"xxx"}, is_valid, type, options);
1304 }
1305 }
1306
TEST_F(TestCast,BinaryToString)1307 TEST_F(TestCast, BinaryToString) { TestCastBinaryToString<BinaryType, StringType>(); }
1308
TEST_F(TestCast,LargeBinaryToLargeString)1309 TEST_F(TestCast, LargeBinaryToLargeString) {
1310 TestCastBinaryToString<LargeBinaryType, LargeStringType>();
1311 }
1312
TEST_F(TestCast,NumberToString)1313 TEST_F(TestCast, NumberToString) { TestCastNumberToString<StringType>(); }
1314
TEST_F(TestCast,NumberToLargeString)1315 TEST_F(TestCast, NumberToLargeString) { TestCastNumberToString<LargeStringType>(); }
1316
TEST_F(TestCast,BooleanToString)1317 TEST_F(TestCast, BooleanToString) { TestCastBooleanToString<StringType>(); }
1318
TEST_F(TestCast,BooleanToLargeString)1319 TEST_F(TestCast, BooleanToLargeString) { TestCastBooleanToString<LargeStringType>(); }
1320
TEST_F(TestCast,ListToPrimitive)1321 TEST_F(TestCast, ListToPrimitive) {
1322 auto from_int = ArrayFromJSON(list(int8()), "[[1, 2], [3, 4]]");
1323 auto from_binary = ArrayFromJSON(list(binary()), "[[\"1\", \"2\"], [\"3\", \"4\"]]");
1324
1325 ASSERT_RAISES(NotImplemented, Cast(*from_int, uint8()));
1326 ASSERT_RAISES(NotImplemented, Cast(*from_binary, utf8()));
1327 }
1328
TEST_F(TestCast,ListToList)1329 TEST_F(TestCast, ListToList) {
1330 CastOptions options;
1331 std::shared_ptr<Array> offsets;
1332
1333 std::vector<int32_t> offsets_values = {0, 1, 2, 5, 7, 7, 8, 10};
1334 std::vector<bool> offsets_is_valid = {true, true, true, true, false, true, true, true};
1335 ArrayFromVector<Int32Type, int32_t>(offsets_is_valid, offsets_values, &offsets);
1336
1337 std::shared_ptr<Array> int32_plain_array =
1338 TestBase::MakeRandomArray<typename TypeTraits<Int32Type>::ArrayType>(10, 2);
1339 ASSERT_OK_AND_ASSIGN(auto int32_list_array,
1340 ListArray::FromArrays(*offsets, *int32_plain_array, pool_));
1341
1342 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> int64_plain_array,
1343 Cast(*int32_plain_array, int64(), options));
1344 ASSERT_OK_AND_ASSIGN(auto int64_list_array,
1345 ListArray::FromArrays(*offsets, *int64_plain_array, pool_));
1346
1347 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> float64_plain_array,
1348 Cast(*int32_plain_array, float64(), options));
1349 ASSERT_OK_AND_ASSIGN(auto float64_list_array,
1350 ListArray::FromArrays(*offsets, *float64_plain_array, pool_));
1351
1352 CheckPass(*int32_list_array, *int64_list_array, int64_list_array->type(), options);
1353 CheckPass(*int32_list_array, *float64_list_array, float64_list_array->type(), options);
1354 CheckPass(*int64_list_array, *int32_list_array, int32_list_array->type(), options);
1355 CheckPass(*int64_list_array, *float64_list_array, float64_list_array->type(), options);
1356
1357 options.allow_float_truncate = true;
1358 CheckPass(*float64_list_array, *int32_list_array, int32_list_array->type(), options);
1359 CheckPass(*float64_list_array, *int64_list_array, int64_list_array->type(), options);
1360 }
1361
TEST_F(TestCast,LargeListToLargeList)1362 TEST_F(TestCast, LargeListToLargeList) {
1363 // Like ListToList above, only testing the basics
1364 CastOptions options;
1365 std::shared_ptr<Array> offsets;
1366
1367 std::vector<int64_t> offsets_values = {0, 1, 2, 5, 7, 7, 8, 10};
1368 std::vector<bool> offsets_is_valid = {true, true, true, true, false, true, true, true};
1369 ArrayFromVector<Int64Type, int64_t>(offsets_is_valid, offsets_values, &offsets);
1370
1371 std::shared_ptr<Array> int32_plain_array =
1372 TestBase::MakeRandomArray<typename TypeTraits<Int32Type>::ArrayType>(10, 2);
1373 ASSERT_OK_AND_ASSIGN(auto int32_list_array,
1374 LargeListArray::FromArrays(*offsets, *int32_plain_array, pool_));
1375
1376 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> float64_plain_array,
1377 Cast(*int32_plain_array, float64(), options));
1378 ASSERT_OK_AND_ASSIGN(auto float64_list_array,
1379 LargeListArray::FromArrays(*offsets, *float64_plain_array, pool_));
1380
1381 CheckPass(*int32_list_array, *float64_list_array, float64_list_array->type(), options);
1382
1383 options.allow_float_truncate = true;
1384 CheckPass(*float64_list_array, *int32_list_array, int32_list_array->type(), options);
1385 }
1386
TEST_F(TestCast,IdentityCasts)1387 TEST_F(TestCast, IdentityCasts) {
1388 // ARROW-4102
1389 auto CheckIdentityCast = [this](std::shared_ptr<DataType> type,
1390 const std::string& json) {
1391 auto arr = ArrayFromJSON(type, json);
1392 CheckZeroCopy(*arr, type);
1393 };
1394
1395 CheckIdentityCast(null(), "[null, null, null]");
1396 CheckIdentityCast(boolean(), "[false, true, null, false]");
1397
1398 for (auto type : kNumericTypes) {
1399 CheckIdentityCast(type, "[1, 2, null, 4]");
1400 }
1401 CheckIdentityCast(binary(), "[\"foo\", \"bar\"]");
1402 CheckIdentityCast(utf8(), "[\"foo\", \"bar\"]");
1403 CheckIdentityCast(fixed_size_binary(3), "[\"foo\", \"bar\"]");
1404
1405 CheckIdentityCast(list(int8()), "[[1, 2], [null], [], [3]]");
1406
1407 CheckIdentityCast(time32(TimeUnit::MILLI), "[1, 2, 3, 4]");
1408 CheckIdentityCast(time64(TimeUnit::MICRO), "[1, 2, 3, 4]");
1409 CheckIdentityCast(date32(), "[1, 2, 3, 4]");
1410 CheckIdentityCast(date64(), "[86400000, 0]");
1411 CheckIdentityCast(timestamp(TimeUnit::SECOND), "[1, 2, 3, 4]");
1412
1413 {
1414 auto dict_values = ArrayFromJSON(int8(), "[1, 2, 3]");
1415 auto dict_type = dictionary(int8(), dict_values->type());
1416 auto dict_indices = ArrayFromJSON(int8(), "[0, 1, 2, 0, null, 2]");
1417 auto dict_array =
1418 std::make_shared<DictionaryArray>(dict_type, dict_indices, dict_values);
1419 CheckZeroCopy(*dict_array, dict_type);
1420 }
1421 }
1422
TEST_F(TestCast,EmptyCasts)1423 TEST_F(TestCast, EmptyCasts) {
1424 // ARROW-4766: 0-length arrays should not segfault
1425 auto CheckEmptyCast = [this](std::shared_ptr<DataType> from,
1426 std::shared_ptr<DataType> to) {
1427 CastOptions options;
1428
1429 // Python creates array with nullptr instead of 0-length (valid) buffers.
1430 auto data = ArrayData::Make(from, /* length */ 0, /* buffers */ {nullptr, nullptr});
1431 auto input = MakeArray(data);
1432 auto expected = ArrayFromJSON(to, "[]");
1433 CheckPass(*input, *expected, to, CastOptions{});
1434 };
1435
1436 for (auto numeric : kNumericTypes) {
1437 CheckEmptyCast(boolean(), numeric);
1438 CheckEmptyCast(numeric, boolean());
1439 }
1440 }
1441
1442 // ----------------------------------------------------------------------
1443 // Test casting from NullType
1444
1445 template <typename TestType>
1446 class TestNullCast : public TestCast {};
1447
1448 typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
1449 UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
1450 Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
1451 TestTypes;
1452
1453 TYPED_TEST_SUITE(TestNullCast, TestTypes);
1454
TYPED_TEST(TestNullCast,FromNull)1455 TYPED_TEST(TestNullCast, FromNull) {
1456 // Null casts to everything
1457 const int length = 10;
1458
1459 // Hack to get a DataType including for parametric types
1460 std::shared_ptr<DataType> out_type =
1461 TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(0, 0)->type();
1462
1463 NullArray arr(length);
1464
1465 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(arr, out_type));
1466 ASSERT_OK(result->ValidateFull());
1467
1468 ASSERT_TRUE(result->type()->Equals(*out_type));
1469 ASSERT_EQ(length, result->length());
1470 ASSERT_EQ(length, result->null_count());
1471 }
1472
1473 // ----------------------------------------------------------------------
1474 // Test casting from DictionaryType
1475
1476 template <typename TestType>
1477 class TestDictionaryCast : public TestCast {};
1478
1479 typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
1480 UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
1481 Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
1482 TestTypes;
1483
1484 TYPED_TEST_SUITE(TestDictionaryCast, TestTypes);
1485
TYPED_TEST(TestDictionaryCast,Basic)1486 TYPED_TEST(TestDictionaryCast, Basic) {
1487 CastOptions options;
1488 std::shared_ptr<Array> plain_array =
1489 TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
1490
1491 ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(plain_array->data()));
1492 ASSERT_EQ(encoded.array()->type->id(), Type::DICTIONARY);
1493
1494 this->CheckPass(*MakeArray(encoded.array()), *plain_array, plain_array->type(),
1495 options);
1496 }
1497
TYPED_TEST(TestDictionaryCast,NoNulls)1498 TYPED_TEST(TestDictionaryCast, NoNulls) {
1499 // Test with a nullptr bitmap buffer (ARROW-3208)
1500 if (TypeParam::type_id == Type::NA) {
1501 // Skip, but gtest doesn't support skipping :-/
1502 return;
1503 }
1504
1505 CastOptions options;
1506 std::shared_ptr<Array> plain_array =
1507 TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 0);
1508 ASSERT_EQ(plain_array->null_count(), 0);
1509
1510 // Dict-encode the plain array
1511 ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(plain_array->data()));
1512
1513 // Make a new dict array with nullptr bitmap buffer
1514 auto data = encoded.array()->Copy();
1515 data->buffers[0] = nullptr;
1516 data->null_count = 0;
1517 std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
1518 ASSERT_OK(dict_array->ValidateFull());
1519
1520 this->CheckPass(*dict_array, *plain_array, plain_array->type(), options);
1521 }
1522
1523 // TODO: See how this might cause problems post-refactor
TYPED_TEST(TestDictionaryCast,DISABLED_OutTypeError)1524 TYPED_TEST(TestDictionaryCast, DISABLED_OutTypeError) {
1525 // ARROW-7077: unsupported out type should return an error
1526 std::shared_ptr<Array> plain_array =
1527 TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(0, 0);
1528 auto in_type = dictionary(int32(), plain_array->type());
1529
1530 auto out_type = (plain_array->type()->id() == Type::INT8) ? binary() : int8();
1531 // Test an output type that's not part of TestTypes.
1532 out_type = list(in_type);
1533 ASSERT_RAISES(NotImplemented, GetCastFunction(out_type));
1534 }
1535
SmallintArrayFromJSON(const std::string & json_data)1536 std::shared_ptr<Array> SmallintArrayFromJSON(const std::string& json_data) {
1537 auto arr = ArrayFromJSON(int16(), json_data);
1538 auto ext_data = arr->data()->Copy();
1539 ext_data->type = smallint();
1540 return MakeArray(ext_data);
1541 }
1542
TEST_F(TestCast,ExtensionTypeToIntDowncast)1543 TEST_F(TestCast, ExtensionTypeToIntDowncast) {
1544 auto smallint = std::make_shared<SmallintType>();
1545 ASSERT_OK(RegisterExtensionType(smallint));
1546
1547 CastOptions options;
1548 options.allow_int_overflow = false;
1549
1550 std::shared_ptr<Array> result;
1551 std::vector<bool> is_valid = {true, false, true, true, true};
1552
1553 // Smallint(int16) to int16
1554 auto v0 = SmallintArrayFromJSON("[0, 100, 200, 1, 2]");
1555 CheckZeroCopy(*v0, int16());
1556
1557 // Smallint(int16) to uint8, no overflow/underrun
1558 auto v1 = SmallintArrayFromJSON("[0, 100, 200, 1, 2]");
1559 auto e1 = ArrayFromJSON(uint8(), "[0, 100, 200, 1, 2]");
1560 CheckPass(*v1, *e1, uint8(), options);
1561
1562 // Smallint(int16) to uint8, with overflow
1563 auto v2 = SmallintArrayFromJSON("[0, null, 256, 1, 3]");
1564 auto e2 = ArrayFromJSON(uint8(), "[0, null, 0, 1, 3]");
1565 // allow overflow
1566 options.allow_int_overflow = true;
1567 CheckPass(*v2, *e2, uint8(), options);
1568 // disallow overflow
1569 options.allow_int_overflow = false;
1570 ASSERT_RAISES(Invalid, Cast(*v2, uint8(), options));
1571
1572 // Smallint(int16) to uint8, with underflow
1573 auto v3 = SmallintArrayFromJSON("[0, null, -1, 1, 0]");
1574 auto e3 = ArrayFromJSON(uint8(), "[0, null, 255, 1, 0]");
1575 // allow overflow
1576 options.allow_int_overflow = true;
1577 CheckPass(*v3, *e3, uint8(), options);
1578 // disallow overflow
1579 options.allow_int_overflow = false;
1580 ASSERT_RAISES(Invalid, Cast(*v3, uint8(), options));
1581
1582 ASSERT_OK(UnregisterExtensionType("smallint"));
1583 }
1584
1585 } // namespace compute
1586 } // namespace arrow
1587