1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <cstdint>
19 #include <cstdio>
20 #include <functional>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 #include <gtest/gtest.h>
26 
27 #include "arrow/array.h"
28 #include "arrow/buffer.h"
29 #include "arrow/extension_type.h"
30 #include "arrow/memory_pool.h"
31 #include "arrow/status.h"
32 #include "arrow/table.h"
33 #include "arrow/testing/extension_type.h"
34 #include "arrow/testing/gtest_common.h"
35 #include "arrow/testing/gtest_util.h"
36 #include "arrow/testing/random.h"
37 #include "arrow/type.h"
38 #include "arrow/type_fwd.h"
39 #include "arrow/type_traits.h"
40 #include "arrow/util/checked_cast.h"
41 #include "arrow/util/decimal.h"
42 
43 #include "arrow/compute/api_vector.h"
44 #include "arrow/compute/cast.h"
45 #include "arrow/compute/kernel.h"
46 #include "arrow/compute/test_util.h"
47 
48 namespace arrow {
49 namespace compute {
50 
51 using internal::checked_cast;
52 
53 static constexpr const char* kInvalidUtf8 = "\xa0\xa1";
54 
55 static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
56     uint8(), int8(),   uint16(), int16(),   uint32(),
57     int32(), uint64(), int64(),  float32(), float64()};
58 
AssertBufferSame(const Array & left,const Array & right,int buffer_index)59 static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
60   ASSERT_EQ(left.data()->buffers[buffer_index].get(),
61             right.data()->buffers[buffer_index].get());
62 }
63 
64 class TestCast : public TestBase {
65  public:
CheckPass(const Array & input,const Array & expected,const std::shared_ptr<DataType> & out_type,const CastOptions & options)66   void CheckPass(const Array& input, const Array& expected,
67                  const std::shared_ptr<DataType>& out_type, const CastOptions& options) {
68     ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(input, out_type, options));
69     ASSERT_OK(result->ValidateFull());
70     AssertArraysEqual(expected, *result, /*verbose=*/true);
71   }
72 
73   template <typename InType, typename I_TYPE>
CheckFails(const std::shared_ptr<DataType> & in_type,const std::vector<I_TYPE> & in_values,const std::vector<bool> & is_valid,const std::shared_ptr<DataType> & out_type,const CastOptions & options)74   void CheckFails(const std::shared_ptr<DataType>& in_type,
75                   const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
76                   const std::shared_ptr<DataType>& out_type, const CastOptions& options) {
77     std::shared_ptr<Array> input;
78     if (is_valid.size() > 0) {
79       ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
80     } else {
81       ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
82     }
83     ASSERT_RAISES(Invalid, Cast(*input, out_type, options));
84   }
85 
CheckZeroCopy(const Array & input,const std::shared_ptr<DataType> & out_type)86   void CheckZeroCopy(const Array& input, const std::shared_ptr<DataType>& out_type) {
87     ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(input, out_type));
88     ASSERT_OK(result->ValidateFull());
89     ASSERT_EQ(input.data()->buffers.size(), result->data()->buffers.size());
90     for (size_t i = 0; i < input.data()->buffers.size(); ++i) {
91       AssertBufferSame(input, *result, static_cast<int>(i));
92     }
93   }
94 
95   template <typename InType, typename I_TYPE, typename OutType, typename O_TYPE>
CheckCase(const std::shared_ptr<DataType> & in_type,const std::vector<I_TYPE> & in_values,const std::vector<bool> & is_valid,const std::shared_ptr<DataType> & out_type,const std::vector<O_TYPE> & out_values,const CastOptions & options)96   void CheckCase(const std::shared_ptr<DataType>& in_type,
97                  const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
98                  const std::shared_ptr<DataType>& out_type,
99                  const std::vector<O_TYPE>& out_values, const CastOptions& options) {
100     ASSERT_EQ(in_values.size(), out_values.size());
101     std::shared_ptr<Array> input, expected;
102     if (is_valid.size() > 0) {
103       ASSERT_EQ(is_valid.size(), out_values.size());
104       ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
105       ArrayFromVector<OutType, O_TYPE>(out_type, is_valid, out_values, &expected);
106     } else {
107       ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
108       ArrayFromVector<OutType, O_TYPE>(out_type, out_values, &expected);
109     }
110     CheckPass(*input, *expected, out_type, options);
111 
112     // Check a sliced variant
113     if (input->length() > 1) {
114       CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
115     }
116   }
117 
CheckCaseJSON(const std::shared_ptr<DataType> & in_type,const std::shared_ptr<DataType> & out_type,const std::string & in_json,const std::string & expected_json,const CastOptions & options=CastOptions ())118   void CheckCaseJSON(const std::shared_ptr<DataType>& in_type,
119                      const std::shared_ptr<DataType>& out_type,
120                      const std::string& in_json, const std::string& expected_json,
121                      const CastOptions& options = CastOptions()) {
122     std::shared_ptr<Array> input = ArrayFromJSON(in_type, in_json);
123     std::shared_ptr<Array> expected = ArrayFromJSON(out_type, expected_json);
124     ASSERT_EQ(input->length(), expected->length());
125     CheckPass(*input, *expected, out_type, options);
126 
127     // Check a sliced variant
128     if (input->length() > 1) {
129       CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
130     }
131   }
132 
133   template <typename SourceType, typename DestType>
TestCastBinaryToString()134   void TestCastBinaryToString() {
135     CastOptions options;
136     auto src_type = TypeTraits<SourceType>::type_singleton();
137     auto dest_type = TypeTraits<DestType>::type_singleton();
138 
139     // All valid except the last one
140     std::vector<bool> all = {1, 1, 1, 1, 1};
141     std::vector<bool> valid = {1, 1, 1, 1, 0};
142     std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
143 
144     std::shared_ptr<Array> array;
145 
146     // Should accept when invalid but null.
147     ArrayFromVector<SourceType, std::string>(src_type, valid, strings, &array);
148     CheckZeroCopy(*array, dest_type);
149 
150     // Should refuse due to invalid utf8 payload
151     CheckFails<SourceType, std::string>(src_type, strings, all, dest_type, options);
152 
153     // Should accept due to option override
154     options.allow_invalid_utf8 = true;
155     CheckCase<SourceType, std::string, DestType, std::string>(
156         src_type, strings, all, dest_type, strings, options);
157   }
158 
159   template <typename DestType>
TestCastNumberToString()160   void TestCastNumberToString() {
161     auto dest_type = TypeTraits<DestType>::type_singleton();
162 
163     CheckCaseJSON(int8(), dest_type, "[0, 1, 127, -128, null]",
164                   R"(["0", "1", "127", "-128", null])");
165     CheckCaseJSON(uint8(), dest_type, "[0, 1, 255, null]", R"(["0", "1", "255", null])");
166     CheckCaseJSON(int16(), dest_type, "[0, 1, 32767, -32768, null]",
167                   R"(["0", "1", "32767", "-32768", null])");
168     CheckCaseJSON(uint16(), dest_type, "[0, 1, 65535, null]",
169                   R"(["0", "1", "65535", null])");
170     CheckCaseJSON(int32(), dest_type, "[0, 1, 2147483647, -2147483648, null]",
171                   R"(["0", "1", "2147483647", "-2147483648", null])");
172     CheckCaseJSON(uint32(), dest_type, "[0, 1, 4294967295, null]",
173                   R"(["0", "1", "4294967295", null])");
174     CheckCaseJSON(int64(), dest_type,
175                   "[0, 1, 9223372036854775807, -9223372036854775808, null]",
176                   R"(["0", "1", "9223372036854775807", "-9223372036854775808", null])");
177     CheckCaseJSON(uint64(), dest_type, "[0, 1, 18446744073709551615, null]",
178                   R"(["0", "1", "18446744073709551615", null])");
179 
180     CheckCaseJSON(float32(), dest_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]",
181                   R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])");
182     CheckCaseJSON(float64(), dest_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]",
183                   R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])");
184   }
185 
186   template <typename DestType>
TestCastBooleanToString()187   void TestCastBooleanToString() {
188     auto dest_type = TypeTraits<DestType>::type_singleton();
189 
190     CheckCaseJSON(boolean(), dest_type, "[true, true, false, null]",
191                   R"(["true", "true", "false", null])");
192   }
193 
194   template <typename SourceType>
TestCastStringToNumber()195   void TestCastStringToNumber() {
196     CastOptions options;
197     auto src_type = TypeTraits<SourceType>::type_singleton();
198 
199     std::vector<bool> is_valid = {true, false, true, true, true};
200 
201     // string to int
202     std::vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
203     std::vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
204     std::vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
205     std::vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
206     std::vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
207     CheckCase<SourceType, std::string, Int8Type, int8_t>(src_type, v_int, is_valid,
208                                                          int8(), e_int8, options);
209     CheckCase<SourceType, std::string, Int16Type, int16_t>(src_type, v_int, is_valid,
210                                                            int16(), e_int16, options);
211     CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
212                                                            int32(), e_int32, options);
213     CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
214                                                            int64(), e_int64, options);
215 
216     v_int = {"2147483647", "0", "-2147483648", "0", "0"};
217     e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
218     CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
219                                                            int32(), e_int32, options);
220     v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
221     e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
222     CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
223                                                            int64(), e_int64, options);
224 
225     // string to uint
226     std::vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
227     std::vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
228     std::vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
229     std::vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
230     std::vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
231     CheckCase<SourceType, std::string, UInt8Type, uint8_t>(src_type, v_uint, is_valid,
232                                                            uint8(), e_uint8, options);
233     CheckCase<SourceType, std::string, UInt16Type, uint16_t>(src_type, v_uint, is_valid,
234                                                              uint16(), e_uint16, options);
235     CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
236                                                              uint32(), e_uint32, options);
237     CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
238                                                              uint64(), e_uint64, options);
239 
240     v_uint = {"4294967295", "0", "0", "0", "0"};
241     e_uint32 = {4294967295, 0, 0, 0, 0};
242     CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
243                                                              uint32(), e_uint32, options);
244     v_uint = {"18446744073709551615", "0", "0", "0", "0"};
245     e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
246     CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
247                                                              uint64(), e_uint64, options);
248 
249     // string to float
250     std::vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
251     std::vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
252     std::vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
253     CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
254                                                          float32(), e_float, options);
255     CheckCase<SourceType, std::string, DoubleType, double>(src_type, v_float, is_valid,
256                                                            float64(), e_double, options);
257 
258 #if !defined(_WIN32) || defined(NDEBUG)
259     // Test that casting is locale-independent
260     {
261       // French locale uses the comma as decimal point
262       LocaleGuard locale_guard("fr_FR.UTF-8");
263       CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
264                                                            float32(), e_float, options);
265       CheckCase<SourceType, std::string, DoubleType, double>(
266           src_type, v_float, is_valid, float64(), e_double, options);
267     }
268 #endif
269   }
270 
271   template <typename SourceType>
TestCastStringToTimestamp()272   void TestCastStringToTimestamp() {
273     CastOptions options;
274     auto src_type = TypeTraits<SourceType>::type_singleton();
275 
276     std::vector<bool> is_valid = {true, false, true};
277     std::vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
278 
279     auto type = timestamp(TimeUnit::SECOND);
280     std::vector<int64_t> e = {0, 0, 951782400};
281     CheckCase<SourceType, std::string, TimestampType, int64_t>(
282         src_type, strings, is_valid, type, e, options);
283 
284     type = timestamp(TimeUnit::MICRO);
285     e = {0, 0, 951782400000000LL};
286     CheckCase<SourceType, std::string, TimestampType, int64_t>(
287         src_type, strings, is_valid, type, e, options);
288 
289     // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
290   }
291 };
292 
TEST_F(TestCast,SameTypeZeroCopy)293 TEST_F(TestCast, SameTypeZeroCopy) {
294   std::shared_ptr<Array> arr = ArrayFromJSON(int32(), "[0, null, 2, 3, 4]");
295   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(*arr, int32()));
296 
297   AssertBufferSame(*arr, *result, 0);
298   AssertBufferSame(*arr, *result, 1);
299 }
300 
TEST_F(TestCast,ZeroChunks)301 TEST_F(TestCast, ZeroChunks) {
302   auto chunked_i32 = std::make_shared<ChunkedArray>(ArrayVector{}, int32());
303   ASSERT_OK_AND_ASSIGN(Datum result, Cast(chunked_i32, utf8()));
304 
305   ASSERT_EQ(result.kind(), Datum::CHUNKED_ARRAY);
306   AssertChunkedEqual(*result.chunked_array(), ChunkedArray({}, utf8()));
307 }
308 
TEST_F(TestCast,FromBoolean)309 TEST_F(TestCast, FromBoolean) {
310   CastOptions options;
311 
312   std::vector<bool> is_valid(20, true);
313   is_valid[3] = false;
314 
315   std::vector<bool> v1(is_valid.size(), true);
316   std::vector<int32_t> e1(is_valid.size(), 1);
317   for (size_t i = 0; i < v1.size(); ++i) {
318     if (i % 3 == 1) {
319       v1[i] = false;
320       e1[i] = 0;
321     }
322   }
323 
324   CheckCase<BooleanType, bool, Int32Type, int32_t>(boolean(), v1, is_valid, int32(), e1,
325                                                    options);
326 }
327 
TEST_F(TestCast,ToBoolean)328 TEST_F(TestCast, ToBoolean) {
329   CastOptions options;
330   for (auto type : kNumericTypes) {
331     CheckCaseJSON(type, boolean(), "[0, null, 127, 1, 0]",
332                   "[false, null, true, true, false]");
333   }
334 
335   // Check negative numbers
336   CheckCaseJSON(int8(), boolean(), "[0, null, 127, -1, 0]",
337                 "[false, null, true, true, false]");
338   CheckCaseJSON(float64(), boolean(), "[0, null, 127, -1, 0]",
339                 "[false, null, true, true, false]");
340 }
341 
TEST_F(TestCast,ToIntUpcast)342 TEST_F(TestCast, ToIntUpcast) {
343   CastOptions options;
344   options.allow_int_overflow = false;
345 
346   std::vector<bool> is_valid = {true, false, true, true, true};
347 
348   // int8 to int32
349   std::vector<int8_t> v1 = {0, 1, 127, -1, 0};
350   std::vector<int32_t> e1 = {0, 1, 127, -1, 0};
351   CheckCase<Int8Type, int8_t, Int32Type, int32_t>(int8(), v1, is_valid, int32(), e1,
352                                                   options);
353 
354   // bool to int8
355   std::vector<bool> v2 = {false, true, false, true, true};
356   std::vector<int8_t> e2 = {0, 1, 0, 1, 1};
357   CheckCase<BooleanType, bool, Int8Type, int8_t>(boolean(), v2, is_valid, int8(), e2,
358                                                  options);
359 
360   // uint8 to int16, no overflow/underrun
361   std::vector<uint8_t> v3 = {0, 100, 200, 255, 0};
362   std::vector<int16_t> e3 = {0, 100, 200, 255, 0};
363   CheckCase<UInt8Type, uint8_t, Int16Type, int16_t>(uint8(), v3, is_valid, int16(), e3,
364                                                     options);
365 }
366 
TEST_F(TestCast,OverflowInNullSlot)367 TEST_F(TestCast, OverflowInNullSlot) {
368   CastOptions options;
369   options.allow_int_overflow = false;
370 
371   std::vector<bool> is_valid = {true, false, true, true, true};
372 
373   std::vector<int32_t> v11 = {0, 70000, 2000, 1000, 0};
374   std::vector<int16_t> e11 = {0, 0, 2000, 1000, 0};
375 
376   std::shared_ptr<Array> expected;
377   ArrayFromVector<Int16Type, int16_t>(int16(), is_valid, e11, &expected);
378 
379   auto buf = Buffer::Wrap(v11.data(), v11.size());
380   Int32Array tmp11(5, buf, expected->null_bitmap(), -1);
381 
382   CheckPass(tmp11, *expected, int16(), options);
383 }
384 
TEST_F(TestCast,ToIntDowncastSafe)385 TEST_F(TestCast, ToIntDowncastSafe) {
386   CastOptions options;
387   options.allow_int_overflow = false;
388 
389   std::vector<bool> is_valid = {true, false, true, true, true};
390 
391   // int16 to uint8, no overflow/underrun
392   std::vector<int16_t> v1 = {0, 100, 200, 1, 2};
393   std::vector<uint8_t> e1 = {0, 100, 200, 1, 2};
394   CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
395                                                     options);
396 
397   // int16 to uint8, with overflow
398   std::vector<int16_t> v2 = {0, 100, 256, 0, 0};
399   CheckFails<Int16Type>(int16(), v2, is_valid, uint8(), options);
400 
401   // underflow
402   std::vector<int16_t> v3 = {0, 100, -1, 0, 0};
403   CheckFails<Int16Type>(int16(), v3, is_valid, uint8(), options);
404 
405   // int32 to int16, no overflow
406   std::vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
407   std::vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
408   CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
409                                                     options);
410 
411   // int32 to int16, overflow
412   std::vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
413   CheckFails<Int32Type>(int32(), v5, is_valid, int16(), options);
414 
415   // underflow
416   std::vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
417   CheckFails<Int32Type>(int32(), v6, is_valid, int16(), options);
418 
419   std::vector<int32_t> v7 = {0, 1000, 2000, -70000, 0};
420   CheckFails<Int32Type>(int32(), v7, is_valid, uint8(), options);
421 }
422 
423 template <typename O, typename I>
UnsafeVectorCast(const std::vector<I> & v)424 std::vector<O> UnsafeVectorCast(const std::vector<I>& v) {
425   size_t n_elems = v.size();
426   std::vector<O> result(n_elems);
427 
428   for (size_t i = 0; i < v.size(); i++) result[i] = static_cast<O>(v[i]);
429 
430   return result;
431 }
432 
TEST_F(TestCast,IntegerSignedToUnsigned)433 TEST_F(TestCast, IntegerSignedToUnsigned) {
434   CastOptions options;
435   options.allow_int_overflow = false;
436 
437   std::vector<bool> is_valid = {true, false, true, true, true};
438 
439   std::vector<int32_t> v1 = {INT32_MIN, 100, -1, UINT16_MAX, INT32_MAX};
440 
441   // Same width
442   CheckFails<Int32Type>(int32(), v1, is_valid, uint32(), options);
443   // Wider
444   CheckFails<Int32Type>(int32(), v1, is_valid, uint64(), options);
445   // Narrower
446   CheckFails<Int32Type>(int32(), v1, is_valid, uint16(), options);
447   // Fail because of overflow (instead of underflow).
448   std::vector<int32_t> over = {0, -11, 0, UINT16_MAX + 1, INT32_MAX};
449   CheckFails<Int32Type>(int32(), over, is_valid, uint16(), options);
450 
451   options.allow_int_overflow = true;
452 
453   CheckCase<Int32Type, int32_t, UInt32Type, uint32_t>(
454       int32(), v1, is_valid, uint32(), UnsafeVectorCast<uint32_t, int32_t>(v1), options);
455   CheckCase<Int32Type, int32_t, UInt64Type, uint64_t>(
456       int32(), v1, is_valid, uint64(), UnsafeVectorCast<uint64_t, int32_t>(v1), options);
457   CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
458       int32(), v1, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(v1), options);
459   CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
460       int32(), over, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(over),
461       options);
462 }
463 
TEST_F(TestCast,IntegerUnsignedToSigned)464 TEST_F(TestCast, IntegerUnsignedToSigned) {
465   CastOptions options;
466   options.allow_int_overflow = false;
467 
468   std::vector<bool> is_valid = {true, true, true};
469 
470   std::vector<uint32_t> v1 = {0, INT16_MAX + 1, UINT32_MAX};
471   std::vector<uint32_t> v2 = {0, INT16_MAX + 1, 2};
472   // Same width
473   CheckFails<UInt32Type>(uint32(), v1, is_valid, int32(), options);
474   // Narrower
475   CheckFails<UInt32Type>(uint32(), v1, is_valid, int16(), options);
476   CheckFails<UInt32Type>(uint32(), v2, is_valid, int16(), options);
477 
478   options.allow_int_overflow = true;
479 
480   CheckCase<UInt32Type, uint32_t, Int32Type, int32_t>(
481       uint32(), v1, is_valid, int32(), UnsafeVectorCast<int32_t, uint32_t>(v1), options);
482   CheckCase<UInt32Type, uint32_t, Int64Type, int64_t>(
483       uint32(), v1, is_valid, int64(), UnsafeVectorCast<int64_t, uint32_t>(v1), options);
484   CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
485       uint32(), v1, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v1), options);
486   CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
487       uint32(), v2, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v2), options);
488 }
489 
TEST_F(TestCast,ToIntDowncastUnsafe)490 TEST_F(TestCast, ToIntDowncastUnsafe) {
491   CastOptions options;
492   options.allow_int_overflow = true;
493 
494   std::vector<bool> is_valid = {true, false, true, true, true};
495 
496   // int16 to uint8, no overflow/underrun
497   std::vector<int16_t> v1 = {0, 100, 200, 1, 2};
498   std::vector<uint8_t> e1 = {0, 100, 200, 1, 2};
499   CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
500                                                     options);
501 
502   // int16 to uint8, with overflow
503   std::vector<int16_t> v2 = {0, 100, 256, 0, 0};
504   std::vector<uint8_t> e2 = {0, 100, 0, 0, 0};
505   CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v2, is_valid, uint8(), e2,
506                                                     options);
507 
508   // underflow
509   std::vector<int16_t> v3 = {0, 100, -1, 0, 0};
510   std::vector<uint8_t> e3 = {0, 100, 255, 0, 0};
511   CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v3, is_valid, uint8(), e3,
512                                                     options);
513 
514   // int32 to int16, no overflow
515   std::vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
516   std::vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
517   CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
518                                                     options);
519 
520   // int32 to int16, overflow
521   // TODO(wesm): do we want to allow this? we could set to null
522   std::vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
523   std::vector<int16_t> e5 = {0, 1000, 2000, 4464, 0};
524   CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v5, is_valid, int16(), e5,
525                                                     options);
526 
527   // underflow
528   // TODO(wesm): do we want to allow this? we could set overflow to null
529   std::vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
530   std::vector<int16_t> e6 = {0, 1000, 2000, -4464, 0};
531   CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v6, is_valid, int16(), e6,
532                                                     options);
533 }
534 
TEST_F(TestCast,FloatingPointToInt)535 TEST_F(TestCast, FloatingPointToInt) {
536   // which means allow_float_truncate == false
537   auto options = CastOptions::Safe();
538 
539   std::vector<bool> is_valid = {true, false, true, true, true};
540   std::vector<bool> all_valid = {true, true, true, true, true};
541 
542   // float32 to int32 no truncation
543   std::vector<float> v1 = {1.0, 0, 0.0, -1.0, 5.0};
544   std::vector<int32_t> e1 = {1, 0, 0, -1, 5};
545   CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, is_valid, int32(), e1,
546                                                   options);
547   CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, all_valid, int32(), e1,
548                                                   options);
549 
550   // float64 to int32 no truncation
551   std::vector<double> v2 = {1.0, 0, 0.0, -1.0, 5.0};
552   std::vector<int32_t> e2 = {1, 0, 0, -1, 5};
553   CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, is_valid, int32(), e2,
554                                                     options);
555   CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, all_valid, int32(), e2,
556                                                     options);
557 
558   // float64 to int64 no truncation
559   std::vector<double> v3 = {1.0, 0, 0.0, -1.0, 5.0};
560   std::vector<int64_t> e3 = {1, 0, 0, -1, 5};
561   CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, is_valid, int64(), e3,
562                                                     options);
563   CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, all_valid, int64(), e3,
564                                                     options);
565 
566   // float64 to int32 truncate
567   std::vector<double> v4 = {1.5, 0, 0.5, -1.5, 5.5};
568   std::vector<int32_t> e4 = {1, 0, 0, -1, 5};
569 
570   options.allow_float_truncate = false;
571   CheckFails<DoubleType>(float64(), v4, is_valid, int32(), options);
572   CheckFails<DoubleType>(float64(), v4, all_valid, int32(), options);
573 
574   options.allow_float_truncate = true;
575   CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, is_valid, int32(), e4,
576                                                     options);
577   CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, all_valid, int32(), e4,
578                                                     options);
579 
580   // float64 to int64 truncate
581   std::vector<double> v5 = {1.5, 0, 0.5, -1.5, 5.5};
582   std::vector<int64_t> e5 = {1, 0, 0, -1, 5};
583 
584   options.allow_float_truncate = false;
585   CheckFails<DoubleType>(float64(), v5, is_valid, int64(), options);
586   CheckFails<DoubleType>(float64(), v5, all_valid, int64(), options);
587 
588   options.allow_float_truncate = true;
589   CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, is_valid, int64(), e5,
590                                                     options);
591   CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, all_valid, int64(), e5,
592                                                     options);
593 }
594 
595 #if ARROW_BITNESS >= 64
TEST_F(TestCast,IntToFloatingPoint)596 TEST_F(TestCast, IntToFloatingPoint) {
597   auto options = CastOptions::Safe();
598 
599   std::vector<bool> all_valid = {true, true, true, true, true};
600   std::vector<bool> all_invalid = {false, false, false, false, false};
601 
602   std::vector<int64_t> v1 = {INT64_MIN, INT64_MIN + 1, 0, INT64_MAX - 1, INT64_MAX};
603   CheckFails<Int64Type>(int64(), v1, all_valid, float32(), options);
604 
605   // While it's not safe to convert, all values are null.
606   CheckCase<Int64Type, int64_t, DoubleType, double>(int64(), v1, all_invalid, float64(),
607                                                     UnsafeVectorCast<double, int64_t>(v1),
608                                                     options);
609 }
610 #endif
611 
TEST_F(TestCast,DecimalToInt)612 TEST_F(TestCast, DecimalToInt) {
613   CastOptions options;
614   std::vector<bool> is_valid2 = {true, true};
615   std::vector<bool> is_valid3 = {true, true, false};
616 
617   // no overflow no truncation
618   std::vector<Decimal128> v12 = {Decimal128("02.0000000000"),
619                                  Decimal128("-11.0000000000")};
620   std::vector<Decimal128> v13 = {Decimal128("02.0000000000"),
621                                  Decimal128("-11.0000000000"),
622                                  Decimal128("-12.0000000000")};
623   std::vector<int64_t> e12 = {2, -11};
624   std::vector<int64_t> e13 = {2, -11, 0};
625 
626   for (bool allow_int_overflow : {false, true}) {
627     for (bool allow_decimal_truncate : {false, true}) {
628       options.allow_int_overflow = allow_int_overflow;
629       options.allow_decimal_truncate = allow_decimal_truncate;
630       CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
631           decimal(38, 10), v12, is_valid2, int64(), e12, options);
632       CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
633           decimal(38, 10), v13, is_valid3, int64(), e13, options);
634     }
635   }
636 
637   // truncation, no overflow
638   std::vector<Decimal128> v22 = {Decimal128("02.1000000000"),
639                                  Decimal128("-11.0000004500")};
640   std::vector<Decimal128> v23 = {Decimal128("02.1000000000"),
641                                  Decimal128("-11.0000004500"),
642                                  Decimal128("-12.0000004500")};
643   std::vector<int64_t> e22 = {2, -11};
644   std::vector<int64_t> e23 = {2, -11, 0};
645 
646   for (bool allow_int_overflow : {false, true}) {
647     options.allow_int_overflow = allow_int_overflow;
648     options.allow_decimal_truncate = true;
649     CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
650         decimal(38, 10), v22, is_valid2, int64(), e22, options);
651     CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
652         decimal(38, 10), v23, is_valid3, int64(), e23, options);
653     options.allow_decimal_truncate = false;
654     CheckFails<Decimal128Type>(decimal(38, 10), v22, is_valid2, int64(), options);
655     CheckFails<Decimal128Type>(decimal(38, 10), v23, is_valid3, int64(), options);
656   }
657 
658   // overflow, no truncation
659   std::vector<Decimal128> v32 = {Decimal128("12345678901234567890000.0000000000"),
660                                  Decimal128("99999999999999999999999.0000000000")};
661   std::vector<Decimal128> v33 = {Decimal128("12345678901234567890000.0000000000"),
662                                  Decimal128("99999999999999999999999.0000000000"),
663                                  Decimal128("99999999999999999999999.0000000000")};
664   // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
665   std::vector<int64_t> e32 = {4807115922877858896, 200376420520689663};
666   std::vector<int64_t> e33 = {4807115922877858896, 200376420520689663, -2};
667 
668   for (bool allow_decimal_truncate : {false, true}) {
669     options.allow_decimal_truncate = allow_decimal_truncate;
670     options.allow_int_overflow = true;
671     CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
672         decimal(38, 10), v32, is_valid2, int64(), e32, options);
673     CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
674         decimal(38, 10), v33, is_valid3, int64(), e33, options);
675     options.allow_int_overflow = false;
676     CheckFails<Decimal128Type>(decimal(38, 10), v32, is_valid2, int64(), options);
677     CheckFails<Decimal128Type>(decimal(38, 10), v33, is_valid3, int64(), options);
678   }
679 
680   // overflow, truncation
681   std::vector<Decimal128> v42 = {Decimal128("12345678901234567890000.0045345000"),
682                                  Decimal128("99999999999999999999999.0000005430")};
683   std::vector<Decimal128> v43 = {Decimal128("12345678901234567890000.0005345340"),
684                                  Decimal128("99999999999999999999999.0000344300"),
685                                  Decimal128("99999999999999999999999.0004354000")};
686   // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
687   std::vector<int64_t> e42 = {4807115922877858896, 200376420520689663};
688   std::vector<int64_t> e43 = {4807115922877858896, 200376420520689663, -2};
689 
690   for (bool allow_int_overflow : {false, true}) {
691     for (bool allow_decimal_truncate : {false, true}) {
692       options.allow_int_overflow = allow_int_overflow;
693       options.allow_decimal_truncate = allow_decimal_truncate;
694       if (options.allow_int_overflow && options.allow_decimal_truncate) {
695         CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
696             decimal(38, 10), v42, is_valid2, int64(), e42, options);
697         CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
698             decimal(38, 10), v43, is_valid3, int64(), e43, options);
699       } else {
700         CheckFails<Decimal128Type>(decimal(38, 10), v42, is_valid2, int64(), options);
701         CheckFails<Decimal128Type>(decimal(38, 10), v43, is_valid3, int64(), options);
702       }
703     }
704   }
705 
706   // negative scale
707   std::vector<Decimal128> v5 = {Decimal128("1234567890000."), Decimal128("-120000.")};
708   for (int i = 0; i < 2; i++) v5[i] = v5[i].Rescale(0, -4).ValueOrDie();
709   std::vector<int64_t> e5 = {1234567890000, -120000};
710   CheckCase<Decimal128Type, Decimal128, Int64Type, int64_t>(
711       decimal(38, -4), v5, is_valid2, int64(), e5, options);
712 }
713 
TEST_F(TestCast,DecimalToDecimal)714 TEST_F(TestCast, DecimalToDecimal) {
715   CastOptions options;
716 
717   std::vector<bool> is_valid2 = {true, true};
718   std::vector<bool> is_valid3 = {true, true, false};
719 
720   // simple cases decimal
721 
722   std::vector<Decimal128> v12 = {Decimal128("02.0000000000"),
723                                  Decimal128("30.0000000000")};
724   std::vector<Decimal128> e12 = {Decimal128("02."), Decimal128("30.")};
725   std::vector<Decimal128> v13 = {Decimal128("02.0000000000"), Decimal128("30.0000000000"),
726                                  Decimal128("30.0000000000")};
727   std::vector<Decimal128> e13 = {Decimal128("02."), Decimal128("30."), Decimal128("-1.")};
728 
729   for (bool allow_decimal_truncate : {false, true}) {
730     options.allow_decimal_truncate = allow_decimal_truncate;
731     CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
732         decimal(38, 10), v12, is_valid2, decimal(28, 0), e12, options);
733     CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
734         decimal(38, 10), v13, is_valid3, decimal(28, 0), e13, options);
735     // and back
736     CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
737         decimal(28, 0), e12, is_valid2, decimal(38, 10), v12, options);
738     CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
739         decimal(28, 0), e13, is_valid3, decimal(38, 10), v13, options);
740   }
741 
742   std::vector<Decimal128> v22 = {Decimal128("-02.1234567890"),
743                                  Decimal128("30.1234567890")};
744   std::vector<Decimal128> e22 = {Decimal128("-02."), Decimal128("30.")};
745   std::vector<Decimal128> f22 = {Decimal128("-02.0000000000"),
746                                  Decimal128("30.0000000000")};
747   std::vector<Decimal128> v23 = {Decimal128("-02.1234567890"),
748                                  Decimal128("30.1234567890"),
749                                  Decimal128("30.1234567890")};
750   std::vector<Decimal128> e23 = {Decimal128("-02."), Decimal128("30."),
751                                  Decimal128("-70.")};
752   std::vector<Decimal128> f23 = {Decimal128("-02.0000000000"),
753                                  Decimal128("30.0000000000"),
754                                  Decimal128("80.0000000000")};
755 
756   options.allow_decimal_truncate = true;
757   CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
758       decimal(38, 10), v22, is_valid2, decimal(28, 0), e22, options);
759   CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
760       decimal(38, 10), v23, is_valid3, decimal(28, 0), e23, options);
761   // and back
762   CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
763       decimal(28, 0), e22, is_valid2, decimal(38, 10), f22, options);
764   CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
765       decimal(28, 0), e23, is_valid3, decimal(38, 10), f23, options);
766 
767   options.allow_decimal_truncate = false;
768   CheckFails<Decimal128Type>(decimal(38, 10), v22, is_valid2, decimal(28, 0), options);
769   CheckFails<Decimal128Type>(decimal(38, 10), v23, is_valid3, decimal(28, 0), options);
770   // back case is ok
771   CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
772       decimal(28, 0), e22, is_valid2, decimal(38, 10), f22, options);
773   CheckCase<Decimal128Type, Decimal128, Decimal128Type, Decimal128>(
774       decimal(28, 0), e23, is_valid3, decimal(38, 10), f23, options);
775 }
776 
TEST_F(TestCast,TimestampToTimestamp)777 TEST_F(TestCast, TimestampToTimestamp) {
778   CastOptions options;
779 
780   auto CheckTimestampCast =
781       [this](const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
782              const std::vector<int64_t>& from_values,
783              const std::vector<int64_t>& to_values, const std::vector<bool>& is_valid) {
784         CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
785             timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
786             options);
787       };
788 
789   std::vector<bool> is_valid = {true, false, true, true, true};
790 
791   // Multiply promotions
792   std::vector<int64_t> v1 = {0, 100, 200, 1, 2};
793   std::vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
794   CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
795 
796   std::vector<int64_t> v2 = {0, 100, 200, 1, 2};
797   std::vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
798   CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
799 
800   std::vector<int64_t> v3 = {0, 100, 200, 1, 2};
801   std::vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
802   CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
803 
804   std::vector<int64_t> v4 = {0, 100, 200, 1, 2};
805   std::vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
806   CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
807 
808   std::vector<int64_t> v5 = {0, 100, 200, 1, 2};
809   std::vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
810   CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
811 
812   std::vector<int64_t> v6 = {0, 100, 200, 1, 2};
813   std::vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
814   CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
815 
816   // Zero copy
817   std::vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
818   std::shared_ptr<Array> arr;
819   ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid, v7,
820                                           &arr);
821   CheckZeroCopy(*arr, timestamp(TimeUnit::SECOND));
822 
823   // ARROW-1773, cast to integer
824   CheckZeroCopy(*arr, int64());
825 
826   // Divide, truncate
827   std::vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
828   std::vector<int64_t> e8 = {0, 100, 200, 1, 2};
829 
830   options.allow_time_truncate = true;
831   CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
832   CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
833   CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
834 
835   std::vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
836   std::vector<int64_t> e9 = {0, 100, 200, 1, 2};
837   CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
838   CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
839 
840   std::vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
841   std::vector<int64_t> e10 = {0, 100, 200, 1, 2};
842   CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
843 
844   // Disallow truncate, failures
845   options.allow_time_truncate = false;
846   CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
847                             timestamp(TimeUnit::SECOND), options);
848   CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v8, is_valid,
849                             timestamp(TimeUnit::MILLI), options);
850   CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v8, is_valid,
851                             timestamp(TimeUnit::MICRO), options);
852   CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v9, is_valid,
853                             timestamp(TimeUnit::SECOND), options);
854   CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v9, is_valid,
855                             timestamp(TimeUnit::MILLI), options);
856   CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v10, is_valid,
857                             timestamp(TimeUnit::SECOND), options);
858 
859   // Multiply overflow
860 
861   // 1000-01-01, 1800-01-01 , 2000-01-01, 2300-01-01, 3000-01-01
862   std::vector<int64_t> v11 = {-30610224000, -5364662400, 946684800, 10413792000,
863                               32503680000};
864 
865   options.allow_time_overflow = false;
866   CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v11, is_valid,
867                             timestamp(TimeUnit::NANO), options);
868 }
869 
TEST_F(TestCast,TimestampToDate32_Date64)870 TEST_F(TestCast, TimestampToDate32_Date64) {
871   CastOptions options;
872 
873   std::vector<bool> is_valid = {true, true, false};
874 
875   // 2000-01-01, 2000-01-02, null
876   std::vector<int64_t> v_nano = {946684800000000000, 946771200000000000, 0};
877   std::vector<int64_t> v_micro = {946684800000000, 946771200000000, 0};
878   std::vector<int64_t> v_milli = {946684800000, 946771200000, 0};
879   std::vector<int64_t> v_second = {946684800, 946771200, 0};
880   std::vector<int32_t> v_day = {10957, 10958, 0};
881 
882   // Simple conversions
883   CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
884       timestamp(TimeUnit::NANO), v_nano, is_valid, date64(), v_milli, options);
885   CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
886       timestamp(TimeUnit::MICRO), v_micro, is_valid, date64(), v_milli, options);
887   CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
888       timestamp(TimeUnit::MILLI), v_milli, is_valid, date64(), v_milli, options);
889   CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
890       timestamp(TimeUnit::SECOND), v_second, is_valid, date64(), v_milli, options);
891 
892   CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
893       timestamp(TimeUnit::NANO), v_nano, is_valid, date32(), v_day, options);
894   CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
895       timestamp(TimeUnit::MICRO), v_micro, is_valid, date32(), v_day, options);
896   CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
897       timestamp(TimeUnit::MILLI), v_milli, is_valid, date32(), v_day, options);
898   CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
899       timestamp(TimeUnit::SECOND), v_second, is_valid, date32(), v_day, options);
900 
901   // Disallow truncate, failures
902   std::vector<int64_t> v_nano_fail = {946684800000000001, 946771200000000001, 0};
903   std::vector<int64_t> v_micro_fail = {946684800000001, 946771200000001, 0};
904   std::vector<int64_t> v_milli_fail = {946684800001, 946771200001, 0};
905   std::vector<int64_t> v_second_fail = {946684801, 946771201, 0};
906 
907   options.allow_time_truncate = false;
908   CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date64(),
909                             options);
910   CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date64(),
911                             options);
912   CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date64(),
913                             options);
914   CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
915                             date64(), options);
916 
917   CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date32(),
918                             options);
919   CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date32(),
920                             options);
921   CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date32(),
922                             options);
923   CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
924                             date32(), options);
925 
926   // Make sure that nulls are excluded from the truncation checks
927   std::vector<int64_t> v_second_nofail = {946684800, 946771200, 1};
928   CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
929       timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date64(), v_milli, options);
930   CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
931       timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date32(), v_day, options);
932 }
933 
TEST_F(TestCast,TimeToCompatible)934 TEST_F(TestCast, TimeToCompatible) {
935   CastOptions options;
936 
937   std::vector<bool> is_valid = {true, false, true, true, true};
938 
939   // Multiply promotions
940   std::vector<int32_t> v1 = {0, 100, 200, 1, 2};
941   std::vector<int32_t> e1 = {0, 100000, 200000, 1000, 2000};
942   CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
943       time32(TimeUnit::SECOND), v1, is_valid, time32(TimeUnit::MILLI), e1, options);
944 
945   std::vector<int32_t> v2 = {0, 100, 200, 1, 2};
946   std::vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
947   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
948       time32(TimeUnit::SECOND), v2, is_valid, time64(TimeUnit::MICRO), e2, options);
949 
950   std::vector<int32_t> v3 = {0, 100, 200, 1, 2};
951   std::vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
952   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
953       time32(TimeUnit::SECOND), v3, is_valid, time64(TimeUnit::NANO), e3, options);
954 
955   std::vector<int32_t> v4 = {0, 100, 200, 1, 2};
956   std::vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
957   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
958       time32(TimeUnit::MILLI), v4, is_valid, time64(TimeUnit::MICRO), e4, options);
959 
960   std::vector<int32_t> v5 = {0, 100, 200, 1, 2};
961   std::vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
962   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
963       time32(TimeUnit::MILLI), v5, is_valid, time64(TimeUnit::NANO), e5, options);
964 
965   std::vector<int64_t> v6 = {0, 100, 200, 1, 2};
966   std::vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
967   CheckCase<Time64Type, int64_t, Time64Type, int64_t>(
968       time64(TimeUnit::MICRO), v6, is_valid, time64(TimeUnit::NANO), e6, options);
969 
970   // Zero copy
971   std::vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
972   std::shared_ptr<Array> arr;
973   ArrayFromVector<Time64Type, int64_t>(time64(TimeUnit::MICRO), is_valid, v7, &arr);
974   CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
975 
976   // ARROW-1773: cast to int64
977   CheckZeroCopy(*arr, int64());
978 
979   std::vector<int32_t> v7_2 = {0, 70000, 2000, 1000, 0};
980   ArrayFromVector<Time32Type, int32_t>(time32(TimeUnit::SECOND), is_valid, v7_2, &arr);
981   CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
982 
983   // ARROW-1773: cast to int64
984   CheckZeroCopy(*arr, int32());
985 
986   // Divide, truncate
987   std::vector<int32_t> v8 = {0, 100123, 200456, 1123, 2456};
988   std::vector<int32_t> e8 = {0, 100, 200, 1, 2};
989 
990   options.allow_time_truncate = true;
991   CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
992       time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND), e8, options);
993   CheckCase<Time64Type, int32_t, Time32Type, int32_t>(
994       time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI), e8, options);
995   CheckCase<Time64Type, int32_t, Time64Type, int32_t>(
996       time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO), e8, options);
997 
998   std::vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
999   std::vector<int32_t> e9 = {0, 100, 200, 1, 2};
1000   CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
1001       time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND), e9, options);
1002   CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
1003       time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI), e9, options);
1004 
1005   std::vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
1006   std::vector<int32_t> e10 = {0, 100, 200, 1, 2};
1007   CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
1008       time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND), e10, options);
1009 
1010   // Disallow truncate, failures
1011 
1012   options.allow_time_truncate = false;
1013   CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND),
1014                          options);
1015   CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI),
1016                          options);
1017   CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO),
1018                          options);
1019   CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND),
1020                          options);
1021   CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI),
1022                          options);
1023   CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND),
1024                          options);
1025 }
1026 
TEST_F(TestCast,DateToCompatible)1027 TEST_F(TestCast, DateToCompatible) {
1028   CastOptions options;
1029 
1030   std::vector<bool> is_valid = {true, false, true, true, true};
1031 
1032   constexpr int64_t F = 86400000;
1033 
1034   // Multiply promotion
1035   std::vector<int32_t> v1 = {0, 100, 200, 1, 2};
1036   std::vector<int64_t> e1 = {0, 100 * F, 200 * F, F, 2 * F};
1037   CheckCase<Date32Type, int32_t, Date64Type, int64_t>(date32(), v1, is_valid, date64(),
1038                                                       e1, options);
1039 
1040   // Zero copy
1041   std::vector<int32_t> v2 = {0, 70000, 2000, 1000, 0};
1042   std::vector<int64_t> v3 = {0, 70000, 2000, 1000, 0};
1043   std::shared_ptr<Array> arr;
1044   ArrayFromVector<Date32Type, int32_t>(date32(), is_valid, v2, &arr);
1045   CheckZeroCopy(*arr, date32());
1046 
1047   // ARROW-1773: zero copy cast to integer
1048   CheckZeroCopy(*arr, int32());
1049 
1050   ArrayFromVector<Date64Type, int64_t>(date64(), is_valid, v3, &arr);
1051   CheckZeroCopy(*arr, date64());
1052 
1053   // ARROW-1773: zero copy cast to integer
1054   CheckZeroCopy(*arr, int64());
1055 
1056   // Divide, truncate
1057   std::vector<int64_t> v8 = {0, 100 * F + 123, 200 * F + 456, F + 123, 2 * F + 456};
1058   std::vector<int32_t> e8 = {0, 100, 200, 1, 2};
1059 
1060   options.allow_time_truncate = true;
1061   CheckCase<Date64Type, int64_t, Date32Type, int32_t>(date64(), v8, is_valid, date32(),
1062                                                       e8, options);
1063 
1064   // Disallow truncate, failures
1065   options.allow_time_truncate = false;
1066   CheckFails<Date64Type>(date64(), v8, is_valid, date32(), options);
1067 }
1068 
TEST_F(TestCast,DurationToCompatible)1069 TEST_F(TestCast, DurationToCompatible) {
1070   CastOptions options;
1071 
1072   auto CheckDurationCast =
1073       [this](const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
1074              const std::vector<int64_t>& from_values,
1075              const std::vector<int64_t>& to_values, const std::vector<bool>& is_valid) {
1076         CheckCase<DurationType, int64_t, DurationType, int64_t>(
1077             duration(from_unit), from_values, is_valid, duration(to_unit), to_values,
1078             options);
1079       };
1080 
1081   std::vector<bool> is_valid = {true, false, true, true, true};
1082 
1083   // Multiply promotions
1084   std::vector<int64_t> v1 = {0, 100, 200, 1, 2};
1085   std::vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
1086   CheckDurationCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
1087 
1088   std::vector<int64_t> v2 = {0, 100, 200, 1, 2};
1089   std::vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
1090   CheckDurationCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
1091 
1092   std::vector<int64_t> v3 = {0, 100, 200, 1, 2};
1093   std::vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
1094   CheckDurationCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
1095 
1096   std::vector<int64_t> v4 = {0, 100, 200, 1, 2};
1097   std::vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
1098   CheckDurationCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
1099 
1100   std::vector<int64_t> v5 = {0, 100, 200, 1, 2};
1101   std::vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
1102   CheckDurationCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
1103 
1104   std::vector<int64_t> v6 = {0, 100, 200, 1, 2};
1105   std::vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
1106   CheckDurationCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
1107 
1108   // Zero copy
1109   std::vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
1110   std::shared_ptr<Array> arr;
1111   ArrayFromVector<DurationType, int64_t>(duration(TimeUnit::SECOND), is_valid, v7, &arr);
1112   CheckZeroCopy(*arr, duration(TimeUnit::SECOND));
1113   CheckZeroCopy(*arr, int64());
1114 
1115   // Divide, truncate
1116   std::vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
1117   std::vector<int64_t> e8 = {0, 100, 200, 1, 2};
1118 
1119   options.allow_time_truncate = true;
1120   CheckDurationCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
1121   CheckDurationCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
1122   CheckDurationCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
1123 
1124   std::vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
1125   std::vector<int64_t> e9 = {0, 100, 200, 1, 2};
1126   CheckDurationCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
1127   CheckDurationCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
1128 
1129   std::vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
1130   std::vector<int64_t> e10 = {0, 100, 200, 1, 2};
1131   CheckDurationCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
1132 
1133   // Disallow truncate, failures
1134   options.allow_time_truncate = false;
1135   CheckFails<DurationType>(duration(TimeUnit::MILLI), v8, is_valid,
1136                            duration(TimeUnit::SECOND), options);
1137   CheckFails<DurationType>(duration(TimeUnit::MICRO), v8, is_valid,
1138                            duration(TimeUnit::MILLI), options);
1139   CheckFails<DurationType>(duration(TimeUnit::NANO), v8, is_valid,
1140                            duration(TimeUnit::MICRO), options);
1141   CheckFails<DurationType>(duration(TimeUnit::MICRO), v9, is_valid,
1142                            duration(TimeUnit::SECOND), options);
1143   CheckFails<DurationType>(duration(TimeUnit::NANO), v9, is_valid,
1144                            duration(TimeUnit::MILLI), options);
1145   CheckFails<DurationType>(duration(TimeUnit::NANO), v10, is_valid,
1146                            duration(TimeUnit::SECOND), options);
1147 
1148   // Multiply overflow
1149 
1150   // 1000-01-01, 1800-01-01 , 2000-01-01, 2300-01-01, 3000-01-01
1151   std::vector<int64_t> v11 = {10000000000, 1, 2, 3, 10000000000};
1152 
1153   options.allow_time_overflow = false;
1154   CheckFails<DurationType>(duration(TimeUnit::SECOND), v11, is_valid,
1155                            duration(TimeUnit::NANO), options);
1156 }
1157 
TEST_F(TestCast,ToDouble)1158 TEST_F(TestCast, ToDouble) {
1159   CastOptions options;
1160   std::vector<bool> is_valid = {true, false, true, true, true};
1161 
1162   // int16 to double
1163   std::vector<int16_t> v1 = {0, 100, 200, 1, 2};
1164   std::vector<double> e1 = {0, 100, 200, 1, 2};
1165   CheckCase<Int16Type, int16_t, DoubleType, double>(int16(), v1, is_valid, float64(), e1,
1166                                                     options);
1167 
1168   // float to double
1169   std::vector<float> v2 = {0, 100, 200, 1, 2};
1170   std::vector<double> e2 = {0, 100, 200, 1, 2};
1171   CheckCase<FloatType, float, DoubleType, double>(float32(), v2, is_valid, float64(), e2,
1172                                                   options);
1173 
1174   // bool to double
1175   std::vector<bool> v3 = {true, true, false, false, true};
1176   std::vector<double> e3 = {1, 1, 0, 0, 1};
1177   CheckCase<BooleanType, bool, DoubleType, double>(boolean(), v3, is_valid, float64(), e3,
1178                                                    options);
1179 }
1180 
TEST_F(TestCast,ChunkedArray)1181 TEST_F(TestCast, ChunkedArray) {
1182   std::vector<int16_t> values1 = {0, 1, 2};
1183   std::vector<int16_t> values2 = {3, 4, 5};
1184 
1185   auto type = int16();
1186   auto out_type = int64();
1187 
1188   auto a1 = _MakeArray<Int16Type, int16_t>(type, values1, {});
1189   auto a2 = _MakeArray<Int16Type, int16_t>(type, values2, {});
1190 
1191   ArrayVector arrays = {a1, a2};
1192   auto carr = std::make_shared<ChunkedArray>(arrays);
1193 
1194   CastOptions options;
1195 
1196   ASSERT_OK_AND_ASSIGN(Datum out, Cast(carr, out_type, options));
1197   ASSERT_EQ(Datum::CHUNKED_ARRAY, out.kind());
1198 
1199   auto out_carr = out.chunked_array();
1200 
1201   std::vector<int64_t> ex_values1 = {0, 1, 2};
1202   std::vector<int64_t> ex_values2 = {3, 4, 5};
1203   auto a3 = _MakeArray<Int64Type, int64_t>(out_type, ex_values1, {});
1204   auto a4 = _MakeArray<Int64Type, int64_t>(out_type, ex_values2, {});
1205 
1206   ArrayVector ex_arrays = {a3, a4};
1207   auto ex_carr = std::make_shared<ChunkedArray>(ex_arrays);
1208 
1209   ASSERT_TRUE(out.chunked_array()->Equals(*ex_carr));
1210 }
1211 
TEST_F(TestCast,UnsupportedTarget)1212 TEST_F(TestCast, UnsupportedTarget) {
1213   std::vector<bool> is_valid = {true, false, true, true, true};
1214   std::vector<int32_t> v1 = {0, 1, 2, 3, 4};
1215 
1216   std::shared_ptr<Array> arr;
1217   ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
1218 
1219   ASSERT_RAISES(NotImplemented, Cast(*arr, list(utf8())));
1220 }
1221 
TEST_F(TestCast,DateTimeZeroCopy)1222 TEST_F(TestCast, DateTimeZeroCopy) {
1223   std::vector<bool> is_valid = {true, false, true, true, true};
1224 
1225   std::vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
1226   std::shared_ptr<Array> arr;
1227   ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
1228 
1229   CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
1230   CheckZeroCopy(*arr, date32());
1231 
1232   std::vector<int64_t> v2 = {0, 70000, 2000, 1000, 0};
1233   ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, v2, &arr);
1234 
1235   CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
1236   CheckZeroCopy(*arr, date64());
1237   CheckZeroCopy(*arr, timestamp(TimeUnit::NANO));
1238   CheckZeroCopy(*arr, duration(TimeUnit::MILLI));
1239 }
1240 
TEST_F(TestCast,StringToBoolean)1241 TEST_F(TestCast, StringToBoolean) {
1242   CastOptions options;
1243 
1244   std::vector<bool> is_valid = {true, false, true, true, true};
1245 
1246   std::vector<std::string> v1 = {"False", "true", "true", "True", "false"};
1247   std::vector<std::string> v2 = {"0", "1", "1", "1", "0"};
1248   std::vector<bool> e = {false, true, true, true, false};
1249   CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v1, is_valid, boolean(),
1250                                                         e, options);
1251   CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v2, is_valid, boolean(),
1252                                                         e, options);
1253 
1254   // Same with LargeStringType
1255   CheckCase<LargeStringType, std::string, BooleanType, bool>(large_utf8(), v1, is_valid,
1256                                                              boolean(), e, options);
1257 }
1258 
TEST_F(TestCast,StringToBooleanErrors)1259 TEST_F(TestCast, StringToBooleanErrors) {
1260   CastOptions options;
1261 
1262   std::vector<bool> is_valid = {true};
1263 
1264   CheckFails<StringType, std::string>(utf8(), {"false "}, is_valid, boolean(), options);
1265   CheckFails<StringType, std::string>(utf8(), {"T"}, is_valid, boolean(), options);
1266   CheckFails<LargeStringType, std::string>(large_utf8(), {"T"}, is_valid, boolean(),
1267                                            options);
1268 }
1269 
TEST_F(TestCast,StringToNumber)1270 TEST_F(TestCast, StringToNumber) { TestCastStringToNumber<StringType>(); }
1271 
TEST_F(TestCast,LargeStringToNumber)1272 TEST_F(TestCast, LargeStringToNumber) { TestCastStringToNumber<LargeStringType>(); }
1273 
TEST_F(TestCast,StringToNumberErrors)1274 TEST_F(TestCast, StringToNumberErrors) {
1275   CastOptions options;
1276 
1277   std::vector<bool> is_valid = {true};
1278 
1279   CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, int8(), options);
1280   CheckFails<StringType, std::string>(utf8(), {"12 z"}, is_valid, int8(), options);
1281   CheckFails<StringType, std::string>(utf8(), {"128"}, is_valid, int8(), options);
1282   CheckFails<StringType, std::string>(utf8(), {"-129"}, is_valid, int8(), options);
1283   CheckFails<StringType, std::string>(utf8(), {"0.5"}, is_valid, int8(), options);
1284 
1285   CheckFails<StringType, std::string>(utf8(), {"256"}, is_valid, uint8(), options);
1286   CheckFails<StringType, std::string>(utf8(), {"-1"}, is_valid, uint8(), options);
1287 
1288   CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, float32(), options);
1289 }
1290 
TEST_F(TestCast,StringToTimestamp)1291 TEST_F(TestCast, StringToTimestamp) { TestCastStringToTimestamp<StringType>(); }
1292 
TEST_F(TestCast,LargeStringToTimestamp)1293 TEST_F(TestCast, LargeStringToTimestamp) { TestCastStringToTimestamp<LargeStringType>(); }
1294 
TEST_F(TestCast,StringToTimestampErrors)1295 TEST_F(TestCast, StringToTimestampErrors) {
1296   CastOptions options;
1297 
1298   std::vector<bool> is_valid = {true};
1299 
1300   for (auto unit : {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
1301     auto type = timestamp(unit);
1302     CheckFails<StringType, std::string>(utf8(), {""}, is_valid, type, options);
1303     CheckFails<StringType, std::string>(utf8(), {"xxx"}, is_valid, type, options);
1304   }
1305 }
1306 
TEST_F(TestCast,BinaryToString)1307 TEST_F(TestCast, BinaryToString) { TestCastBinaryToString<BinaryType, StringType>(); }
1308 
TEST_F(TestCast,LargeBinaryToLargeString)1309 TEST_F(TestCast, LargeBinaryToLargeString) {
1310   TestCastBinaryToString<LargeBinaryType, LargeStringType>();
1311 }
1312 
TEST_F(TestCast,NumberToString)1313 TEST_F(TestCast, NumberToString) { TestCastNumberToString<StringType>(); }
1314 
TEST_F(TestCast,NumberToLargeString)1315 TEST_F(TestCast, NumberToLargeString) { TestCastNumberToString<LargeStringType>(); }
1316 
TEST_F(TestCast,BooleanToString)1317 TEST_F(TestCast, BooleanToString) { TestCastBooleanToString<StringType>(); }
1318 
TEST_F(TestCast,BooleanToLargeString)1319 TEST_F(TestCast, BooleanToLargeString) { TestCastBooleanToString<LargeStringType>(); }
1320 
TEST_F(TestCast,ListToPrimitive)1321 TEST_F(TestCast, ListToPrimitive) {
1322   auto from_int = ArrayFromJSON(list(int8()), "[[1, 2], [3, 4]]");
1323   auto from_binary = ArrayFromJSON(list(binary()), "[[\"1\", \"2\"], [\"3\", \"4\"]]");
1324 
1325   ASSERT_RAISES(NotImplemented, Cast(*from_int, uint8()));
1326   ASSERT_RAISES(NotImplemented, Cast(*from_binary, utf8()));
1327 }
1328 
TEST_F(TestCast,ListToList)1329 TEST_F(TestCast, ListToList) {
1330   CastOptions options;
1331   std::shared_ptr<Array> offsets;
1332 
1333   std::vector<int32_t> offsets_values = {0, 1, 2, 5, 7, 7, 8, 10};
1334   std::vector<bool> offsets_is_valid = {true, true, true, true, false, true, true, true};
1335   ArrayFromVector<Int32Type, int32_t>(offsets_is_valid, offsets_values, &offsets);
1336 
1337   std::shared_ptr<Array> int32_plain_array =
1338       TestBase::MakeRandomArray<typename TypeTraits<Int32Type>::ArrayType>(10, 2);
1339   ASSERT_OK_AND_ASSIGN(auto int32_list_array,
1340                        ListArray::FromArrays(*offsets, *int32_plain_array, pool_));
1341 
1342   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> int64_plain_array,
1343                        Cast(*int32_plain_array, int64(), options));
1344   ASSERT_OK_AND_ASSIGN(auto int64_list_array,
1345                        ListArray::FromArrays(*offsets, *int64_plain_array, pool_));
1346 
1347   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> float64_plain_array,
1348                        Cast(*int32_plain_array, float64(), options));
1349   ASSERT_OK_AND_ASSIGN(auto float64_list_array,
1350                        ListArray::FromArrays(*offsets, *float64_plain_array, pool_));
1351 
1352   CheckPass(*int32_list_array, *int64_list_array, int64_list_array->type(), options);
1353   CheckPass(*int32_list_array, *float64_list_array, float64_list_array->type(), options);
1354   CheckPass(*int64_list_array, *int32_list_array, int32_list_array->type(), options);
1355   CheckPass(*int64_list_array, *float64_list_array, float64_list_array->type(), options);
1356 
1357   options.allow_float_truncate = true;
1358   CheckPass(*float64_list_array, *int32_list_array, int32_list_array->type(), options);
1359   CheckPass(*float64_list_array, *int64_list_array, int64_list_array->type(), options);
1360 }
1361 
TEST_F(TestCast,LargeListToLargeList)1362 TEST_F(TestCast, LargeListToLargeList) {
1363   // Like ListToList above, only testing the basics
1364   CastOptions options;
1365   std::shared_ptr<Array> offsets;
1366 
1367   std::vector<int64_t> offsets_values = {0, 1, 2, 5, 7, 7, 8, 10};
1368   std::vector<bool> offsets_is_valid = {true, true, true, true, false, true, true, true};
1369   ArrayFromVector<Int64Type, int64_t>(offsets_is_valid, offsets_values, &offsets);
1370 
1371   std::shared_ptr<Array> int32_plain_array =
1372       TestBase::MakeRandomArray<typename TypeTraits<Int32Type>::ArrayType>(10, 2);
1373   ASSERT_OK_AND_ASSIGN(auto int32_list_array,
1374                        LargeListArray::FromArrays(*offsets, *int32_plain_array, pool_));
1375 
1376   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> float64_plain_array,
1377                        Cast(*int32_plain_array, float64(), options));
1378   ASSERT_OK_AND_ASSIGN(auto float64_list_array,
1379                        LargeListArray::FromArrays(*offsets, *float64_plain_array, pool_));
1380 
1381   CheckPass(*int32_list_array, *float64_list_array, float64_list_array->type(), options);
1382 
1383   options.allow_float_truncate = true;
1384   CheckPass(*float64_list_array, *int32_list_array, int32_list_array->type(), options);
1385 }
1386 
TEST_F(TestCast,IdentityCasts)1387 TEST_F(TestCast, IdentityCasts) {
1388   // ARROW-4102
1389   auto CheckIdentityCast = [this](std::shared_ptr<DataType> type,
1390                                   const std::string& json) {
1391     auto arr = ArrayFromJSON(type, json);
1392     CheckZeroCopy(*arr, type);
1393   };
1394 
1395   CheckIdentityCast(null(), "[null, null, null]");
1396   CheckIdentityCast(boolean(), "[false, true, null, false]");
1397 
1398   for (auto type : kNumericTypes) {
1399     CheckIdentityCast(type, "[1, 2, null, 4]");
1400   }
1401   CheckIdentityCast(binary(), "[\"foo\", \"bar\"]");
1402   CheckIdentityCast(utf8(), "[\"foo\", \"bar\"]");
1403   CheckIdentityCast(fixed_size_binary(3), "[\"foo\", \"bar\"]");
1404 
1405   CheckIdentityCast(list(int8()), "[[1, 2], [null], [], [3]]");
1406 
1407   CheckIdentityCast(time32(TimeUnit::MILLI), "[1, 2, 3, 4]");
1408   CheckIdentityCast(time64(TimeUnit::MICRO), "[1, 2, 3, 4]");
1409   CheckIdentityCast(date32(), "[1, 2, 3, 4]");
1410   CheckIdentityCast(date64(), "[86400000, 0]");
1411   CheckIdentityCast(timestamp(TimeUnit::SECOND), "[1, 2, 3, 4]");
1412 
1413   {
1414     auto dict_values = ArrayFromJSON(int8(), "[1, 2, 3]");
1415     auto dict_type = dictionary(int8(), dict_values->type());
1416     auto dict_indices = ArrayFromJSON(int8(), "[0, 1, 2, 0, null, 2]");
1417     auto dict_array =
1418         std::make_shared<DictionaryArray>(dict_type, dict_indices, dict_values);
1419     CheckZeroCopy(*dict_array, dict_type);
1420   }
1421 }
1422 
TEST_F(TestCast,EmptyCasts)1423 TEST_F(TestCast, EmptyCasts) {
1424   // ARROW-4766: 0-length arrays should not segfault
1425   auto CheckEmptyCast = [this](std::shared_ptr<DataType> from,
1426                                std::shared_ptr<DataType> to) {
1427     CastOptions options;
1428 
1429     // Python creates array with nullptr instead of 0-length (valid) buffers.
1430     auto data = ArrayData::Make(from, /* length */ 0, /* buffers */ {nullptr, nullptr});
1431     auto input = MakeArray(data);
1432     auto expected = ArrayFromJSON(to, "[]");
1433     CheckPass(*input, *expected, to, CastOptions{});
1434   };
1435 
1436   for (auto numeric : kNumericTypes) {
1437     CheckEmptyCast(boolean(), numeric);
1438     CheckEmptyCast(numeric, boolean());
1439   }
1440 }
1441 
1442 // ----------------------------------------------------------------------
1443 // Test casting from NullType
1444 
1445 template <typename TestType>
1446 class TestNullCast : public TestCast {};
1447 
1448 typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
1449                          UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
1450                          Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
1451     TestTypes;
1452 
1453 TYPED_TEST_SUITE(TestNullCast, TestTypes);
1454 
TYPED_TEST(TestNullCast,FromNull)1455 TYPED_TEST(TestNullCast, FromNull) {
1456   // Null casts to everything
1457   const int length = 10;
1458 
1459   // Hack to get a DataType including for parametric types
1460   std::shared_ptr<DataType> out_type =
1461       TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(0, 0)->type();
1462 
1463   NullArray arr(length);
1464 
1465   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(arr, out_type));
1466   ASSERT_OK(result->ValidateFull());
1467 
1468   ASSERT_TRUE(result->type()->Equals(*out_type));
1469   ASSERT_EQ(length, result->length());
1470   ASSERT_EQ(length, result->null_count());
1471 }
1472 
1473 // ----------------------------------------------------------------------
1474 // Test casting from DictionaryType
1475 
1476 template <typename TestType>
1477 class TestDictionaryCast : public TestCast {};
1478 
1479 typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
1480                          UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
1481                          Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
1482     TestTypes;
1483 
1484 TYPED_TEST_SUITE(TestDictionaryCast, TestTypes);
1485 
TYPED_TEST(TestDictionaryCast,Basic)1486 TYPED_TEST(TestDictionaryCast, Basic) {
1487   CastOptions options;
1488   std::shared_ptr<Array> plain_array =
1489       TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
1490 
1491   ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(plain_array->data()));
1492   ASSERT_EQ(encoded.array()->type->id(), Type::DICTIONARY);
1493 
1494   this->CheckPass(*MakeArray(encoded.array()), *plain_array, plain_array->type(),
1495                   options);
1496 }
1497 
TYPED_TEST(TestDictionaryCast,NoNulls)1498 TYPED_TEST(TestDictionaryCast, NoNulls) {
1499   // Test with a nullptr bitmap buffer (ARROW-3208)
1500   if (TypeParam::type_id == Type::NA) {
1501     // Skip, but gtest doesn't support skipping :-/
1502     return;
1503   }
1504 
1505   CastOptions options;
1506   std::shared_ptr<Array> plain_array =
1507       TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 0);
1508   ASSERT_EQ(plain_array->null_count(), 0);
1509 
1510   // Dict-encode the plain array
1511   ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(plain_array->data()));
1512 
1513   // Make a new dict array with nullptr bitmap buffer
1514   auto data = encoded.array()->Copy();
1515   data->buffers[0] = nullptr;
1516   data->null_count = 0;
1517   std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
1518   ASSERT_OK(dict_array->ValidateFull());
1519 
1520   this->CheckPass(*dict_array, *plain_array, plain_array->type(), options);
1521 }
1522 
1523 // TODO: See how this might cause problems post-refactor
TYPED_TEST(TestDictionaryCast,DISABLED_OutTypeError)1524 TYPED_TEST(TestDictionaryCast, DISABLED_OutTypeError) {
1525   // ARROW-7077: unsupported out type should return an error
1526   std::shared_ptr<Array> plain_array =
1527       TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(0, 0);
1528   auto in_type = dictionary(int32(), plain_array->type());
1529 
1530   auto out_type = (plain_array->type()->id() == Type::INT8) ? binary() : int8();
1531   // Test an output type that's not part of TestTypes.
1532   out_type = list(in_type);
1533   ASSERT_RAISES(NotImplemented, GetCastFunction(out_type));
1534 }
1535 
SmallintArrayFromJSON(const std::string & json_data)1536 std::shared_ptr<Array> SmallintArrayFromJSON(const std::string& json_data) {
1537   auto arr = ArrayFromJSON(int16(), json_data);
1538   auto ext_data = arr->data()->Copy();
1539   ext_data->type = smallint();
1540   return MakeArray(ext_data);
1541 }
1542 
TEST_F(TestCast,ExtensionTypeToIntDowncast)1543 TEST_F(TestCast, ExtensionTypeToIntDowncast) {
1544   auto smallint = std::make_shared<SmallintType>();
1545   ASSERT_OK(RegisterExtensionType(smallint));
1546 
1547   CastOptions options;
1548   options.allow_int_overflow = false;
1549 
1550   std::shared_ptr<Array> result;
1551   std::vector<bool> is_valid = {true, false, true, true, true};
1552 
1553   // Smallint(int16) to int16
1554   auto v0 = SmallintArrayFromJSON("[0, 100, 200, 1, 2]");
1555   CheckZeroCopy(*v0, int16());
1556 
1557   // Smallint(int16) to uint8, no overflow/underrun
1558   auto v1 = SmallintArrayFromJSON("[0, 100, 200, 1, 2]");
1559   auto e1 = ArrayFromJSON(uint8(), "[0, 100, 200, 1, 2]");
1560   CheckPass(*v1, *e1, uint8(), options);
1561 
1562   // Smallint(int16) to uint8, with overflow
1563   auto v2 = SmallintArrayFromJSON("[0, null, 256, 1, 3]");
1564   auto e2 = ArrayFromJSON(uint8(), "[0, null, 0, 1, 3]");
1565   // allow overflow
1566   options.allow_int_overflow = true;
1567   CheckPass(*v2, *e2, uint8(), options);
1568   // disallow overflow
1569   options.allow_int_overflow = false;
1570   ASSERT_RAISES(Invalid, Cast(*v2, uint8(), options));
1571 
1572   // Smallint(int16) to uint8, with underflow
1573   auto v3 = SmallintArrayFromJSON("[0, null, -1, 1, 0]");
1574   auto e3 = ArrayFromJSON(uint8(), "[0, null, 255, 1, 0]");
1575   // allow overflow
1576   options.allow_int_overflow = true;
1577   CheckPass(*v3, *e3, uint8(), options);
1578   // disallow overflow
1579   options.allow_int_overflow = false;
1580   ASSERT_RAISES(Invalid, Cast(*v3, uint8(), options));
1581 
1582   ASSERT_OK(UnregisterExtensionType("smallint"));
1583 }
1584 
1585 }  // namespace compute
1586 }  // namespace arrow
1587