1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <cstdint>
19 #include <cstdio>
20 #include <functional>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 #include <gmock/gmock.h>
26 #include <gtest/gtest.h>
27 
28 #include "arrow/array.h"
29 #include "arrow/array/builder_decimal.h"
30 #include "arrow/buffer.h"
31 #include "arrow/chunked_array.h"
32 #include "arrow/extension_type.h"
33 #include "arrow/status.h"
34 #include "arrow/testing/extension_type.h"
35 #include "arrow/testing/gtest_common.h"
36 #include "arrow/testing/gtest_util.h"
37 #include "arrow/testing/random.h"
38 #include "arrow/type.h"
39 #include "arrow/type_fwd.h"
40 #include "arrow/type_traits.h"
41 #include "arrow/util/bitmap.h"
42 #include "arrow/util/checked_cast.h"
43 #include "arrow/util/decimal.h"
44 
45 #include "arrow/compute/api_vector.h"
46 #include "arrow/compute/cast.h"
47 #include "arrow/compute/kernel.h"
48 #include "arrow/compute/kernels/codegen_internal.h"
49 #include "arrow/compute/kernels/test_util.h"
50 
51 namespace arrow {
52 
53 using internal::checked_cast;
54 using internal::checked_pointer_cast;
55 
56 namespace compute {
57 
InvalidUtf8(std::shared_ptr<DataType> type)58 static std::shared_ptr<Array> InvalidUtf8(std::shared_ptr<DataType> type) {
59   return ArrayFromJSON(type,
60                        "["
61                        R"(
62                        "Hi",
63                        "olá mundo",
64                        "你好世界",
65                        "",
66                        )"
67                        "\"\xa0\xa1\""
68                        "]");
69 }
70 
FixedSizeInvalidUtf8(std::shared_ptr<DataType> type)71 static std::shared_ptr<Array> FixedSizeInvalidUtf8(std::shared_ptr<DataType> type) {
72   if (type->id() == Type::FIXED_SIZE_BINARY) {
73     // Assume a particular width for testing
74     EXPECT_EQ(3, checked_cast<const FixedSizeBinaryType&>(*type).byte_width());
75   }
76   return ArrayFromJSON(type,
77                        "["
78                        R"(
79                        "Hi!",
80                        "lá",
81                        "",
82                        "   ",
83                        )"
84                        "\"\xa0\xa1\xa2\""
85                        "]");
86 }
87 
88 static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
89     uint8(), int8(),   uint16(), int16(),   uint32(),
90     int32(), uint64(), int64(),  float32(), float64()};
91 
92 static std::vector<std::shared_ptr<DataType>> kIntegerTypes = {
93     int8(), uint8(), int16(), uint16(), int32(), uint32(), int64(), uint64()};
94 
95 static std::vector<std::shared_ptr<DataType>> kDictionaryIndexTypes = kIntegerTypes;
96 
97 static std::vector<std::shared_ptr<DataType>> kBaseBinaryTypes = {
98     binary(), utf8(), large_binary(), large_utf8()};
99 
AssertBufferSame(const Array & left,const Array & right,int buffer_index)100 static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
101   ASSERT_EQ(left.data()->buffers[buffer_index].get(),
102             right.data()->buffers[buffer_index].get());
103 }
104 
CheckCast(std::shared_ptr<Array> input,std::shared_ptr<Array> expected,CastOptions options=CastOptions{})105 static void CheckCast(std::shared_ptr<Array> input, std::shared_ptr<Array> expected,
106                       CastOptions options = CastOptions{}) {
107   options.to_type = expected->type();
108   CheckScalarUnary("cast", input, expected, &options);
109 }
110 
CheckCastFails(std::shared_ptr<Array> input,CastOptions options)111 static void CheckCastFails(std::shared_ptr<Array> input, CastOptions options) {
112   ASSERT_RAISES(Invalid, Cast(input, options))
113       << "\n  to_type:   " << options.to_type->ToString()
114       << "\n  from_type: " << input->type()->ToString()
115       << "\n  input:     " << input->ToString();
116 
117   // For the scalars, check that at least one of the input fails (since many
118   // of the tests contains a mix of passing and failing values). In some
119   // cases we will want to check more precisely
120   int64_t num_failing = 0;
121   for (int64_t i = 0; i < input->length(); ++i) {
122     ASSERT_OK_AND_ASSIGN(auto scalar, input->GetScalar(i));
123     num_failing += static_cast<int>(Cast(scalar, options).status().IsInvalid());
124   }
125   ASSERT_GT(num_failing, 0);
126 }
127 
CheckCastZeroCopy(std::shared_ptr<Array> input,std::shared_ptr<DataType> to_type,CastOptions options=CastOptions::Safe ())128 static void CheckCastZeroCopy(std::shared_ptr<Array> input,
129                               std::shared_ptr<DataType> to_type,
130                               CastOptions options = CastOptions::Safe()) {
131   ASSERT_OK_AND_ASSIGN(auto converted, Cast(*input, to_type, options));
132   ValidateOutput(*converted);
133 
134   ASSERT_EQ(input->data()->buffers.size(), converted->data()->buffers.size());
135   for (size_t i = 0; i < input->data()->buffers.size(); ++i) {
136     AssertBufferSame(*input, *converted, static_cast<int>(i));
137   }
138 }
139 
MaskArrayWithNullsAt(std::shared_ptr<Array> input,std::vector<int> indices_to_mask)140 static std::shared_ptr<Array> MaskArrayWithNullsAt(std::shared_ptr<Array> input,
141                                                    std::vector<int> indices_to_mask) {
142   auto masked = input->data()->Copy();
143   masked->buffers[0] = *AllocateEmptyBitmap(input->length());
144   masked->null_count = kUnknownNullCount;
145 
146   using arrow::internal::Bitmap;
147   Bitmap is_valid(masked->buffers[0], 0, input->length());
148   if (auto original = input->null_bitmap()) {
149     is_valid.CopyFrom(Bitmap(original, input->offset(), input->length()));
150   } else {
151     is_valid.SetBitsTo(true);
152   }
153 
154   for (int i : indices_to_mask) {
155     is_valid.SetBitTo(i, false);
156   }
157   return MakeArray(masked);
158 }
159 
TEST(Cast,CanCast)160 TEST(Cast, CanCast) {
161   auto ExpectCanCast = [](std::shared_ptr<DataType> from,
162                           std::vector<std::shared_ptr<DataType>> to_set,
163                           bool expected = true) {
164     for (auto to : to_set) {
165       EXPECT_EQ(CanCast(*from, *to), expected) << "  from: " << from->ToString() << "\n"
166                                                << "    to: " << to->ToString();
167     }
168   };
169 
170   auto ExpectCannotCast = [ExpectCanCast](std::shared_ptr<DataType> from,
171                                           std::vector<std::shared_ptr<DataType>> to_set) {
172     ExpectCanCast(from, to_set, /*expected=*/false);
173   };
174 
175   ExpectCanCast(null(), {boolean()});
176   ExpectCanCast(null(), kNumericTypes);
177   ExpectCanCast(null(), kBaseBinaryTypes);
178   ExpectCanCast(
179       null(), {date32(), date64(), time32(TimeUnit::MILLI), timestamp(TimeUnit::SECOND)});
180   ExpectCanCast(dictionary(uint16(), null()), {null()});
181 
182   ExpectCanCast(boolean(), {boolean()});
183   ExpectCanCast(boolean(), kNumericTypes);
184   ExpectCanCast(boolean(), {utf8(), large_utf8()});
185   ExpectCanCast(dictionary(int32(), boolean()), {boolean()});
186 
187   ExpectCannotCast(boolean(), {null()});
188   ExpectCannotCast(boolean(), {binary(), large_binary()});
189   ExpectCannotCast(boolean(), {date32(), date64(), time32(TimeUnit::MILLI),
190                                timestamp(TimeUnit::SECOND)});
191 
192   for (auto from_numeric : kNumericTypes) {
193     ExpectCanCast(from_numeric, {boolean()});
194     ExpectCanCast(from_numeric, kNumericTypes);
195     ExpectCanCast(from_numeric, {utf8(), large_utf8()});
196     ExpectCanCast(dictionary(int32(), from_numeric), {from_numeric});
197 
198     ExpectCannotCast(from_numeric, {null()});
199   }
200 
201   for (auto from_base_binary : kBaseBinaryTypes) {
202     ExpectCanCast(from_base_binary, {boolean()});
203     ExpectCanCast(from_base_binary, kNumericTypes);
204     ExpectCanCast(from_base_binary, kBaseBinaryTypes);
205     ExpectCanCast(dictionary(int64(), from_base_binary), {from_base_binary});
206 
207     // any cast which is valid for the dictionary is valid for the DictionaryArray
208     ExpectCanCast(dictionary(uint32(), from_base_binary), kBaseBinaryTypes);
209     ExpectCanCast(dictionary(int16(), from_base_binary), kNumericTypes);
210 
211     ExpectCannotCast(from_base_binary, {null()});
212   }
213 
214   ExpectCanCast(utf8(), {timestamp(TimeUnit::MILLI)});
215   ExpectCanCast(large_utf8(), {timestamp(TimeUnit::NANO)});
216   ExpectCannotCast(timestamp(TimeUnit::MICRO),
217                    {binary(), large_binary()});  // no formatting supported
218 
219   ExpectCanCast(fixed_size_binary(3),
220                 {binary(), utf8(), large_binary(), large_utf8(), fixed_size_binary(3)});
221   // Doesn't fail since a kernel exists (but it will return an error when executed)
222   // ExpectCannotCast(fixed_size_binary(3), {fixed_size_binary(5)});
223 
224   ExtensionTypeGuard smallint_guard(smallint());
225   ExpectCanCast(smallint(), {int16()});  // cast storage
226   ExpectCanCast(smallint(),
227                 kNumericTypes);  // any cast which is valid for storage is supported
228   ExpectCannotCast(null(), {smallint()});  // FIXME missing common cast from null
229 
230   ExpectCanCast(date32(), {utf8(), large_utf8()});
231   ExpectCanCast(date64(), {utf8(), large_utf8()});
232   ExpectCanCast(timestamp(TimeUnit::NANO), {utf8(), large_utf8()});
233   ExpectCanCast(timestamp(TimeUnit::MICRO), {utf8(), large_utf8()});
234   ExpectCanCast(time32(TimeUnit::MILLI), {utf8(), large_utf8()});
235   ExpectCanCast(time64(TimeUnit::NANO), {utf8(), large_utf8()});
236 }
237 
TEST(Cast,SameTypeZeroCopy)238 TEST(Cast, SameTypeZeroCopy) {
239   std::shared_ptr<Array> arr = ArrayFromJSON(int32(), "[0, null, 2, 3, 4]");
240   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(*arr, int32()));
241 
242   AssertBufferSame(*arr, *result, 0);
243   AssertBufferSame(*arr, *result, 1);
244 }
245 
TEST(Cast,ZeroChunks)246 TEST(Cast, ZeroChunks) {
247   auto chunked_i32 = std::make_shared<ChunkedArray>(ArrayVector{}, int32());
248   ASSERT_OK_AND_ASSIGN(Datum result, Cast(chunked_i32, utf8()));
249 
250   ASSERT_EQ(result.kind(), Datum::CHUNKED_ARRAY);
251   AssertChunkedEqual(*result.chunked_array(), ChunkedArray({}, utf8()));
252 }
253 
TEST(Cast,CastDoesNotProvideDefaultOptions)254 TEST(Cast, CastDoesNotProvideDefaultOptions) {
255   std::shared_ptr<Array> arr = ArrayFromJSON(int32(), "[0, null, 2, 3, 4]");
256   ASSERT_RAISES(Invalid, CallFunction("cast", {arr}));
257 }
258 
TEST(Cast,FromBoolean)259 TEST(Cast, FromBoolean) {
260   std::string vals = "[1, 0, null, 1, 0, 1, 1, null, 0, 0, 1]";
261   CheckCast(ArrayFromJSON(boolean(), vals), ArrayFromJSON(int32(), vals));
262 }
263 
TEST(Cast,ToBoolean)264 TEST(Cast, ToBoolean) {
265   for (auto type : kNumericTypes) {
266     CheckCast(ArrayFromJSON(type, "[0, null, 127, 1, 0]"),
267               ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
268   }
269 
270   // Check negative numbers
271   for (auto type : {int8(), float64()}) {
272     CheckCast(ArrayFromJSON(type, "[0, null, 127, -1, 0]"),
273               ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
274   }
275 }
276 
TEST(Cast,ToIntUpcast)277 TEST(Cast, ToIntUpcast) {
278   std::vector<bool> is_valid = {true, false, true, true, true};
279 
280   // int8 to int32
281   CheckCast(ArrayFromJSON(int8(), "[0, null, 127, -1, 0]"),
282             ArrayFromJSON(int32(), "[0, null, 127, -1, 0]"));
283 
284   // uint8 to int16, no overflow/underrun
285   CheckCast(ArrayFromJSON(uint8(), "[0, 100, 200, 255, 0]"),
286             ArrayFromJSON(int16(), "[0, 100, 200, 255, 0]"));
287 }
288 
TEST(Cast,OverflowInNullSlot)289 TEST(Cast, OverflowInNullSlot) {
290   CheckCast(
291       MaskArrayWithNullsAt(ArrayFromJSON(int32(), "[0, 87654321, 2000, 1000, 0]"), {1}),
292       ArrayFromJSON(int16(), "[0, null, 2000, 1000, 0]"));
293 }
294 
TEST(Cast,ToIntDowncastSafe)295 TEST(Cast, ToIntDowncastSafe) {
296   // int16 to uint8, no overflow/underflow
297   CheckCast(ArrayFromJSON(int16(), "[0, null, 200, 1, 2]"),
298             ArrayFromJSON(uint8(), "[0, null, 200, 1, 2]"));
299 
300   // int16 to uint8, overflow
301   CheckCastFails(ArrayFromJSON(int16(), "[0, null, 256, 0, 0]"),
302                  CastOptions::Safe(uint8()));
303   // ... and underflow
304   CheckCastFails(ArrayFromJSON(int16(), "[0, null, -1, 0, 0]"),
305                  CastOptions::Safe(uint8()));
306 
307   // int32 to int16, no overflow/underflow
308   CheckCast(ArrayFromJSON(int32(), "[0, null, 2000, 1, 2]"),
309             ArrayFromJSON(int16(), "[0, null, 2000, 1, 2]"));
310 
311   // int32 to int16, overflow
312   CheckCastFails(ArrayFromJSON(int32(), "[0, null, 2000, 70000, 2]"),
313                  CastOptions::Safe(int16()));
314 
315   // ... and underflow
316   CheckCastFails(ArrayFromJSON(int32(), "[0, null, 2000, -70000, 2]"),
317                  CastOptions::Safe(int16()));
318 
319   CheckCastFails(ArrayFromJSON(int32(), "[0, null, 2000, -70000, 2]"),
320                  CastOptions::Safe(uint8()));
321 }
322 
TEST(Cast,IntegerSignedToUnsigned)323 TEST(Cast, IntegerSignedToUnsigned) {
324   auto i32s = ArrayFromJSON(int32(), "[-2147483648, null, -1, 65535, 2147483647]");
325   // Same width
326   CheckCastFails(i32s, CastOptions::Safe(uint32()));
327   // Wider
328   CheckCastFails(i32s, CastOptions::Safe(uint64()));
329   // Narrower
330   CheckCastFails(i32s, CastOptions::Safe(uint16()));
331 
332   CastOptions options;
333   options.allow_int_overflow = true;
334 
335   CheckCast(i32s,
336             ArrayFromJSON(uint32(), "[2147483648, null, 4294967295, 65535, 2147483647]"),
337             options);
338   CheckCast(i32s,
339             ArrayFromJSON(
340                 uint64(),
341                 "[18446744071562067968, null, 18446744073709551615, 65535, 2147483647]"),
342             options);
343   CheckCast(i32s, ArrayFromJSON(uint16(), "[0, null, 65535, 65535, 65535]"), options);
344 
345   // Fail because of overflow (instead of underflow).
346   i32s = ArrayFromJSON(int32(), "[0, null, 0, 65536, 2147483647]");
347   CheckCastFails(i32s, CastOptions::Safe(uint16()));
348 
349   CheckCast(i32s, ArrayFromJSON(uint16(), "[0, null, 0, 0, 65535]"), options);
350 }
351 
TEST(Cast,IntegerUnsignedToSigned)352 TEST(Cast, IntegerUnsignedToSigned) {
353   auto u32s = ArrayFromJSON(uint32(), "[4294967295, null, 0, 32768]");
354   // Same width
355   CheckCastFails(u32s, CastOptions::Safe(int32()));
356 
357   // Narrower
358   CheckCastFails(u32s, CastOptions::Safe(int16()));
359   CheckCastFails(u32s->Slice(1), CastOptions::Safe(int16()));
360 
361   CastOptions options;
362   options.allow_int_overflow = true;
363 
364   CheckCast(u32s, ArrayFromJSON(int32(), "[-1, null, 0, 32768]"), options);
365   CheckCast(u32s, ArrayFromJSON(int64(), "[4294967295, null, 0, 32768]"), options);
366   CheckCast(u32s, ArrayFromJSON(int16(), "[-1, null, 0, -32768]"), options);
367 }
368 
TEST(Cast,ToIntDowncastUnsafe)369 TEST(Cast, ToIntDowncastUnsafe) {
370   CastOptions options;
371   options.allow_int_overflow = true;
372 
373   // int16 to uint8, no overflow/underflow
374   CheckCast(ArrayFromJSON(int16(), "[0, null, 200, 1, 2]"),
375             ArrayFromJSON(uint8(), "[0, null, 200, 1, 2]"), options);
376 
377   // int16 to uint8, with overflow/underflow
378   CheckCast(ArrayFromJSON(int16(), "[0, null, 256, 1, 2, -1]"),
379             ArrayFromJSON(uint8(), "[0, null, 0, 1, 2, 255]"), options);
380 
381   // int32 to int16, no overflow/underflow
382   CheckCast(ArrayFromJSON(int32(), "[0, null, 2000, 1, 2, -1]"),
383             ArrayFromJSON(int16(), "[0, null, 2000, 1, 2, -1]"), options);
384 
385   // int32 to int16, with overflow/underflow
386   CheckCast(ArrayFromJSON(int32(), "[0, null, 2000, 70000, -70000]"),
387             ArrayFromJSON(int16(), "[0, null, 2000, 4464, -4464]"), options);
388 }
389 
TEST(Cast,FloatingToInt)390 TEST(Cast, FloatingToInt) {
391   for (auto from : {float32(), float64()}) {
392     for (auto to : {int32(), int64()}) {
393       // float to int no truncation
394       CheckCast(ArrayFromJSON(from, "[1.0, null, 0.0, -1.0, 5.0]"),
395                 ArrayFromJSON(to, "[1, null, 0, -1, 5]"));
396 
397       // float to int truncate error
398       auto opts = CastOptions::Safe(to);
399       CheckCastFails(ArrayFromJSON(from, "[1.5, 0.0, null, 0.5, -1.5, 5.5]"), opts);
400 
401       // float to int truncate allowed
402       opts.allow_float_truncate = true;
403       CheckCast(ArrayFromJSON(from, "[1.5, 0.0, null, 0.5, -1.5, 5.5]"),
404                 ArrayFromJSON(to, "[1, 0, null, 0, -1, 5]"), opts);
405     }
406   }
407 }
408 
TEST(Cast,IntToFloating)409 TEST(Cast, IntToFloating) {
410   for (auto from : {uint32(), int32()}) {
411     std::string two_24 = "[16777216, 16777217]";
412 
413     CheckCastFails(ArrayFromJSON(from, two_24), CastOptions::Safe(float32()));
414 
415     CheckCast(ArrayFromJSON(from, two_24)->Slice(0, 1),
416               ArrayFromJSON(float32(), two_24)->Slice(0, 1));
417   }
418 
419   auto i64s = ArrayFromJSON(int64(),
420                             "[-9223372036854775808, -9223372036854775807, 0,"
421                             "  9223372036854775806,  9223372036854775807]");
422   CheckCastFails(i64s, CastOptions::Safe(float64()));
423 
424   // Masking those values with nulls makes this safe
425   CheckCast(MaskArrayWithNullsAt(i64s, {0, 1, 3, 4}),
426             ArrayFromJSON(float64(), "[null, null, 0, null, null]"));
427 
428   CheckCastFails(ArrayFromJSON(uint64(), "[9007199254740992, 9007199254740993]"),
429                  CastOptions::Safe(float64()));
430 }
431 
TEST(Cast,Decimal128ToInt)432 TEST(Cast, Decimal128ToInt) {
433   auto options = CastOptions::Safe(int64());
434 
435   for (bool allow_int_overflow : {false, true}) {
436     for (bool allow_decimal_truncate : {false, true}) {
437       options.allow_int_overflow = allow_int_overflow;
438       options.allow_decimal_truncate = allow_decimal_truncate;
439 
440       auto no_overflow_no_truncation = ArrayFromJSON(decimal(38, 10), R"([
441           "02.0000000000",
442          "-11.0000000000",
443           "22.0000000000",
444         "-121.0000000000",
445         null])");
446       CheckCast(no_overflow_no_truncation,
447                 ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
448     }
449   }
450 
451   for (bool allow_int_overflow : {false, true}) {
452     options.allow_int_overflow = allow_int_overflow;
453     auto truncation_but_no_overflow = ArrayFromJSON(decimal(38, 10), R"([
454           "02.1000000000",
455          "-11.0000004500",
456           "22.0000004500",
457         "-121.1210000000",
458         null])");
459 
460     options.allow_decimal_truncate = true;
461     CheckCast(truncation_but_no_overflow,
462               ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
463 
464     options.allow_decimal_truncate = false;
465     CheckCastFails(truncation_but_no_overflow, options);
466   }
467 
468   for (bool allow_decimal_truncate : {false, true}) {
469     options.allow_decimal_truncate = allow_decimal_truncate;
470 
471     auto overflow_no_truncation = ArrayFromJSON(decimal(38, 10), R"([
472         "12345678901234567890000.0000000000",
473         "99999999999999999999999.0000000000",
474         null])");
475 
476     options.allow_int_overflow = true;
477     CheckCast(
478         overflow_no_truncation,
479         ArrayFromJSON(int64(),
480                       // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
481                       "[4807115922877858896, 200376420520689663, null]"),
482         options);
483 
484     options.allow_int_overflow = false;
485     CheckCastFails(overflow_no_truncation, options);
486   }
487 
488   for (bool allow_int_overflow : {false, true}) {
489     for (bool allow_decimal_truncate : {false, true}) {
490       options.allow_int_overflow = allow_int_overflow;
491       options.allow_decimal_truncate = allow_decimal_truncate;
492 
493       auto overflow_and_truncation = ArrayFromJSON(decimal(38, 10), R"([
494         "12345678901234567890000.0045345000",
495         "99999999999999999999999.0000344300",
496         null])");
497 
498       if (options.allow_int_overflow && options.allow_decimal_truncate) {
499         CheckCast(overflow_and_truncation,
500                   ArrayFromJSON(
501                       int64(),
502                       // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
503                       "[4807115922877858896, 200376420520689663, null]"),
504                   options);
505       } else {
506         CheckCastFails(overflow_and_truncation, options);
507       }
508     }
509   }
510 
511   Decimal128Builder builder(decimal(38, -4));
512   for (auto d : {Decimal128("1234567890000."), Decimal128("-120000.")}) {
513     ASSERT_OK_AND_ASSIGN(d, d.Rescale(0, -4));
514     ASSERT_OK(builder.Append(d));
515   }
516   ASSERT_OK_AND_ASSIGN(auto negative_scale, builder.Finish());
517   options.allow_int_overflow = true;
518   options.allow_decimal_truncate = true;
519   CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000, -120000]"), options);
520 }
521 
TEST(Cast,Decimal256ToInt)522 TEST(Cast, Decimal256ToInt) {
523   auto options = CastOptions::Safe(int64());
524 
525   for (bool allow_int_overflow : {false, true}) {
526     for (bool allow_decimal_truncate : {false, true}) {
527       options.allow_int_overflow = allow_int_overflow;
528       options.allow_decimal_truncate = allow_decimal_truncate;
529 
530       auto no_overflow_no_truncation = ArrayFromJSON(decimal256(40, 10), R"([
531           "02.0000000000",
532          "-11.0000000000",
533           "22.0000000000",
534         "-121.0000000000",
535         null])");
536       CheckCast(no_overflow_no_truncation,
537                 ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
538     }
539   }
540 
541   for (bool allow_int_overflow : {false, true}) {
542     options.allow_int_overflow = allow_int_overflow;
543     auto truncation_but_no_overflow = ArrayFromJSON(decimal256(40, 10), R"([
544           "02.1000000000",
545          "-11.0000004500",
546           "22.0000004500",
547         "-121.1210000000",
548         null])");
549 
550     options.allow_decimal_truncate = true;
551     CheckCast(truncation_but_no_overflow,
552               ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
553 
554     options.allow_decimal_truncate = false;
555     CheckCastFails(truncation_but_no_overflow, options);
556   }
557 
558   for (bool allow_decimal_truncate : {false, true}) {
559     options.allow_decimal_truncate = allow_decimal_truncate;
560 
561     auto overflow_no_truncation = ArrayFromJSON(decimal256(40, 10), R"([
562         "1234567890123456789000000.0000000000",
563         "9999999999999999999999999.0000000000",
564         null])");
565 
566     options.allow_int_overflow = true;
567     CheckCast(overflow_no_truncation,
568               ArrayFromJSON(
569                   int64(),
570                   // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64
571                   "[1096246371337547584, 1590897978359414783, null]"),
572               options);
573 
574     options.allow_int_overflow = false;
575     CheckCastFails(overflow_no_truncation, options);
576   }
577 
578   for (bool allow_int_overflow : {false, true}) {
579     for (bool allow_decimal_truncate : {false, true}) {
580       options.allow_int_overflow = allow_int_overflow;
581       options.allow_decimal_truncate = allow_decimal_truncate;
582 
583       auto overflow_and_truncation = ArrayFromJSON(decimal256(40, 10), R"([
584         "1234567890123456789000000.0045345000",
585         "9999999999999999999999999.0000344300",
586         null])");
587 
588       if (options.allow_int_overflow && options.allow_decimal_truncate) {
589         CheckCast(
590             overflow_and_truncation,
591             ArrayFromJSON(
592                 int64(),
593                 // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64
594                 "[1096246371337547584, 1590897978359414783, null]"),
595             options);
596       } else {
597         CheckCastFails(overflow_and_truncation, options);
598       }
599     }
600   }
601 
602   Decimal256Builder builder(decimal256(40, -4));
603   for (auto d : {Decimal256("1234567890000."), Decimal256("-120000.")}) {
604     ASSERT_OK_AND_ASSIGN(d, d.Rescale(0, -4));
605     ASSERT_OK(builder.Append(d));
606   }
607   ASSERT_OK_AND_ASSIGN(auto negative_scale, builder.Finish());
608   options.allow_int_overflow = true;
609   options.allow_decimal_truncate = true;
610   CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000, -120000]"), options);
611 }
612 
TEST(Cast,IntegerToDecimal)613 TEST(Cast, IntegerToDecimal) {
614   for (auto decimal_type : {decimal128(21, 2), decimal256(21, 2)}) {
615     for (auto integer_type : kIntegerTypes) {
616       CheckCast(
617           ArrayFromJSON(integer_type, "[0, 7, null, 100, 99]"),
618           ArrayFromJSON(decimal_type, R"(["0.00", "7.00", null, "100.00", "99.00"])"));
619     }
620   }
621 
622   // extreme value
623   for (auto decimal_type : {decimal128(19, 0), decimal256(19, 0)}) {
624     CheckCast(ArrayFromJSON(int64(), "[-9223372036854775808, 9223372036854775807]"),
625               ArrayFromJSON(decimal_type,
626                             R"(["-9223372036854775808", "9223372036854775807"])"));
627     CheckCast(ArrayFromJSON(uint64(), "[0, 18446744073709551615]"),
628               ArrayFromJSON(decimal_type, R"(["0", "18446744073709551615"])"));
629   }
630 
631   // insufficient output precision
632   {
633     CastOptions options;
634 
635     options.to_type = decimal128(5, 3);
636     CheckCastFails(ArrayFromJSON(int8(), "[0]"), options);
637 
638     options.to_type = decimal256(76, 67);
639     CheckCastFails(ArrayFromJSON(int32(), "[0]"), options);
640   }
641 }
642 
TEST(Cast,Decimal128ToDecimal128)643 TEST(Cast, Decimal128ToDecimal128) {
644   CastOptions options;
645 
646   for (bool allow_decimal_truncate : {false, true}) {
647     options.allow_decimal_truncate = allow_decimal_truncate;
648 
649     auto no_truncation = ArrayFromJSON(decimal(38, 10), R"([
650           "02.0000000000",
651           "30.0000000000",
652           "22.0000000000",
653         "-121.0000000000",
654         null])");
655     auto expected = ArrayFromJSON(decimal(28, 0), R"([
656           "02.",
657           "30.",
658           "22.",
659         "-121.",
660         null])");
661 
662     CheckCast(no_truncation, expected, options);
663     CheckCast(expected, no_truncation, options);
664   }
665 
666   for (bool allow_decimal_truncate : {false, true}) {
667     options.allow_decimal_truncate = allow_decimal_truncate;
668 
669     // Same scale, different precision
670     auto d_5_2 = ArrayFromJSON(decimal(5, 2), R"([
671           "12.34",
672            "0.56"])");
673     auto d_4_2 = ArrayFromJSON(decimal(4, 2), R"([
674           "12.34",
675            "0.56"])");
676 
677     CheckCast(d_5_2, d_4_2, options);
678     CheckCast(d_4_2, d_5_2, options);
679   }
680 
681   auto d_38_10 = ArrayFromJSON(decimal(38, 10), R"([
682       "-02.1234567890",
683        "30.1234567890",
684       null])");
685 
686   auto d_28_0 = ArrayFromJSON(decimal(28, 0), R"([
687       "-02.",
688        "30.",
689       null])");
690 
691   auto d_38_10_roundtripped = ArrayFromJSON(decimal(38, 10), R"([
692       "-02.0000000000",
693        "30.0000000000",
694       null])");
695 
696   // Rescale which leads to truncation
697   options.allow_decimal_truncate = true;
698   CheckCast(d_38_10, d_28_0, options);
699   CheckCast(d_28_0, d_38_10_roundtripped, options);
700 
701   options.allow_decimal_truncate = false;
702   options.to_type = d_28_0->type();
703   CheckCastFails(d_38_10, options);
704   CheckCast(d_28_0, d_38_10_roundtripped, options);
705 
706   // Precision loss without rescale leads to truncation
707   auto d_4_2 = ArrayFromJSON(decimal(4, 2), R"(["12.34"])");
708   for (auto expected : {
709            ArrayFromJSON(decimal(3, 2), R"(["12.34"])"),
710            ArrayFromJSON(decimal(4, 3), R"(["12.340"])"),
711            ArrayFromJSON(decimal(2, 1), R"(["12.3"])"),
712        }) {
713     options.allow_decimal_truncate = true;
714     CheckCast(d_4_2, expected, options);
715 
716     options.allow_decimal_truncate = false;
717     options.to_type = expected->type();
718     CheckCastFails(d_4_2, options);
719   }
720 }
721 
TEST(Cast,Decimal256ToDecimal256)722 TEST(Cast, Decimal256ToDecimal256) {
723   CastOptions options;
724 
725   for (bool allow_decimal_truncate : {false, true}) {
726     options.allow_decimal_truncate = allow_decimal_truncate;
727 
728     auto no_truncation = ArrayFromJSON(decimal256(38, 10), R"([
729           "02.0000000000",
730           "30.0000000000",
731           "22.0000000000",
732         "-121.0000000000",
733         null])");
734     auto expected = ArrayFromJSON(decimal256(28, 0), R"([
735           "02.",
736           "30.",
737           "22.",
738         "-121.",
739         null])");
740 
741     CheckCast(no_truncation, expected, options);
742     CheckCast(expected, no_truncation, options);
743   }
744 
745   for (bool allow_decimal_truncate : {false, true}) {
746     options.allow_decimal_truncate = allow_decimal_truncate;
747 
748     // Same scale, different precision
749     auto d_5_2 = ArrayFromJSON(decimal256(5, 2), R"([
750           "12.34",
751            "0.56"])");
752     auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([
753           "12.34",
754            "0.56"])");
755 
756     CheckCast(d_5_2, d_4_2, options);
757     CheckCast(d_4_2, d_5_2, options);
758   }
759 
760   auto d_38_10 = ArrayFromJSON(decimal256(38, 10), R"([
761       "-02.1234567890",
762        "30.1234567890",
763       null])");
764 
765   auto d_28_0 = ArrayFromJSON(decimal256(28, 0), R"([
766       "-02.",
767        "30.",
768       null])");
769 
770   auto d_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([
771       "-02.0000000000",
772        "30.0000000000",
773       null])");
774 
775   // Rescale which leads to truncation
776   options.allow_decimal_truncate = true;
777   CheckCast(d_38_10, d_28_0, options);
778   CheckCast(d_28_0, d_38_10_roundtripped, options);
779 
780   options.allow_decimal_truncate = false;
781   options.to_type = d_28_0->type();
782   CheckCastFails(d_38_10, options);
783   CheckCast(d_28_0, d_38_10_roundtripped, options);
784 
785   // Precision loss without rescale leads to truncation
786   auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])");
787   for (auto expected : {
788            ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"),
789            ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"),
790            ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"),
791        }) {
792     options.allow_decimal_truncate = true;
793     CheckCast(d_4_2, expected, options);
794 
795     options.allow_decimal_truncate = false;
796     options.to_type = expected->type();
797     CheckCastFails(d_4_2, options);
798   }
799 }
800 
TEST(Cast,Decimal128ToDecimal256)801 TEST(Cast, Decimal128ToDecimal256) {
802   CastOptions options;
803 
804   for (bool allow_decimal_truncate : {false, true}) {
805     options.allow_decimal_truncate = allow_decimal_truncate;
806 
807     auto no_truncation = ArrayFromJSON(decimal(38, 10), R"([
808           "02.0000000000",
809           "30.0000000000",
810           "22.0000000000",
811         "-121.0000000000",
812         null])");
813     auto expected = ArrayFromJSON(decimal256(48, 0), R"([
814           "02.",
815           "30.",
816           "22.",
817         "-121.",
818         null])");
819 
820     CheckCast(no_truncation, expected, options);
821   }
822 
823   for (bool allow_decimal_truncate : {false, true}) {
824     options.allow_decimal_truncate = allow_decimal_truncate;
825 
826     // Same scale, different precision
827     auto d_5_2 = ArrayFromJSON(decimal(5, 2), R"([
828           "12.34",
829            "0.56"])");
830     auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([
831           "12.34",
832            "0.56"])");
833     auto d_40_2 = ArrayFromJSON(decimal256(40, 2), R"([
834           "12.34",
835            "0.56"])");
836 
837     CheckCast(d_5_2, d_4_2, options);
838     CheckCast(d_5_2, d_40_2, options);
839   }
840 
841   auto d128_38_10 = ArrayFromJSON(decimal(38, 10), R"([
842       "-02.1234567890",
843        "30.1234567890",
844       null])");
845 
846   auto d128_28_0 = ArrayFromJSON(decimal(28, 0), R"([
847       "-02.",
848        "30.",
849       null])");
850 
851   auto d256_28_0 = ArrayFromJSON(decimal256(28, 0), R"([
852       "-02.",
853        "30.",
854       null])");
855 
856   auto d256_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([
857       "-02.0000000000",
858        "30.0000000000",
859       null])");
860 
861   // Rescale which leads to truncation
862   options.allow_decimal_truncate = true;
863   CheckCast(d128_38_10, d256_28_0, options);
864   CheckCast(d128_28_0, d256_38_10_roundtripped, options);
865 
866   options.allow_decimal_truncate = false;
867   options.to_type = d256_28_0->type();
868   CheckCastFails(d128_38_10, options);
869   CheckCast(d128_28_0, d256_38_10_roundtripped, options);
870 
871   // Precision loss without rescale leads to truncation
872   auto d128_4_2 = ArrayFromJSON(decimal(4, 2), R"(["12.34"])");
873   for (auto expected : {
874            ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"),
875            ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"),
876            ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"),
877        }) {
878     options.allow_decimal_truncate = true;
879     CheckCast(d128_4_2, expected, options);
880 
881     options.allow_decimal_truncate = false;
882     options.to_type = expected->type();
883     CheckCastFails(d128_4_2, options);
884   }
885 }
886 
TEST(Cast,Decimal256ToDecimal128)887 TEST(Cast, Decimal256ToDecimal128) {
888   CastOptions options;
889 
890   for (bool allow_decimal_truncate : {false, true}) {
891     options.allow_decimal_truncate = allow_decimal_truncate;
892 
893     auto no_truncation = ArrayFromJSON(decimal256(42, 10), R"([
894           "02.0000000000",
895           "30.0000000000",
896           "22.0000000000",
897         "-121.0000000000",
898         null])");
899     auto expected = ArrayFromJSON(decimal(28, 0), R"([
900           "02.",
901           "30.",
902           "22.",
903         "-121.",
904         null])");
905 
906     CheckCast(no_truncation, expected, options);
907   }
908 
909   for (bool allow_decimal_truncate : {false, true}) {
910     options.allow_decimal_truncate = allow_decimal_truncate;
911 
912     // Same scale, different precision
913     auto d_5_2 = ArrayFromJSON(decimal256(42, 2), R"([
914           "12.34",
915            "0.56"])");
916     auto d_4_2 = ArrayFromJSON(decimal(4, 2), R"([
917           "12.34",
918            "0.56"])");
919 
920     CheckCast(d_5_2, d_4_2, options);
921   }
922 
923   auto d256_52_10 = ArrayFromJSON(decimal256(52, 10), R"([
924       "-02.1234567890",
925        "30.1234567890",
926       null])");
927 
928   auto d256_42_0 = ArrayFromJSON(decimal256(42, 0), R"([
929       "-02.",
930        "30.",
931       null])");
932 
933   auto d128_28_0 = ArrayFromJSON(decimal(28, 0), R"([
934       "-02.",
935        "30.",
936       null])");
937 
938   auto d128_38_10_roundtripped = ArrayFromJSON(decimal(38, 10), R"([
939       "-02.0000000000",
940        "30.0000000000",
941       null])");
942 
943   // Rescale which leads to truncation
944   options.allow_decimal_truncate = true;
945   CheckCast(d256_52_10, d128_28_0, options);
946   CheckCast(d256_42_0, d128_38_10_roundtripped, options);
947 
948   options.allow_decimal_truncate = false;
949   options.to_type = d128_28_0->type();
950   CheckCastFails(d256_52_10, options);
951   CheckCast(d256_42_0, d128_38_10_roundtripped, options);
952 
953   // Precision loss without rescale leads to truncation
954   auto d256_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])");
955   for (auto expected : {
956            ArrayFromJSON(decimal(3, 2), R"(["12.34"])"),
957            ArrayFromJSON(decimal(4, 3), R"(["12.340"])"),
958            ArrayFromJSON(decimal(2, 1), R"(["12.3"])"),
959        }) {
960     options.allow_decimal_truncate = true;
961     CheckCast(d256_4_2, expected, options);
962 
963     options.allow_decimal_truncate = false;
964     options.to_type = expected->type();
965     CheckCastFails(d256_4_2, options);
966   }
967 }
968 
TEST(Cast,FloatingToDecimal)969 TEST(Cast, FloatingToDecimal) {
970   for (auto float_type : {float32(), float64()}) {
971     for (auto decimal_type : {decimal(5, 2), decimal256(5, 2)}) {
972       CheckCast(
973           ArrayFromJSON(float_type, "[0.0, null, 123.45, 123.456, 999.994]"),
974           ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "123.46", "999.99"])"));
975 
976       // Overflow
977       CastOptions options;
978       options.to_type = decimal_type;
979       CheckCastFails(ArrayFromJSON(float_type, "[999.996]"), options);
980 
981       options.allow_decimal_truncate = true;
982       CheckCast(
983           ArrayFromJSON(float_type, "[0.0, null, 999.996, 123.45, 999.994]"),
984           ArrayFromJSON(decimal_type, R"(["0.00", null, "0.00", "123.45", "999.99"])"),
985           options);
986     }
987   }
988 
989   for (auto decimal_type : {decimal128, decimal256}) {
990     // 2**64 + 2**41 (exactly representable as a float)
991     CheckCast(ArrayFromJSON(float32(), "[1.8446746e+19, -1.8446746e+19]"),
992               ArrayFromJSON(decimal_type(20, 0),
993                             R"(["18446746272732807168", "-18446746272732807168"])"));
994 
995     CheckCast(
996         ArrayFromJSON(float64(), "[1.8446744073709556e+19, -1.8446744073709556e+19]"),
997         ArrayFromJSON(decimal_type(20, 0),
998                       R"(["18446744073709555712", "-18446744073709555712"])"));
999 
1000     CheckCast(ArrayFromJSON(float32(), "[1.8446746e+15, -1.8446746e+15]"),
1001               ArrayFromJSON(decimal_type(20, 4),
1002                             R"(["1844674627273280.7168", "-1844674627273280.7168"])"));
1003 
1004     CheckCast(
1005         ArrayFromJSON(float64(), "[1.8446744073709556e+15, -1.8446744073709556e+15]"),
1006         ArrayFromJSON(decimal_type(20, 4),
1007                       R"(["1844674407370955.5712", "-1844674407370955.5712"])"));
1008 
1009     // Edge cases are tested for Decimal128::FromReal() and Decimal256::FromReal
1010   }
1011 }
1012 
TEST(Cast,DecimalToFloating)1013 TEST(Cast, DecimalToFloating) {
1014   for (auto float_type : {float32(), float64()}) {
1015     for (auto decimal_type : {decimal(5, 2), decimal256(5, 2)}) {
1016       CheckCast(ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "999.99"])"),
1017                 ArrayFromJSON(float_type, "[0.0, null, 123.45, 999.99]"));
1018     }
1019   }
1020 
1021   // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal()
1022 }
1023 
TEST(Cast,TimestampToTimestamp)1024 TEST(Cast, TimestampToTimestamp) {
1025   struct TimestampTypePair {
1026     std::shared_ptr<DataType> coarse, fine;
1027   };
1028 
1029   CastOptions options;
1030 
1031   for (auto types : {
1032            TimestampTypePair{timestamp(TimeUnit::SECOND), timestamp(TimeUnit::MILLI)},
1033            TimestampTypePair{timestamp(TimeUnit::MILLI), timestamp(TimeUnit::MICRO)},
1034            TimestampTypePair{timestamp(TimeUnit::MICRO), timestamp(TimeUnit::NANO)},
1035        }) {
1036     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1037     auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000, 1000, 2000]");
1038 
1039     // multiply/promote
1040     CheckCast(coarse, promoted);
1041 
1042     auto will_be_truncated = ArrayFromJSON(types.fine, "[0, null, 200456, 1123, 2456]");
1043 
1044     // with truncation disallowed, fails
1045     options.allow_time_truncate = false;
1046     options.to_type = types.coarse;
1047     CheckCastFails(will_be_truncated, options);
1048 
1049     // with truncation allowed, divide/truncate
1050     options.allow_time_truncate = true;
1051     CheckCast(will_be_truncated, coarse, options);
1052   }
1053 
1054   for (auto types : {
1055            TimestampTypePair{timestamp(TimeUnit::SECOND), timestamp(TimeUnit::MICRO)},
1056            TimestampTypePair{timestamp(TimeUnit::MILLI), timestamp(TimeUnit::NANO)},
1057        }) {
1058     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1059     auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000000, 1000000, 2000000]");
1060 
1061     // multiply/promote
1062     CheckCast(coarse, promoted);
1063 
1064     auto will_be_truncated =
1065         ArrayFromJSON(types.fine, "[0, null, 200456000, 1123000, 2456000]");
1066 
1067     // with truncation disallowed, fails
1068     options.allow_time_truncate = false;
1069     options.to_type = types.coarse;
1070     CheckCastFails(will_be_truncated, options);
1071 
1072     // with truncation allowed, divide/truncate
1073     options.allow_time_truncate = true;
1074     CheckCast(will_be_truncated, coarse, options);
1075   }
1076 
1077   for (auto types : {
1078            TimestampTypePair{timestamp(TimeUnit::SECOND), timestamp(TimeUnit::NANO)},
1079        }) {
1080     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1081     auto promoted =
1082         ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");
1083 
1084     // multiply/promote
1085     CheckCast(coarse, promoted);
1086 
1087     auto will_be_truncated =
1088         ArrayFromJSON(types.fine, "[0, null, 200456000000, 1123000000, 2456000000]");
1089 
1090     // with truncation disallowed, fails
1091     options.allow_time_truncate = false;
1092     options.to_type = types.coarse;
1093     CheckCastFails(will_be_truncated, options);
1094 
1095     // with truncation allowed, divide/truncate
1096     options.allow_time_truncate = true;
1097     CheckCast(will_be_truncated, coarse, options);
1098   }
1099 }
1100 
TEST(Cast,TimestampZeroCopy)1101 TEST(Cast, TimestampZeroCopy) {
1102   for (auto zero_copy_to_type : {
1103            timestamp(TimeUnit::SECOND),
1104            int64(),  // ARROW-1773, cast to integer
1105        }) {
1106     CheckCastZeroCopy(
1107         ArrayFromJSON(timestamp(TimeUnit::SECOND), "[0, null, 2000, 1000, 0]"),
1108         zero_copy_to_type);
1109   }
1110   CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
1111                     timestamp(TimeUnit::SECOND));
1112 }
1113 
TEST(Cast,TimestampToTimestampMultiplyOverflow)1114 TEST(Cast, TimestampToTimestampMultiplyOverflow) {
1115   CastOptions options;
1116   options.to_type = timestamp(TimeUnit::NANO);
1117   // 1000-01-01, 1800-01-01 , 2000-01-01, 2300-01-01, 3000-01-01
1118   CheckCastFails(
1119       ArrayFromJSON(timestamp(TimeUnit::SECOND),
1120                     "[-30610224000, -5364662400, 946684800, 10413792000, 32503680000]"),
1121       options);
1122 }
1123 
1124 constexpr char kTimestampJson[] =
1125     R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
1126           "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
1127           "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
1128           "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
1129           "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
1130           "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
1131           "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])";
1132 constexpr char kTimestampSecondsJson[] =
1133     R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
1134           "1899-01-01T00:59:20","2033-05-18T03:33:20",
1135           "2020-01-01T01:05:05", "2019-12-31T02:10:10",
1136           "2019-12-30T03:15:15", "2009-12-31T04:20:20",
1137           "2010-01-01T05:25:25", "2010-01-03T06:30:30",
1138           "2010-01-04T07:35:35", "2006-01-01T08:40:40",
1139           "2005-12-31T09:45:45", "2008-12-28", "2008-12-29",
1140           "2012-01-01 01:02:03", null])";
1141 constexpr char kTimestampExtremeJson[] =
1142     R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])";
1143 
TEST(Cast,TimestampToDate)1144 TEST(Cast, TimestampToDate) {
1145   // See scalar_temporal_test.cc
1146   auto timestamps = ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampJson);
1147   auto date_32 = ArrayFromJSON(date32(),
1148                                R"([
1149           0, 11016, -25932, 23148,
1150           18262, 18261, 18260, 14609,
1151           14610, 14612, 14613, 13149,
1152           13148, 14241, 14242, 15340, null
1153       ])");
1154   auto date_64 = ArrayFromJSON(date64(),
1155                                R"([
1156           0, 951782400000, -2240524800000, 1999987200000,
1157           1577836800000, 1577750400000, 1577664000000, 1262217600000,
1158           1262304000000, 1262476800000, 1262563200000, 1136073600000,
1159           1135987200000, 1230422400000, 1230508800000, 1325376000000, null
1160       ])");
1161   // See TestOutsideNanosecondRange in scalar_temporal_test.cc
1162   auto timestamps_extreme =
1163       ArrayFromJSON(timestamp(TimeUnit::MICRO),
1164                     R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])");
1165   auto date_32_extreme = ArrayFromJSON(date32(), "[-106753, 106753]");
1166   auto date_64_extreme = ArrayFromJSON(date64(), "[-9223459200000, 9223459200000]");
1167 
1168   CheckCast(timestamps, date_32);
1169   CheckCast(timestamps, date_64);
1170   CheckCast(timestamps_extreme, date_32_extreme);
1171   CheckCast(timestamps_extreme, date_64_extreme);
1172   for (auto u : TimeUnit::values()) {
1173     auto unit = timestamp(u);
1174     CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_32);
1175     CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_64);
1176   }
1177 }
1178 
TEST(Cast,ZonedTimestampToDate)1179 TEST(Cast, ZonedTimestampToDate) {
1180 #ifdef _WIN32
1181   // TODO(ARROW-13168): we lack tzdb on Windows
1182   GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows";
1183 #endif
1184 
1185   {
1186     // See TestZoned in scalar_temporal_test.cc
1187     auto timestamps =
1188         ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson);
1189     auto date_32 = ArrayFromJSON(date32(),
1190                                  R"([
1191           -1, 11016, -25933, 23147,
1192           18261, 18260, 18259, 14608,
1193           14609, 14611, 14612, 13148,
1194           13148, 14240, 14241, 15339, null
1195       ])");
1196     auto date_64 = ArrayFromJSON(date64(), R"([
1197           -86400000, 951782400000, -2240611200000, 1999900800000,
1198           1577750400000, 1577664000000, 1577577600000, 1262131200000,
1199           1262217600000, 1262390400000, 1262476800000, 1135987200000,
1200           1135987200000, 1230336000000, 1230422400000, 1325289600000, null
1201       ])");
1202     CheckCast(timestamps, date_32);
1203     CheckCast(timestamps, date_64);
1204   }
1205 
1206   auto date_32 = ArrayFromJSON(date32(), R"([
1207           0, 11017, -25932, 23148,
1208           18262, 18261, 18260, 14609,
1209           14610, 14612, 14613, 13149,
1210           13148, 14241, 14242, 15340, null
1211       ])");
1212   auto date_64 = ArrayFromJSON(date64(), R"([
1213           0, 951868800000, -2240524800000, 1999987200000, 1577836800000,
1214           1577750400000, 1577664000000, 1262217600000, 1262304000000,
1215           1262476800000, 1262563200000, 1136073600000, 1135987200000,
1216           1230422400000, 1230508800000, 1325376000000, null
1217       ])");
1218 
1219   for (auto u : TimeUnit::values()) {
1220     auto timestamps =
1221         ArrayFromJSON(timestamp(u, "Australia/Broken_Hill"), kTimestampSecondsJson);
1222     CheckCast(timestamps, date_32);
1223     CheckCast(timestamps, date_64);
1224   }
1225 
1226   // Invalid timezone
1227   for (auto u : TimeUnit::values()) {
1228     auto timestamps =
1229         ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
1230     CheckCastFails(timestamps, CastOptions::Unsafe(date32()));
1231     CheckCastFails(timestamps, CastOptions::Unsafe(date64()));
1232   }
1233 }
1234 
TEST(Cast,TimestampToTime)1235 TEST(Cast, TimestampToTime) {
1236   // See scalar_temporal_test.cc
1237   auto timestamps = ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampJson);
1238   // See TestOutsideNanosecondRange in scalar_temporal_test.cc
1239   auto timestamps_extreme =
1240       ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampExtremeJson);
1241   auto timestamps_us = ArrayFromJSON(timestamp(TimeUnit::MICRO), R"([
1242           "1970-01-01T00:00:59.123456","2000-02-29T23:23:23.999999",
1243           "1899-01-01T00:59:20.001001","2033-05-18T03:33:20.000000",
1244           "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
1245           "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
1246           "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
1247           "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
1248           "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])");
1249   auto timestamps_ms = ArrayFromJSON(timestamp(TimeUnit::MILLI), R"([
1250           "1970-01-01T00:00:59.123","2000-02-29T23:23:23.999",
1251           "1899-01-01T00:59:20.001","2033-05-18T03:33:20.000",
1252           "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
1253           "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004",
1254           "2010-01-01T05:25:25.005", "2010-01-03T06:30:30.006",
1255           "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
1256           "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])");
1257   auto timestamps_s = ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson);
1258 
1259   auto times = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1260           59123456789, 84203999999999, 3560001001001, 12800000000000,
1261           3905001000000, 7810002000000, 11715003000000, 15620004132000,
1262           19525005321000, 23430006163000, 27335000000000, 31240000000000,
1263           35145000000000, 0, 0, 3723000000000, null
1264       ])");
1265   auto times_ns_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
1266           59123456, 84203999999, 3560001001, 12800000000,
1267           3905001000, 7810002000, 11715003000, 15620004132,
1268           19525005321, 23430006163, 27335000000, 31240000000,
1269           35145000000, 0, 0, 3723000000, null
1270       ])");
1271   auto times_ns_ms = ArrayFromJSON(time32(TimeUnit::MILLI), R"([
1272           59123, 84203999, 3560001, 12800000,
1273           3905001, 7810002, 11715003, 15620004,
1274           19525005, 23430006, 27335000, 31240000,
1275           35145000, 0, 0, 3723000, null
1276       ])");
1277   auto times_us_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1278           59123456000, 84203999999000, 3560001001000, 12800000000000,
1279           3905001000000, 7810002000000, 11715003000000, 15620004132000,
1280           19525005321000, 23430006163000, 27335000000000, 31240000000000,
1281           35145000000000, 0, 0, 3723000000000, null
1282       ])");
1283   auto times_ms_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1284           59123000000, 84203999000000, 3560001000000, 12800000000000,
1285           3905001000000, 7810002000000, 11715003000000, 15620004000000,
1286           19525005000000, 23430006000000, 27335000000000, 31240000000000,
1287           35145000000000, 0, 0, 3723000000000, null
1288       ])");
1289   auto times_ms_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
1290           59123000, 84203999000, 3560001000, 12800000000,
1291           3905001000, 7810002000, 11715003000, 15620004000,
1292           19525005000, 23430006000, 27335000000, 31240000000,
1293           35145000000, 0, 0, 3723000000, null
1294       ])");
1295 
1296   auto times_extreme = ArrayFromJSON(time64(TimeUnit::MICRO), "[59123456, 84203999999]");
1297   auto times_s = ArrayFromJSON(time32(TimeUnit::SECOND), R"([
1298           59, 84203, 3560, 12800,
1299           3905, 7810, 11715, 15620,
1300           19525, 23430, 27335, 31240,
1301           35145, 0, 0, 3723, null
1302       ])");
1303   auto times_ms = ArrayFromJSON(time32(TimeUnit::MILLI), R"([
1304           59000, 84203000, 3560000, 12800000,
1305           3905000, 7810000, 11715000, 15620000,
1306           19525000, 23430000, 27335000, 31240000,
1307           35145000, 0, 0, 3723000, null
1308       ])");
1309   auto times_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
1310           59000000, 84203000000, 3560000000, 12800000000,
1311           3905000000, 7810000000, 11715000000, 15620000000,
1312           19525000000, 23430000000, 27335000000, 31240000000,
1313           35145000000, 0, 0, 3723000000, null
1314       ])");
1315   auto times_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1316           59000000000, 84203000000000, 3560000000000, 12800000000000,
1317           3905000000000, 7810000000000, 11715000000000, 15620000000000,
1318           19525000000000, 23430000000000, 27335000000000, 31240000000000,
1319           35145000000000, 0, 0, 3723000000000, null
1320       ])");
1321 
1322   CheckCast(timestamps, times);
1323   CheckCastFails(timestamps, CastOptions::Safe(time64(TimeUnit::MICRO)));
1324   CheckCast(timestamps_extreme, times_extreme);
1325   CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson), times_s);
1326   CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson), times_ms);
1327   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI), kTimestampSecondsJson), times_s);
1328   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI), kTimestampSecondsJson), times_ms);
1329   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_us);
1330   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_ns);
1331   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_ms);
1332   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_s);
1333   CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_ns);
1334   CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_us);
1335   CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_ms);
1336   CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_s);
1337 
1338   CastOptions truncate = CastOptions::Safe();
1339   truncate.allow_time_truncate = true;
1340 
1341   // Truncation tests
1342   CheckCastFails(timestamps, CastOptions::Safe(time64(TimeUnit::MICRO)));
1343   CheckCastFails(timestamps, CastOptions::Safe(time32(TimeUnit::MILLI)));
1344   CheckCastFails(timestamps, CastOptions::Safe(time32(TimeUnit::SECOND)));
1345   CheckCastFails(timestamps_us, CastOptions::Safe(time32(TimeUnit::MILLI)));
1346   CheckCastFails(timestamps_us, CastOptions::Safe(time32(TimeUnit::SECOND)));
1347   CheckCastFails(timestamps_ms, CastOptions::Safe(time32(TimeUnit::SECOND)));
1348   CheckCast(timestamps, times_ns_us, truncate);
1349   CheckCast(timestamps, times_ns_ms, truncate);
1350   CheckCast(timestamps, times_s, truncate);
1351   CheckCast(timestamps_us, times_ns_ms, truncate);
1352   CheckCast(timestamps_us, times_s, truncate);
1353   CheckCast(timestamps_ms, times_s, truncate);
1354 
1355   // Upscaling tests
1356   CheckCast(timestamps_us, times_us_ns);
1357   CheckCast(timestamps_ms, times_ms_ns);
1358   CheckCast(timestamps_ms, times_ms_us);
1359   CheckCast(timestamps_s, times_ns);
1360   CheckCast(timestamps_s, times_us);
1361   CheckCast(timestamps_s, times_ms);
1362 
1363   // Invalid timezone
1364   for (auto u : TimeUnit::values()) {
1365     auto timestamps =
1366         ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
1367     if (u == TimeUnit::SECOND || u == TimeUnit::MILLI) {
1368       CheckCastFails(timestamps, CastOptions::Unsafe(time32(u)));
1369     } else {
1370       CheckCastFails(timestamps, CastOptions::Unsafe(time64(u)));
1371     }
1372   }
1373 }
1374 
TEST(Cast,ZonedTimestampToTime)1375 TEST(Cast, ZonedTimestampToTime) {
1376 #ifdef _WIN32
1377   // TODO(ARROW-13168): we lack tzdb on Windows
1378   GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows";
1379 #endif
1380 
1381   CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson),
1382             ArrayFromJSON(time64(TimeUnit::NANO), R"([
1383           52259123456789, 50003999999999, 56480001001001, 65000000000000,
1384           56105001000000, 60010002000000, 63915003000000, 67820004132000,
1385           71725005321000, 75630006163000, 79535000000000, 83440000000000,
1386           945000000000, 52200000000000, 52200000000000, 55923000000000, null
1387       ])"));
1388 
1389   auto time_s = R"([
1390           34259, 35603, 35960, 47000,
1391           41705, 45610, 49515, 53420,
1392           57325, 61230, 65135, 69040,
1393           72945, 37800, 37800, 41523, null
1394       ])";
1395   auto time_ms = R"([
1396           34259000, 35603000, 35960000, 47000000,
1397           41705000, 45610000, 49515000, 53420000,
1398           57325000, 61230000, 65135000, 69040000,
1399           72945000, 37800000, 37800000, 41523000, null
1400       ])";
1401   auto time_us = R"([
1402           34259000000, 35603000000, 35960000000, 47000000000,
1403           41705000000, 45610000000, 49515000000, 53420000000,
1404           57325000000, 61230000000, 65135000000, 69040000000,
1405           72945000000, 37800000000, 37800000000, 41523000000, null
1406       ])";
1407   auto time_ns = R"([
1408           34259000000000, 35603000000000, 35960000000000, 47000000000000,
1409           41705000000000, 45610000000000, 49515000000000, 53420000000000,
1410           57325000000000, 61230000000000, 65135000000000, 69040000000000,
1411           72945000000000, 37800000000000, 37800000000000, 41523000000000, null
1412       ])";
1413   CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "Australia/Broken_Hill"),
1414                           kTimestampSecondsJson),
1415             ArrayFromJSON(time32(TimeUnit::SECOND), time_s));
1416   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI, "Australia/Broken_Hill"),
1417                           kTimestampSecondsJson),
1418             ArrayFromJSON(time32(TimeUnit::MILLI), time_ms));
1419   CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO, "Australia/Broken_Hill"),
1420                           kTimestampSecondsJson),
1421             ArrayFromJSON(time64(TimeUnit::MICRO), time_us));
1422   CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Australia/Broken_Hill"),
1423                           kTimestampSecondsJson),
1424             ArrayFromJSON(time64(TimeUnit::NANO), time_ns));
1425 }
1426 
TEST(Cast,TimeToTime)1427 TEST(Cast, TimeToTime) {
1428   struct TimeTypePair {
1429     std::shared_ptr<DataType> coarse, fine;
1430   };
1431 
1432   CastOptions options;
1433 
1434   for (auto types : {
1435            TimeTypePair{time32(TimeUnit::SECOND), time32(TimeUnit::MILLI)},
1436            TimeTypePair{time32(TimeUnit::MILLI), time64(TimeUnit::MICRO)},
1437            TimeTypePair{time64(TimeUnit::MICRO), time64(TimeUnit::NANO)},
1438        }) {
1439     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1440     auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000, 1000, 2000]");
1441 
1442     // multiply/promote
1443     CheckCast(coarse, promoted);
1444 
1445     auto will_be_truncated = ArrayFromJSON(types.fine, "[0, null, 200456, 1123, 2456]");
1446 
1447     // with truncation disallowed, fails
1448     options.allow_time_truncate = false;
1449     options.to_type = types.coarse;
1450     CheckCastFails(will_be_truncated, options);
1451 
1452     // with truncation allowed, divide/truncate
1453     options.allow_time_truncate = true;
1454     CheckCast(will_be_truncated, coarse, options);
1455   }
1456 
1457   for (auto types : {
1458            TimeTypePair{time32(TimeUnit::SECOND), time64(TimeUnit::MICRO)},
1459            TimeTypePair{time32(TimeUnit::MILLI), time64(TimeUnit::NANO)},
1460        }) {
1461     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1462     auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000000, 1000000, 2000000]");
1463 
1464     // multiply/promote
1465     CheckCast(coarse, promoted);
1466 
1467     auto will_be_truncated =
1468         ArrayFromJSON(types.fine, "[0, null, 200456000, 1123000, 2456000]");
1469 
1470     // with truncation disallowed, fails
1471     options.allow_time_truncate = false;
1472     options.to_type = types.coarse;
1473     CheckCastFails(will_be_truncated, options);
1474 
1475     // with truncation allowed, divide/truncate
1476     options.allow_time_truncate = true;
1477     CheckCast(will_be_truncated, coarse, options);
1478   }
1479 
1480   for (auto types : {
1481            TimeTypePair{time32(TimeUnit::SECOND), time64(TimeUnit::NANO)},
1482        }) {
1483     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1484     auto promoted =
1485         ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");
1486 
1487     // multiply/promote
1488     CheckCast(coarse, promoted);
1489 
1490     auto will_be_truncated =
1491         ArrayFromJSON(types.fine, "[0, null, 200456000000, 1123000000, 2456000000]");
1492 
1493     // with truncation disallowed, fails
1494     options.allow_time_truncate = false;
1495     options.to_type = types.coarse;
1496     CheckCastFails(will_be_truncated, options);
1497 
1498     // with truncation allowed, divide/truncate
1499     options.allow_time_truncate = true;
1500     CheckCast(will_be_truncated, coarse, options);
1501   }
1502 }
1503 
TEST(Cast,TimeZeroCopy)1504 TEST(Cast, TimeZeroCopy) {
1505   for (auto zero_copy_to_type : {
1506            time32(TimeUnit::SECOND),
1507            int32(),  // ARROW-1773: cast to int32
1508        }) {
1509     CheckCastZeroCopy(ArrayFromJSON(time32(TimeUnit::SECOND), "[0, null, 2000, 1000, 0]"),
1510                       zero_copy_to_type);
1511   }
1512   CheckCastZeroCopy(ArrayFromJSON(int32(), "[0, null, 2000, 1000, 0]"),
1513                     time32(TimeUnit::SECOND));
1514 
1515   for (auto zero_copy_to_type : {
1516            time64(TimeUnit::MICRO),
1517            int64(),  // ARROW-1773: cast to int64
1518        }) {
1519     CheckCastZeroCopy(ArrayFromJSON(time64(TimeUnit::MICRO), "[0, null, 2000, 1000, 0]"),
1520                       zero_copy_to_type);
1521   }
1522   CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
1523                     time64(TimeUnit::MICRO));
1524 }
1525 
TEST(Cast,DateToString)1526 TEST(Cast, DateToString) {
1527   for (auto string_type : {utf8(), large_utf8()}) {
1528     CheckCast(ArrayFromJSON(date32(), "[0, null]"),
1529               ArrayFromJSON(string_type, R"(["1970-01-01", null])"));
1530     CheckCast(ArrayFromJSON(date64(), "[86400000, null]"),
1531               ArrayFromJSON(string_type, R"(["1970-01-02", null])"));
1532   }
1533 }
1534 
TEST(Cast,TimeToString)1535 TEST(Cast, TimeToString) {
1536   for (auto string_type : {utf8(), large_utf8()}) {
1537     CheckCast(ArrayFromJSON(time32(TimeUnit::SECOND), "[1, 62]"),
1538               ArrayFromJSON(string_type, R"(["00:00:01", "00:01:02"])"));
1539     CheckCast(
1540         ArrayFromJSON(time64(TimeUnit::NANO), "[0, 1]"),
1541         ArrayFromJSON(string_type, R"(["00:00:00.000000000", "00:00:00.000000001"])"));
1542   }
1543 }
1544 
TEST(Cast,TimestampToString)1545 TEST(Cast, TimestampToString) {
1546   for (auto string_type : {utf8(), large_utf8()}) {
1547     CheckCast(
1548         ArrayFromJSON(timestamp(TimeUnit::SECOND), "[-30610224000, -5364662400]"),
1549         ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
1550   }
1551 }
1552 
TEST(Cast,DateToDate)1553 TEST(Cast, DateToDate) {
1554   auto day_32 = ArrayFromJSON(date32(), "[0, null, 100, 1, 10]");
1555   auto day_64 = ArrayFromJSON(date64(), R"([
1556                0,
1557             null,
1558       8640000000,
1559         86400000,
1560        864000000])");
1561 
1562   // Multiply promotion
1563   CheckCast(day_32, day_64);
1564 
1565   // No truncation
1566   CheckCast(day_64, day_32);
1567 
1568   auto day_64_will_be_truncated = ArrayFromJSON(date64(), R"([
1569                0,
1570             null,
1571       8640000123,
1572         86400456,
1573        864000789])");
1574 
1575   // Disallow truncate
1576   CastOptions options;
1577   options.to_type = date32();
1578   CheckCastFails(day_64_will_be_truncated, options);
1579 
1580   // Divide, truncate
1581   options.allow_time_truncate = true;
1582   CheckCast(day_64_will_be_truncated, day_32, options);
1583 }
1584 
TEST(Cast,DateZeroCopy)1585 TEST(Cast, DateZeroCopy) {
1586   for (auto zero_copy_to_type : {
1587            date32(),
1588            int32(),  // ARROW-1773: cast to int32
1589        }) {
1590     CheckCastZeroCopy(ArrayFromJSON(date32(), "[0, null, 2000, 1000, 0]"),
1591                       zero_copy_to_type);
1592   }
1593   CheckCastZeroCopy(ArrayFromJSON(int32(), "[0, null, 2000, 1000, 0]"), date32());
1594 
1595   for (auto zero_copy_to_type : {
1596            date64(),
1597            int64(),  // ARROW-1773: cast to int64
1598        }) {
1599     CheckCastZeroCopy(ArrayFromJSON(date64(), "[0, null, 2000, 1000, 0]"),
1600                       zero_copy_to_type);
1601   }
1602   CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"), date64());
1603 }
1604 
TEST(Cast,DurationToDuration)1605 TEST(Cast, DurationToDuration) {
1606   struct DurationTypePair {
1607     std::shared_ptr<DataType> coarse, fine;
1608   };
1609 
1610   CastOptions options;
1611 
1612   for (auto types : {
1613            DurationTypePair{duration(TimeUnit::SECOND), duration(TimeUnit::MILLI)},
1614            DurationTypePair{duration(TimeUnit::MILLI), duration(TimeUnit::MICRO)},
1615            DurationTypePair{duration(TimeUnit::MICRO), duration(TimeUnit::NANO)},
1616        }) {
1617     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1618     auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000, 1000, 2000]");
1619 
1620     // multiply/promote
1621     CheckCast(coarse, promoted);
1622 
1623     auto will_be_truncated = ArrayFromJSON(types.fine, "[0, null, 200456, 1123, 2456]");
1624 
1625     // with truncation disallowed, fails
1626     options.allow_time_truncate = false;
1627     options.to_type = types.coarse;
1628     CheckCastFails(will_be_truncated, options);
1629 
1630     // with truncation allowed, divide/truncate
1631     options.allow_time_truncate = true;
1632     CheckCast(will_be_truncated, coarse, options);
1633   }
1634 
1635   for (auto types : {
1636            DurationTypePair{duration(TimeUnit::SECOND), duration(TimeUnit::MICRO)},
1637            DurationTypePair{duration(TimeUnit::MILLI), duration(TimeUnit::NANO)},
1638        }) {
1639     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1640     auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000000, 1000000, 2000000]");
1641 
1642     // multiply/promote
1643     CheckCast(coarse, promoted);
1644 
1645     auto will_be_truncated =
1646         ArrayFromJSON(types.fine, "[0, null, 200000456, 1000123, 2000456]");
1647 
1648     // with truncation disallowed, fails
1649     options.allow_time_truncate = false;
1650     options.to_type = types.coarse;
1651     CheckCastFails(will_be_truncated, options);
1652 
1653     // with truncation allowed, divide/truncate
1654     options.allow_time_truncate = true;
1655     CheckCast(will_be_truncated, coarse, options);
1656   }
1657 
1658   for (auto types : {
1659            DurationTypePair{duration(TimeUnit::SECOND), duration(TimeUnit::NANO)},
1660        }) {
1661     auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1662     auto promoted =
1663         ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");
1664 
1665     // multiply/promote
1666     CheckCast(coarse, promoted);
1667 
1668     auto will_be_truncated =
1669         ArrayFromJSON(types.fine, "[0, null, 200000000456, 1000000123, 2000000456]");
1670 
1671     // with truncation disallowed, fails
1672     options.allow_time_truncate = false;
1673     options.to_type = types.coarse;
1674     CheckCastFails(will_be_truncated, options);
1675 
1676     // with truncation allowed, divide/truncate
1677     options.allow_time_truncate = true;
1678     CheckCast(will_be_truncated, coarse, options);
1679   }
1680 }
1681 
TEST(Cast,DurationZeroCopy)1682 TEST(Cast, DurationZeroCopy) {
1683   for (auto zero_copy_to_type : {
1684            duration(TimeUnit::SECOND),
1685            int64(),  // ARROW-1773: cast to int64
1686        }) {
1687     CheckCastZeroCopy(
1688         ArrayFromJSON(duration(TimeUnit::SECOND), "[0, null, 2000, 1000, 0]"),
1689         zero_copy_to_type);
1690   }
1691   CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
1692                     duration(TimeUnit::SECOND));
1693 }
1694 
TEST(Cast,DurationToDurationMultiplyOverflow)1695 TEST(Cast, DurationToDurationMultiplyOverflow) {
1696   CastOptions options;
1697   options.to_type = duration(TimeUnit::NANO);
1698   CheckCastFails(
1699       ArrayFromJSON(duration(TimeUnit::SECOND), "[10000000000, 1, 2, 3, 10000000000]"),
1700       options);
1701 }
1702 
TEST(Cast,MiscToFloating)1703 TEST(Cast, MiscToFloating) {
1704   for (auto to_type : {float32(), float64()}) {
1705     CheckCast(ArrayFromJSON(int16(), "[0, null, 200, 1, 2]"),
1706               ArrayFromJSON(to_type, "[0, null, 200, 1, 2]"));
1707 
1708     CheckCast(ArrayFromJSON(float32(), "[0, null, 200, 1, 2]"),
1709               ArrayFromJSON(to_type, "[0, null, 200, 1, 2]"));
1710 
1711     CheckCast(ArrayFromJSON(boolean(), "[true, null, false, false, true]"),
1712               ArrayFromJSON(to_type, "[1, null, 0, 0, 1]"));
1713   }
1714 }
1715 
TEST(Cast,UnsupportedInputType)1716 TEST(Cast, UnsupportedInputType) {
1717   // Casting to a supported target type, but with an unsupported input type
1718   // for the target type.
1719   const auto arr = ArrayFromJSON(int32(), "[1, 2, 3]");
1720 
1721   const auto to_type = list(utf8());
1722   const char* expected_message = "Unsupported cast from int32 to list";
1723 
1724   // Try through concrete API
1725   EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1726                                   Cast(*arr, to_type));
1727 
1728   // Try through general kernel API
1729   CastOptions options;
1730   options.to_type = to_type;
1731   EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1732                                   CallFunction("cast", {arr}, &options));
1733 }
1734 
TEST(Cast,UnsupportedTargetType)1735 TEST(Cast, UnsupportedTargetType) {
1736   // Casting to an unsupported target type
1737   const auto arr = ArrayFromJSON(int32(), "[1, 2, 3]");
1738   const auto to_type = dense_union({field("a", int32())});
1739 
1740   // Try through concrete API
1741   const char* expected_message = "Unsupported cast from int32 to dense_union";
1742   EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1743                                   Cast(*arr, to_type));
1744 
1745   // Try through general kernel API
1746   CastOptions options;
1747   options.to_type = to_type;
1748   EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1749                                   CallFunction("cast", {arr}, &options));
1750 }
1751 
TEST(Cast,StringToBoolean)1752 TEST(Cast, StringToBoolean) {
1753   for (auto string_type : {utf8(), large_utf8()}) {
1754     CheckCast(ArrayFromJSON(string_type, R"(["False", null, "true", "True", "false"])"),
1755               ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
1756 
1757     CheckCast(ArrayFromJSON(string_type, R"(["0", null, "1", "1", "0"])"),
1758               ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
1759 
1760     auto options = CastOptions::Safe(boolean());
1761     CheckCastFails(ArrayFromJSON(string_type, R"(["false "])"), options);
1762     CheckCastFails(ArrayFromJSON(string_type, R"(["T"])"), options);
1763   }
1764 }
1765 
TEST(Cast,StringToInt)1766 TEST(Cast, StringToInt) {
1767   for (auto string_type : {utf8(), large_utf8()}) {
1768     for (auto signed_type : {int8(), int16(), int32(), int64()}) {
1769       CheckCast(
1770           ArrayFromJSON(string_type, R"(["0", null, "127", "-1", "0", "0x0", "0x7F"])"),
1771           ArrayFromJSON(signed_type, "[0, null, 127, -1, 0, 0, 127]"));
1772     }
1773 
1774     CheckCast(ArrayFromJSON(string_type, R"(["2147483647", null, "-2147483648", "0",
1775           "0X0", "0x7FFFFFFF", "0XFFFFfFfF", "0Xf0000000"])"),
1776               ArrayFromJSON(
1777                   int32(),
1778                   "[2147483647, null, -2147483648, 0, 0, 2147483647, -1, -268435456]"));
1779 
1780     CheckCast(ArrayFromJSON(string_type,
1781                             R"(["9223372036854775807", null, "-9223372036854775808", "0",
1782                     "0x0", "0x7FFFFFFFFFFFFFFf", "0XF000000000000001"])"),
1783               ArrayFromJSON(int64(),
1784                             "[9223372036854775807, null, -9223372036854775808, 0, 0, "
1785                             "9223372036854775807, -1152921504606846975]"));
1786 
1787     for (auto unsigned_type : {uint8(), uint16(), uint32(), uint64()}) {
1788       CheckCast(ArrayFromJSON(string_type,
1789                               R"(["0", null, "127", "255", "0", "0X0", "0xff", "0x7f"])"),
1790                 ArrayFromJSON(unsigned_type, "[0, null, 127, 255, 0, 0, 255, 127]"));
1791     }
1792 
1793     CheckCast(
1794         ArrayFromJSON(string_type, R"(["2147483647", null, "4294967295", "0",
1795                                     "0x0", "0x7FFFFFFf", "0xFFFFFFFF"])"),
1796         ArrayFromJSON(uint32(),
1797                       "[2147483647, null, 4294967295, 0, 0, 2147483647, 4294967295]"));
1798 
1799     CheckCast(ArrayFromJSON(string_type,
1800                             R"(["9223372036854775807", null, "18446744073709551615", "0",
1801                     "0x0", "0x7FFFFFFFFFFFFFFf", "0xfFFFFFFFFFFFFFFf"])"),
1802               ArrayFromJSON(uint64(),
1803                             "[9223372036854775807, null, 18446744073709551615, 0, 0, "
1804                             "9223372036854775807, 18446744073709551615]"));
1805 
1806     for (std::string not_int8 : {
1807              "z",
1808              "12 z",
1809              "128",
1810              "-129",
1811              "0.5",
1812              "0x",
1813              "0xfff",
1814              "-0xf0",
1815          }) {
1816       auto options = CastOptions::Safe(int8());
1817       CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_int8 + "\"]"), options);
1818     }
1819 
1820     for (std::string not_uint8 : {"256", "-1", "0.5", "0x", "0x3wa", "0x123"}) {
1821       auto options = CastOptions::Safe(uint8());
1822       CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_uint8 + "\"]"), options);
1823     }
1824   }
1825 }
1826 
TEST(Cast,StringToFloating)1827 TEST(Cast, StringToFloating) {
1828   for (auto string_type : {utf8(), large_utf8()}) {
1829     for (auto float_type : {float32(), float64()}) {
1830       auto strings =
1831           ArrayFromJSON(string_type, R"(["0.1", null, "127.3", "1e3", "200.4", "0.5"])");
1832       auto floats = ArrayFromJSON(float_type, "[0.1, null, 127.3, 1000, 200.4, 0.5]");
1833       CheckCast(strings, floats);
1834 
1835       for (std::string not_float : {
1836                "z",
1837            }) {
1838         auto options = CastOptions::Safe(float32());
1839         CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_float + "\"]"), options);
1840       }
1841 
1842 #if !defined(_WIN32) || defined(NDEBUG)
1843       // Test that casting is locale-independent
1844       // French locale uses the comma as decimal point
1845       LocaleGuard locale_guard("fr_FR.UTF-8");
1846       CheckCast(strings, floats);
1847 #endif
1848     }
1849   }
1850 }
1851 
TEST(Cast,StringToTimestamp)1852 TEST(Cast, StringToTimestamp) {
1853   for (auto string_type : {utf8(), large_utf8()}) {
1854     auto strings = ArrayFromJSON(string_type, R"(["1970-01-01", null, "2000-02-29"])");
1855 
1856     CheckCast(strings,
1857               ArrayFromJSON(timestamp(TimeUnit::SECOND), "[0, null, 951782400]"));
1858 
1859     CheckCast(strings,
1860               ArrayFromJSON(timestamp(TimeUnit::MICRO), "[0, null, 951782400000000]"));
1861 
1862     for (auto unit :
1863          {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
1864       for (std::string not_ts : {
1865                "",
1866                "xxx",
1867            }) {
1868         auto options = CastOptions::Safe(timestamp(unit));
1869         CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_ts + "\"]"), options);
1870       }
1871     }
1872 
1873     // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
1874   }
1875 }
1876 
AssertBinaryZeroCopy(std::shared_ptr<Array> lhs,std::shared_ptr<Array> rhs)1877 static void AssertBinaryZeroCopy(std::shared_ptr<Array> lhs, std::shared_ptr<Array> rhs) {
1878   // null bitmap and data buffers are always zero-copied
1879   AssertBufferSame(*lhs, *rhs, 0);
1880   AssertBufferSame(*lhs, *rhs, 2);
1881 
1882   if (offset_bit_width(lhs->type_id()) == offset_bit_width(rhs->type_id())) {
1883     // offset buffer is zero copied if possible
1884     AssertBufferSame(*lhs, *rhs, 1);
1885     return;
1886   }
1887 
1888   // offset buffers are equivalent
1889   ArrayVector offsets;
1890   for (auto array : {lhs, rhs}) {
1891     auto length = array->length();
1892     auto buffer = array->data()->buffers[1];
1893     offsets.push_back(offset_bit_width(array->type_id()) == 32
1894                           ? *Cast(Int32Array(length, buffer), int64())
1895                           : std::make_shared<Int64Array>(length, buffer));
1896   }
1897   AssertArraysEqual(*offsets[0], *offsets[1]);
1898 }
1899 
TEST(Cast,BinaryToString)1900 TEST(Cast, BinaryToString) {
1901   for (auto bin_type : {binary(), large_binary()}) {
1902     for (auto string_type : {utf8(), large_utf8()}) {
1903       // empty -> empty always works
1904       CheckCast(ArrayFromJSON(bin_type, "[]"), ArrayFromJSON(string_type, "[]"));
1905 
1906       auto invalid_utf8 = InvalidUtf8(bin_type);
1907 
1908       // invalid utf-8 masked by a null bit is not an error
1909       CheckCast(MaskArrayWithNullsAt(InvalidUtf8(bin_type), {4}),
1910                 MaskArrayWithNullsAt(InvalidUtf8(string_type), {4}));
1911 
1912       // error: invalid utf-8
1913       auto options = CastOptions::Safe(string_type);
1914       CheckCastFails(invalid_utf8, options);
1915 
1916       // override utf-8 check
1917       options.allow_invalid_utf8 = true;
1918       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, string_type, options));
1919       ASSERT_RAISES(Invalid, strings->ValidateFull());
1920       AssertBinaryZeroCopy(invalid_utf8, strings);
1921     }
1922   }
1923 
1924   auto from_type = fixed_size_binary(3);
1925   auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
1926   for (auto string_type : {utf8(), large_utf8()}) {
1927     CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(string_type, "[]"));
1928 
1929     // invalid utf-8 masked by a null bit is not an error
1930     CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
1931               MaskArrayWithNullsAt(FixedSizeInvalidUtf8(string_type), {4}));
1932 
1933     // error: invalid utf-8
1934     auto options = CastOptions::Safe(string_type);
1935     CheckCastFails(invalid_utf8, options);
1936 
1937     // override utf-8 check
1938     options.allow_invalid_utf8 = true;
1939     ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, string_type, options));
1940     ASSERT_RAISES(Invalid, strings->ValidateFull());
1941 
1942     // N.B. null buffer is not always the same if input sliced
1943     AssertBufferSame(*invalid_utf8, *strings, 0);
1944     ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
1945   }
1946 }
1947 
TEST(Cast,BinaryOrStringToBinary)1948 TEST(Cast, BinaryOrStringToBinary) {
1949   for (auto from_type : {utf8(), large_utf8(), binary(), large_binary()}) {
1950     for (auto to_type : {binary(), large_binary()}) {
1951       // empty -> empty always works
1952       CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
1953 
1954       auto invalid_utf8 = InvalidUtf8(from_type);
1955 
1956       // invalid utf-8 is not an error for binary
1957       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
1958       ValidateOutput(*strings);
1959       AssertBinaryZeroCopy(invalid_utf8, strings);
1960 
1961       // invalid utf-8 masked by a null bit is not an error
1962       CheckCast(MaskArrayWithNullsAt(InvalidUtf8(from_type), {4}),
1963                 MaskArrayWithNullsAt(InvalidUtf8(to_type), {4}));
1964     }
1965   }
1966 
1967   auto from_type = fixed_size_binary(3);
1968   auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
1969   CheckCast(invalid_utf8, invalid_utf8);
1970   CheckCastFails(invalid_utf8, CastOptions::Safe(fixed_size_binary(5)));
1971   for (auto to_type : {binary(), large_binary()}) {
1972     CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
1973     ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
1974     ValidateOutput(*strings);
1975 
1976     // N.B. null buffer is not always the same if input sliced
1977     AssertBufferSame(*invalid_utf8, *strings, 0);
1978     ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
1979 
1980     // invalid utf-8 masked by a null bit is not an error
1981     CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
1982               MaskArrayWithNullsAt(FixedSizeInvalidUtf8(to_type), {4}));
1983   }
1984 }
1985 
TEST(Cast,StringToString)1986 TEST(Cast, StringToString) {
1987   for (auto from_type : {utf8(), large_utf8()}) {
1988     for (auto to_type : {utf8(), large_utf8()}) {
1989       // empty -> empty always works
1990       CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
1991 
1992       auto invalid_utf8 = InvalidUtf8(from_type);
1993 
1994       // invalid utf-8 masked by a null bit is not an error
1995       CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
1996                 MaskArrayWithNullsAt(InvalidUtf8(to_type), {4}));
1997 
1998       // override utf-8 check
1999       auto options = CastOptions::Safe(to_type);
2000       options.allow_invalid_utf8 = true;
2001       // utf-8 is not checked by Cast when the origin guarantees utf-8
2002       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type, options));
2003       ASSERT_RAISES(Invalid, strings->ValidateFull());
2004       AssertBinaryZeroCopy(invalid_utf8, strings);
2005     }
2006   }
2007 }
2008 
TEST(Cast,IntToString)2009 TEST(Cast, IntToString) {
2010   for (auto string_type : {utf8(), large_utf8()}) {
2011     CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"),
2012               ArrayFromJSON(string_type, R"(["0", "1", "127", "-128", null])"));
2013 
2014     CheckCast(ArrayFromJSON(uint8(), "[0, 1, 255, null]"),
2015               ArrayFromJSON(string_type, R"(["0", "1", "255", null])"));
2016 
2017     CheckCast(ArrayFromJSON(int16(), "[0, 1, 32767, -32768, null]"),
2018               ArrayFromJSON(string_type, R"(["0", "1", "32767", "-32768", null])"));
2019 
2020     CheckCast(ArrayFromJSON(uint16(), "[0, 1, 65535, null]"),
2021               ArrayFromJSON(string_type, R"(["0", "1", "65535", null])"));
2022 
2023     CheckCast(
2024         ArrayFromJSON(int32(), "[0, 1, 2147483647, -2147483648, null]"),
2025         ArrayFromJSON(string_type, R"(["0", "1", "2147483647", "-2147483648", null])"));
2026 
2027     CheckCast(ArrayFromJSON(uint32(), "[0, 1, 4294967295, null]"),
2028               ArrayFromJSON(string_type, R"(["0", "1", "4294967295", null])"));
2029 
2030     CheckCast(
2031         ArrayFromJSON(int64(), "[0, 1, 9223372036854775807, -9223372036854775808, null]"),
2032         ArrayFromJSON(
2033             string_type,
2034             R"(["0", "1", "9223372036854775807", "-9223372036854775808", null])"));
2035 
2036     CheckCast(ArrayFromJSON(uint64(), "[0, 1, 18446744073709551615, null]"),
2037               ArrayFromJSON(string_type, R"(["0", "1", "18446744073709551615", null])"));
2038   }
2039 }
2040 
TEST(Cast,FloatingToString)2041 TEST(Cast, FloatingToString) {
2042   for (auto string_type : {utf8(), large_utf8()}) {
2043     CheckCast(
2044         ArrayFromJSON(float32(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
2045         ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
2046 
2047     CheckCast(
2048         ArrayFromJSON(float64(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
2049         ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
2050   }
2051 }
2052 
TEST(Cast,BooleanToString)2053 TEST(Cast, BooleanToString) {
2054   for (auto string_type : {utf8(), large_utf8()}) {
2055     CheckCast(ArrayFromJSON(boolean(), "[true, true, false, null]"),
2056               ArrayFromJSON(string_type, R"(["true", "true", "false", null])"));
2057   }
2058 }
2059 
TEST(Cast,ListToPrimitive)2060 TEST(Cast, ListToPrimitive) {
2061   ASSERT_RAISES(NotImplemented,
2062                 Cast(*ArrayFromJSON(list(int8()), "[[1, 2], [3, 4]]"), uint8()));
2063 
2064   ASSERT_RAISES(
2065       NotImplemented,
2066       Cast(*ArrayFromJSON(list(binary()), R"([["1", "2"], ["3", "4"]])"), utf8()));
2067 }
2068 
2069 using make_list_t = std::shared_ptr<DataType>(const std::shared_ptr<DataType>&);
2070 
2071 static const auto list_factories = std::vector<make_list_t*>{&list, &large_list};
2072 
CheckListToList(const std::vector<std::shared_ptr<DataType>> & value_types,const std::string & json_data)2073 static void CheckListToList(const std::vector<std::shared_ptr<DataType>>& value_types,
2074                             const std::string& json_data) {
2075   for (auto make_src_list : list_factories) {
2076     for (auto make_dest_list : list_factories) {
2077       for (const auto& src_value_type : value_types) {
2078         for (const auto& dest_value_type : value_types) {
2079           const auto src_type = make_src_list(src_value_type);
2080           const auto dest_type = make_dest_list(dest_value_type);
2081           ARROW_SCOPED_TRACE("src_type = ", src_type->ToString(),
2082                              ", dest_type = ", dest_type->ToString());
2083           CheckCast(ArrayFromJSON(src_type, json_data),
2084                     ArrayFromJSON(dest_type, json_data));
2085         }
2086       }
2087     }
2088   }
2089 }
2090 
TEST(Cast,ListToList)2091 TEST(Cast, ListToList) {
2092   CheckListToList({int32(), float32(), int64()},
2093                   "[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]");
2094 }
2095 
TEST(Cast,ListToListNoNulls)2096 TEST(Cast, ListToListNoNulls) {
2097   // ARROW-12568
2098   CheckListToList({int32(), float32(), int64()},
2099                   "[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]");
2100 }
2101 
TEST(Cast,ListToListOptionsPassthru)2102 TEST(Cast, ListToListOptionsPassthru) {
2103   for (auto make_src_list : list_factories) {
2104     for (auto make_dest_list : list_factories) {
2105       auto list_int32 = ArrayFromJSON(make_src_list(int32()), "[[87654321]]");
2106 
2107       auto options = CastOptions::Safe(make_dest_list(int16()));
2108       CheckCastFails(list_int32, options);
2109 
2110       options.allow_int_overflow = true;
2111       CheckCast(list_int32, ArrayFromJSON(make_dest_list(int16()), "[[32689]]"), options);
2112     }
2113   }
2114 }
2115 
TEST(Cast,IdentityCasts)2116 TEST(Cast, IdentityCasts) {
2117   // ARROW-4102
2118   auto CheckIdentityCast = [](std::shared_ptr<DataType> type, const std::string& json) {
2119     CheckCastZeroCopy(ArrayFromJSON(type, json), type);
2120   };
2121 
2122   CheckIdentityCast(null(), "[null, null, null]");
2123   CheckIdentityCast(boolean(), "[false, true, null, false]");
2124 
2125   for (auto type : kNumericTypes) {
2126     CheckIdentityCast(type, "[1, 2, null, 4]");
2127   }
2128   CheckIdentityCast(binary(), R"(["foo", "bar"])");
2129   CheckIdentityCast(utf8(), R"(["foo", "bar"])");
2130   CheckIdentityCast(fixed_size_binary(3), R"(["foo", "bar"])");
2131 
2132   CheckIdentityCast(list(int8()), "[[1, 2], [null], [], [3]]");
2133 
2134   CheckIdentityCast(time32(TimeUnit::MILLI), "[1, 2, 3, 4]");
2135   CheckIdentityCast(time64(TimeUnit::MICRO), "[1, 2, 3, 4]");
2136   CheckIdentityCast(date32(), "[1, 2, 3, 4]");
2137   CheckIdentityCast(date64(), "[86400000, 0]");
2138   CheckIdentityCast(timestamp(TimeUnit::SECOND), "[1, 2, 3, 4]");
2139 
2140   CheckIdentityCast(dictionary(int8(), int8()), "[1, 2, 3, 1, null, 3]");
2141 }
2142 
TEST(Cast,EmptyCasts)2143 TEST(Cast, EmptyCasts) {
2144   // ARROW-4766: 0-length arrays should not segfault
2145   auto CheckCastEmpty = [](std::shared_ptr<DataType> from, std::shared_ptr<DataType> to) {
2146     // Python creates array with nullptr instead of 0-length (valid) buffers.
2147     auto data = ArrayData::Make(from, /* length */ 0, /* buffers */ {nullptr, nullptr});
2148     CheckCast(MakeArray(data), ArrayFromJSON(to, "[]"));
2149   };
2150 
2151   for (auto numeric : kNumericTypes) {
2152     CheckCastEmpty(boolean(), numeric);
2153     CheckCastEmpty(numeric, boolean());
2154   }
2155 }
2156 
TEST(Cast,CastWithNoValidityBitmapButUnknownNullCount)2157 TEST(Cast, CastWithNoValidityBitmapButUnknownNullCount) {
2158   // ARROW-12672 segfault when casting slightly malformed array
2159   // (no validity bitmap but atomic null count non-zero)
2160   auto values = ArrayFromJSON(boolean(), "[true, true, false]");
2161 
2162   ASSERT_OK_AND_ASSIGN(auto expected, Cast(*values, int8()));
2163 
2164   ASSERT_EQ(values->data()->buffers[0], NULLPTR);
2165   values->data()->null_count = kUnknownNullCount;
2166   ASSERT_OK_AND_ASSIGN(auto result, Cast(*values, int8()));
2167 
2168   AssertArraysEqual(*expected, *result);
2169 }
2170 
2171 // ----------------------------------------------------------------------
2172 // Test casting from NullType
2173 
TEST(Cast,FromNull)2174 TEST(Cast, FromNull) {
2175   for (auto to_type : {
2176            null(),
2177            uint8(),
2178            int8(),
2179            uint16(),
2180            int16(),
2181            uint32(),
2182            int32(),
2183            uint64(),
2184            int64(),
2185            float32(),
2186            float64(),
2187            date32(),
2188            date64(),
2189            fixed_size_binary(10),
2190            binary(),
2191            utf8(),
2192        }) {
2193     ASSERT_OK_AND_ASSIGN(auto expected, MakeArrayOfNull(to_type, 10));
2194     CheckCast(std::make_shared<NullArray>(10), expected);
2195   }
2196 }
2197 
TEST(Cast,FromNullToDictionary)2198 TEST(Cast, FromNullToDictionary) {
2199   auto from = std::make_shared<NullArray>(10);
2200   auto to_type = dictionary(int8(), boolean());
2201 
2202   ASSERT_OK_AND_ASSIGN(auto expected, MakeArrayOfNull(to_type, 10));
2203   CheckCast(from, expected);
2204 }
2205 
2206 // ----------------------------------------------------------------------
2207 // Test casting from DictionaryType
2208 
TEST(Cast,FromDictionary)2209 TEST(Cast, FromDictionary) {
2210   ArrayVector dictionaries;
2211   dictionaries.push_back(std::make_shared<NullArray>(5));
2212 
2213   for (auto num_type : kNumericTypes) {
2214     dictionaries.push_back(ArrayFromJSON(num_type, "[23, 12, 45, 12, null]"));
2215   }
2216 
2217   for (auto string_type : kBaseBinaryTypes) {
2218     dictionaries.push_back(
2219         ArrayFromJSON(string_type, R"(["foo", "bar", "baz", "foo", null])"));
2220   }
2221 
2222   for (auto dict : dictionaries) {
2223     for (auto index_type : kDictionaryIndexTypes) {
2224       auto indices = ArrayFromJSON(index_type, "[4, 0, 1, 2, 0, 4, null, 2]");
2225       ASSERT_OK_AND_ASSIGN(auto expected, Take(*dict, *indices));
2226 
2227       ASSERT_OK_AND_ASSIGN(
2228           auto dict_arr, DictionaryArray::FromArrays(dictionary(index_type, dict->type()),
2229                                                      indices, dict));
2230       CheckCast(dict_arr, expected);
2231     }
2232   }
2233 
2234   for (auto dict : dictionaries) {
2235     if (dict->type_id() == Type::NA) continue;
2236 
2237     // Test with a nullptr bitmap buffer (ARROW-3208)
2238     auto indices = ArrayFromJSON(int8(), "[0, 0, 1, 2, 0, 3, 3, 2]");
2239     ASSERT_OK_AND_ASSIGN(auto no_nulls, Take(*dict, *indices));
2240     ASSERT_EQ(no_nulls->null_count(), 0);
2241 
2242     ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(no_nulls));
2243 
2244     // Make a new dict array with nullptr bitmap buffer
2245     auto data = encoded.array()->Copy();
2246     data->buffers[0] = nullptr;
2247     data->null_count = 0;
2248     std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
2249     ValidateOutput(*dict_array);
2250 
2251     CheckCast(dict_array, no_nulls);
2252   }
2253 }
2254 
SmallintArrayFromJSON(const std::string & json_data)2255 std::shared_ptr<Array> SmallintArrayFromJSON(const std::string& json_data) {
2256   auto arr = ArrayFromJSON(int16(), json_data);
2257   auto ext_data = arr->data()->Copy();
2258   ext_data->type = smallint();
2259   return MakeArray(ext_data);
2260 }
2261 
TEST(Cast,ExtensionTypeToIntDowncast)2262 TEST(Cast, ExtensionTypeToIntDowncast) {
2263   auto smallint = std::make_shared<SmallintType>();
2264   ExtensionTypeGuard smallint_guard(smallint);
2265 
2266   std::shared_ptr<Array> result;
2267   std::vector<bool> is_valid = {true, false, true, true, true};
2268 
2269   // Smallint(int16) to int16
2270   CheckCastZeroCopy(SmallintArrayFromJSON("[0, 100, 200, 1, 2]"), int16());
2271 
2272   // Smallint(int16) to uint8, no overflow/underrun
2273   CheckCast(SmallintArrayFromJSON("[0, 100, 200, 1, 2]"),
2274             ArrayFromJSON(uint8(), "[0, 100, 200, 1, 2]"));
2275 
2276   // Smallint(int16) to uint8, with overflow
2277   {
2278     CastOptions options;
2279     options.to_type = uint8();
2280     CheckCastFails(SmallintArrayFromJSON("[0, null, 256, 1, 3]"), options);
2281 
2282     options.allow_int_overflow = true;
2283     CheckCast(SmallintArrayFromJSON("[0, null, 256, 1, 3]"),
2284               ArrayFromJSON(uint8(), "[0, null, 0, 1, 3]"), options);
2285   }
2286 
2287   // Smallint(int16) to uint8, with underflow
2288   {
2289     CastOptions options;
2290     options.to_type = uint8();
2291     CheckCastFails(SmallintArrayFromJSON("[0, null, -1, 1, 3]"), options);
2292 
2293     options.allow_int_overflow = true;
2294     CheckCast(SmallintArrayFromJSON("[0, null, -1, 1, 3]"),
2295               ArrayFromJSON(uint8(), "[0, null, 255, 1, 3]"), options);
2296   }
2297 }
2298 
TEST(Cast,DictTypeToAnotherDict)2299 TEST(Cast, DictTypeToAnotherDict) {
2300   auto check_cast = [&](const std::shared_ptr<DataType>& in_type,
2301                         const std::shared_ptr<DataType>& out_type,
2302                         const std::string& json_str,
2303                         const CastOptions& options = CastOptions()) {
2304     auto arr = ArrayFromJSON(in_type, json_str);
2305     auto exp = in_type->Equals(out_type) ? arr : ArrayFromJSON(out_type, json_str);
2306     // this checks for scalars as well
2307     CheckCast(arr, exp, options);
2308   };
2309 
2310   //    check same type passed on to casting
2311   check_cast(dictionary(int8(), int16()), dictionary(int8(), int16()),
2312              "[1, 2, 3, 1, null, 3]");
2313   check_cast(dictionary(int8(), int16()), dictionary(int32(), int64()),
2314              "[1, 2, 3, 1, null, 3]");
2315   check_cast(dictionary(int8(), int16()), dictionary(int32(), float64()),
2316              "[1, 2, 3, 1, null, 3]");
2317   check_cast(dictionary(int32(), utf8()), dictionary(int8(), utf8()),
2318              R"(["a", "b", "a", null])");
2319 
2320   auto arr = ArrayFromJSON(dictionary(int32(), int32()), "[1, 1000]");
2321   // check casting unsafe values (checking for unsafe indices is unnecessary, because it
2322   // would create an invalid index array which results in a ValidateOutput failure)
2323   ASSERT_OK_AND_ASSIGN(auto casted,
2324                        Cast(arr, dictionary(int8(), int8()), CastOptions::Unsafe()));
2325   ValidateOutput(casted);
2326 
2327   // check safe casting values
2328   EXPECT_RAISES_WITH_MESSAGE_THAT(
2329       Invalid, testing::HasSubstr("Integer value 1000 not in range"),
2330       Cast(arr, dictionary(int8(), int8()), CastOptions::Safe()));
2331 }
2332 
2333 }  // namespace compute
2334 }  // namespace arrow
2335