1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <cstdint>
19 #include <cstdio>
20 #include <functional>
21 #include <memory>
22 #include <string>
23 #include <vector>
24
25 #include <gmock/gmock.h>
26 #include <gtest/gtest.h>
27
28 #include "arrow/array.h"
29 #include "arrow/array/builder_decimal.h"
30 #include "arrow/buffer.h"
31 #include "arrow/chunked_array.h"
32 #include "arrow/extension_type.h"
33 #include "arrow/status.h"
34 #include "arrow/testing/extension_type.h"
35 #include "arrow/testing/gtest_common.h"
36 #include "arrow/testing/gtest_util.h"
37 #include "arrow/testing/random.h"
38 #include "arrow/type.h"
39 #include "arrow/type_fwd.h"
40 #include "arrow/type_traits.h"
41 #include "arrow/util/bitmap.h"
42 #include "arrow/util/checked_cast.h"
43 #include "arrow/util/decimal.h"
44
45 #include "arrow/compute/api_vector.h"
46 #include "arrow/compute/cast.h"
47 #include "arrow/compute/kernel.h"
48 #include "arrow/compute/kernels/codegen_internal.h"
49 #include "arrow/compute/kernels/test_util.h"
50
51 namespace arrow {
52
53 using internal::checked_cast;
54 using internal::checked_pointer_cast;
55
56 namespace compute {
57
InvalidUtf8(std::shared_ptr<DataType> type)58 static std::shared_ptr<Array> InvalidUtf8(std::shared_ptr<DataType> type) {
59 return ArrayFromJSON(type,
60 "["
61 R"(
62 "Hi",
63 "olá mundo",
64 "你好世界",
65 "",
66 )"
67 "\"\xa0\xa1\""
68 "]");
69 }
70
FixedSizeInvalidUtf8(std::shared_ptr<DataType> type)71 static std::shared_ptr<Array> FixedSizeInvalidUtf8(std::shared_ptr<DataType> type) {
72 if (type->id() == Type::FIXED_SIZE_BINARY) {
73 // Assume a particular width for testing
74 EXPECT_EQ(3, checked_cast<const FixedSizeBinaryType&>(*type).byte_width());
75 }
76 return ArrayFromJSON(type,
77 "["
78 R"(
79 "Hi!",
80 "lá",
81 "你",
82 " ",
83 )"
84 "\"\xa0\xa1\xa2\""
85 "]");
86 }
87
88 static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
89 uint8(), int8(), uint16(), int16(), uint32(),
90 int32(), uint64(), int64(), float32(), float64()};
91
92 static std::vector<std::shared_ptr<DataType>> kIntegerTypes = {
93 int8(), uint8(), int16(), uint16(), int32(), uint32(), int64(), uint64()};
94
95 static std::vector<std::shared_ptr<DataType>> kDictionaryIndexTypes = kIntegerTypes;
96
97 static std::vector<std::shared_ptr<DataType>> kBaseBinaryTypes = {
98 binary(), utf8(), large_binary(), large_utf8()};
99
AssertBufferSame(const Array & left,const Array & right,int buffer_index)100 static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
101 ASSERT_EQ(left.data()->buffers[buffer_index].get(),
102 right.data()->buffers[buffer_index].get());
103 }
104
CheckCast(std::shared_ptr<Array> input,std::shared_ptr<Array> expected,CastOptions options=CastOptions{})105 static void CheckCast(std::shared_ptr<Array> input, std::shared_ptr<Array> expected,
106 CastOptions options = CastOptions{}) {
107 options.to_type = expected->type();
108 CheckScalarUnary("cast", input, expected, &options);
109 }
110
CheckCastFails(std::shared_ptr<Array> input,CastOptions options)111 static void CheckCastFails(std::shared_ptr<Array> input, CastOptions options) {
112 ASSERT_RAISES(Invalid, Cast(input, options))
113 << "\n to_type: " << options.to_type->ToString()
114 << "\n from_type: " << input->type()->ToString()
115 << "\n input: " << input->ToString();
116
117 // For the scalars, check that at least one of the input fails (since many
118 // of the tests contains a mix of passing and failing values). In some
119 // cases we will want to check more precisely
120 int64_t num_failing = 0;
121 for (int64_t i = 0; i < input->length(); ++i) {
122 ASSERT_OK_AND_ASSIGN(auto scalar, input->GetScalar(i));
123 num_failing += static_cast<int>(Cast(scalar, options).status().IsInvalid());
124 }
125 ASSERT_GT(num_failing, 0);
126 }
127
CheckCastZeroCopy(std::shared_ptr<Array> input,std::shared_ptr<DataType> to_type,CastOptions options=CastOptions::Safe ())128 static void CheckCastZeroCopy(std::shared_ptr<Array> input,
129 std::shared_ptr<DataType> to_type,
130 CastOptions options = CastOptions::Safe()) {
131 ASSERT_OK_AND_ASSIGN(auto converted, Cast(*input, to_type, options));
132 ValidateOutput(*converted);
133
134 ASSERT_EQ(input->data()->buffers.size(), converted->data()->buffers.size());
135 for (size_t i = 0; i < input->data()->buffers.size(); ++i) {
136 AssertBufferSame(*input, *converted, static_cast<int>(i));
137 }
138 }
139
MaskArrayWithNullsAt(std::shared_ptr<Array> input,std::vector<int> indices_to_mask)140 static std::shared_ptr<Array> MaskArrayWithNullsAt(std::shared_ptr<Array> input,
141 std::vector<int> indices_to_mask) {
142 auto masked = input->data()->Copy();
143 masked->buffers[0] = *AllocateEmptyBitmap(input->length());
144 masked->null_count = kUnknownNullCount;
145
146 using arrow::internal::Bitmap;
147 Bitmap is_valid(masked->buffers[0], 0, input->length());
148 if (auto original = input->null_bitmap()) {
149 is_valid.CopyFrom(Bitmap(original, input->offset(), input->length()));
150 } else {
151 is_valid.SetBitsTo(true);
152 }
153
154 for (int i : indices_to_mask) {
155 is_valid.SetBitTo(i, false);
156 }
157 return MakeArray(masked);
158 }
159
TEST(Cast,CanCast)160 TEST(Cast, CanCast) {
161 auto ExpectCanCast = [](std::shared_ptr<DataType> from,
162 std::vector<std::shared_ptr<DataType>> to_set,
163 bool expected = true) {
164 for (auto to : to_set) {
165 EXPECT_EQ(CanCast(*from, *to), expected) << " from: " << from->ToString() << "\n"
166 << " to: " << to->ToString();
167 }
168 };
169
170 auto ExpectCannotCast = [ExpectCanCast](std::shared_ptr<DataType> from,
171 std::vector<std::shared_ptr<DataType>> to_set) {
172 ExpectCanCast(from, to_set, /*expected=*/false);
173 };
174
175 ExpectCanCast(null(), {boolean()});
176 ExpectCanCast(null(), kNumericTypes);
177 ExpectCanCast(null(), kBaseBinaryTypes);
178 ExpectCanCast(
179 null(), {date32(), date64(), time32(TimeUnit::MILLI), timestamp(TimeUnit::SECOND)});
180 ExpectCanCast(dictionary(uint16(), null()), {null()});
181
182 ExpectCanCast(boolean(), {boolean()});
183 ExpectCanCast(boolean(), kNumericTypes);
184 ExpectCanCast(boolean(), {utf8(), large_utf8()});
185 ExpectCanCast(dictionary(int32(), boolean()), {boolean()});
186
187 ExpectCannotCast(boolean(), {null()});
188 ExpectCannotCast(boolean(), {binary(), large_binary()});
189 ExpectCannotCast(boolean(), {date32(), date64(), time32(TimeUnit::MILLI),
190 timestamp(TimeUnit::SECOND)});
191
192 for (auto from_numeric : kNumericTypes) {
193 ExpectCanCast(from_numeric, {boolean()});
194 ExpectCanCast(from_numeric, kNumericTypes);
195 ExpectCanCast(from_numeric, {utf8(), large_utf8()});
196 ExpectCanCast(dictionary(int32(), from_numeric), {from_numeric});
197
198 ExpectCannotCast(from_numeric, {null()});
199 }
200
201 for (auto from_base_binary : kBaseBinaryTypes) {
202 ExpectCanCast(from_base_binary, {boolean()});
203 ExpectCanCast(from_base_binary, kNumericTypes);
204 ExpectCanCast(from_base_binary, kBaseBinaryTypes);
205 ExpectCanCast(dictionary(int64(), from_base_binary), {from_base_binary});
206
207 // any cast which is valid for the dictionary is valid for the DictionaryArray
208 ExpectCanCast(dictionary(uint32(), from_base_binary), kBaseBinaryTypes);
209 ExpectCanCast(dictionary(int16(), from_base_binary), kNumericTypes);
210
211 ExpectCannotCast(from_base_binary, {null()});
212 }
213
214 ExpectCanCast(utf8(), {timestamp(TimeUnit::MILLI)});
215 ExpectCanCast(large_utf8(), {timestamp(TimeUnit::NANO)});
216 ExpectCannotCast(timestamp(TimeUnit::MICRO),
217 {binary(), large_binary()}); // no formatting supported
218
219 ExpectCanCast(fixed_size_binary(3),
220 {binary(), utf8(), large_binary(), large_utf8(), fixed_size_binary(3)});
221 // Doesn't fail since a kernel exists (but it will return an error when executed)
222 // ExpectCannotCast(fixed_size_binary(3), {fixed_size_binary(5)});
223
224 ExtensionTypeGuard smallint_guard(smallint());
225 ExpectCanCast(smallint(), {int16()}); // cast storage
226 ExpectCanCast(smallint(),
227 kNumericTypes); // any cast which is valid for storage is supported
228 ExpectCannotCast(null(), {smallint()}); // FIXME missing common cast from null
229
230 ExpectCanCast(date32(), {utf8(), large_utf8()});
231 ExpectCanCast(date64(), {utf8(), large_utf8()});
232 ExpectCanCast(timestamp(TimeUnit::NANO), {utf8(), large_utf8()});
233 ExpectCanCast(timestamp(TimeUnit::MICRO), {utf8(), large_utf8()});
234 ExpectCanCast(time32(TimeUnit::MILLI), {utf8(), large_utf8()});
235 ExpectCanCast(time64(TimeUnit::NANO), {utf8(), large_utf8()});
236 }
237
TEST(Cast,SameTypeZeroCopy)238 TEST(Cast, SameTypeZeroCopy) {
239 std::shared_ptr<Array> arr = ArrayFromJSON(int32(), "[0, null, 2, 3, 4]");
240 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Cast(*arr, int32()));
241
242 AssertBufferSame(*arr, *result, 0);
243 AssertBufferSame(*arr, *result, 1);
244 }
245
TEST(Cast,ZeroChunks)246 TEST(Cast, ZeroChunks) {
247 auto chunked_i32 = std::make_shared<ChunkedArray>(ArrayVector{}, int32());
248 ASSERT_OK_AND_ASSIGN(Datum result, Cast(chunked_i32, utf8()));
249
250 ASSERT_EQ(result.kind(), Datum::CHUNKED_ARRAY);
251 AssertChunkedEqual(*result.chunked_array(), ChunkedArray({}, utf8()));
252 }
253
TEST(Cast,CastDoesNotProvideDefaultOptions)254 TEST(Cast, CastDoesNotProvideDefaultOptions) {
255 std::shared_ptr<Array> arr = ArrayFromJSON(int32(), "[0, null, 2, 3, 4]");
256 ASSERT_RAISES(Invalid, CallFunction("cast", {arr}));
257 }
258
TEST(Cast,FromBoolean)259 TEST(Cast, FromBoolean) {
260 std::string vals = "[1, 0, null, 1, 0, 1, 1, null, 0, 0, 1]";
261 CheckCast(ArrayFromJSON(boolean(), vals), ArrayFromJSON(int32(), vals));
262 }
263
TEST(Cast,ToBoolean)264 TEST(Cast, ToBoolean) {
265 for (auto type : kNumericTypes) {
266 CheckCast(ArrayFromJSON(type, "[0, null, 127, 1, 0]"),
267 ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
268 }
269
270 // Check negative numbers
271 for (auto type : {int8(), float64()}) {
272 CheckCast(ArrayFromJSON(type, "[0, null, 127, -1, 0]"),
273 ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
274 }
275 }
276
TEST(Cast,ToIntUpcast)277 TEST(Cast, ToIntUpcast) {
278 std::vector<bool> is_valid = {true, false, true, true, true};
279
280 // int8 to int32
281 CheckCast(ArrayFromJSON(int8(), "[0, null, 127, -1, 0]"),
282 ArrayFromJSON(int32(), "[0, null, 127, -1, 0]"));
283
284 // uint8 to int16, no overflow/underrun
285 CheckCast(ArrayFromJSON(uint8(), "[0, 100, 200, 255, 0]"),
286 ArrayFromJSON(int16(), "[0, 100, 200, 255, 0]"));
287 }
288
TEST(Cast,OverflowInNullSlot)289 TEST(Cast, OverflowInNullSlot) {
290 CheckCast(
291 MaskArrayWithNullsAt(ArrayFromJSON(int32(), "[0, 87654321, 2000, 1000, 0]"), {1}),
292 ArrayFromJSON(int16(), "[0, null, 2000, 1000, 0]"));
293 }
294
TEST(Cast,ToIntDowncastSafe)295 TEST(Cast, ToIntDowncastSafe) {
296 // int16 to uint8, no overflow/underflow
297 CheckCast(ArrayFromJSON(int16(), "[0, null, 200, 1, 2]"),
298 ArrayFromJSON(uint8(), "[0, null, 200, 1, 2]"));
299
300 // int16 to uint8, overflow
301 CheckCastFails(ArrayFromJSON(int16(), "[0, null, 256, 0, 0]"),
302 CastOptions::Safe(uint8()));
303 // ... and underflow
304 CheckCastFails(ArrayFromJSON(int16(), "[0, null, -1, 0, 0]"),
305 CastOptions::Safe(uint8()));
306
307 // int32 to int16, no overflow/underflow
308 CheckCast(ArrayFromJSON(int32(), "[0, null, 2000, 1, 2]"),
309 ArrayFromJSON(int16(), "[0, null, 2000, 1, 2]"));
310
311 // int32 to int16, overflow
312 CheckCastFails(ArrayFromJSON(int32(), "[0, null, 2000, 70000, 2]"),
313 CastOptions::Safe(int16()));
314
315 // ... and underflow
316 CheckCastFails(ArrayFromJSON(int32(), "[0, null, 2000, -70000, 2]"),
317 CastOptions::Safe(int16()));
318
319 CheckCastFails(ArrayFromJSON(int32(), "[0, null, 2000, -70000, 2]"),
320 CastOptions::Safe(uint8()));
321 }
322
TEST(Cast,IntegerSignedToUnsigned)323 TEST(Cast, IntegerSignedToUnsigned) {
324 auto i32s = ArrayFromJSON(int32(), "[-2147483648, null, -1, 65535, 2147483647]");
325 // Same width
326 CheckCastFails(i32s, CastOptions::Safe(uint32()));
327 // Wider
328 CheckCastFails(i32s, CastOptions::Safe(uint64()));
329 // Narrower
330 CheckCastFails(i32s, CastOptions::Safe(uint16()));
331
332 CastOptions options;
333 options.allow_int_overflow = true;
334
335 CheckCast(i32s,
336 ArrayFromJSON(uint32(), "[2147483648, null, 4294967295, 65535, 2147483647]"),
337 options);
338 CheckCast(i32s,
339 ArrayFromJSON(
340 uint64(),
341 "[18446744071562067968, null, 18446744073709551615, 65535, 2147483647]"),
342 options);
343 CheckCast(i32s, ArrayFromJSON(uint16(), "[0, null, 65535, 65535, 65535]"), options);
344
345 // Fail because of overflow (instead of underflow).
346 i32s = ArrayFromJSON(int32(), "[0, null, 0, 65536, 2147483647]");
347 CheckCastFails(i32s, CastOptions::Safe(uint16()));
348
349 CheckCast(i32s, ArrayFromJSON(uint16(), "[0, null, 0, 0, 65535]"), options);
350 }
351
TEST(Cast,IntegerUnsignedToSigned)352 TEST(Cast, IntegerUnsignedToSigned) {
353 auto u32s = ArrayFromJSON(uint32(), "[4294967295, null, 0, 32768]");
354 // Same width
355 CheckCastFails(u32s, CastOptions::Safe(int32()));
356
357 // Narrower
358 CheckCastFails(u32s, CastOptions::Safe(int16()));
359 CheckCastFails(u32s->Slice(1), CastOptions::Safe(int16()));
360
361 CastOptions options;
362 options.allow_int_overflow = true;
363
364 CheckCast(u32s, ArrayFromJSON(int32(), "[-1, null, 0, 32768]"), options);
365 CheckCast(u32s, ArrayFromJSON(int64(), "[4294967295, null, 0, 32768]"), options);
366 CheckCast(u32s, ArrayFromJSON(int16(), "[-1, null, 0, -32768]"), options);
367 }
368
TEST(Cast,ToIntDowncastUnsafe)369 TEST(Cast, ToIntDowncastUnsafe) {
370 CastOptions options;
371 options.allow_int_overflow = true;
372
373 // int16 to uint8, no overflow/underflow
374 CheckCast(ArrayFromJSON(int16(), "[0, null, 200, 1, 2]"),
375 ArrayFromJSON(uint8(), "[0, null, 200, 1, 2]"), options);
376
377 // int16 to uint8, with overflow/underflow
378 CheckCast(ArrayFromJSON(int16(), "[0, null, 256, 1, 2, -1]"),
379 ArrayFromJSON(uint8(), "[0, null, 0, 1, 2, 255]"), options);
380
381 // int32 to int16, no overflow/underflow
382 CheckCast(ArrayFromJSON(int32(), "[0, null, 2000, 1, 2, -1]"),
383 ArrayFromJSON(int16(), "[0, null, 2000, 1, 2, -1]"), options);
384
385 // int32 to int16, with overflow/underflow
386 CheckCast(ArrayFromJSON(int32(), "[0, null, 2000, 70000, -70000]"),
387 ArrayFromJSON(int16(), "[0, null, 2000, 4464, -4464]"), options);
388 }
389
TEST(Cast,FloatingToInt)390 TEST(Cast, FloatingToInt) {
391 for (auto from : {float32(), float64()}) {
392 for (auto to : {int32(), int64()}) {
393 // float to int no truncation
394 CheckCast(ArrayFromJSON(from, "[1.0, null, 0.0, -1.0, 5.0]"),
395 ArrayFromJSON(to, "[1, null, 0, -1, 5]"));
396
397 // float to int truncate error
398 auto opts = CastOptions::Safe(to);
399 CheckCastFails(ArrayFromJSON(from, "[1.5, 0.0, null, 0.5, -1.5, 5.5]"), opts);
400
401 // float to int truncate allowed
402 opts.allow_float_truncate = true;
403 CheckCast(ArrayFromJSON(from, "[1.5, 0.0, null, 0.5, -1.5, 5.5]"),
404 ArrayFromJSON(to, "[1, 0, null, 0, -1, 5]"), opts);
405 }
406 }
407 }
408
TEST(Cast,IntToFloating)409 TEST(Cast, IntToFloating) {
410 for (auto from : {uint32(), int32()}) {
411 std::string two_24 = "[16777216, 16777217]";
412
413 CheckCastFails(ArrayFromJSON(from, two_24), CastOptions::Safe(float32()));
414
415 CheckCast(ArrayFromJSON(from, two_24)->Slice(0, 1),
416 ArrayFromJSON(float32(), two_24)->Slice(0, 1));
417 }
418
419 auto i64s = ArrayFromJSON(int64(),
420 "[-9223372036854775808, -9223372036854775807, 0,"
421 " 9223372036854775806, 9223372036854775807]");
422 CheckCastFails(i64s, CastOptions::Safe(float64()));
423
424 // Masking those values with nulls makes this safe
425 CheckCast(MaskArrayWithNullsAt(i64s, {0, 1, 3, 4}),
426 ArrayFromJSON(float64(), "[null, null, 0, null, null]"));
427
428 CheckCastFails(ArrayFromJSON(uint64(), "[9007199254740992, 9007199254740993]"),
429 CastOptions::Safe(float64()));
430 }
431
TEST(Cast,Decimal128ToInt)432 TEST(Cast, Decimal128ToInt) {
433 auto options = CastOptions::Safe(int64());
434
435 for (bool allow_int_overflow : {false, true}) {
436 for (bool allow_decimal_truncate : {false, true}) {
437 options.allow_int_overflow = allow_int_overflow;
438 options.allow_decimal_truncate = allow_decimal_truncate;
439
440 auto no_overflow_no_truncation = ArrayFromJSON(decimal(38, 10), R"([
441 "02.0000000000",
442 "-11.0000000000",
443 "22.0000000000",
444 "-121.0000000000",
445 null])");
446 CheckCast(no_overflow_no_truncation,
447 ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
448 }
449 }
450
451 for (bool allow_int_overflow : {false, true}) {
452 options.allow_int_overflow = allow_int_overflow;
453 auto truncation_but_no_overflow = ArrayFromJSON(decimal(38, 10), R"([
454 "02.1000000000",
455 "-11.0000004500",
456 "22.0000004500",
457 "-121.1210000000",
458 null])");
459
460 options.allow_decimal_truncate = true;
461 CheckCast(truncation_but_no_overflow,
462 ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
463
464 options.allow_decimal_truncate = false;
465 CheckCastFails(truncation_but_no_overflow, options);
466 }
467
468 for (bool allow_decimal_truncate : {false, true}) {
469 options.allow_decimal_truncate = allow_decimal_truncate;
470
471 auto overflow_no_truncation = ArrayFromJSON(decimal(38, 10), R"([
472 "12345678901234567890000.0000000000",
473 "99999999999999999999999.0000000000",
474 null])");
475
476 options.allow_int_overflow = true;
477 CheckCast(
478 overflow_no_truncation,
479 ArrayFromJSON(int64(),
480 // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
481 "[4807115922877858896, 200376420520689663, null]"),
482 options);
483
484 options.allow_int_overflow = false;
485 CheckCastFails(overflow_no_truncation, options);
486 }
487
488 for (bool allow_int_overflow : {false, true}) {
489 for (bool allow_decimal_truncate : {false, true}) {
490 options.allow_int_overflow = allow_int_overflow;
491 options.allow_decimal_truncate = allow_decimal_truncate;
492
493 auto overflow_and_truncation = ArrayFromJSON(decimal(38, 10), R"([
494 "12345678901234567890000.0045345000",
495 "99999999999999999999999.0000344300",
496 null])");
497
498 if (options.allow_int_overflow && options.allow_decimal_truncate) {
499 CheckCast(overflow_and_truncation,
500 ArrayFromJSON(
501 int64(),
502 // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64
503 "[4807115922877858896, 200376420520689663, null]"),
504 options);
505 } else {
506 CheckCastFails(overflow_and_truncation, options);
507 }
508 }
509 }
510
511 Decimal128Builder builder(decimal(38, -4));
512 for (auto d : {Decimal128("1234567890000."), Decimal128("-120000.")}) {
513 ASSERT_OK_AND_ASSIGN(d, d.Rescale(0, -4));
514 ASSERT_OK(builder.Append(d));
515 }
516 ASSERT_OK_AND_ASSIGN(auto negative_scale, builder.Finish());
517 options.allow_int_overflow = true;
518 options.allow_decimal_truncate = true;
519 CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000, -120000]"), options);
520 }
521
TEST(Cast,Decimal256ToInt)522 TEST(Cast, Decimal256ToInt) {
523 auto options = CastOptions::Safe(int64());
524
525 for (bool allow_int_overflow : {false, true}) {
526 for (bool allow_decimal_truncate : {false, true}) {
527 options.allow_int_overflow = allow_int_overflow;
528 options.allow_decimal_truncate = allow_decimal_truncate;
529
530 auto no_overflow_no_truncation = ArrayFromJSON(decimal256(40, 10), R"([
531 "02.0000000000",
532 "-11.0000000000",
533 "22.0000000000",
534 "-121.0000000000",
535 null])");
536 CheckCast(no_overflow_no_truncation,
537 ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
538 }
539 }
540
541 for (bool allow_int_overflow : {false, true}) {
542 options.allow_int_overflow = allow_int_overflow;
543 auto truncation_but_no_overflow = ArrayFromJSON(decimal256(40, 10), R"([
544 "02.1000000000",
545 "-11.0000004500",
546 "22.0000004500",
547 "-121.1210000000",
548 null])");
549
550 options.allow_decimal_truncate = true;
551 CheckCast(truncation_but_no_overflow,
552 ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options);
553
554 options.allow_decimal_truncate = false;
555 CheckCastFails(truncation_but_no_overflow, options);
556 }
557
558 for (bool allow_decimal_truncate : {false, true}) {
559 options.allow_decimal_truncate = allow_decimal_truncate;
560
561 auto overflow_no_truncation = ArrayFromJSON(decimal256(40, 10), R"([
562 "1234567890123456789000000.0000000000",
563 "9999999999999999999999999.0000000000",
564 null])");
565
566 options.allow_int_overflow = true;
567 CheckCast(overflow_no_truncation,
568 ArrayFromJSON(
569 int64(),
570 // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64
571 "[1096246371337547584, 1590897978359414783, null]"),
572 options);
573
574 options.allow_int_overflow = false;
575 CheckCastFails(overflow_no_truncation, options);
576 }
577
578 for (bool allow_int_overflow : {false, true}) {
579 for (bool allow_decimal_truncate : {false, true}) {
580 options.allow_int_overflow = allow_int_overflow;
581 options.allow_decimal_truncate = allow_decimal_truncate;
582
583 auto overflow_and_truncation = ArrayFromJSON(decimal256(40, 10), R"([
584 "1234567890123456789000000.0045345000",
585 "9999999999999999999999999.0000344300",
586 null])");
587
588 if (options.allow_int_overflow && options.allow_decimal_truncate) {
589 CheckCast(
590 overflow_and_truncation,
591 ArrayFromJSON(
592 int64(),
593 // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64
594 "[1096246371337547584, 1590897978359414783, null]"),
595 options);
596 } else {
597 CheckCastFails(overflow_and_truncation, options);
598 }
599 }
600 }
601
602 Decimal256Builder builder(decimal256(40, -4));
603 for (auto d : {Decimal256("1234567890000."), Decimal256("-120000.")}) {
604 ASSERT_OK_AND_ASSIGN(d, d.Rescale(0, -4));
605 ASSERT_OK(builder.Append(d));
606 }
607 ASSERT_OK_AND_ASSIGN(auto negative_scale, builder.Finish());
608 options.allow_int_overflow = true;
609 options.allow_decimal_truncate = true;
610 CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000, -120000]"), options);
611 }
612
TEST(Cast,IntegerToDecimal)613 TEST(Cast, IntegerToDecimal) {
614 for (auto decimal_type : {decimal128(21, 2), decimal256(21, 2)}) {
615 for (auto integer_type : kIntegerTypes) {
616 CheckCast(
617 ArrayFromJSON(integer_type, "[0, 7, null, 100, 99]"),
618 ArrayFromJSON(decimal_type, R"(["0.00", "7.00", null, "100.00", "99.00"])"));
619 }
620 }
621
622 // extreme value
623 for (auto decimal_type : {decimal128(19, 0), decimal256(19, 0)}) {
624 CheckCast(ArrayFromJSON(int64(), "[-9223372036854775808, 9223372036854775807]"),
625 ArrayFromJSON(decimal_type,
626 R"(["-9223372036854775808", "9223372036854775807"])"));
627 CheckCast(ArrayFromJSON(uint64(), "[0, 18446744073709551615]"),
628 ArrayFromJSON(decimal_type, R"(["0", "18446744073709551615"])"));
629 }
630
631 // insufficient output precision
632 {
633 CastOptions options;
634
635 options.to_type = decimal128(5, 3);
636 CheckCastFails(ArrayFromJSON(int8(), "[0]"), options);
637
638 options.to_type = decimal256(76, 67);
639 CheckCastFails(ArrayFromJSON(int32(), "[0]"), options);
640 }
641 }
642
TEST(Cast,Decimal128ToDecimal128)643 TEST(Cast, Decimal128ToDecimal128) {
644 CastOptions options;
645
646 for (bool allow_decimal_truncate : {false, true}) {
647 options.allow_decimal_truncate = allow_decimal_truncate;
648
649 auto no_truncation = ArrayFromJSON(decimal(38, 10), R"([
650 "02.0000000000",
651 "30.0000000000",
652 "22.0000000000",
653 "-121.0000000000",
654 null])");
655 auto expected = ArrayFromJSON(decimal(28, 0), R"([
656 "02.",
657 "30.",
658 "22.",
659 "-121.",
660 null])");
661
662 CheckCast(no_truncation, expected, options);
663 CheckCast(expected, no_truncation, options);
664 }
665
666 for (bool allow_decimal_truncate : {false, true}) {
667 options.allow_decimal_truncate = allow_decimal_truncate;
668
669 // Same scale, different precision
670 auto d_5_2 = ArrayFromJSON(decimal(5, 2), R"([
671 "12.34",
672 "0.56"])");
673 auto d_4_2 = ArrayFromJSON(decimal(4, 2), R"([
674 "12.34",
675 "0.56"])");
676
677 CheckCast(d_5_2, d_4_2, options);
678 CheckCast(d_4_2, d_5_2, options);
679 }
680
681 auto d_38_10 = ArrayFromJSON(decimal(38, 10), R"([
682 "-02.1234567890",
683 "30.1234567890",
684 null])");
685
686 auto d_28_0 = ArrayFromJSON(decimal(28, 0), R"([
687 "-02.",
688 "30.",
689 null])");
690
691 auto d_38_10_roundtripped = ArrayFromJSON(decimal(38, 10), R"([
692 "-02.0000000000",
693 "30.0000000000",
694 null])");
695
696 // Rescale which leads to truncation
697 options.allow_decimal_truncate = true;
698 CheckCast(d_38_10, d_28_0, options);
699 CheckCast(d_28_0, d_38_10_roundtripped, options);
700
701 options.allow_decimal_truncate = false;
702 options.to_type = d_28_0->type();
703 CheckCastFails(d_38_10, options);
704 CheckCast(d_28_0, d_38_10_roundtripped, options);
705
706 // Precision loss without rescale leads to truncation
707 auto d_4_2 = ArrayFromJSON(decimal(4, 2), R"(["12.34"])");
708 for (auto expected : {
709 ArrayFromJSON(decimal(3, 2), R"(["12.34"])"),
710 ArrayFromJSON(decimal(4, 3), R"(["12.340"])"),
711 ArrayFromJSON(decimal(2, 1), R"(["12.3"])"),
712 }) {
713 options.allow_decimal_truncate = true;
714 CheckCast(d_4_2, expected, options);
715
716 options.allow_decimal_truncate = false;
717 options.to_type = expected->type();
718 CheckCastFails(d_4_2, options);
719 }
720 }
721
TEST(Cast,Decimal256ToDecimal256)722 TEST(Cast, Decimal256ToDecimal256) {
723 CastOptions options;
724
725 for (bool allow_decimal_truncate : {false, true}) {
726 options.allow_decimal_truncate = allow_decimal_truncate;
727
728 auto no_truncation = ArrayFromJSON(decimal256(38, 10), R"([
729 "02.0000000000",
730 "30.0000000000",
731 "22.0000000000",
732 "-121.0000000000",
733 null])");
734 auto expected = ArrayFromJSON(decimal256(28, 0), R"([
735 "02.",
736 "30.",
737 "22.",
738 "-121.",
739 null])");
740
741 CheckCast(no_truncation, expected, options);
742 CheckCast(expected, no_truncation, options);
743 }
744
745 for (bool allow_decimal_truncate : {false, true}) {
746 options.allow_decimal_truncate = allow_decimal_truncate;
747
748 // Same scale, different precision
749 auto d_5_2 = ArrayFromJSON(decimal256(5, 2), R"([
750 "12.34",
751 "0.56"])");
752 auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([
753 "12.34",
754 "0.56"])");
755
756 CheckCast(d_5_2, d_4_2, options);
757 CheckCast(d_4_2, d_5_2, options);
758 }
759
760 auto d_38_10 = ArrayFromJSON(decimal256(38, 10), R"([
761 "-02.1234567890",
762 "30.1234567890",
763 null])");
764
765 auto d_28_0 = ArrayFromJSON(decimal256(28, 0), R"([
766 "-02.",
767 "30.",
768 null])");
769
770 auto d_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([
771 "-02.0000000000",
772 "30.0000000000",
773 null])");
774
775 // Rescale which leads to truncation
776 options.allow_decimal_truncate = true;
777 CheckCast(d_38_10, d_28_0, options);
778 CheckCast(d_28_0, d_38_10_roundtripped, options);
779
780 options.allow_decimal_truncate = false;
781 options.to_type = d_28_0->type();
782 CheckCastFails(d_38_10, options);
783 CheckCast(d_28_0, d_38_10_roundtripped, options);
784
785 // Precision loss without rescale leads to truncation
786 auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])");
787 for (auto expected : {
788 ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"),
789 ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"),
790 ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"),
791 }) {
792 options.allow_decimal_truncate = true;
793 CheckCast(d_4_2, expected, options);
794
795 options.allow_decimal_truncate = false;
796 options.to_type = expected->type();
797 CheckCastFails(d_4_2, options);
798 }
799 }
800
TEST(Cast,Decimal128ToDecimal256)801 TEST(Cast, Decimal128ToDecimal256) {
802 CastOptions options;
803
804 for (bool allow_decimal_truncate : {false, true}) {
805 options.allow_decimal_truncate = allow_decimal_truncate;
806
807 auto no_truncation = ArrayFromJSON(decimal(38, 10), R"([
808 "02.0000000000",
809 "30.0000000000",
810 "22.0000000000",
811 "-121.0000000000",
812 null])");
813 auto expected = ArrayFromJSON(decimal256(48, 0), R"([
814 "02.",
815 "30.",
816 "22.",
817 "-121.",
818 null])");
819
820 CheckCast(no_truncation, expected, options);
821 }
822
823 for (bool allow_decimal_truncate : {false, true}) {
824 options.allow_decimal_truncate = allow_decimal_truncate;
825
826 // Same scale, different precision
827 auto d_5_2 = ArrayFromJSON(decimal(5, 2), R"([
828 "12.34",
829 "0.56"])");
830 auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([
831 "12.34",
832 "0.56"])");
833 auto d_40_2 = ArrayFromJSON(decimal256(40, 2), R"([
834 "12.34",
835 "0.56"])");
836
837 CheckCast(d_5_2, d_4_2, options);
838 CheckCast(d_5_2, d_40_2, options);
839 }
840
841 auto d128_38_10 = ArrayFromJSON(decimal(38, 10), R"([
842 "-02.1234567890",
843 "30.1234567890",
844 null])");
845
846 auto d128_28_0 = ArrayFromJSON(decimal(28, 0), R"([
847 "-02.",
848 "30.",
849 null])");
850
851 auto d256_28_0 = ArrayFromJSON(decimal256(28, 0), R"([
852 "-02.",
853 "30.",
854 null])");
855
856 auto d256_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([
857 "-02.0000000000",
858 "30.0000000000",
859 null])");
860
861 // Rescale which leads to truncation
862 options.allow_decimal_truncate = true;
863 CheckCast(d128_38_10, d256_28_0, options);
864 CheckCast(d128_28_0, d256_38_10_roundtripped, options);
865
866 options.allow_decimal_truncate = false;
867 options.to_type = d256_28_0->type();
868 CheckCastFails(d128_38_10, options);
869 CheckCast(d128_28_0, d256_38_10_roundtripped, options);
870
871 // Precision loss without rescale leads to truncation
872 auto d128_4_2 = ArrayFromJSON(decimal(4, 2), R"(["12.34"])");
873 for (auto expected : {
874 ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"),
875 ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"),
876 ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"),
877 }) {
878 options.allow_decimal_truncate = true;
879 CheckCast(d128_4_2, expected, options);
880
881 options.allow_decimal_truncate = false;
882 options.to_type = expected->type();
883 CheckCastFails(d128_4_2, options);
884 }
885 }
886
TEST(Cast,Decimal256ToDecimal128)887 TEST(Cast, Decimal256ToDecimal128) {
888 CastOptions options;
889
890 for (bool allow_decimal_truncate : {false, true}) {
891 options.allow_decimal_truncate = allow_decimal_truncate;
892
893 auto no_truncation = ArrayFromJSON(decimal256(42, 10), R"([
894 "02.0000000000",
895 "30.0000000000",
896 "22.0000000000",
897 "-121.0000000000",
898 null])");
899 auto expected = ArrayFromJSON(decimal(28, 0), R"([
900 "02.",
901 "30.",
902 "22.",
903 "-121.",
904 null])");
905
906 CheckCast(no_truncation, expected, options);
907 }
908
909 for (bool allow_decimal_truncate : {false, true}) {
910 options.allow_decimal_truncate = allow_decimal_truncate;
911
912 // Same scale, different precision
913 auto d_5_2 = ArrayFromJSON(decimal256(42, 2), R"([
914 "12.34",
915 "0.56"])");
916 auto d_4_2 = ArrayFromJSON(decimal(4, 2), R"([
917 "12.34",
918 "0.56"])");
919
920 CheckCast(d_5_2, d_4_2, options);
921 }
922
923 auto d256_52_10 = ArrayFromJSON(decimal256(52, 10), R"([
924 "-02.1234567890",
925 "30.1234567890",
926 null])");
927
928 auto d256_42_0 = ArrayFromJSON(decimal256(42, 0), R"([
929 "-02.",
930 "30.",
931 null])");
932
933 auto d128_28_0 = ArrayFromJSON(decimal(28, 0), R"([
934 "-02.",
935 "30.",
936 null])");
937
938 auto d128_38_10_roundtripped = ArrayFromJSON(decimal(38, 10), R"([
939 "-02.0000000000",
940 "30.0000000000",
941 null])");
942
943 // Rescale which leads to truncation
944 options.allow_decimal_truncate = true;
945 CheckCast(d256_52_10, d128_28_0, options);
946 CheckCast(d256_42_0, d128_38_10_roundtripped, options);
947
948 options.allow_decimal_truncate = false;
949 options.to_type = d128_28_0->type();
950 CheckCastFails(d256_52_10, options);
951 CheckCast(d256_42_0, d128_38_10_roundtripped, options);
952
953 // Precision loss without rescale leads to truncation
954 auto d256_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])");
955 for (auto expected : {
956 ArrayFromJSON(decimal(3, 2), R"(["12.34"])"),
957 ArrayFromJSON(decimal(4, 3), R"(["12.340"])"),
958 ArrayFromJSON(decimal(2, 1), R"(["12.3"])"),
959 }) {
960 options.allow_decimal_truncate = true;
961 CheckCast(d256_4_2, expected, options);
962
963 options.allow_decimal_truncate = false;
964 options.to_type = expected->type();
965 CheckCastFails(d256_4_2, options);
966 }
967 }
968
TEST(Cast,FloatingToDecimal)969 TEST(Cast, FloatingToDecimal) {
970 for (auto float_type : {float32(), float64()}) {
971 for (auto decimal_type : {decimal(5, 2), decimal256(5, 2)}) {
972 CheckCast(
973 ArrayFromJSON(float_type, "[0.0, null, 123.45, 123.456, 999.994]"),
974 ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "123.46", "999.99"])"));
975
976 // Overflow
977 CastOptions options;
978 options.to_type = decimal_type;
979 CheckCastFails(ArrayFromJSON(float_type, "[999.996]"), options);
980
981 options.allow_decimal_truncate = true;
982 CheckCast(
983 ArrayFromJSON(float_type, "[0.0, null, 999.996, 123.45, 999.994]"),
984 ArrayFromJSON(decimal_type, R"(["0.00", null, "0.00", "123.45", "999.99"])"),
985 options);
986 }
987 }
988
989 for (auto decimal_type : {decimal128, decimal256}) {
990 // 2**64 + 2**41 (exactly representable as a float)
991 CheckCast(ArrayFromJSON(float32(), "[1.8446746e+19, -1.8446746e+19]"),
992 ArrayFromJSON(decimal_type(20, 0),
993 R"(["18446746272732807168", "-18446746272732807168"])"));
994
995 CheckCast(
996 ArrayFromJSON(float64(), "[1.8446744073709556e+19, -1.8446744073709556e+19]"),
997 ArrayFromJSON(decimal_type(20, 0),
998 R"(["18446744073709555712", "-18446744073709555712"])"));
999
1000 CheckCast(ArrayFromJSON(float32(), "[1.8446746e+15, -1.8446746e+15]"),
1001 ArrayFromJSON(decimal_type(20, 4),
1002 R"(["1844674627273280.7168", "-1844674627273280.7168"])"));
1003
1004 CheckCast(
1005 ArrayFromJSON(float64(), "[1.8446744073709556e+15, -1.8446744073709556e+15]"),
1006 ArrayFromJSON(decimal_type(20, 4),
1007 R"(["1844674407370955.5712", "-1844674407370955.5712"])"));
1008
1009 // Edge cases are tested for Decimal128::FromReal() and Decimal256::FromReal
1010 }
1011 }
1012
TEST(Cast,DecimalToFloating)1013 TEST(Cast, DecimalToFloating) {
1014 for (auto float_type : {float32(), float64()}) {
1015 for (auto decimal_type : {decimal(5, 2), decimal256(5, 2)}) {
1016 CheckCast(ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "999.99"])"),
1017 ArrayFromJSON(float_type, "[0.0, null, 123.45, 999.99]"));
1018 }
1019 }
1020
1021 // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal()
1022 }
1023
TEST(Cast,TimestampToTimestamp)1024 TEST(Cast, TimestampToTimestamp) {
1025 struct TimestampTypePair {
1026 std::shared_ptr<DataType> coarse, fine;
1027 };
1028
1029 CastOptions options;
1030
1031 for (auto types : {
1032 TimestampTypePair{timestamp(TimeUnit::SECOND), timestamp(TimeUnit::MILLI)},
1033 TimestampTypePair{timestamp(TimeUnit::MILLI), timestamp(TimeUnit::MICRO)},
1034 TimestampTypePair{timestamp(TimeUnit::MICRO), timestamp(TimeUnit::NANO)},
1035 }) {
1036 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1037 auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000, 1000, 2000]");
1038
1039 // multiply/promote
1040 CheckCast(coarse, promoted);
1041
1042 auto will_be_truncated = ArrayFromJSON(types.fine, "[0, null, 200456, 1123, 2456]");
1043
1044 // with truncation disallowed, fails
1045 options.allow_time_truncate = false;
1046 options.to_type = types.coarse;
1047 CheckCastFails(will_be_truncated, options);
1048
1049 // with truncation allowed, divide/truncate
1050 options.allow_time_truncate = true;
1051 CheckCast(will_be_truncated, coarse, options);
1052 }
1053
1054 for (auto types : {
1055 TimestampTypePair{timestamp(TimeUnit::SECOND), timestamp(TimeUnit::MICRO)},
1056 TimestampTypePair{timestamp(TimeUnit::MILLI), timestamp(TimeUnit::NANO)},
1057 }) {
1058 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1059 auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000000, 1000000, 2000000]");
1060
1061 // multiply/promote
1062 CheckCast(coarse, promoted);
1063
1064 auto will_be_truncated =
1065 ArrayFromJSON(types.fine, "[0, null, 200456000, 1123000, 2456000]");
1066
1067 // with truncation disallowed, fails
1068 options.allow_time_truncate = false;
1069 options.to_type = types.coarse;
1070 CheckCastFails(will_be_truncated, options);
1071
1072 // with truncation allowed, divide/truncate
1073 options.allow_time_truncate = true;
1074 CheckCast(will_be_truncated, coarse, options);
1075 }
1076
1077 for (auto types : {
1078 TimestampTypePair{timestamp(TimeUnit::SECOND), timestamp(TimeUnit::NANO)},
1079 }) {
1080 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1081 auto promoted =
1082 ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");
1083
1084 // multiply/promote
1085 CheckCast(coarse, promoted);
1086
1087 auto will_be_truncated =
1088 ArrayFromJSON(types.fine, "[0, null, 200456000000, 1123000000, 2456000000]");
1089
1090 // with truncation disallowed, fails
1091 options.allow_time_truncate = false;
1092 options.to_type = types.coarse;
1093 CheckCastFails(will_be_truncated, options);
1094
1095 // with truncation allowed, divide/truncate
1096 options.allow_time_truncate = true;
1097 CheckCast(will_be_truncated, coarse, options);
1098 }
1099 }
1100
TEST(Cast,TimestampZeroCopy)1101 TEST(Cast, TimestampZeroCopy) {
1102 for (auto zero_copy_to_type : {
1103 timestamp(TimeUnit::SECOND),
1104 int64(), // ARROW-1773, cast to integer
1105 }) {
1106 CheckCastZeroCopy(
1107 ArrayFromJSON(timestamp(TimeUnit::SECOND), "[0, null, 2000, 1000, 0]"),
1108 zero_copy_to_type);
1109 }
1110 CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
1111 timestamp(TimeUnit::SECOND));
1112 }
1113
TEST(Cast,TimestampToTimestampMultiplyOverflow)1114 TEST(Cast, TimestampToTimestampMultiplyOverflow) {
1115 CastOptions options;
1116 options.to_type = timestamp(TimeUnit::NANO);
1117 // 1000-01-01, 1800-01-01 , 2000-01-01, 2300-01-01, 3000-01-01
1118 CheckCastFails(
1119 ArrayFromJSON(timestamp(TimeUnit::SECOND),
1120 "[-30610224000, -5364662400, 946684800, 10413792000, 32503680000]"),
1121 options);
1122 }
1123
1124 constexpr char kTimestampJson[] =
1125 R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
1126 "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
1127 "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
1128 "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
1129 "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
1130 "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
1131 "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])";
1132 constexpr char kTimestampSecondsJson[] =
1133 R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
1134 "1899-01-01T00:59:20","2033-05-18T03:33:20",
1135 "2020-01-01T01:05:05", "2019-12-31T02:10:10",
1136 "2019-12-30T03:15:15", "2009-12-31T04:20:20",
1137 "2010-01-01T05:25:25", "2010-01-03T06:30:30",
1138 "2010-01-04T07:35:35", "2006-01-01T08:40:40",
1139 "2005-12-31T09:45:45", "2008-12-28", "2008-12-29",
1140 "2012-01-01 01:02:03", null])";
1141 constexpr char kTimestampExtremeJson[] =
1142 R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])";
1143
TEST(Cast,TimestampToDate)1144 TEST(Cast, TimestampToDate) {
1145 // See scalar_temporal_test.cc
1146 auto timestamps = ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampJson);
1147 auto date_32 = ArrayFromJSON(date32(),
1148 R"([
1149 0, 11016, -25932, 23148,
1150 18262, 18261, 18260, 14609,
1151 14610, 14612, 14613, 13149,
1152 13148, 14241, 14242, 15340, null
1153 ])");
1154 auto date_64 = ArrayFromJSON(date64(),
1155 R"([
1156 0, 951782400000, -2240524800000, 1999987200000,
1157 1577836800000, 1577750400000, 1577664000000, 1262217600000,
1158 1262304000000, 1262476800000, 1262563200000, 1136073600000,
1159 1135987200000, 1230422400000, 1230508800000, 1325376000000, null
1160 ])");
1161 // See TestOutsideNanosecondRange in scalar_temporal_test.cc
1162 auto timestamps_extreme =
1163 ArrayFromJSON(timestamp(TimeUnit::MICRO),
1164 R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])");
1165 auto date_32_extreme = ArrayFromJSON(date32(), "[-106753, 106753]");
1166 auto date_64_extreme = ArrayFromJSON(date64(), "[-9223459200000, 9223459200000]");
1167
1168 CheckCast(timestamps, date_32);
1169 CheckCast(timestamps, date_64);
1170 CheckCast(timestamps_extreme, date_32_extreme);
1171 CheckCast(timestamps_extreme, date_64_extreme);
1172 for (auto u : TimeUnit::values()) {
1173 auto unit = timestamp(u);
1174 CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_32);
1175 CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_64);
1176 }
1177 }
1178
TEST(Cast,ZonedTimestampToDate)1179 TEST(Cast, ZonedTimestampToDate) {
1180 #ifdef _WIN32
1181 // TODO(ARROW-13168): we lack tzdb on Windows
1182 GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows";
1183 #endif
1184
1185 {
1186 // See TestZoned in scalar_temporal_test.cc
1187 auto timestamps =
1188 ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson);
1189 auto date_32 = ArrayFromJSON(date32(),
1190 R"([
1191 -1, 11016, -25933, 23147,
1192 18261, 18260, 18259, 14608,
1193 14609, 14611, 14612, 13148,
1194 13148, 14240, 14241, 15339, null
1195 ])");
1196 auto date_64 = ArrayFromJSON(date64(), R"([
1197 -86400000, 951782400000, -2240611200000, 1999900800000,
1198 1577750400000, 1577664000000, 1577577600000, 1262131200000,
1199 1262217600000, 1262390400000, 1262476800000, 1135987200000,
1200 1135987200000, 1230336000000, 1230422400000, 1325289600000, null
1201 ])");
1202 CheckCast(timestamps, date_32);
1203 CheckCast(timestamps, date_64);
1204 }
1205
1206 auto date_32 = ArrayFromJSON(date32(), R"([
1207 0, 11017, -25932, 23148,
1208 18262, 18261, 18260, 14609,
1209 14610, 14612, 14613, 13149,
1210 13148, 14241, 14242, 15340, null
1211 ])");
1212 auto date_64 = ArrayFromJSON(date64(), R"([
1213 0, 951868800000, -2240524800000, 1999987200000, 1577836800000,
1214 1577750400000, 1577664000000, 1262217600000, 1262304000000,
1215 1262476800000, 1262563200000, 1136073600000, 1135987200000,
1216 1230422400000, 1230508800000, 1325376000000, null
1217 ])");
1218
1219 for (auto u : TimeUnit::values()) {
1220 auto timestamps =
1221 ArrayFromJSON(timestamp(u, "Australia/Broken_Hill"), kTimestampSecondsJson);
1222 CheckCast(timestamps, date_32);
1223 CheckCast(timestamps, date_64);
1224 }
1225
1226 // Invalid timezone
1227 for (auto u : TimeUnit::values()) {
1228 auto timestamps =
1229 ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
1230 CheckCastFails(timestamps, CastOptions::Unsafe(date32()));
1231 CheckCastFails(timestamps, CastOptions::Unsafe(date64()));
1232 }
1233 }
1234
TEST(Cast,TimestampToTime)1235 TEST(Cast, TimestampToTime) {
1236 // See scalar_temporal_test.cc
1237 auto timestamps = ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampJson);
1238 // See TestOutsideNanosecondRange in scalar_temporal_test.cc
1239 auto timestamps_extreme =
1240 ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampExtremeJson);
1241 auto timestamps_us = ArrayFromJSON(timestamp(TimeUnit::MICRO), R"([
1242 "1970-01-01T00:00:59.123456","2000-02-29T23:23:23.999999",
1243 "1899-01-01T00:59:20.001001","2033-05-18T03:33:20.000000",
1244 "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
1245 "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
1246 "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
1247 "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
1248 "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])");
1249 auto timestamps_ms = ArrayFromJSON(timestamp(TimeUnit::MILLI), R"([
1250 "1970-01-01T00:00:59.123","2000-02-29T23:23:23.999",
1251 "1899-01-01T00:59:20.001","2033-05-18T03:33:20.000",
1252 "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
1253 "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004",
1254 "2010-01-01T05:25:25.005", "2010-01-03T06:30:30.006",
1255 "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
1256 "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])");
1257 auto timestamps_s = ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson);
1258
1259 auto times = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1260 59123456789, 84203999999999, 3560001001001, 12800000000000,
1261 3905001000000, 7810002000000, 11715003000000, 15620004132000,
1262 19525005321000, 23430006163000, 27335000000000, 31240000000000,
1263 35145000000000, 0, 0, 3723000000000, null
1264 ])");
1265 auto times_ns_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
1266 59123456, 84203999999, 3560001001, 12800000000,
1267 3905001000, 7810002000, 11715003000, 15620004132,
1268 19525005321, 23430006163, 27335000000, 31240000000,
1269 35145000000, 0, 0, 3723000000, null
1270 ])");
1271 auto times_ns_ms = ArrayFromJSON(time32(TimeUnit::MILLI), R"([
1272 59123, 84203999, 3560001, 12800000,
1273 3905001, 7810002, 11715003, 15620004,
1274 19525005, 23430006, 27335000, 31240000,
1275 35145000, 0, 0, 3723000, null
1276 ])");
1277 auto times_us_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1278 59123456000, 84203999999000, 3560001001000, 12800000000000,
1279 3905001000000, 7810002000000, 11715003000000, 15620004132000,
1280 19525005321000, 23430006163000, 27335000000000, 31240000000000,
1281 35145000000000, 0, 0, 3723000000000, null
1282 ])");
1283 auto times_ms_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1284 59123000000, 84203999000000, 3560001000000, 12800000000000,
1285 3905001000000, 7810002000000, 11715003000000, 15620004000000,
1286 19525005000000, 23430006000000, 27335000000000, 31240000000000,
1287 35145000000000, 0, 0, 3723000000000, null
1288 ])");
1289 auto times_ms_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
1290 59123000, 84203999000, 3560001000, 12800000000,
1291 3905001000, 7810002000, 11715003000, 15620004000,
1292 19525005000, 23430006000, 27335000000, 31240000000,
1293 35145000000, 0, 0, 3723000000, null
1294 ])");
1295
1296 auto times_extreme = ArrayFromJSON(time64(TimeUnit::MICRO), "[59123456, 84203999999]");
1297 auto times_s = ArrayFromJSON(time32(TimeUnit::SECOND), R"([
1298 59, 84203, 3560, 12800,
1299 3905, 7810, 11715, 15620,
1300 19525, 23430, 27335, 31240,
1301 35145, 0, 0, 3723, null
1302 ])");
1303 auto times_ms = ArrayFromJSON(time32(TimeUnit::MILLI), R"([
1304 59000, 84203000, 3560000, 12800000,
1305 3905000, 7810000, 11715000, 15620000,
1306 19525000, 23430000, 27335000, 31240000,
1307 35145000, 0, 0, 3723000, null
1308 ])");
1309 auto times_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
1310 59000000, 84203000000, 3560000000, 12800000000,
1311 3905000000, 7810000000, 11715000000, 15620000000,
1312 19525000000, 23430000000, 27335000000, 31240000000,
1313 35145000000, 0, 0, 3723000000, null
1314 ])");
1315 auto times_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
1316 59000000000, 84203000000000, 3560000000000, 12800000000000,
1317 3905000000000, 7810000000000, 11715000000000, 15620000000000,
1318 19525000000000, 23430000000000, 27335000000000, 31240000000000,
1319 35145000000000, 0, 0, 3723000000000, null
1320 ])");
1321
1322 CheckCast(timestamps, times);
1323 CheckCastFails(timestamps, CastOptions::Safe(time64(TimeUnit::MICRO)));
1324 CheckCast(timestamps_extreme, times_extreme);
1325 CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson), times_s);
1326 CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson), times_ms);
1327 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI), kTimestampSecondsJson), times_s);
1328 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI), kTimestampSecondsJson), times_ms);
1329 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_us);
1330 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_ns);
1331 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_ms);
1332 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_s);
1333 CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_ns);
1334 CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_us);
1335 CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_ms);
1336 CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_s);
1337
1338 CastOptions truncate = CastOptions::Safe();
1339 truncate.allow_time_truncate = true;
1340
1341 // Truncation tests
1342 CheckCastFails(timestamps, CastOptions::Safe(time64(TimeUnit::MICRO)));
1343 CheckCastFails(timestamps, CastOptions::Safe(time32(TimeUnit::MILLI)));
1344 CheckCastFails(timestamps, CastOptions::Safe(time32(TimeUnit::SECOND)));
1345 CheckCastFails(timestamps_us, CastOptions::Safe(time32(TimeUnit::MILLI)));
1346 CheckCastFails(timestamps_us, CastOptions::Safe(time32(TimeUnit::SECOND)));
1347 CheckCastFails(timestamps_ms, CastOptions::Safe(time32(TimeUnit::SECOND)));
1348 CheckCast(timestamps, times_ns_us, truncate);
1349 CheckCast(timestamps, times_ns_ms, truncate);
1350 CheckCast(timestamps, times_s, truncate);
1351 CheckCast(timestamps_us, times_ns_ms, truncate);
1352 CheckCast(timestamps_us, times_s, truncate);
1353 CheckCast(timestamps_ms, times_s, truncate);
1354
1355 // Upscaling tests
1356 CheckCast(timestamps_us, times_us_ns);
1357 CheckCast(timestamps_ms, times_ms_ns);
1358 CheckCast(timestamps_ms, times_ms_us);
1359 CheckCast(timestamps_s, times_ns);
1360 CheckCast(timestamps_s, times_us);
1361 CheckCast(timestamps_s, times_ms);
1362
1363 // Invalid timezone
1364 for (auto u : TimeUnit::values()) {
1365 auto timestamps =
1366 ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
1367 if (u == TimeUnit::SECOND || u == TimeUnit::MILLI) {
1368 CheckCastFails(timestamps, CastOptions::Unsafe(time32(u)));
1369 } else {
1370 CheckCastFails(timestamps, CastOptions::Unsafe(time64(u)));
1371 }
1372 }
1373 }
1374
TEST(Cast,ZonedTimestampToTime)1375 TEST(Cast, ZonedTimestampToTime) {
1376 #ifdef _WIN32
1377 // TODO(ARROW-13168): we lack tzdb on Windows
1378 GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows";
1379 #endif
1380
1381 CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson),
1382 ArrayFromJSON(time64(TimeUnit::NANO), R"([
1383 52259123456789, 50003999999999, 56480001001001, 65000000000000,
1384 56105001000000, 60010002000000, 63915003000000, 67820004132000,
1385 71725005321000, 75630006163000, 79535000000000, 83440000000000,
1386 945000000000, 52200000000000, 52200000000000, 55923000000000, null
1387 ])"));
1388
1389 auto time_s = R"([
1390 34259, 35603, 35960, 47000,
1391 41705, 45610, 49515, 53420,
1392 57325, 61230, 65135, 69040,
1393 72945, 37800, 37800, 41523, null
1394 ])";
1395 auto time_ms = R"([
1396 34259000, 35603000, 35960000, 47000000,
1397 41705000, 45610000, 49515000, 53420000,
1398 57325000, 61230000, 65135000, 69040000,
1399 72945000, 37800000, 37800000, 41523000, null
1400 ])";
1401 auto time_us = R"([
1402 34259000000, 35603000000, 35960000000, 47000000000,
1403 41705000000, 45610000000, 49515000000, 53420000000,
1404 57325000000, 61230000000, 65135000000, 69040000000,
1405 72945000000, 37800000000, 37800000000, 41523000000, null
1406 ])";
1407 auto time_ns = R"([
1408 34259000000000, 35603000000000, 35960000000000, 47000000000000,
1409 41705000000000, 45610000000000, 49515000000000, 53420000000000,
1410 57325000000000, 61230000000000, 65135000000000, 69040000000000,
1411 72945000000000, 37800000000000, 37800000000000, 41523000000000, null
1412 ])";
1413 CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "Australia/Broken_Hill"),
1414 kTimestampSecondsJson),
1415 ArrayFromJSON(time32(TimeUnit::SECOND), time_s));
1416 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI, "Australia/Broken_Hill"),
1417 kTimestampSecondsJson),
1418 ArrayFromJSON(time32(TimeUnit::MILLI), time_ms));
1419 CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO, "Australia/Broken_Hill"),
1420 kTimestampSecondsJson),
1421 ArrayFromJSON(time64(TimeUnit::MICRO), time_us));
1422 CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Australia/Broken_Hill"),
1423 kTimestampSecondsJson),
1424 ArrayFromJSON(time64(TimeUnit::NANO), time_ns));
1425 }
1426
TEST(Cast,TimeToTime)1427 TEST(Cast, TimeToTime) {
1428 struct TimeTypePair {
1429 std::shared_ptr<DataType> coarse, fine;
1430 };
1431
1432 CastOptions options;
1433
1434 for (auto types : {
1435 TimeTypePair{time32(TimeUnit::SECOND), time32(TimeUnit::MILLI)},
1436 TimeTypePair{time32(TimeUnit::MILLI), time64(TimeUnit::MICRO)},
1437 TimeTypePair{time64(TimeUnit::MICRO), time64(TimeUnit::NANO)},
1438 }) {
1439 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1440 auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000, 1000, 2000]");
1441
1442 // multiply/promote
1443 CheckCast(coarse, promoted);
1444
1445 auto will_be_truncated = ArrayFromJSON(types.fine, "[0, null, 200456, 1123, 2456]");
1446
1447 // with truncation disallowed, fails
1448 options.allow_time_truncate = false;
1449 options.to_type = types.coarse;
1450 CheckCastFails(will_be_truncated, options);
1451
1452 // with truncation allowed, divide/truncate
1453 options.allow_time_truncate = true;
1454 CheckCast(will_be_truncated, coarse, options);
1455 }
1456
1457 for (auto types : {
1458 TimeTypePair{time32(TimeUnit::SECOND), time64(TimeUnit::MICRO)},
1459 TimeTypePair{time32(TimeUnit::MILLI), time64(TimeUnit::NANO)},
1460 }) {
1461 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1462 auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000000, 1000000, 2000000]");
1463
1464 // multiply/promote
1465 CheckCast(coarse, promoted);
1466
1467 auto will_be_truncated =
1468 ArrayFromJSON(types.fine, "[0, null, 200456000, 1123000, 2456000]");
1469
1470 // with truncation disallowed, fails
1471 options.allow_time_truncate = false;
1472 options.to_type = types.coarse;
1473 CheckCastFails(will_be_truncated, options);
1474
1475 // with truncation allowed, divide/truncate
1476 options.allow_time_truncate = true;
1477 CheckCast(will_be_truncated, coarse, options);
1478 }
1479
1480 for (auto types : {
1481 TimeTypePair{time32(TimeUnit::SECOND), time64(TimeUnit::NANO)},
1482 }) {
1483 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1484 auto promoted =
1485 ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");
1486
1487 // multiply/promote
1488 CheckCast(coarse, promoted);
1489
1490 auto will_be_truncated =
1491 ArrayFromJSON(types.fine, "[0, null, 200456000000, 1123000000, 2456000000]");
1492
1493 // with truncation disallowed, fails
1494 options.allow_time_truncate = false;
1495 options.to_type = types.coarse;
1496 CheckCastFails(will_be_truncated, options);
1497
1498 // with truncation allowed, divide/truncate
1499 options.allow_time_truncate = true;
1500 CheckCast(will_be_truncated, coarse, options);
1501 }
1502 }
1503
TEST(Cast,TimeZeroCopy)1504 TEST(Cast, TimeZeroCopy) {
1505 for (auto zero_copy_to_type : {
1506 time32(TimeUnit::SECOND),
1507 int32(), // ARROW-1773: cast to int32
1508 }) {
1509 CheckCastZeroCopy(ArrayFromJSON(time32(TimeUnit::SECOND), "[0, null, 2000, 1000, 0]"),
1510 zero_copy_to_type);
1511 }
1512 CheckCastZeroCopy(ArrayFromJSON(int32(), "[0, null, 2000, 1000, 0]"),
1513 time32(TimeUnit::SECOND));
1514
1515 for (auto zero_copy_to_type : {
1516 time64(TimeUnit::MICRO),
1517 int64(), // ARROW-1773: cast to int64
1518 }) {
1519 CheckCastZeroCopy(ArrayFromJSON(time64(TimeUnit::MICRO), "[0, null, 2000, 1000, 0]"),
1520 zero_copy_to_type);
1521 }
1522 CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
1523 time64(TimeUnit::MICRO));
1524 }
1525
TEST(Cast,DateToString)1526 TEST(Cast, DateToString) {
1527 for (auto string_type : {utf8(), large_utf8()}) {
1528 CheckCast(ArrayFromJSON(date32(), "[0, null]"),
1529 ArrayFromJSON(string_type, R"(["1970-01-01", null])"));
1530 CheckCast(ArrayFromJSON(date64(), "[86400000, null]"),
1531 ArrayFromJSON(string_type, R"(["1970-01-02", null])"));
1532 }
1533 }
1534
TEST(Cast,TimeToString)1535 TEST(Cast, TimeToString) {
1536 for (auto string_type : {utf8(), large_utf8()}) {
1537 CheckCast(ArrayFromJSON(time32(TimeUnit::SECOND), "[1, 62]"),
1538 ArrayFromJSON(string_type, R"(["00:00:01", "00:01:02"])"));
1539 CheckCast(
1540 ArrayFromJSON(time64(TimeUnit::NANO), "[0, 1]"),
1541 ArrayFromJSON(string_type, R"(["00:00:00.000000000", "00:00:00.000000001"])"));
1542 }
1543 }
1544
TEST(Cast,TimestampToString)1545 TEST(Cast, TimestampToString) {
1546 for (auto string_type : {utf8(), large_utf8()}) {
1547 CheckCast(
1548 ArrayFromJSON(timestamp(TimeUnit::SECOND), "[-30610224000, -5364662400]"),
1549 ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
1550 }
1551 }
1552
TEST(Cast,DateToDate)1553 TEST(Cast, DateToDate) {
1554 auto day_32 = ArrayFromJSON(date32(), "[0, null, 100, 1, 10]");
1555 auto day_64 = ArrayFromJSON(date64(), R"([
1556 0,
1557 null,
1558 8640000000,
1559 86400000,
1560 864000000])");
1561
1562 // Multiply promotion
1563 CheckCast(day_32, day_64);
1564
1565 // No truncation
1566 CheckCast(day_64, day_32);
1567
1568 auto day_64_will_be_truncated = ArrayFromJSON(date64(), R"([
1569 0,
1570 null,
1571 8640000123,
1572 86400456,
1573 864000789])");
1574
1575 // Disallow truncate
1576 CastOptions options;
1577 options.to_type = date32();
1578 CheckCastFails(day_64_will_be_truncated, options);
1579
1580 // Divide, truncate
1581 options.allow_time_truncate = true;
1582 CheckCast(day_64_will_be_truncated, day_32, options);
1583 }
1584
TEST(Cast,DateZeroCopy)1585 TEST(Cast, DateZeroCopy) {
1586 for (auto zero_copy_to_type : {
1587 date32(),
1588 int32(), // ARROW-1773: cast to int32
1589 }) {
1590 CheckCastZeroCopy(ArrayFromJSON(date32(), "[0, null, 2000, 1000, 0]"),
1591 zero_copy_to_type);
1592 }
1593 CheckCastZeroCopy(ArrayFromJSON(int32(), "[0, null, 2000, 1000, 0]"), date32());
1594
1595 for (auto zero_copy_to_type : {
1596 date64(),
1597 int64(), // ARROW-1773: cast to int64
1598 }) {
1599 CheckCastZeroCopy(ArrayFromJSON(date64(), "[0, null, 2000, 1000, 0]"),
1600 zero_copy_to_type);
1601 }
1602 CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"), date64());
1603 }
1604
TEST(Cast,DurationToDuration)1605 TEST(Cast, DurationToDuration) {
1606 struct DurationTypePair {
1607 std::shared_ptr<DataType> coarse, fine;
1608 };
1609
1610 CastOptions options;
1611
1612 for (auto types : {
1613 DurationTypePair{duration(TimeUnit::SECOND), duration(TimeUnit::MILLI)},
1614 DurationTypePair{duration(TimeUnit::MILLI), duration(TimeUnit::MICRO)},
1615 DurationTypePair{duration(TimeUnit::MICRO), duration(TimeUnit::NANO)},
1616 }) {
1617 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1618 auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000, 1000, 2000]");
1619
1620 // multiply/promote
1621 CheckCast(coarse, promoted);
1622
1623 auto will_be_truncated = ArrayFromJSON(types.fine, "[0, null, 200456, 1123, 2456]");
1624
1625 // with truncation disallowed, fails
1626 options.allow_time_truncate = false;
1627 options.to_type = types.coarse;
1628 CheckCastFails(will_be_truncated, options);
1629
1630 // with truncation allowed, divide/truncate
1631 options.allow_time_truncate = true;
1632 CheckCast(will_be_truncated, coarse, options);
1633 }
1634
1635 for (auto types : {
1636 DurationTypePair{duration(TimeUnit::SECOND), duration(TimeUnit::MICRO)},
1637 DurationTypePair{duration(TimeUnit::MILLI), duration(TimeUnit::NANO)},
1638 }) {
1639 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1640 auto promoted = ArrayFromJSON(types.fine, "[0, null, 200000000, 1000000, 2000000]");
1641
1642 // multiply/promote
1643 CheckCast(coarse, promoted);
1644
1645 auto will_be_truncated =
1646 ArrayFromJSON(types.fine, "[0, null, 200000456, 1000123, 2000456]");
1647
1648 // with truncation disallowed, fails
1649 options.allow_time_truncate = false;
1650 options.to_type = types.coarse;
1651 CheckCastFails(will_be_truncated, options);
1652
1653 // with truncation allowed, divide/truncate
1654 options.allow_time_truncate = true;
1655 CheckCast(will_be_truncated, coarse, options);
1656 }
1657
1658 for (auto types : {
1659 DurationTypePair{duration(TimeUnit::SECOND), duration(TimeUnit::NANO)},
1660 }) {
1661 auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200, 1, 2]");
1662 auto promoted =
1663 ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");
1664
1665 // multiply/promote
1666 CheckCast(coarse, promoted);
1667
1668 auto will_be_truncated =
1669 ArrayFromJSON(types.fine, "[0, null, 200000000456, 1000000123, 2000000456]");
1670
1671 // with truncation disallowed, fails
1672 options.allow_time_truncate = false;
1673 options.to_type = types.coarse;
1674 CheckCastFails(will_be_truncated, options);
1675
1676 // with truncation allowed, divide/truncate
1677 options.allow_time_truncate = true;
1678 CheckCast(will_be_truncated, coarse, options);
1679 }
1680 }
1681
TEST(Cast,DurationZeroCopy)1682 TEST(Cast, DurationZeroCopy) {
1683 for (auto zero_copy_to_type : {
1684 duration(TimeUnit::SECOND),
1685 int64(), // ARROW-1773: cast to int64
1686 }) {
1687 CheckCastZeroCopy(
1688 ArrayFromJSON(duration(TimeUnit::SECOND), "[0, null, 2000, 1000, 0]"),
1689 zero_copy_to_type);
1690 }
1691 CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
1692 duration(TimeUnit::SECOND));
1693 }
1694
TEST(Cast,DurationToDurationMultiplyOverflow)1695 TEST(Cast, DurationToDurationMultiplyOverflow) {
1696 CastOptions options;
1697 options.to_type = duration(TimeUnit::NANO);
1698 CheckCastFails(
1699 ArrayFromJSON(duration(TimeUnit::SECOND), "[10000000000, 1, 2, 3, 10000000000]"),
1700 options);
1701 }
1702
TEST(Cast,MiscToFloating)1703 TEST(Cast, MiscToFloating) {
1704 for (auto to_type : {float32(), float64()}) {
1705 CheckCast(ArrayFromJSON(int16(), "[0, null, 200, 1, 2]"),
1706 ArrayFromJSON(to_type, "[0, null, 200, 1, 2]"));
1707
1708 CheckCast(ArrayFromJSON(float32(), "[0, null, 200, 1, 2]"),
1709 ArrayFromJSON(to_type, "[0, null, 200, 1, 2]"));
1710
1711 CheckCast(ArrayFromJSON(boolean(), "[true, null, false, false, true]"),
1712 ArrayFromJSON(to_type, "[1, null, 0, 0, 1]"));
1713 }
1714 }
1715
TEST(Cast,UnsupportedInputType)1716 TEST(Cast, UnsupportedInputType) {
1717 // Casting to a supported target type, but with an unsupported input type
1718 // for the target type.
1719 const auto arr = ArrayFromJSON(int32(), "[1, 2, 3]");
1720
1721 const auto to_type = list(utf8());
1722 const char* expected_message = "Unsupported cast from int32 to list";
1723
1724 // Try through concrete API
1725 EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1726 Cast(*arr, to_type));
1727
1728 // Try through general kernel API
1729 CastOptions options;
1730 options.to_type = to_type;
1731 EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1732 CallFunction("cast", {arr}, &options));
1733 }
1734
TEST(Cast,UnsupportedTargetType)1735 TEST(Cast, UnsupportedTargetType) {
1736 // Casting to an unsupported target type
1737 const auto arr = ArrayFromJSON(int32(), "[1, 2, 3]");
1738 const auto to_type = dense_union({field("a", int32())});
1739
1740 // Try through concrete API
1741 const char* expected_message = "Unsupported cast from int32 to dense_union";
1742 EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1743 Cast(*arr, to_type));
1744
1745 // Try through general kernel API
1746 CastOptions options;
1747 options.to_type = to_type;
1748 EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
1749 CallFunction("cast", {arr}, &options));
1750 }
1751
TEST(Cast,StringToBoolean)1752 TEST(Cast, StringToBoolean) {
1753 for (auto string_type : {utf8(), large_utf8()}) {
1754 CheckCast(ArrayFromJSON(string_type, R"(["False", null, "true", "True", "false"])"),
1755 ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
1756
1757 CheckCast(ArrayFromJSON(string_type, R"(["0", null, "1", "1", "0"])"),
1758 ArrayFromJSON(boolean(), "[false, null, true, true, false]"));
1759
1760 auto options = CastOptions::Safe(boolean());
1761 CheckCastFails(ArrayFromJSON(string_type, R"(["false "])"), options);
1762 CheckCastFails(ArrayFromJSON(string_type, R"(["T"])"), options);
1763 }
1764 }
1765
TEST(Cast,StringToInt)1766 TEST(Cast, StringToInt) {
1767 for (auto string_type : {utf8(), large_utf8()}) {
1768 for (auto signed_type : {int8(), int16(), int32(), int64()}) {
1769 CheckCast(
1770 ArrayFromJSON(string_type, R"(["0", null, "127", "-1", "0", "0x0", "0x7F"])"),
1771 ArrayFromJSON(signed_type, "[0, null, 127, -1, 0, 0, 127]"));
1772 }
1773
1774 CheckCast(ArrayFromJSON(string_type, R"(["2147483647", null, "-2147483648", "0",
1775 "0X0", "0x7FFFFFFF", "0XFFFFfFfF", "0Xf0000000"])"),
1776 ArrayFromJSON(
1777 int32(),
1778 "[2147483647, null, -2147483648, 0, 0, 2147483647, -1, -268435456]"));
1779
1780 CheckCast(ArrayFromJSON(string_type,
1781 R"(["9223372036854775807", null, "-9223372036854775808", "0",
1782 "0x0", "0x7FFFFFFFFFFFFFFf", "0XF000000000000001"])"),
1783 ArrayFromJSON(int64(),
1784 "[9223372036854775807, null, -9223372036854775808, 0, 0, "
1785 "9223372036854775807, -1152921504606846975]"));
1786
1787 for (auto unsigned_type : {uint8(), uint16(), uint32(), uint64()}) {
1788 CheckCast(ArrayFromJSON(string_type,
1789 R"(["0", null, "127", "255", "0", "0X0", "0xff", "0x7f"])"),
1790 ArrayFromJSON(unsigned_type, "[0, null, 127, 255, 0, 0, 255, 127]"));
1791 }
1792
1793 CheckCast(
1794 ArrayFromJSON(string_type, R"(["2147483647", null, "4294967295", "0",
1795 "0x0", "0x7FFFFFFf", "0xFFFFFFFF"])"),
1796 ArrayFromJSON(uint32(),
1797 "[2147483647, null, 4294967295, 0, 0, 2147483647, 4294967295]"));
1798
1799 CheckCast(ArrayFromJSON(string_type,
1800 R"(["9223372036854775807", null, "18446744073709551615", "0",
1801 "0x0", "0x7FFFFFFFFFFFFFFf", "0xfFFFFFFFFFFFFFFf"])"),
1802 ArrayFromJSON(uint64(),
1803 "[9223372036854775807, null, 18446744073709551615, 0, 0, "
1804 "9223372036854775807, 18446744073709551615]"));
1805
1806 for (std::string not_int8 : {
1807 "z",
1808 "12 z",
1809 "128",
1810 "-129",
1811 "0.5",
1812 "0x",
1813 "0xfff",
1814 "-0xf0",
1815 }) {
1816 auto options = CastOptions::Safe(int8());
1817 CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_int8 + "\"]"), options);
1818 }
1819
1820 for (std::string not_uint8 : {"256", "-1", "0.5", "0x", "0x3wa", "0x123"}) {
1821 auto options = CastOptions::Safe(uint8());
1822 CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_uint8 + "\"]"), options);
1823 }
1824 }
1825 }
1826
TEST(Cast,StringToFloating)1827 TEST(Cast, StringToFloating) {
1828 for (auto string_type : {utf8(), large_utf8()}) {
1829 for (auto float_type : {float32(), float64()}) {
1830 auto strings =
1831 ArrayFromJSON(string_type, R"(["0.1", null, "127.3", "1e3", "200.4", "0.5"])");
1832 auto floats = ArrayFromJSON(float_type, "[0.1, null, 127.3, 1000, 200.4, 0.5]");
1833 CheckCast(strings, floats);
1834
1835 for (std::string not_float : {
1836 "z",
1837 }) {
1838 auto options = CastOptions::Safe(float32());
1839 CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_float + "\"]"), options);
1840 }
1841
1842 #if !defined(_WIN32) || defined(NDEBUG)
1843 // Test that casting is locale-independent
1844 // French locale uses the comma as decimal point
1845 LocaleGuard locale_guard("fr_FR.UTF-8");
1846 CheckCast(strings, floats);
1847 #endif
1848 }
1849 }
1850 }
1851
TEST(Cast,StringToTimestamp)1852 TEST(Cast, StringToTimestamp) {
1853 for (auto string_type : {utf8(), large_utf8()}) {
1854 auto strings = ArrayFromJSON(string_type, R"(["1970-01-01", null, "2000-02-29"])");
1855
1856 CheckCast(strings,
1857 ArrayFromJSON(timestamp(TimeUnit::SECOND), "[0, null, 951782400]"));
1858
1859 CheckCast(strings,
1860 ArrayFromJSON(timestamp(TimeUnit::MICRO), "[0, null, 951782400000000]"));
1861
1862 for (auto unit :
1863 {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
1864 for (std::string not_ts : {
1865 "",
1866 "xxx",
1867 }) {
1868 auto options = CastOptions::Safe(timestamp(unit));
1869 CheckCastFails(ArrayFromJSON(string_type, "[\"" + not_ts + "\"]"), options);
1870 }
1871 }
1872
1873 // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
1874 }
1875 }
1876
AssertBinaryZeroCopy(std::shared_ptr<Array> lhs,std::shared_ptr<Array> rhs)1877 static void AssertBinaryZeroCopy(std::shared_ptr<Array> lhs, std::shared_ptr<Array> rhs) {
1878 // null bitmap and data buffers are always zero-copied
1879 AssertBufferSame(*lhs, *rhs, 0);
1880 AssertBufferSame(*lhs, *rhs, 2);
1881
1882 if (offset_bit_width(lhs->type_id()) == offset_bit_width(rhs->type_id())) {
1883 // offset buffer is zero copied if possible
1884 AssertBufferSame(*lhs, *rhs, 1);
1885 return;
1886 }
1887
1888 // offset buffers are equivalent
1889 ArrayVector offsets;
1890 for (auto array : {lhs, rhs}) {
1891 auto length = array->length();
1892 auto buffer = array->data()->buffers[1];
1893 offsets.push_back(offset_bit_width(array->type_id()) == 32
1894 ? *Cast(Int32Array(length, buffer), int64())
1895 : std::make_shared<Int64Array>(length, buffer));
1896 }
1897 AssertArraysEqual(*offsets[0], *offsets[1]);
1898 }
1899
TEST(Cast,BinaryToString)1900 TEST(Cast, BinaryToString) {
1901 for (auto bin_type : {binary(), large_binary()}) {
1902 for (auto string_type : {utf8(), large_utf8()}) {
1903 // empty -> empty always works
1904 CheckCast(ArrayFromJSON(bin_type, "[]"), ArrayFromJSON(string_type, "[]"));
1905
1906 auto invalid_utf8 = InvalidUtf8(bin_type);
1907
1908 // invalid utf-8 masked by a null bit is not an error
1909 CheckCast(MaskArrayWithNullsAt(InvalidUtf8(bin_type), {4}),
1910 MaskArrayWithNullsAt(InvalidUtf8(string_type), {4}));
1911
1912 // error: invalid utf-8
1913 auto options = CastOptions::Safe(string_type);
1914 CheckCastFails(invalid_utf8, options);
1915
1916 // override utf-8 check
1917 options.allow_invalid_utf8 = true;
1918 ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, string_type, options));
1919 ASSERT_RAISES(Invalid, strings->ValidateFull());
1920 AssertBinaryZeroCopy(invalid_utf8, strings);
1921 }
1922 }
1923
1924 auto from_type = fixed_size_binary(3);
1925 auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
1926 for (auto string_type : {utf8(), large_utf8()}) {
1927 CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(string_type, "[]"));
1928
1929 // invalid utf-8 masked by a null bit is not an error
1930 CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
1931 MaskArrayWithNullsAt(FixedSizeInvalidUtf8(string_type), {4}));
1932
1933 // error: invalid utf-8
1934 auto options = CastOptions::Safe(string_type);
1935 CheckCastFails(invalid_utf8, options);
1936
1937 // override utf-8 check
1938 options.allow_invalid_utf8 = true;
1939 ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, string_type, options));
1940 ASSERT_RAISES(Invalid, strings->ValidateFull());
1941
1942 // N.B. null buffer is not always the same if input sliced
1943 AssertBufferSame(*invalid_utf8, *strings, 0);
1944 ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
1945 }
1946 }
1947
TEST(Cast,BinaryOrStringToBinary)1948 TEST(Cast, BinaryOrStringToBinary) {
1949 for (auto from_type : {utf8(), large_utf8(), binary(), large_binary()}) {
1950 for (auto to_type : {binary(), large_binary()}) {
1951 // empty -> empty always works
1952 CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
1953
1954 auto invalid_utf8 = InvalidUtf8(from_type);
1955
1956 // invalid utf-8 is not an error for binary
1957 ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
1958 ValidateOutput(*strings);
1959 AssertBinaryZeroCopy(invalid_utf8, strings);
1960
1961 // invalid utf-8 masked by a null bit is not an error
1962 CheckCast(MaskArrayWithNullsAt(InvalidUtf8(from_type), {4}),
1963 MaskArrayWithNullsAt(InvalidUtf8(to_type), {4}));
1964 }
1965 }
1966
1967 auto from_type = fixed_size_binary(3);
1968 auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
1969 CheckCast(invalid_utf8, invalid_utf8);
1970 CheckCastFails(invalid_utf8, CastOptions::Safe(fixed_size_binary(5)));
1971 for (auto to_type : {binary(), large_binary()}) {
1972 CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
1973 ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
1974 ValidateOutput(*strings);
1975
1976 // N.B. null buffer is not always the same if input sliced
1977 AssertBufferSame(*invalid_utf8, *strings, 0);
1978 ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
1979
1980 // invalid utf-8 masked by a null bit is not an error
1981 CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
1982 MaskArrayWithNullsAt(FixedSizeInvalidUtf8(to_type), {4}));
1983 }
1984 }
1985
TEST(Cast,StringToString)1986 TEST(Cast, StringToString) {
1987 for (auto from_type : {utf8(), large_utf8()}) {
1988 for (auto to_type : {utf8(), large_utf8()}) {
1989 // empty -> empty always works
1990 CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
1991
1992 auto invalid_utf8 = InvalidUtf8(from_type);
1993
1994 // invalid utf-8 masked by a null bit is not an error
1995 CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
1996 MaskArrayWithNullsAt(InvalidUtf8(to_type), {4}));
1997
1998 // override utf-8 check
1999 auto options = CastOptions::Safe(to_type);
2000 options.allow_invalid_utf8 = true;
2001 // utf-8 is not checked by Cast when the origin guarantees utf-8
2002 ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type, options));
2003 ASSERT_RAISES(Invalid, strings->ValidateFull());
2004 AssertBinaryZeroCopy(invalid_utf8, strings);
2005 }
2006 }
2007 }
2008
TEST(Cast,IntToString)2009 TEST(Cast, IntToString) {
2010 for (auto string_type : {utf8(), large_utf8()}) {
2011 CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"),
2012 ArrayFromJSON(string_type, R"(["0", "1", "127", "-128", null])"));
2013
2014 CheckCast(ArrayFromJSON(uint8(), "[0, 1, 255, null]"),
2015 ArrayFromJSON(string_type, R"(["0", "1", "255", null])"));
2016
2017 CheckCast(ArrayFromJSON(int16(), "[0, 1, 32767, -32768, null]"),
2018 ArrayFromJSON(string_type, R"(["0", "1", "32767", "-32768", null])"));
2019
2020 CheckCast(ArrayFromJSON(uint16(), "[0, 1, 65535, null]"),
2021 ArrayFromJSON(string_type, R"(["0", "1", "65535", null])"));
2022
2023 CheckCast(
2024 ArrayFromJSON(int32(), "[0, 1, 2147483647, -2147483648, null]"),
2025 ArrayFromJSON(string_type, R"(["0", "1", "2147483647", "-2147483648", null])"));
2026
2027 CheckCast(ArrayFromJSON(uint32(), "[0, 1, 4294967295, null]"),
2028 ArrayFromJSON(string_type, R"(["0", "1", "4294967295", null])"));
2029
2030 CheckCast(
2031 ArrayFromJSON(int64(), "[0, 1, 9223372036854775807, -9223372036854775808, null]"),
2032 ArrayFromJSON(
2033 string_type,
2034 R"(["0", "1", "9223372036854775807", "-9223372036854775808", null])"));
2035
2036 CheckCast(ArrayFromJSON(uint64(), "[0, 1, 18446744073709551615, null]"),
2037 ArrayFromJSON(string_type, R"(["0", "1", "18446744073709551615", null])"));
2038 }
2039 }
2040
TEST(Cast,FloatingToString)2041 TEST(Cast, FloatingToString) {
2042 for (auto string_type : {utf8(), large_utf8()}) {
2043 CheckCast(
2044 ArrayFromJSON(float32(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
2045 ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
2046
2047 CheckCast(
2048 ArrayFromJSON(float64(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
2049 ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
2050 }
2051 }
2052
TEST(Cast,BooleanToString)2053 TEST(Cast, BooleanToString) {
2054 for (auto string_type : {utf8(), large_utf8()}) {
2055 CheckCast(ArrayFromJSON(boolean(), "[true, true, false, null]"),
2056 ArrayFromJSON(string_type, R"(["true", "true", "false", null])"));
2057 }
2058 }
2059
TEST(Cast,ListToPrimitive)2060 TEST(Cast, ListToPrimitive) {
2061 ASSERT_RAISES(NotImplemented,
2062 Cast(*ArrayFromJSON(list(int8()), "[[1, 2], [3, 4]]"), uint8()));
2063
2064 ASSERT_RAISES(
2065 NotImplemented,
2066 Cast(*ArrayFromJSON(list(binary()), R"([["1", "2"], ["3", "4"]])"), utf8()));
2067 }
2068
2069 using make_list_t = std::shared_ptr<DataType>(const std::shared_ptr<DataType>&);
2070
2071 static const auto list_factories = std::vector<make_list_t*>{&list, &large_list};
2072
CheckListToList(const std::vector<std::shared_ptr<DataType>> & value_types,const std::string & json_data)2073 static void CheckListToList(const std::vector<std::shared_ptr<DataType>>& value_types,
2074 const std::string& json_data) {
2075 for (auto make_src_list : list_factories) {
2076 for (auto make_dest_list : list_factories) {
2077 for (const auto& src_value_type : value_types) {
2078 for (const auto& dest_value_type : value_types) {
2079 const auto src_type = make_src_list(src_value_type);
2080 const auto dest_type = make_dest_list(dest_value_type);
2081 ARROW_SCOPED_TRACE("src_type = ", src_type->ToString(),
2082 ", dest_type = ", dest_type->ToString());
2083 CheckCast(ArrayFromJSON(src_type, json_data),
2084 ArrayFromJSON(dest_type, json_data));
2085 }
2086 }
2087 }
2088 }
2089 }
2090
TEST(Cast,ListToList)2091 TEST(Cast, ListToList) {
2092 CheckListToList({int32(), float32(), int64()},
2093 "[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]");
2094 }
2095
TEST(Cast,ListToListNoNulls)2096 TEST(Cast, ListToListNoNulls) {
2097 // ARROW-12568
2098 CheckListToList({int32(), float32(), int64()},
2099 "[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]");
2100 }
2101
TEST(Cast,ListToListOptionsPassthru)2102 TEST(Cast, ListToListOptionsPassthru) {
2103 for (auto make_src_list : list_factories) {
2104 for (auto make_dest_list : list_factories) {
2105 auto list_int32 = ArrayFromJSON(make_src_list(int32()), "[[87654321]]");
2106
2107 auto options = CastOptions::Safe(make_dest_list(int16()));
2108 CheckCastFails(list_int32, options);
2109
2110 options.allow_int_overflow = true;
2111 CheckCast(list_int32, ArrayFromJSON(make_dest_list(int16()), "[[32689]]"), options);
2112 }
2113 }
2114 }
2115
TEST(Cast,IdentityCasts)2116 TEST(Cast, IdentityCasts) {
2117 // ARROW-4102
2118 auto CheckIdentityCast = [](std::shared_ptr<DataType> type, const std::string& json) {
2119 CheckCastZeroCopy(ArrayFromJSON(type, json), type);
2120 };
2121
2122 CheckIdentityCast(null(), "[null, null, null]");
2123 CheckIdentityCast(boolean(), "[false, true, null, false]");
2124
2125 for (auto type : kNumericTypes) {
2126 CheckIdentityCast(type, "[1, 2, null, 4]");
2127 }
2128 CheckIdentityCast(binary(), R"(["foo", "bar"])");
2129 CheckIdentityCast(utf8(), R"(["foo", "bar"])");
2130 CheckIdentityCast(fixed_size_binary(3), R"(["foo", "bar"])");
2131
2132 CheckIdentityCast(list(int8()), "[[1, 2], [null], [], [3]]");
2133
2134 CheckIdentityCast(time32(TimeUnit::MILLI), "[1, 2, 3, 4]");
2135 CheckIdentityCast(time64(TimeUnit::MICRO), "[1, 2, 3, 4]");
2136 CheckIdentityCast(date32(), "[1, 2, 3, 4]");
2137 CheckIdentityCast(date64(), "[86400000, 0]");
2138 CheckIdentityCast(timestamp(TimeUnit::SECOND), "[1, 2, 3, 4]");
2139
2140 CheckIdentityCast(dictionary(int8(), int8()), "[1, 2, 3, 1, null, 3]");
2141 }
2142
TEST(Cast,EmptyCasts)2143 TEST(Cast, EmptyCasts) {
2144 // ARROW-4766: 0-length arrays should not segfault
2145 auto CheckCastEmpty = [](std::shared_ptr<DataType> from, std::shared_ptr<DataType> to) {
2146 // Python creates array with nullptr instead of 0-length (valid) buffers.
2147 auto data = ArrayData::Make(from, /* length */ 0, /* buffers */ {nullptr, nullptr});
2148 CheckCast(MakeArray(data), ArrayFromJSON(to, "[]"));
2149 };
2150
2151 for (auto numeric : kNumericTypes) {
2152 CheckCastEmpty(boolean(), numeric);
2153 CheckCastEmpty(numeric, boolean());
2154 }
2155 }
2156
TEST(Cast,CastWithNoValidityBitmapButUnknownNullCount)2157 TEST(Cast, CastWithNoValidityBitmapButUnknownNullCount) {
2158 // ARROW-12672 segfault when casting slightly malformed array
2159 // (no validity bitmap but atomic null count non-zero)
2160 auto values = ArrayFromJSON(boolean(), "[true, true, false]");
2161
2162 ASSERT_OK_AND_ASSIGN(auto expected, Cast(*values, int8()));
2163
2164 ASSERT_EQ(values->data()->buffers[0], NULLPTR);
2165 values->data()->null_count = kUnknownNullCount;
2166 ASSERT_OK_AND_ASSIGN(auto result, Cast(*values, int8()));
2167
2168 AssertArraysEqual(*expected, *result);
2169 }
2170
2171 // ----------------------------------------------------------------------
2172 // Test casting from NullType
2173
TEST(Cast,FromNull)2174 TEST(Cast, FromNull) {
2175 for (auto to_type : {
2176 null(),
2177 uint8(),
2178 int8(),
2179 uint16(),
2180 int16(),
2181 uint32(),
2182 int32(),
2183 uint64(),
2184 int64(),
2185 float32(),
2186 float64(),
2187 date32(),
2188 date64(),
2189 fixed_size_binary(10),
2190 binary(),
2191 utf8(),
2192 }) {
2193 ASSERT_OK_AND_ASSIGN(auto expected, MakeArrayOfNull(to_type, 10));
2194 CheckCast(std::make_shared<NullArray>(10), expected);
2195 }
2196 }
2197
TEST(Cast,FromNullToDictionary)2198 TEST(Cast, FromNullToDictionary) {
2199 auto from = std::make_shared<NullArray>(10);
2200 auto to_type = dictionary(int8(), boolean());
2201
2202 ASSERT_OK_AND_ASSIGN(auto expected, MakeArrayOfNull(to_type, 10));
2203 CheckCast(from, expected);
2204 }
2205
2206 // ----------------------------------------------------------------------
2207 // Test casting from DictionaryType
2208
TEST(Cast,FromDictionary)2209 TEST(Cast, FromDictionary) {
2210 ArrayVector dictionaries;
2211 dictionaries.push_back(std::make_shared<NullArray>(5));
2212
2213 for (auto num_type : kNumericTypes) {
2214 dictionaries.push_back(ArrayFromJSON(num_type, "[23, 12, 45, 12, null]"));
2215 }
2216
2217 for (auto string_type : kBaseBinaryTypes) {
2218 dictionaries.push_back(
2219 ArrayFromJSON(string_type, R"(["foo", "bar", "baz", "foo", null])"));
2220 }
2221
2222 for (auto dict : dictionaries) {
2223 for (auto index_type : kDictionaryIndexTypes) {
2224 auto indices = ArrayFromJSON(index_type, "[4, 0, 1, 2, 0, 4, null, 2]");
2225 ASSERT_OK_AND_ASSIGN(auto expected, Take(*dict, *indices));
2226
2227 ASSERT_OK_AND_ASSIGN(
2228 auto dict_arr, DictionaryArray::FromArrays(dictionary(index_type, dict->type()),
2229 indices, dict));
2230 CheckCast(dict_arr, expected);
2231 }
2232 }
2233
2234 for (auto dict : dictionaries) {
2235 if (dict->type_id() == Type::NA) continue;
2236
2237 // Test with a nullptr bitmap buffer (ARROW-3208)
2238 auto indices = ArrayFromJSON(int8(), "[0, 0, 1, 2, 0, 3, 3, 2]");
2239 ASSERT_OK_AND_ASSIGN(auto no_nulls, Take(*dict, *indices));
2240 ASSERT_EQ(no_nulls->null_count(), 0);
2241
2242 ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(no_nulls));
2243
2244 // Make a new dict array with nullptr bitmap buffer
2245 auto data = encoded.array()->Copy();
2246 data->buffers[0] = nullptr;
2247 data->null_count = 0;
2248 std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
2249 ValidateOutput(*dict_array);
2250
2251 CheckCast(dict_array, no_nulls);
2252 }
2253 }
2254
SmallintArrayFromJSON(const std::string & json_data)2255 std::shared_ptr<Array> SmallintArrayFromJSON(const std::string& json_data) {
2256 auto arr = ArrayFromJSON(int16(), json_data);
2257 auto ext_data = arr->data()->Copy();
2258 ext_data->type = smallint();
2259 return MakeArray(ext_data);
2260 }
2261
TEST(Cast,ExtensionTypeToIntDowncast)2262 TEST(Cast, ExtensionTypeToIntDowncast) {
2263 auto smallint = std::make_shared<SmallintType>();
2264 ExtensionTypeGuard smallint_guard(smallint);
2265
2266 std::shared_ptr<Array> result;
2267 std::vector<bool> is_valid = {true, false, true, true, true};
2268
2269 // Smallint(int16) to int16
2270 CheckCastZeroCopy(SmallintArrayFromJSON("[0, 100, 200, 1, 2]"), int16());
2271
2272 // Smallint(int16) to uint8, no overflow/underrun
2273 CheckCast(SmallintArrayFromJSON("[0, 100, 200, 1, 2]"),
2274 ArrayFromJSON(uint8(), "[0, 100, 200, 1, 2]"));
2275
2276 // Smallint(int16) to uint8, with overflow
2277 {
2278 CastOptions options;
2279 options.to_type = uint8();
2280 CheckCastFails(SmallintArrayFromJSON("[0, null, 256, 1, 3]"), options);
2281
2282 options.allow_int_overflow = true;
2283 CheckCast(SmallintArrayFromJSON("[0, null, 256, 1, 3]"),
2284 ArrayFromJSON(uint8(), "[0, null, 0, 1, 3]"), options);
2285 }
2286
2287 // Smallint(int16) to uint8, with underflow
2288 {
2289 CastOptions options;
2290 options.to_type = uint8();
2291 CheckCastFails(SmallintArrayFromJSON("[0, null, -1, 1, 3]"), options);
2292
2293 options.allow_int_overflow = true;
2294 CheckCast(SmallintArrayFromJSON("[0, null, -1, 1, 3]"),
2295 ArrayFromJSON(uint8(), "[0, null, 255, 1, 3]"), options);
2296 }
2297 }
2298
TEST(Cast,DictTypeToAnotherDict)2299 TEST(Cast, DictTypeToAnotherDict) {
2300 auto check_cast = [&](const std::shared_ptr<DataType>& in_type,
2301 const std::shared_ptr<DataType>& out_type,
2302 const std::string& json_str,
2303 const CastOptions& options = CastOptions()) {
2304 auto arr = ArrayFromJSON(in_type, json_str);
2305 auto exp = in_type->Equals(out_type) ? arr : ArrayFromJSON(out_type, json_str);
2306 // this checks for scalars as well
2307 CheckCast(arr, exp, options);
2308 };
2309
2310 // check same type passed on to casting
2311 check_cast(dictionary(int8(), int16()), dictionary(int8(), int16()),
2312 "[1, 2, 3, 1, null, 3]");
2313 check_cast(dictionary(int8(), int16()), dictionary(int32(), int64()),
2314 "[1, 2, 3, 1, null, 3]");
2315 check_cast(dictionary(int8(), int16()), dictionary(int32(), float64()),
2316 "[1, 2, 3, 1, null, 3]");
2317 check_cast(dictionary(int32(), utf8()), dictionary(int8(), utf8()),
2318 R"(["a", "b", "a", null])");
2319
2320 auto arr = ArrayFromJSON(dictionary(int32(), int32()), "[1, 1000]");
2321 // check casting unsafe values (checking for unsafe indices is unnecessary, because it
2322 // would create an invalid index array which results in a ValidateOutput failure)
2323 ASSERT_OK_AND_ASSIGN(auto casted,
2324 Cast(arr, dictionary(int8(), int8()), CastOptions::Unsafe()));
2325 ValidateOutput(casted);
2326
2327 // check safe casting values
2328 EXPECT_RAISES_WITH_MESSAGE_THAT(
2329 Invalid, testing::HasSubstr("Integer value 1000 not in range"),
2330 Cast(arr, dictionary(int8(), int8()), CastOptions::Safe()));
2331 }
2332
2333 } // namespace compute
2334 } // namespace arrow
2335