1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "gandiva/projector.h"
19
20 #include <gtest/gtest.h>
21
22 #include <cmath>
23
24 #include "arrow/memory_pool.h"
25 #include "gandiva/literal_holder.h"
26 #include "gandiva/node.h"
27 #include "gandiva/tests/test_util.h"
28 #include "gandiva/tree_expr_builder.h"
29
30 namespace gandiva {
31
32 using arrow::boolean;
33 using arrow::float32;
34 using arrow::int32;
35
36 class TestProjector : public ::testing::Test {
37 public:
SetUp()38 void SetUp() { pool_ = arrow::default_memory_pool(); }
39
40 protected:
41 arrow::MemoryPool* pool_;
42 };
43
TEST_F(TestProjector,TestProjectCache)44 TEST_F(TestProjector, TestProjectCache) {
45 // schema for input fields
46 auto field0 = field("f0", int32());
47 auto field1 = field("f2", int32());
48 auto schema = arrow::schema({field0, field1});
49
50 // output fields
51 auto field_sum = field("add", int32());
52 auto field_sub = field("subtract", int32());
53
54 // Build expression
55 auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
56 auto sub_expr =
57 TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
58
59 auto configuration = TestConfiguration();
60
61 std::shared_ptr<Projector> projector;
62 auto status = Projector::Make(schema, {sum_expr, sub_expr}, configuration, &projector);
63 ASSERT_OK(status);
64
65 // everything is same, should return the same projector.
66 auto schema_same = arrow::schema({field0, field1});
67 std::shared_ptr<Projector> cached_projector;
68 status = Projector::Make(schema_same, {sum_expr, sub_expr}, configuration,
69 &cached_projector);
70 ASSERT_OK(status);
71 EXPECT_EQ(cached_projector, projector);
72
73 // schema is different should return a new projector.
74 auto field2 = field("f2", int32());
75 auto different_schema = arrow::schema({field0, field1, field2});
76 std::shared_ptr<Projector> should_be_new_projector;
77 status = Projector::Make(different_schema, {sum_expr, sub_expr}, configuration,
78 &should_be_new_projector);
79 ASSERT_OK(status);
80 EXPECT_NE(cached_projector, should_be_new_projector);
81
82 // expression list is different should return a new projector.
83 std::shared_ptr<Projector> should_be_new_projector1;
84 status = Projector::Make(schema, {sum_expr}, configuration, &should_be_new_projector1);
85 ASSERT_OK(status);
86 EXPECT_NE(cached_projector, should_be_new_projector1);
87
88 // another instance of the same configuration, should return the same projector.
89 status = Projector::Make(schema, {sum_expr, sub_expr}, TestConfiguration(),
90 &cached_projector);
91 ASSERT_OK(status);
92 EXPECT_EQ(cached_projector, projector);
93 }
94
TEST_F(TestProjector,TestProjectCacheFieldNames)95 TEST_F(TestProjector, TestProjectCacheFieldNames) {
96 // schema for input fields
97 auto field0 = field("f0", int32());
98 auto field1 = field("f1", int32());
99 auto field2 = field("f2", int32());
100 auto schema = arrow::schema({field0, field1, field2});
101
102 // output fields
103 auto sum_01 = field("sum_01", int32());
104 auto sum_12 = field("sum_12", int32());
105
106 auto sum_expr_01 = TreeExprBuilder::MakeExpression("add", {field0, field1}, sum_01);
107 std::shared_ptr<Projector> projector_01;
108 auto status =
109 Projector::Make(schema, {sum_expr_01}, TestConfiguration(), &projector_01);
110 EXPECT_TRUE(status.ok());
111
112 auto sum_expr_12 = TreeExprBuilder::MakeExpression("add", {field1, field2}, sum_12);
113 std::shared_ptr<Projector> projector_12;
114 status = Projector::Make(schema, {sum_expr_12}, TestConfiguration(), &projector_12);
115 EXPECT_TRUE(status.ok());
116
117 // add(f0, f1) != add(f1, f2)
118 EXPECT_TRUE(projector_01.get() != projector_12.get());
119 }
120
TEST_F(TestProjector,TestProjectCacheDouble)121 TEST_F(TestProjector, TestProjectCacheDouble) {
122 auto schema = arrow::schema({});
123 auto res = field("result", arrow::float64());
124
125 double d0 = 1.23456788912345677E18;
126 double d1 = 1.23456789012345677E18;
127
128 auto literal0 = TreeExprBuilder::MakeLiteral(d0);
129 auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
130 auto configuration = TestConfiguration();
131
132 std::shared_ptr<Projector> projector0;
133 auto status = Projector::Make(schema, {expr0}, configuration, &projector0);
134 EXPECT_TRUE(status.ok()) << status.message();
135
136 auto literal1 = TreeExprBuilder::MakeLiteral(d1);
137 auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
138 std::shared_ptr<Projector> projector1;
139 status = Projector::Make(schema, {expr1}, configuration, &projector1);
140 EXPECT_TRUE(status.ok()) << status.message();
141
142 EXPECT_TRUE(projector0.get() != projector1.get());
143 }
144
TEST_F(TestProjector,TestProjectCacheFloat)145 TEST_F(TestProjector, TestProjectCacheFloat) {
146 auto schema = arrow::schema({});
147 auto res = field("result", arrow::float32());
148
149 float f0 = static_cast<float>(12345678891.000000);
150 float f1 = f0 - 1000;
151
152 auto literal0 = TreeExprBuilder::MakeLiteral(f0);
153 auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
154 std::shared_ptr<Projector> projector0;
155 auto status = Projector::Make(schema, {expr0}, TestConfiguration(), &projector0);
156 EXPECT_TRUE(status.ok()) << status.message();
157
158 auto literal1 = TreeExprBuilder::MakeLiteral(f1);
159 auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
160 std::shared_ptr<Projector> projector1;
161 status = Projector::Make(schema, {expr1}, TestConfiguration(), &projector1);
162 EXPECT_TRUE(status.ok()) << status.message();
163
164 EXPECT_TRUE(projector0.get() != projector1.get());
165 }
166
TEST_F(TestProjector,TestProjectCacheLiteral)167 TEST_F(TestProjector, TestProjectCacheLiteral) {
168 auto schema = arrow::schema({});
169 auto res = field("result", arrow::decimal(38, 5));
170
171 DecimalScalar128 d0("12345678", 38, 5);
172 DecimalScalar128 d1("98756432", 38, 5);
173
174 auto literal0 = TreeExprBuilder::MakeDecimalLiteral(d0);
175 auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
176 std::shared_ptr<Projector> projector0;
177 ASSERT_OK(Projector::Make(schema, {expr0}, TestConfiguration(), &projector0));
178
179 auto literal1 = TreeExprBuilder::MakeDecimalLiteral(d1);
180 auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
181 std::shared_ptr<Projector> projector1;
182 ASSERT_OK(Projector::Make(schema, {expr1}, TestConfiguration(), &projector1));
183
184 EXPECT_NE(projector0.get(), projector1.get());
185 }
186
TEST_F(TestProjector,TestProjectCacheDecimalCast)187 TEST_F(TestProjector, TestProjectCacheDecimalCast) {
188 auto field_float64 = field("float64", arrow::float64());
189 auto schema = arrow::schema({field_float64});
190
191 auto res_31_13 = field("result", arrow::decimal(31, 13));
192 auto expr0 = TreeExprBuilder::MakeExpression("castDECIMAL", {field_float64}, res_31_13);
193 std::shared_ptr<Projector> projector0;
194 ASSERT_OK(Projector::Make(schema, {expr0}, TestConfiguration(), &projector0));
195
196 // if the output scale is different, the cache can't be used.
197 auto res_31_14 = field("result", arrow::decimal(31, 14));
198 auto expr1 = TreeExprBuilder::MakeExpression("castDECIMAL", {field_float64}, res_31_14);
199 std::shared_ptr<Projector> projector1;
200 ASSERT_OK(Projector::Make(schema, {expr1}, TestConfiguration(), &projector1));
201 EXPECT_NE(projector0.get(), projector1.get());
202
203 // if the output scale/precision are same, should get a cache hit.
204 auto res_31_13_alt = field("result", arrow::decimal(31, 13));
205 auto expr2 =
206 TreeExprBuilder::MakeExpression("castDECIMAL", {field_float64}, res_31_13_alt);
207 std::shared_ptr<Projector> projector2;
208 ASSERT_OK(Projector::Make(schema, {expr2}, TestConfiguration(), &projector2));
209 EXPECT_EQ(projector0.get(), projector2.get());
210 }
211
TEST_F(TestProjector,TestIntSumSub)212 TEST_F(TestProjector, TestIntSumSub) {
213 // schema for input fields
214 auto field0 = field("f0", int32());
215 auto field1 = field("f2", int32());
216 auto schema = arrow::schema({field0, field1});
217
218 // output fields
219 auto field_sum = field("add", int32());
220 auto field_sub = field("subtract", int32());
221
222 // Build expression
223 auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
224 auto sub_expr =
225 TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
226
227 std::shared_ptr<Projector> projector;
228 auto status =
229 Projector::Make(schema, {sum_expr, sub_expr}, TestConfiguration(), &projector);
230 EXPECT_TRUE(status.ok());
231
232 // Create a row-batch with some sample data
233 int num_records = 4;
234 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
235 auto array1 = MakeArrowArrayInt32({11, 13, 15, 17}, {true, true, false, true});
236 // expected output
237 auto exp_sum = MakeArrowArrayInt32({12, 15, 0, 0}, {true, true, false, false});
238 auto exp_sub = MakeArrowArrayInt32({-10, -11, 0, 0}, {true, true, false, false});
239
240 // prepare input record batch
241 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
242
243 // Evaluate expression
244 arrow::ArrayVector outputs;
245 status = projector->Evaluate(*in_batch, pool_, &outputs);
246 EXPECT_TRUE(status.ok());
247
248 // Validate results
249 EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
250 EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
251 }
252
253 template <typename TYPE, typename C_TYPE>
TestArithmeticOpsForType(arrow::MemoryPool * pool)254 static void TestArithmeticOpsForType(arrow::MemoryPool* pool) {
255 auto atype = arrow::TypeTraits<TYPE>::type_singleton();
256
257 // schema for input fields
258 auto field0 = field("f0", atype);
259 auto field1 = field("f1", atype);
260 auto schema = arrow::schema({field0, field1});
261
262 // output fields
263 auto field_sum = field("add", atype);
264 auto field_sub = field("subtract", atype);
265 auto field_mul = field("multiply", atype);
266 auto field_div = field("divide", atype);
267 auto field_eq = field("equal", arrow::boolean());
268 auto field_lt = field("less_than", arrow::boolean());
269
270 // Build expression
271 auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
272 auto sub_expr =
273 TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
274 auto mul_expr =
275 TreeExprBuilder::MakeExpression("multiply", {field0, field1}, field_mul);
276 auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div);
277 auto eq_expr = TreeExprBuilder::MakeExpression("equal", {field0, field1}, field_eq);
278 auto lt_expr = TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_lt);
279
280 std::shared_ptr<Projector> projector;
281 auto status =
282 Projector::Make(schema, {sum_expr, sub_expr, mul_expr, div_expr, eq_expr, lt_expr},
283 TestConfiguration(), &projector);
284 EXPECT_TRUE(status.ok());
285
286 // Create a row-batch with some sample data
287 int num_records = 12;
288 std::vector<C_TYPE> input0 = {1, 2, 53, 84, 5, 15, 0, 1, 52, 83, 4, 120};
289 std::vector<C_TYPE> input1 = {10, 15, 23, 84, 4, 51, 68, 9, 16, 18, 19, 37};
290 std::vector<bool> validity = {true, true, true, true, true, true,
291 true, true, true, true, true, true};
292
293 auto array0 = MakeArrowArray<TYPE, C_TYPE>(input0, validity);
294 auto array1 = MakeArrowArray<TYPE, C_TYPE>(input1, validity);
295
296 // expected output
297 std::vector<C_TYPE> sum;
298 std::vector<C_TYPE> sub;
299 std::vector<C_TYPE> mul;
300 std::vector<C_TYPE> div;
301 std::vector<bool> eq;
302 std::vector<bool> lt;
303 for (int i = 0; i < num_records; i++) {
304 sum.push_back(static_cast<C_TYPE>(input0[i] + input1[i]));
305 sub.push_back(static_cast<C_TYPE>(input0[i] - input1[i]));
306 mul.push_back(static_cast<C_TYPE>(input0[i] * input1[i]));
307 div.push_back(static_cast<C_TYPE>(input0[i] / input1[i]));
308 eq.push_back(input0[i] == input1[i]);
309 lt.push_back(input0[i] < input1[i]);
310 }
311 auto exp_sum = MakeArrowArray<TYPE, C_TYPE>(sum, validity);
312 auto exp_sub = MakeArrowArray<TYPE, C_TYPE>(sub, validity);
313 auto exp_mul = MakeArrowArray<TYPE, C_TYPE>(mul, validity);
314 auto exp_div = MakeArrowArray<TYPE, C_TYPE>(div, validity);
315 auto exp_eq = MakeArrowArray<arrow::BooleanType, bool>(eq, validity);
316 auto exp_lt = MakeArrowArray<arrow::BooleanType, bool>(lt, validity);
317
318 // prepare input record batch
319 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
320
321 // Evaluate expression
322 arrow::ArrayVector outputs;
323 status = projector->Evaluate(*in_batch, pool, &outputs);
324 EXPECT_TRUE(status.ok());
325
326 // Validate results
327 EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
328 EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
329 EXPECT_ARROW_ARRAY_EQUALS(exp_mul, outputs.at(2));
330 EXPECT_ARROW_ARRAY_EQUALS(exp_div, outputs.at(3));
331 EXPECT_ARROW_ARRAY_EQUALS(exp_eq, outputs.at(4));
332 EXPECT_ARROW_ARRAY_EQUALS(exp_lt, outputs.at(5));
333 }
334
TEST_F(TestProjector,TestAllIntTypes)335 TEST_F(TestProjector, TestAllIntTypes) {
336 TestArithmeticOpsForType<arrow::UInt8Type, uint8_t>(pool_);
337 TestArithmeticOpsForType<arrow::UInt16Type, uint16_t>(pool_);
338 TestArithmeticOpsForType<arrow::UInt32Type, uint32_t>(pool_);
339 TestArithmeticOpsForType<arrow::UInt64Type, uint64_t>(pool_);
340 TestArithmeticOpsForType<arrow::Int8Type, int8_t>(pool_);
341 TestArithmeticOpsForType<arrow::Int16Type, int16_t>(pool_);
342 TestArithmeticOpsForType<arrow::Int32Type, int32_t>(pool_);
343 TestArithmeticOpsForType<arrow::Int64Type, int64_t>(pool_);
344 }
345
TEST_F(TestProjector,TestExtendedMath)346 TEST_F(TestProjector, TestExtendedMath) {
347 // schema for input fields
348 auto field0 = arrow::field("f0", arrow::float64());
349 auto field1 = arrow::field("f1", arrow::float64());
350 auto schema = arrow::schema({field0, field1});
351
352 // output fields
353 auto field_cbrt = arrow::field("cbrt", arrow::float64());
354 auto field_exp = arrow::field("exp", arrow::float64());
355 auto field_log = arrow::field("log", arrow::float64());
356 auto field_log10 = arrow::field("log10", arrow::float64());
357 auto field_logb = arrow::field("logb", arrow::float64());
358 auto field_power = arrow::field("power", arrow::float64());
359
360 // Build expression
361 auto cbrt_expr = TreeExprBuilder::MakeExpression("cbrt", {field0}, field_cbrt);
362 auto exp_expr = TreeExprBuilder::MakeExpression("exp", {field0}, field_exp);
363 auto log_expr = TreeExprBuilder::MakeExpression("log", {field0}, field_log);
364 auto log10_expr = TreeExprBuilder::MakeExpression("log10", {field0}, field_log10);
365 auto logb_expr = TreeExprBuilder::MakeExpression("log", {field0, field1}, field_logb);
366 auto power_expr =
367 TreeExprBuilder::MakeExpression("power", {field0, field1}, field_power);
368
369 std::shared_ptr<Projector> projector;
370 auto status = Projector::Make(
371 schema, {cbrt_expr, exp_expr, log_expr, log10_expr, logb_expr, power_expr},
372 TestConfiguration(), &projector);
373 EXPECT_TRUE(status.ok());
374
375 // Create a row-batch with some sample data
376 int num_records = 4;
377 std::vector<double> input0 = {16, 10, -14, 8.3};
378 std::vector<double> input1 = {2, 3, 5, 7};
379 std::vector<bool> validity = {true, true, true, true};
380
381 auto array0 = MakeArrowArray<arrow::DoubleType, double>(input0, validity);
382 auto array1 = MakeArrowArray<arrow::DoubleType, double>(input1, validity);
383
384 // expected output
385 std::vector<double> cbrt_vals;
386 std::vector<double> exp_vals;
387 std::vector<double> log_vals;
388 std::vector<double> log10_vals;
389 std::vector<double> logb_vals;
390 std::vector<double> power_vals;
391 for (int i = 0; i < num_records; i++) {
392 cbrt_vals.push_back(static_cast<double>(cbrtl(input0[i])));
393 exp_vals.push_back(static_cast<double>(expl(input0[i])));
394 log_vals.push_back(static_cast<double>(logl(input0[i])));
395 log10_vals.push_back(static_cast<double>(log10l(input0[i])));
396 logb_vals.push_back(static_cast<double>(logl(input1[i]) / logl(input0[i])));
397 power_vals.push_back(static_cast<double>(powl(input0[i], input1[i])));
398 }
399 auto expected_cbrt = MakeArrowArray<arrow::DoubleType, double>(cbrt_vals, validity);
400 auto expected_exp = MakeArrowArray<arrow::DoubleType, double>(exp_vals, validity);
401 auto expected_log = MakeArrowArray<arrow::DoubleType, double>(log_vals, validity);
402 auto expected_log10 = MakeArrowArray<arrow::DoubleType, double>(log10_vals, validity);
403 auto expected_logb = MakeArrowArray<arrow::DoubleType, double>(logb_vals, validity);
404 auto expected_power = MakeArrowArray<arrow::DoubleType, double>(power_vals, validity);
405
406 // prepare input record batch
407 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
408
409 // Evaluate expression
410 arrow::ArrayVector outputs;
411 status = projector->Evaluate(*in_batch, pool_, &outputs);
412 EXPECT_TRUE(status.ok());
413
414 // Validate results
415 double epsilon = 1E-13;
416 EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_cbrt, outputs.at(0), epsilon);
417 EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_exp, outputs.at(1), epsilon);
418 EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_log, outputs.at(2), epsilon);
419 EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_log10, outputs.at(3), epsilon);
420 EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_logb, outputs.at(4), epsilon);
421 EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_power, outputs.at(5), epsilon);
422 }
423
TEST_F(TestProjector,TestFloatLessThan)424 TEST_F(TestProjector, TestFloatLessThan) {
425 // schema for input fields
426 auto field0 = field("f0", float32());
427 auto field1 = field("f2", float32());
428 auto schema = arrow::schema({field0, field1});
429
430 // output fields
431 auto field_result = field("res", boolean());
432
433 // Build expression
434 auto lt_expr =
435 TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_result);
436
437 // Build a projector for the expressions.
438 std::shared_ptr<Projector> projector;
439 auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
440 EXPECT_TRUE(status.ok());
441
442 // Create a row-batch with some sample data
443 int num_records = 3;
444 auto array0 = MakeArrowArrayFloat32({1.0f, 8.9f, 3.0f}, {true, true, false});
445 auto array1 = MakeArrowArrayFloat32({4.0f, 3.4f, 6.8f}, {true, true, true});
446 // expected output
447 auto exp = MakeArrowArrayBool({true, false, false}, {true, true, false});
448
449 // prepare input record batch
450 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
451
452 // Evaluate expression
453 arrow::ArrayVector outputs;
454 status = projector->Evaluate(*in_batch, pool_, &outputs);
455 EXPECT_TRUE(status.ok());
456
457 // Validate results
458 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
459 }
460
TEST_F(TestProjector,TestIsNotNull)461 TEST_F(TestProjector, TestIsNotNull) {
462 // schema for input fields
463 auto field0 = field("f0", float32());
464 auto schema = arrow::schema({field0});
465
466 // output fields
467 auto field_result = field("res", boolean());
468
469 // Build expression
470 auto myexpr = TreeExprBuilder::MakeExpression("isnotnull", {field0}, field_result);
471
472 // Build a projector for the expressions.
473 std::shared_ptr<Projector> projector;
474 auto status = Projector::Make(schema, {myexpr}, TestConfiguration(), &projector);
475 EXPECT_TRUE(status.ok());
476
477 // Create a row-batch with some sample data
478 int num_records = 3;
479 auto array0 = MakeArrowArrayFloat32({1.0f, 8.9f, 3.0f}, {true, true, false});
480 // expected output
481 auto exp = MakeArrowArrayBool({true, true, false}, {true, true, true});
482
483 // prepare input record batch
484 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
485
486 // Evaluate expression
487 arrow::ArrayVector outputs;
488 status = projector->Evaluate(*in_batch, pool_, &outputs);
489 EXPECT_TRUE(status.ok());
490
491 // Validate results
492 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
493 }
494
TEST_F(TestProjector,TestZeroCopy)495 TEST_F(TestProjector, TestZeroCopy) {
496 // schema for input fields
497 auto field0 = field("f0", int32());
498 auto schema = arrow::schema({field0});
499
500 // output fields
501 auto res = field("res", float32());
502
503 // Build expression
504 auto cast_expr = TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res);
505
506 std::shared_ptr<Projector> projector;
507 auto status = Projector::Make(schema, {cast_expr}, TestConfiguration(), &projector);
508 EXPECT_TRUE(status.ok());
509
510 // Create a row-batch with some sample data
511 int num_records = 4;
512 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
513 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
514
515 // expected output
516 auto exp = MakeArrowArrayFloat32({1, 2, 3, 0}, {true, true, true, false});
517
518 // allocate output buffers
519 int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
520 int64_t bitmap_capacity = arrow::BitUtil::RoundUpToMultipleOf64(bitmap_sz);
521 std::vector<uint8_t> bitmap(bitmap_capacity);
522 std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
523 std::make_shared<arrow::MutableBuffer>(&bitmap[0], bitmap_capacity);
524
525 int64_t data_sz = sizeof(float) * num_records;
526 std::vector<uint8_t> data(bitmap_capacity);
527 std::shared_ptr<arrow::MutableBuffer> data_buf =
528 std::make_shared<arrow::MutableBuffer>(&data[0], data_sz);
529
530 auto array_data =
531 arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
532
533 // Evaluate expression
534 status = projector->Evaluate(*in_batch, {array_data});
535 EXPECT_TRUE(status.ok());
536
537 // Validate results
538 auto output = arrow::MakeArray(array_data);
539 EXPECT_ARROW_ARRAY_EQUALS(exp, output);
540 }
541
TEST_F(TestProjector,TestZeroCopyNegative)542 TEST_F(TestProjector, TestZeroCopyNegative) {
543 // schema for input fields
544 auto field0 = field("f0", int32());
545 auto schema = arrow::schema({field0});
546
547 // output fields
548 auto res = field("res", float32());
549
550 // Build expression
551 auto cast_expr = TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res);
552
553 std::shared_ptr<Projector> projector;
554 auto status = Projector::Make(schema, {cast_expr}, TestConfiguration(), &projector);
555 EXPECT_TRUE(status.ok());
556
557 // Create a row-batch with some sample data
558 int num_records = 4;
559 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
560 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
561
562 // expected output
563 auto exp = MakeArrowArrayFloat32({1, 2, 3, 0}, {true, true, true, false});
564
565 // allocate output buffers
566 int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
567 std::unique_ptr<uint8_t[]> bitmap(new uint8_t[bitmap_sz]);
568 std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
569 std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz);
570
571 int64_t data_sz = sizeof(float) * num_records;
572 std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
573 std::shared_ptr<arrow::MutableBuffer> data_buf =
574 std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
575
576 auto array_data =
577 arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
578
579 // the batch can't be empty.
580 auto bad_batch = arrow::RecordBatch::Make(schema, 0 /*num_records*/, {array0});
581 status = projector->Evaluate(*bad_batch, {array_data});
582 EXPECT_EQ(status.code(), StatusCode::Invalid);
583
584 // the output array can't be null.
585 std::shared_ptr<arrow::ArrayData> null_array_data;
586 status = projector->Evaluate(*in_batch, {null_array_data});
587 EXPECT_EQ(status.code(), StatusCode::Invalid);
588
589 // the output array must have at least two buffers.
590 auto bad_array_data = arrow::ArrayData::Make(float32(), num_records, {bitmap_buf});
591 status = projector->Evaluate(*in_batch, {bad_array_data});
592 EXPECT_EQ(status.code(), StatusCode::Invalid);
593
594 // the output buffers must have sufficiently sized data_buf.
595 std::shared_ptr<arrow::MutableBuffer> bad_data_buf =
596 std::make_shared<arrow::MutableBuffer>(data.get(), data_sz - 1);
597 auto bad_array_data2 =
598 arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, bad_data_buf});
599 status = projector->Evaluate(*in_batch, {bad_array_data2});
600 EXPECT_EQ(status.code(), StatusCode::Invalid);
601
602 // the output buffers must have sufficiently sized bitmap_buf.
603 std::shared_ptr<arrow::MutableBuffer> bad_bitmap_buf =
604 std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz - 1);
605 auto bad_array_data3 =
606 arrow::ArrayData::Make(float32(), num_records, {bad_bitmap_buf, data_buf});
607 status = projector->Evaluate(*in_batch, {bad_array_data3});
608 EXPECT_EQ(status.code(), StatusCode::Invalid);
609 }
610
TEST_F(TestProjector,TestDivideZero)611 TEST_F(TestProjector, TestDivideZero) {
612 // schema for input fields
613 auto field0 = field("f0", int32());
614 auto field1 = field("f2", int32());
615 auto schema = arrow::schema({field0, field1});
616
617 // output fields
618 auto field_div = field("divide", int32());
619
620 // Build expression
621 auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div);
622
623 std::shared_ptr<Projector> projector;
624 auto status = Projector::Make(schema, {div_expr}, TestConfiguration(), &projector);
625 EXPECT_TRUE(status.ok()) << status.message();
626
627 // Create a row-batch with some sample data
628 int num_records = 5;
629 auto array0 = MakeArrowArrayInt32({2, 3, 4, 5, 6}, {true, true, true, true, true});
630 auto array1 = MakeArrowArrayInt32({1, 2, 2, 0, 0}, {true, true, false, true, true});
631
632 // prepare input record batch
633 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
634
635 // Evaluate expression
636 arrow::ArrayVector outputs;
637 status = projector->Evaluate(*in_batch, pool_, &outputs);
638 EXPECT_EQ(status.code(), StatusCode::ExecutionError);
639 std::string expected_error = "divide by zero error";
640 EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
641
642 // Testing for second batch that has no error should succeed.
643 num_records = 5;
644 array0 = MakeArrowArrayInt32({2, 3, 4, 5, 6}, {true, true, true, true, true});
645 array1 = MakeArrowArrayInt32({1, 2, 2, 1, 1}, {true, true, false, true, true});
646
647 // prepare input record batch
648 in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
649 // expected output
650 auto exp = MakeArrowArrayInt32({2, 1, 2, 5, 6}, {true, true, false, true, true});
651
652 // Evaluate expression
653 status = projector->Evaluate(*in_batch, pool_, &outputs);
654 EXPECT_TRUE(status.ok());
655
656 // Validate results
657 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
658 }
659
TEST_F(TestProjector,TestModZero)660 TEST_F(TestProjector, TestModZero) {
661 // schema for input fields
662 auto field0 = field("f0", arrow::int64());
663 auto field1 = field("f2", int32());
664 auto schema = arrow::schema({field0, field1});
665
666 // output fields
667 auto field_div = field("mod", int32());
668
669 // Build expression
670 auto mod_expr = TreeExprBuilder::MakeExpression("mod", {field0, field1}, field_div);
671
672 std::shared_ptr<Projector> projector;
673 auto status = Projector::Make(schema, {mod_expr}, TestConfiguration(), &projector);
674 EXPECT_TRUE(status.ok()) << status.message();
675
676 // Create a row-batch with some sample data
677 int num_records = 4;
678 auto array0 = MakeArrowArrayInt64({2, 3, 4, 5}, {true, true, true, true});
679 auto array1 = MakeArrowArrayInt32({1, 2, 2, 0}, {true, true, false, true});
680 // expected output
681 auto exp_mod = MakeArrowArrayInt32({0, 1, 0, 5}, {true, true, false, true});
682
683 // prepare input record batch
684 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
685
686 // Evaluate expression
687 arrow::ArrayVector outputs;
688 status = projector->Evaluate(*in_batch, pool_, &outputs);
689 EXPECT_TRUE(status.ok()) << status.message();
690
691 // Validate results
692 EXPECT_ARROW_ARRAY_EQUALS(exp_mod, outputs.at(0));
693 }
694
TEST_F(TestProjector,TestConcat)695 TEST_F(TestProjector, TestConcat) {
696 // schema for input fields
697 auto field0 = field("f0", arrow::utf8());
698 auto field1 = field("f1", arrow::utf8());
699 auto schema = arrow::schema({field0, field1});
700
701 // output fields
702 auto field_concat = field("concat", arrow::utf8());
703
704 // Build expression
705 auto concat_expr =
706 TreeExprBuilder::MakeExpression("concat", {field0, field1}, field_concat);
707
708 std::shared_ptr<Projector> projector;
709 auto status = Projector::Make(schema, {concat_expr}, TestConfiguration(), &projector);
710 EXPECT_TRUE(status.ok()) << status.message();
711
712 // Create a row-batch with some sample data
713 int num_records = 6;
714 auto array0 = MakeArrowArrayUtf8({"ab", "", "ab", "invalid", "valid", "invalid"},
715 {true, true, true, false, true, false});
716 auto array1 = MakeArrowArrayUtf8({"cd", "cd", "", "valid", "invalid", "invalid"},
717 {true, true, true, true, false, false});
718 // expected output
719 auto exp_concat = MakeArrowArrayUtf8({"abcd", "cd", "ab", "valid", "valid", ""},
720 {true, true, true, true, true, true});
721
722 // prepare input record batch
723 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
724
725 // Evaluate expression
726 arrow::ArrayVector outputs;
727 status = projector->Evaluate(*in_batch, pool_, &outputs);
728 EXPECT_TRUE(status.ok()) << status.message();
729
730 // Validate results
731 EXPECT_ARROW_ARRAY_EQUALS(exp_concat, outputs.at(0));
732 }
733
TEST_F(TestProjector,TestOffset)734 TEST_F(TestProjector, TestOffset) {
735 // schema for input fields
736 auto field0 = field("f0", arrow::int32());
737 auto field1 = field("f1", arrow::int32());
738 auto schema = arrow::schema({field0, field1});
739
740 // output fields
741 auto field_sum = field("sum", arrow::int32());
742
743 // Build expression
744 auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
745
746 std::shared_ptr<Projector> projector;
747 auto status = Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector);
748 EXPECT_TRUE(status.ok()) << status.message();
749
750 // Create a row-batch with some sample data
751 int num_records = 4;
752 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 5}, {true, true, true, true, false});
753 array0 = array0->Slice(1);
754 auto array1 = MakeArrowArrayInt32({5, 6, 7, 8}, {true, false, true, true});
755 // expected output
756 auto exp_sum = MakeArrowArrayInt32({9, 11, 13}, {false, true, false});
757
758 // prepare input record batch
759 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
760 in_batch = in_batch->Slice(1);
761
762 // Evaluate expression
763 arrow::ArrayVector outputs;
764 status = projector->Evaluate(*in_batch, pool_, &outputs);
765 EXPECT_TRUE(status.ok()) << status.message();
766
767 // Validate results
768 EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
769 }
770
771 // Test to ensure behaviour of cast functions when the validity is false for an input. The
772 // function should not run for that input.
TEST_F(TestProjector,TestCastFunction)773 TEST_F(TestProjector, TestCastFunction) {
774 auto field0 = field("f0", arrow::utf8());
775 auto schema = arrow::schema({field0});
776
777 // output fields
778 auto res_float4 = field("res_float4", arrow::float32());
779 auto res_float8 = field("res_float8", arrow::float64());
780 auto res_int4 = field("castINT", arrow::int32());
781 auto res_int8 = field("castBIGINT", arrow::int64());
782
783 // Build expression
784 auto cast_expr_float4 =
785 TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res_float4);
786 auto cast_expr_float8 =
787 TreeExprBuilder::MakeExpression("castFLOAT8", {field0}, res_float8);
788 auto cast_expr_int4 = TreeExprBuilder::MakeExpression("castINT", {field0}, res_int4);
789 auto cast_expr_int8 = TreeExprBuilder::MakeExpression("castBIGINT", {field0}, res_int8);
790
791 std::shared_ptr<Projector> projector;
792
793 // {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8}
794 auto status = Projector::Make(
795 schema, {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8},
796 TestConfiguration(), &projector);
797 EXPECT_TRUE(status.ok());
798
799 // Create a row-batch with some sample data
800 int num_records = 4;
801
802 // Last validity is false and the cast functions throw error when input is empty. Should
803 // not be evaluated due to addition of NativeFunction::kCanReturnErrors
804 auto array0 = MakeArrowArrayUtf8({"1", "2", "3", ""}, {true, true, true, false});
805 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
806
807 auto out_float4 = MakeArrowArrayFloat32({1, 2, 3, 0}, {true, true, true, false});
808 auto out_float8 = MakeArrowArrayFloat64({1, 2, 3, 0}, {true, true, true, false});
809 auto out_int4 = MakeArrowArrayInt32({1, 2, 3, 0}, {true, true, true, false});
810 auto out_int8 = MakeArrowArrayInt64({1, 2, 3, 0}, {true, true, true, false});
811
812 arrow::ArrayVector outputs;
813
814 // Evaluate expression
815 status = projector->Evaluate(*in_batch, pool_, &outputs);
816 EXPECT_TRUE(status.ok());
817
818 EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(0));
819 EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(1));
820 EXPECT_ARROW_ARRAY_EQUALS(out_int4, outputs.at(2));
821 EXPECT_ARROW_ARRAY_EQUALS(out_int8, outputs.at(3));
822 }
823
TEST_F(TestProjector,TestToDate)824 TEST_F(TestProjector, TestToDate) {
825 // schema for input fields
826 auto field0 = field("f0", arrow::utf8());
827 auto field_node = std::make_shared<FieldNode>(field0);
828 auto schema = arrow::schema({field0});
829
830 // output fields
831 auto field_result = field("res", arrow::date64());
832
833 auto pattern_node = std::make_shared<LiteralNode>(
834 arrow::utf8(), LiteralHolder(std::string("YYYY-MM-DD")), false);
835
836 // Build expression
837 auto fn_node = TreeExprBuilder::MakeFunction("to_date", {field_node, pattern_node},
838 arrow::date64());
839 auto expr = TreeExprBuilder::MakeExpression(fn_node, field_result);
840
841 // Build a projector for the expressions.
842 std::shared_ptr<Projector> projector;
843 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
844 EXPECT_TRUE(status.ok());
845
846 // Create a row-batch with some sample data
847 int num_records = 3;
848 auto array0 =
849 MakeArrowArrayUtf8({"1986-12-01", "2012-12-01", "invalid"}, {true, true, false});
850 // expected output
851 auto exp = MakeArrowArrayDate64({533779200000, 1354320000000, 0}, {true, true, false});
852
853 // prepare input record batch
854 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
855
856 // Evaluate expression
857 arrow::ArrayVector outputs;
858 status = projector->Evaluate(*in_batch, pool_, &outputs);
859 EXPECT_TRUE(status.ok());
860
861 // Validate results
862 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
863 }
864
865 // ARROW-11617
TEST_F(TestProjector,TestIfElseOpt)866 TEST_F(TestProjector, TestIfElseOpt) {
867 // schema for input
868 auto field0 = field("f0", int32());
869 auto field1 = field("f1", int32());
870 auto field2 = field("f2", int32());
871 auto schema = arrow::schema({field0, field1, field2});
872
873 auto f0 = std::make_shared<FieldNode>(field0);
874 auto f1 = std::make_shared<FieldNode>(field1);
875 auto f2 = std::make_shared<FieldNode>(field2);
876
877 // output fields
878 auto field_result = field("out", int32());
879
880 // Expr - (f0, f1 - null; f2 non null)
881 //
882 // if (is not null(f0))
883 // then f0
884 // else add((
885 // if (is not null (f1))
886 // then f1
887 // else f2
888 // ), f1)
889
890 auto cond_node_inner = TreeExprBuilder::MakeFunction("isnotnull", {f1}, boolean());
891 auto if_node_inner = TreeExprBuilder::MakeIf(cond_node_inner, f1, f2, int32());
892
893 auto cond_node_outer = TreeExprBuilder::MakeFunction("isnotnull", {f0}, boolean());
894 auto else_node_outer =
895 TreeExprBuilder::MakeFunction("add", {if_node_inner, f1}, int32());
896
897 auto if_node_outer =
898 TreeExprBuilder::MakeIf(cond_node_outer, f1, else_node_outer, int32());
899 auto expr = TreeExprBuilder::MakeExpression(if_node_outer, field_result);
900
901 // Build a projector for the expressions.
902 std::shared_ptr<Projector> projector;
903 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
904 EXPECT_TRUE(status.ok());
905
906 // Create a row-batch with some sample data
907 int num_records = 1;
908 auto array0 = MakeArrowArrayInt32({0}, {false});
909 auto array1 = MakeArrowArrayInt32({0}, {false});
910 auto array2 = MakeArrowArrayInt32({99}, {true});
911 // expected output
912 auto exp = MakeArrowArrayInt32({0}, {false});
913
914 // prepare input record batch
915 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
916
917 // Evaluate expression
918 arrow::ArrayVector outputs;
919 status = projector->Evaluate(*in_batch, pool_, &outputs);
920 EXPECT_TRUE(status.ok());
921
922 // Validate results
923 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
924 }
925
926 } // namespace gandiva
927