1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "gandiva/projector.h"
19 
20 #include <gtest/gtest.h>
21 
22 #include <cmath>
23 
24 #include "arrow/memory_pool.h"
25 #include "gandiva/literal_holder.h"
26 #include "gandiva/node.h"
27 #include "gandiva/tests/test_util.h"
28 #include "gandiva/tree_expr_builder.h"
29 
30 namespace gandiva {
31 
32 using arrow::boolean;
33 using arrow::float32;
34 using arrow::int32;
35 
36 class TestProjector : public ::testing::Test {
37  public:
SetUp()38   void SetUp() { pool_ = arrow::default_memory_pool(); }
39 
40  protected:
41   arrow::MemoryPool* pool_;
42 };
43 
TEST_F(TestProjector,TestProjectCache)44 TEST_F(TestProjector, TestProjectCache) {
45   // schema for input fields
46   auto field0 = field("f0", int32());
47   auto field1 = field("f2", int32());
48   auto schema = arrow::schema({field0, field1});
49 
50   // output fields
51   auto field_sum = field("add", int32());
52   auto field_sub = field("subtract", int32());
53 
54   // Build expression
55   auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
56   auto sub_expr =
57       TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
58 
59   auto configuration = TestConfiguration();
60 
61   std::shared_ptr<Projector> projector;
62   auto status = Projector::Make(schema, {sum_expr, sub_expr}, configuration, &projector);
63   ASSERT_OK(status);
64 
65   // everything is same, should return the same projector.
66   auto schema_same = arrow::schema({field0, field1});
67   std::shared_ptr<Projector> cached_projector;
68   status = Projector::Make(schema_same, {sum_expr, sub_expr}, configuration,
69                            &cached_projector);
70   ASSERT_OK(status);
71   EXPECT_EQ(cached_projector, projector);
72 
73   // schema is different should return a new projector.
74   auto field2 = field("f2", int32());
75   auto different_schema = arrow::schema({field0, field1, field2});
76   std::shared_ptr<Projector> should_be_new_projector;
77   status = Projector::Make(different_schema, {sum_expr, sub_expr}, configuration,
78                            &should_be_new_projector);
79   ASSERT_OK(status);
80   EXPECT_NE(cached_projector, should_be_new_projector);
81 
82   // expression list is different should return a new projector.
83   std::shared_ptr<Projector> should_be_new_projector1;
84   status = Projector::Make(schema, {sum_expr}, configuration, &should_be_new_projector1);
85   ASSERT_OK(status);
86   EXPECT_NE(cached_projector, should_be_new_projector1);
87 
88   // another instance of the same configuration, should return the same projector.
89   status = Projector::Make(schema, {sum_expr, sub_expr}, TestConfiguration(),
90                            &cached_projector);
91   ASSERT_OK(status);
92   EXPECT_EQ(cached_projector, projector);
93 }
94 
TEST_F(TestProjector,TestProjectCacheFieldNames)95 TEST_F(TestProjector, TestProjectCacheFieldNames) {
96   // schema for input fields
97   auto field0 = field("f0", int32());
98   auto field1 = field("f1", int32());
99   auto field2 = field("f2", int32());
100   auto schema = arrow::schema({field0, field1, field2});
101 
102   // output fields
103   auto sum_01 = field("sum_01", int32());
104   auto sum_12 = field("sum_12", int32());
105 
106   auto sum_expr_01 = TreeExprBuilder::MakeExpression("add", {field0, field1}, sum_01);
107   std::shared_ptr<Projector> projector_01;
108   auto status =
109       Projector::Make(schema, {sum_expr_01}, TestConfiguration(), &projector_01);
110   EXPECT_TRUE(status.ok());
111 
112   auto sum_expr_12 = TreeExprBuilder::MakeExpression("add", {field1, field2}, sum_12);
113   std::shared_ptr<Projector> projector_12;
114   status = Projector::Make(schema, {sum_expr_12}, TestConfiguration(), &projector_12);
115   EXPECT_TRUE(status.ok());
116 
117   // add(f0, f1) != add(f1, f2)
118   EXPECT_TRUE(projector_01.get() != projector_12.get());
119 }
120 
TEST_F(TestProjector,TestProjectCacheDouble)121 TEST_F(TestProjector, TestProjectCacheDouble) {
122   auto schema = arrow::schema({});
123   auto res = field("result", arrow::float64());
124 
125   double d0 = 1.23456788912345677E18;
126   double d1 = 1.23456789012345677E18;
127 
128   auto literal0 = TreeExprBuilder::MakeLiteral(d0);
129   auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
130   auto configuration = TestConfiguration();
131 
132   std::shared_ptr<Projector> projector0;
133   auto status = Projector::Make(schema, {expr0}, configuration, &projector0);
134   EXPECT_TRUE(status.ok()) << status.message();
135 
136   auto literal1 = TreeExprBuilder::MakeLiteral(d1);
137   auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
138   std::shared_ptr<Projector> projector1;
139   status = Projector::Make(schema, {expr1}, configuration, &projector1);
140   EXPECT_TRUE(status.ok()) << status.message();
141 
142   EXPECT_TRUE(projector0.get() != projector1.get());
143 }
144 
TEST_F(TestProjector,TestProjectCacheFloat)145 TEST_F(TestProjector, TestProjectCacheFloat) {
146   auto schema = arrow::schema({});
147   auto res = field("result", arrow::float32());
148 
149   float f0 = static_cast<float>(12345678891.000000);
150   float f1 = f0 - 1000;
151 
152   auto literal0 = TreeExprBuilder::MakeLiteral(f0);
153   auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
154   std::shared_ptr<Projector> projector0;
155   auto status = Projector::Make(schema, {expr0}, TestConfiguration(), &projector0);
156   EXPECT_TRUE(status.ok()) << status.message();
157 
158   auto literal1 = TreeExprBuilder::MakeLiteral(f1);
159   auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
160   std::shared_ptr<Projector> projector1;
161   status = Projector::Make(schema, {expr1}, TestConfiguration(), &projector1);
162   EXPECT_TRUE(status.ok()) << status.message();
163 
164   EXPECT_TRUE(projector0.get() != projector1.get());
165 }
166 
TEST_F(TestProjector,TestProjectCacheLiteral)167 TEST_F(TestProjector, TestProjectCacheLiteral) {
168   auto schema = arrow::schema({});
169   auto res = field("result", arrow::decimal(38, 5));
170 
171   DecimalScalar128 d0("12345678", 38, 5);
172   DecimalScalar128 d1("98756432", 38, 5);
173 
174   auto literal0 = TreeExprBuilder::MakeDecimalLiteral(d0);
175   auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
176   std::shared_ptr<Projector> projector0;
177   ASSERT_OK(Projector::Make(schema, {expr0}, TestConfiguration(), &projector0));
178 
179   auto literal1 = TreeExprBuilder::MakeDecimalLiteral(d1);
180   auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
181   std::shared_ptr<Projector> projector1;
182   ASSERT_OK(Projector::Make(schema, {expr1}, TestConfiguration(), &projector1));
183 
184   EXPECT_NE(projector0.get(), projector1.get());
185 }
186 
TEST_F(TestProjector,TestProjectCacheDecimalCast)187 TEST_F(TestProjector, TestProjectCacheDecimalCast) {
188   auto field_float64 = field("float64", arrow::float64());
189   auto schema = arrow::schema({field_float64});
190 
191   auto res_31_13 = field("result", arrow::decimal(31, 13));
192   auto expr0 = TreeExprBuilder::MakeExpression("castDECIMAL", {field_float64}, res_31_13);
193   std::shared_ptr<Projector> projector0;
194   ASSERT_OK(Projector::Make(schema, {expr0}, TestConfiguration(), &projector0));
195 
196   // if the output scale is different, the cache can't be used.
197   auto res_31_14 = field("result", arrow::decimal(31, 14));
198   auto expr1 = TreeExprBuilder::MakeExpression("castDECIMAL", {field_float64}, res_31_14);
199   std::shared_ptr<Projector> projector1;
200   ASSERT_OK(Projector::Make(schema, {expr1}, TestConfiguration(), &projector1));
201   EXPECT_NE(projector0.get(), projector1.get());
202 
203   // if the output scale/precision are same, should get a cache hit.
204   auto res_31_13_alt = field("result", arrow::decimal(31, 13));
205   auto expr2 =
206       TreeExprBuilder::MakeExpression("castDECIMAL", {field_float64}, res_31_13_alt);
207   std::shared_ptr<Projector> projector2;
208   ASSERT_OK(Projector::Make(schema, {expr2}, TestConfiguration(), &projector2));
209   EXPECT_EQ(projector0.get(), projector2.get());
210 }
211 
TEST_F(TestProjector,TestIntSumSub)212 TEST_F(TestProjector, TestIntSumSub) {
213   // schema for input fields
214   auto field0 = field("f0", int32());
215   auto field1 = field("f2", int32());
216   auto schema = arrow::schema({field0, field1});
217 
218   // output fields
219   auto field_sum = field("add", int32());
220   auto field_sub = field("subtract", int32());
221 
222   // Build expression
223   auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
224   auto sub_expr =
225       TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
226 
227   std::shared_ptr<Projector> projector;
228   auto status =
229       Projector::Make(schema, {sum_expr, sub_expr}, TestConfiguration(), &projector);
230   EXPECT_TRUE(status.ok());
231 
232   // Create a row-batch with some sample data
233   int num_records = 4;
234   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
235   auto array1 = MakeArrowArrayInt32({11, 13, 15, 17}, {true, true, false, true});
236   // expected output
237   auto exp_sum = MakeArrowArrayInt32({12, 15, 0, 0}, {true, true, false, false});
238   auto exp_sub = MakeArrowArrayInt32({-10, -11, 0, 0}, {true, true, false, false});
239 
240   // prepare input record batch
241   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
242 
243   // Evaluate expression
244   arrow::ArrayVector outputs;
245   status = projector->Evaluate(*in_batch, pool_, &outputs);
246   EXPECT_TRUE(status.ok());
247 
248   // Validate results
249   EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
250   EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
251 }
252 
253 template <typename TYPE, typename C_TYPE>
TestArithmeticOpsForType(arrow::MemoryPool * pool)254 static void TestArithmeticOpsForType(arrow::MemoryPool* pool) {
255   auto atype = arrow::TypeTraits<TYPE>::type_singleton();
256 
257   // schema for input fields
258   auto field0 = field("f0", atype);
259   auto field1 = field("f1", atype);
260   auto schema = arrow::schema({field0, field1});
261 
262   // output fields
263   auto field_sum = field("add", atype);
264   auto field_sub = field("subtract", atype);
265   auto field_mul = field("multiply", atype);
266   auto field_div = field("divide", atype);
267   auto field_eq = field("equal", arrow::boolean());
268   auto field_lt = field("less_than", arrow::boolean());
269 
270   // Build expression
271   auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
272   auto sub_expr =
273       TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
274   auto mul_expr =
275       TreeExprBuilder::MakeExpression("multiply", {field0, field1}, field_mul);
276   auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div);
277   auto eq_expr = TreeExprBuilder::MakeExpression("equal", {field0, field1}, field_eq);
278   auto lt_expr = TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_lt);
279 
280   std::shared_ptr<Projector> projector;
281   auto status =
282       Projector::Make(schema, {sum_expr, sub_expr, mul_expr, div_expr, eq_expr, lt_expr},
283                       TestConfiguration(), &projector);
284   EXPECT_TRUE(status.ok());
285 
286   // Create a row-batch with some sample data
287   int num_records = 12;
288   std::vector<C_TYPE> input0 = {1, 2, 53, 84, 5, 15, 0, 1, 52, 83, 4, 120};
289   std::vector<C_TYPE> input1 = {10, 15, 23, 84, 4, 51, 68, 9, 16, 18, 19, 37};
290   std::vector<bool> validity = {true, true, true, true, true, true,
291                                 true, true, true, true, true, true};
292 
293   auto array0 = MakeArrowArray<TYPE, C_TYPE>(input0, validity);
294   auto array1 = MakeArrowArray<TYPE, C_TYPE>(input1, validity);
295 
296   // expected output
297   std::vector<C_TYPE> sum;
298   std::vector<C_TYPE> sub;
299   std::vector<C_TYPE> mul;
300   std::vector<C_TYPE> div;
301   std::vector<bool> eq;
302   std::vector<bool> lt;
303   for (int i = 0; i < num_records; i++) {
304     sum.push_back(static_cast<C_TYPE>(input0[i] + input1[i]));
305     sub.push_back(static_cast<C_TYPE>(input0[i] - input1[i]));
306     mul.push_back(static_cast<C_TYPE>(input0[i] * input1[i]));
307     div.push_back(static_cast<C_TYPE>(input0[i] / input1[i]));
308     eq.push_back(input0[i] == input1[i]);
309     lt.push_back(input0[i] < input1[i]);
310   }
311   auto exp_sum = MakeArrowArray<TYPE, C_TYPE>(sum, validity);
312   auto exp_sub = MakeArrowArray<TYPE, C_TYPE>(sub, validity);
313   auto exp_mul = MakeArrowArray<TYPE, C_TYPE>(mul, validity);
314   auto exp_div = MakeArrowArray<TYPE, C_TYPE>(div, validity);
315   auto exp_eq = MakeArrowArray<arrow::BooleanType, bool>(eq, validity);
316   auto exp_lt = MakeArrowArray<arrow::BooleanType, bool>(lt, validity);
317 
318   // prepare input record batch
319   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
320 
321   // Evaluate expression
322   arrow::ArrayVector outputs;
323   status = projector->Evaluate(*in_batch, pool, &outputs);
324   EXPECT_TRUE(status.ok());
325 
326   // Validate results
327   EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
328   EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
329   EXPECT_ARROW_ARRAY_EQUALS(exp_mul, outputs.at(2));
330   EXPECT_ARROW_ARRAY_EQUALS(exp_div, outputs.at(3));
331   EXPECT_ARROW_ARRAY_EQUALS(exp_eq, outputs.at(4));
332   EXPECT_ARROW_ARRAY_EQUALS(exp_lt, outputs.at(5));
333 }
334 
TEST_F(TestProjector,TestAllIntTypes)335 TEST_F(TestProjector, TestAllIntTypes) {
336   TestArithmeticOpsForType<arrow::UInt8Type, uint8_t>(pool_);
337   TestArithmeticOpsForType<arrow::UInt16Type, uint16_t>(pool_);
338   TestArithmeticOpsForType<arrow::UInt32Type, uint32_t>(pool_);
339   TestArithmeticOpsForType<arrow::UInt64Type, uint64_t>(pool_);
340   TestArithmeticOpsForType<arrow::Int8Type, int8_t>(pool_);
341   TestArithmeticOpsForType<arrow::Int16Type, int16_t>(pool_);
342   TestArithmeticOpsForType<arrow::Int32Type, int32_t>(pool_);
343   TestArithmeticOpsForType<arrow::Int64Type, int64_t>(pool_);
344 }
345 
TEST_F(TestProjector,TestExtendedMath)346 TEST_F(TestProjector, TestExtendedMath) {
347   // schema for input fields
348   auto field0 = arrow::field("f0", arrow::float64());
349   auto field1 = arrow::field("f1", arrow::float64());
350   auto schema = arrow::schema({field0, field1});
351 
352   // output fields
353   auto field_cbrt = arrow::field("cbrt", arrow::float64());
354   auto field_exp = arrow::field("exp", arrow::float64());
355   auto field_log = arrow::field("log", arrow::float64());
356   auto field_log10 = arrow::field("log10", arrow::float64());
357   auto field_logb = arrow::field("logb", arrow::float64());
358   auto field_power = arrow::field("power", arrow::float64());
359 
360   // Build expression
361   auto cbrt_expr = TreeExprBuilder::MakeExpression("cbrt", {field0}, field_cbrt);
362   auto exp_expr = TreeExprBuilder::MakeExpression("exp", {field0}, field_exp);
363   auto log_expr = TreeExprBuilder::MakeExpression("log", {field0}, field_log);
364   auto log10_expr = TreeExprBuilder::MakeExpression("log10", {field0}, field_log10);
365   auto logb_expr = TreeExprBuilder::MakeExpression("log", {field0, field1}, field_logb);
366   auto power_expr =
367       TreeExprBuilder::MakeExpression("power", {field0, field1}, field_power);
368 
369   std::shared_ptr<Projector> projector;
370   auto status = Projector::Make(
371       schema, {cbrt_expr, exp_expr, log_expr, log10_expr, logb_expr, power_expr},
372       TestConfiguration(), &projector);
373   EXPECT_TRUE(status.ok());
374 
375   // Create a row-batch with some sample data
376   int num_records = 4;
377   std::vector<double> input0 = {16, 10, -14, 8.3};
378   std::vector<double> input1 = {2, 3, 5, 7};
379   std::vector<bool> validity = {true, true, true, true};
380 
381   auto array0 = MakeArrowArray<arrow::DoubleType, double>(input0, validity);
382   auto array1 = MakeArrowArray<arrow::DoubleType, double>(input1, validity);
383 
384   // expected output
385   std::vector<double> cbrt_vals;
386   std::vector<double> exp_vals;
387   std::vector<double> log_vals;
388   std::vector<double> log10_vals;
389   std::vector<double> logb_vals;
390   std::vector<double> power_vals;
391   for (int i = 0; i < num_records; i++) {
392     cbrt_vals.push_back(static_cast<double>(cbrtl(input0[i])));
393     exp_vals.push_back(static_cast<double>(expl(input0[i])));
394     log_vals.push_back(static_cast<double>(logl(input0[i])));
395     log10_vals.push_back(static_cast<double>(log10l(input0[i])));
396     logb_vals.push_back(static_cast<double>(logl(input1[i]) / logl(input0[i])));
397     power_vals.push_back(static_cast<double>(powl(input0[i], input1[i])));
398   }
399   auto expected_cbrt = MakeArrowArray<arrow::DoubleType, double>(cbrt_vals, validity);
400   auto expected_exp = MakeArrowArray<arrow::DoubleType, double>(exp_vals, validity);
401   auto expected_log = MakeArrowArray<arrow::DoubleType, double>(log_vals, validity);
402   auto expected_log10 = MakeArrowArray<arrow::DoubleType, double>(log10_vals, validity);
403   auto expected_logb = MakeArrowArray<arrow::DoubleType, double>(logb_vals, validity);
404   auto expected_power = MakeArrowArray<arrow::DoubleType, double>(power_vals, validity);
405 
406   // prepare input record batch
407   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
408 
409   // Evaluate expression
410   arrow::ArrayVector outputs;
411   status = projector->Evaluate(*in_batch, pool_, &outputs);
412   EXPECT_TRUE(status.ok());
413 
414   // Validate results
415   double epsilon = 1E-13;
416   EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_cbrt, outputs.at(0), epsilon);
417   EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_exp, outputs.at(1), epsilon);
418   EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_log, outputs.at(2), epsilon);
419   EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_log10, outputs.at(3), epsilon);
420   EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_logb, outputs.at(4), epsilon);
421   EXPECT_ARROW_ARRAY_APPROX_EQUALS(expected_power, outputs.at(5), epsilon);
422 }
423 
TEST_F(TestProjector,TestFloatLessThan)424 TEST_F(TestProjector, TestFloatLessThan) {
425   // schema for input fields
426   auto field0 = field("f0", float32());
427   auto field1 = field("f2", float32());
428   auto schema = arrow::schema({field0, field1});
429 
430   // output fields
431   auto field_result = field("res", boolean());
432 
433   // Build expression
434   auto lt_expr =
435       TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_result);
436 
437   // Build a projector for the expressions.
438   std::shared_ptr<Projector> projector;
439   auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
440   EXPECT_TRUE(status.ok());
441 
442   // Create a row-batch with some sample data
443   int num_records = 3;
444   auto array0 = MakeArrowArrayFloat32({1.0f, 8.9f, 3.0f}, {true, true, false});
445   auto array1 = MakeArrowArrayFloat32({4.0f, 3.4f, 6.8f}, {true, true, true});
446   // expected output
447   auto exp = MakeArrowArrayBool({true, false, false}, {true, true, false});
448 
449   // prepare input record batch
450   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
451 
452   // Evaluate expression
453   arrow::ArrayVector outputs;
454   status = projector->Evaluate(*in_batch, pool_, &outputs);
455   EXPECT_TRUE(status.ok());
456 
457   // Validate results
458   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
459 }
460 
TEST_F(TestProjector,TestIsNotNull)461 TEST_F(TestProjector, TestIsNotNull) {
462   // schema for input fields
463   auto field0 = field("f0", float32());
464   auto schema = arrow::schema({field0});
465 
466   // output fields
467   auto field_result = field("res", boolean());
468 
469   // Build expression
470   auto myexpr = TreeExprBuilder::MakeExpression("isnotnull", {field0}, field_result);
471 
472   // Build a projector for the expressions.
473   std::shared_ptr<Projector> projector;
474   auto status = Projector::Make(schema, {myexpr}, TestConfiguration(), &projector);
475   EXPECT_TRUE(status.ok());
476 
477   // Create a row-batch with some sample data
478   int num_records = 3;
479   auto array0 = MakeArrowArrayFloat32({1.0f, 8.9f, 3.0f}, {true, true, false});
480   // expected output
481   auto exp = MakeArrowArrayBool({true, true, false}, {true, true, true});
482 
483   // prepare input record batch
484   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
485 
486   // Evaluate expression
487   arrow::ArrayVector outputs;
488   status = projector->Evaluate(*in_batch, pool_, &outputs);
489   EXPECT_TRUE(status.ok());
490 
491   // Validate results
492   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
493 }
494 
TEST_F(TestProjector,TestZeroCopy)495 TEST_F(TestProjector, TestZeroCopy) {
496   // schema for input fields
497   auto field0 = field("f0", int32());
498   auto schema = arrow::schema({field0});
499 
500   // output fields
501   auto res = field("res", float32());
502 
503   // Build expression
504   auto cast_expr = TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res);
505 
506   std::shared_ptr<Projector> projector;
507   auto status = Projector::Make(schema, {cast_expr}, TestConfiguration(), &projector);
508   EXPECT_TRUE(status.ok());
509 
510   // Create a row-batch with some sample data
511   int num_records = 4;
512   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
513   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
514 
515   // expected output
516   auto exp = MakeArrowArrayFloat32({1, 2, 3, 0}, {true, true, true, false});
517 
518   // allocate output buffers
519   int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
520   int64_t bitmap_capacity = arrow::BitUtil::RoundUpToMultipleOf64(bitmap_sz);
521   std::vector<uint8_t> bitmap(bitmap_capacity);
522   std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
523       std::make_shared<arrow::MutableBuffer>(&bitmap[0], bitmap_capacity);
524 
525   int64_t data_sz = sizeof(float) * num_records;
526   std::vector<uint8_t> data(bitmap_capacity);
527   std::shared_ptr<arrow::MutableBuffer> data_buf =
528       std::make_shared<arrow::MutableBuffer>(&data[0], data_sz);
529 
530   auto array_data =
531       arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
532 
533   // Evaluate expression
534   status = projector->Evaluate(*in_batch, {array_data});
535   EXPECT_TRUE(status.ok());
536 
537   // Validate results
538   auto output = arrow::MakeArray(array_data);
539   EXPECT_ARROW_ARRAY_EQUALS(exp, output);
540 }
541 
TEST_F(TestProjector,TestZeroCopyNegative)542 TEST_F(TestProjector, TestZeroCopyNegative) {
543   // schema for input fields
544   auto field0 = field("f0", int32());
545   auto schema = arrow::schema({field0});
546 
547   // output fields
548   auto res = field("res", float32());
549 
550   // Build expression
551   auto cast_expr = TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res);
552 
553   std::shared_ptr<Projector> projector;
554   auto status = Projector::Make(schema, {cast_expr}, TestConfiguration(), &projector);
555   EXPECT_TRUE(status.ok());
556 
557   // Create a row-batch with some sample data
558   int num_records = 4;
559   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
560   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
561 
562   // expected output
563   auto exp = MakeArrowArrayFloat32({1, 2, 3, 0}, {true, true, true, false});
564 
565   // allocate output buffers
566   int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
567   std::unique_ptr<uint8_t[]> bitmap(new uint8_t[bitmap_sz]);
568   std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
569       std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz);
570 
571   int64_t data_sz = sizeof(float) * num_records;
572   std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
573   std::shared_ptr<arrow::MutableBuffer> data_buf =
574       std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
575 
576   auto array_data =
577       arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
578 
579   // the batch can't be empty.
580   auto bad_batch = arrow::RecordBatch::Make(schema, 0 /*num_records*/, {array0});
581   status = projector->Evaluate(*bad_batch, {array_data});
582   EXPECT_EQ(status.code(), StatusCode::Invalid);
583 
584   // the output array can't be null.
585   std::shared_ptr<arrow::ArrayData> null_array_data;
586   status = projector->Evaluate(*in_batch, {null_array_data});
587   EXPECT_EQ(status.code(), StatusCode::Invalid);
588 
589   // the output array must have at least two buffers.
590   auto bad_array_data = arrow::ArrayData::Make(float32(), num_records, {bitmap_buf});
591   status = projector->Evaluate(*in_batch, {bad_array_data});
592   EXPECT_EQ(status.code(), StatusCode::Invalid);
593 
594   // the output buffers must have sufficiently sized data_buf.
595   std::shared_ptr<arrow::MutableBuffer> bad_data_buf =
596       std::make_shared<arrow::MutableBuffer>(data.get(), data_sz - 1);
597   auto bad_array_data2 =
598       arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, bad_data_buf});
599   status = projector->Evaluate(*in_batch, {bad_array_data2});
600   EXPECT_EQ(status.code(), StatusCode::Invalid);
601 
602   // the output buffers must have sufficiently sized bitmap_buf.
603   std::shared_ptr<arrow::MutableBuffer> bad_bitmap_buf =
604       std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz - 1);
605   auto bad_array_data3 =
606       arrow::ArrayData::Make(float32(), num_records, {bad_bitmap_buf, data_buf});
607   status = projector->Evaluate(*in_batch, {bad_array_data3});
608   EXPECT_EQ(status.code(), StatusCode::Invalid);
609 }
610 
TEST_F(TestProjector,TestDivideZero)611 TEST_F(TestProjector, TestDivideZero) {
612   // schema for input fields
613   auto field0 = field("f0", int32());
614   auto field1 = field("f2", int32());
615   auto schema = arrow::schema({field0, field1});
616 
617   // output fields
618   auto field_div = field("divide", int32());
619 
620   // Build expression
621   auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div);
622 
623   std::shared_ptr<Projector> projector;
624   auto status = Projector::Make(schema, {div_expr}, TestConfiguration(), &projector);
625   EXPECT_TRUE(status.ok()) << status.message();
626 
627   // Create a row-batch with some sample data
628   int num_records = 5;
629   auto array0 = MakeArrowArrayInt32({2, 3, 4, 5, 6}, {true, true, true, true, true});
630   auto array1 = MakeArrowArrayInt32({1, 2, 2, 0, 0}, {true, true, false, true, true});
631 
632   // prepare input record batch
633   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
634 
635   // Evaluate expression
636   arrow::ArrayVector outputs;
637   status = projector->Evaluate(*in_batch, pool_, &outputs);
638   EXPECT_EQ(status.code(), StatusCode::ExecutionError);
639   std::string expected_error = "divide by zero error";
640   EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
641 
642   // Testing for second batch that has no error should succeed.
643   num_records = 5;
644   array0 = MakeArrowArrayInt32({2, 3, 4, 5, 6}, {true, true, true, true, true});
645   array1 = MakeArrowArrayInt32({1, 2, 2, 1, 1}, {true, true, false, true, true});
646 
647   // prepare input record batch
648   in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
649   // expected output
650   auto exp = MakeArrowArrayInt32({2, 1, 2, 5, 6}, {true, true, false, true, true});
651 
652   // Evaluate expression
653   status = projector->Evaluate(*in_batch, pool_, &outputs);
654   EXPECT_TRUE(status.ok());
655 
656   // Validate results
657   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
658 }
659 
TEST_F(TestProjector,TestModZero)660 TEST_F(TestProjector, TestModZero) {
661   // schema for input fields
662   auto field0 = field("f0", arrow::int64());
663   auto field1 = field("f2", int32());
664   auto schema = arrow::schema({field0, field1});
665 
666   // output fields
667   auto field_div = field("mod", int32());
668 
669   // Build expression
670   auto mod_expr = TreeExprBuilder::MakeExpression("mod", {field0, field1}, field_div);
671 
672   std::shared_ptr<Projector> projector;
673   auto status = Projector::Make(schema, {mod_expr}, TestConfiguration(), &projector);
674   EXPECT_TRUE(status.ok()) << status.message();
675 
676   // Create a row-batch with some sample data
677   int num_records = 4;
678   auto array0 = MakeArrowArrayInt64({2, 3, 4, 5}, {true, true, true, true});
679   auto array1 = MakeArrowArrayInt32({1, 2, 2, 0}, {true, true, false, true});
680   // expected output
681   auto exp_mod = MakeArrowArrayInt32({0, 1, 0, 5}, {true, true, false, true});
682 
683   // prepare input record batch
684   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
685 
686   // Evaluate expression
687   arrow::ArrayVector outputs;
688   status = projector->Evaluate(*in_batch, pool_, &outputs);
689   EXPECT_TRUE(status.ok()) << status.message();
690 
691   // Validate results
692   EXPECT_ARROW_ARRAY_EQUALS(exp_mod, outputs.at(0));
693 }
694 
TEST_F(TestProjector,TestConcat)695 TEST_F(TestProjector, TestConcat) {
696   // schema for input fields
697   auto field0 = field("f0", arrow::utf8());
698   auto field1 = field("f1", arrow::utf8());
699   auto schema = arrow::schema({field0, field1});
700 
701   // output fields
702   auto field_concat = field("concat", arrow::utf8());
703 
704   // Build expression
705   auto concat_expr =
706       TreeExprBuilder::MakeExpression("concat", {field0, field1}, field_concat);
707 
708   std::shared_ptr<Projector> projector;
709   auto status = Projector::Make(schema, {concat_expr}, TestConfiguration(), &projector);
710   EXPECT_TRUE(status.ok()) << status.message();
711 
712   // Create a row-batch with some sample data
713   int num_records = 6;
714   auto array0 = MakeArrowArrayUtf8({"ab", "", "ab", "invalid", "valid", "invalid"},
715                                    {true, true, true, false, true, false});
716   auto array1 = MakeArrowArrayUtf8({"cd", "cd", "", "valid", "invalid", "invalid"},
717                                    {true, true, true, true, false, false});
718   // expected output
719   auto exp_concat = MakeArrowArrayUtf8({"abcd", "cd", "ab", "valid", "valid", ""},
720                                        {true, true, true, true, true, true});
721 
722   // prepare input record batch
723   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
724 
725   // Evaluate expression
726   arrow::ArrayVector outputs;
727   status = projector->Evaluate(*in_batch, pool_, &outputs);
728   EXPECT_TRUE(status.ok()) << status.message();
729 
730   // Validate results
731   EXPECT_ARROW_ARRAY_EQUALS(exp_concat, outputs.at(0));
732 }
733 
TEST_F(TestProjector,TestOffset)734 TEST_F(TestProjector, TestOffset) {
735   // schema for input fields
736   auto field0 = field("f0", arrow::int32());
737   auto field1 = field("f1", arrow::int32());
738   auto schema = arrow::schema({field0, field1});
739 
740   // output fields
741   auto field_sum = field("sum", arrow::int32());
742 
743   // Build expression
744   auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
745 
746   std::shared_ptr<Projector> projector;
747   auto status = Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector);
748   EXPECT_TRUE(status.ok()) << status.message();
749 
750   // Create a row-batch with some sample data
751   int num_records = 4;
752   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 5}, {true, true, true, true, false});
753   array0 = array0->Slice(1);
754   auto array1 = MakeArrowArrayInt32({5, 6, 7, 8}, {true, false, true, true});
755   // expected output
756   auto exp_sum = MakeArrowArrayInt32({9, 11, 13}, {false, true, false});
757 
758   // prepare input record batch
759   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
760   in_batch = in_batch->Slice(1);
761 
762   // Evaluate expression
763   arrow::ArrayVector outputs;
764   status = projector->Evaluate(*in_batch, pool_, &outputs);
765   EXPECT_TRUE(status.ok()) << status.message();
766 
767   // Validate results
768   EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
769 }
770 
771 // Test to ensure behaviour of cast functions when the validity is false for an input. The
772 // function should not run for that input.
TEST_F(TestProjector,TestCastFunction)773 TEST_F(TestProjector, TestCastFunction) {
774   auto field0 = field("f0", arrow::utf8());
775   auto schema = arrow::schema({field0});
776 
777   // output fields
778   auto res_float4 = field("res_float4", arrow::float32());
779   auto res_float8 = field("res_float8", arrow::float64());
780   auto res_int4 = field("castINT", arrow::int32());
781   auto res_int8 = field("castBIGINT", arrow::int64());
782 
783   // Build expression
784   auto cast_expr_float4 =
785       TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res_float4);
786   auto cast_expr_float8 =
787       TreeExprBuilder::MakeExpression("castFLOAT8", {field0}, res_float8);
788   auto cast_expr_int4 = TreeExprBuilder::MakeExpression("castINT", {field0}, res_int4);
789   auto cast_expr_int8 = TreeExprBuilder::MakeExpression("castBIGINT", {field0}, res_int8);
790 
791   std::shared_ptr<Projector> projector;
792 
793   //  {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8}
794   auto status = Projector::Make(
795       schema, {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8},
796       TestConfiguration(), &projector);
797   EXPECT_TRUE(status.ok());
798 
799   // Create a row-batch with some sample data
800   int num_records = 4;
801 
802   // Last validity is false and the cast functions throw error when input is empty. Should
803   // not be evaluated due to addition of NativeFunction::kCanReturnErrors
804   auto array0 = MakeArrowArrayUtf8({"1", "2", "3", ""}, {true, true, true, false});
805   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
806 
807   auto out_float4 = MakeArrowArrayFloat32({1, 2, 3, 0}, {true, true, true, false});
808   auto out_float8 = MakeArrowArrayFloat64({1, 2, 3, 0}, {true, true, true, false});
809   auto out_int4 = MakeArrowArrayInt32({1, 2, 3, 0}, {true, true, true, false});
810   auto out_int8 = MakeArrowArrayInt64({1, 2, 3, 0}, {true, true, true, false});
811 
812   arrow::ArrayVector outputs;
813 
814   // Evaluate expression
815   status = projector->Evaluate(*in_batch, pool_, &outputs);
816   EXPECT_TRUE(status.ok());
817 
818   EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(0));
819   EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(1));
820   EXPECT_ARROW_ARRAY_EQUALS(out_int4, outputs.at(2));
821   EXPECT_ARROW_ARRAY_EQUALS(out_int8, outputs.at(3));
822 }
823 
TEST_F(TestProjector,TestToDate)824 TEST_F(TestProjector, TestToDate) {
825   // schema for input fields
826   auto field0 = field("f0", arrow::utf8());
827   auto field_node = std::make_shared<FieldNode>(field0);
828   auto schema = arrow::schema({field0});
829 
830   // output fields
831   auto field_result = field("res", arrow::date64());
832 
833   auto pattern_node = std::make_shared<LiteralNode>(
834       arrow::utf8(), LiteralHolder(std::string("YYYY-MM-DD")), false);
835 
836   // Build expression
837   auto fn_node = TreeExprBuilder::MakeFunction("to_date", {field_node, pattern_node},
838                                                arrow::date64());
839   auto expr = TreeExprBuilder::MakeExpression(fn_node, field_result);
840 
841   // Build a projector for the expressions.
842   std::shared_ptr<Projector> projector;
843   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
844   EXPECT_TRUE(status.ok());
845 
846   // Create a row-batch with some sample data
847   int num_records = 3;
848   auto array0 =
849       MakeArrowArrayUtf8({"1986-12-01", "2012-12-01", "invalid"}, {true, true, false});
850   // expected output
851   auto exp = MakeArrowArrayDate64({533779200000, 1354320000000, 0}, {true, true, false});
852 
853   // prepare input record batch
854   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
855 
856   // Evaluate expression
857   arrow::ArrayVector outputs;
858   status = projector->Evaluate(*in_batch, pool_, &outputs);
859   EXPECT_TRUE(status.ok());
860 
861   // Validate results
862   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
863 }
864 
865 // ARROW-11617
TEST_F(TestProjector,TestIfElseOpt)866 TEST_F(TestProjector, TestIfElseOpt) {
867   // schema for input
868   auto field0 = field("f0", int32());
869   auto field1 = field("f1", int32());
870   auto field2 = field("f2", int32());
871   auto schema = arrow::schema({field0, field1, field2});
872 
873   auto f0 = std::make_shared<FieldNode>(field0);
874   auto f1 = std::make_shared<FieldNode>(field1);
875   auto f2 = std::make_shared<FieldNode>(field2);
876 
877   // output fields
878   auto field_result = field("out", int32());
879 
880   // Expr - (f0, f1 - null; f2 non null)
881   //
882   // if (is not null(f0))
883   // then f0
884   // else add((
885   //    if (is not null (f1))
886   //    then f1
887   //    else f2
888   //  ), f1)
889 
890   auto cond_node_inner = TreeExprBuilder::MakeFunction("isnotnull", {f1}, boolean());
891   auto if_node_inner = TreeExprBuilder::MakeIf(cond_node_inner, f1, f2, int32());
892 
893   auto cond_node_outer = TreeExprBuilder::MakeFunction("isnotnull", {f0}, boolean());
894   auto else_node_outer =
895       TreeExprBuilder::MakeFunction("add", {if_node_inner, f1}, int32());
896 
897   auto if_node_outer =
898       TreeExprBuilder::MakeIf(cond_node_outer, f1, else_node_outer, int32());
899   auto expr = TreeExprBuilder::MakeExpression(if_node_outer, field_result);
900 
901   // Build a projector for the expressions.
902   std::shared_ptr<Projector> projector;
903   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
904   EXPECT_TRUE(status.ok());
905 
906   // Create a row-batch with some sample data
907   int num_records = 1;
908   auto array0 = MakeArrowArrayInt32({0}, {false});
909   auto array1 = MakeArrowArrayInt32({0}, {false});
910   auto array2 = MakeArrowArrayInt32({99}, {true});
911   // expected output
912   auto exp = MakeArrowArrayInt32({0}, {false});
913 
914   // prepare input record batch
915   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
916 
917   // Evaluate expression
918   arrow::ArrayVector outputs;
919   status = projector->Evaluate(*in_batch, pool_, &outputs);
920   EXPECT_TRUE(status.ok());
921 
922   // Validate results
923   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
924 }
925 
926 }  // namespace gandiva
927