1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <gtest/gtest.h>
19 #include "arrow/memory_pool.h"
20 #include "gandiva/filter.h"
21 #include "gandiva/projector.h"
22 #include "gandiva/tests/test_util.h"
23 #include "gandiva/tree_expr_builder.h"
24 
25 namespace gandiva {
26 
27 using arrow::boolean;
28 using arrow::int32;
29 using arrow::utf8;
30 
31 class TestNullValidity : public ::testing::Test {
32  public:
SetUp()33   void SetUp() { pool_ = arrow::default_memory_pool(); }
34 
35  protected:
36   arrow::MemoryPool* pool_;
37 };
38 
39 // Create an array without a validity buffer.
MakeArrowArrayInt32WithNullValidity(std::vector<int32_t> in_data)40 ArrayPtr MakeArrowArrayInt32WithNullValidity(std::vector<int32_t> in_data) {
41   auto array = MakeArrowArrayInt32(in_data);
42   return std::make_shared<arrow::Int32Array>(in_data.size(), array->data()->buffers[1],
43                                              nullptr, 0);
44 }
45 
TEST_F(TestNullValidity,TestFunc)46 TEST_F(TestNullValidity, TestFunc) {
47   // schema for input fields
48   auto field0 = field("f0", int32());
49   auto field1 = field("f1", int32());
50   auto schema = arrow::schema({field0, field1});
51 
52   // Build condition f0 + f1 < 10
53   auto node_f0 = TreeExprBuilder::MakeField(field0);
54   auto node_f1 = TreeExprBuilder::MakeField(field1);
55   auto sum_func =
56       TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
57   auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
58   auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
59                                                     arrow::boolean());
60   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
61 
62   std::shared_ptr<Filter> filter;
63   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
64   EXPECT_TRUE(status.ok());
65 
66   // Create a row-batch with some sample data
67   int num_records = 5;
68 
69   // Create an array without a validity buffer.
70   auto array0 = MakeArrowArrayInt32WithNullValidity({1, 2, 3, 4, 6});
71   auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
72   // expected output (indices for which condition matches)
73   auto exp = MakeArrowArrayUint16({0, 4});
74 
75   // prepare input record batch
76   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
77 
78   std::shared_ptr<SelectionVector> selection_vector;
79   status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
80   EXPECT_TRUE(status.ok());
81 
82   // Evaluate expression
83   status = filter->Evaluate(*in_batch, selection_vector);
84   EXPECT_TRUE(status.ok());
85 
86   // Validate results
87   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
88 }
89 
TEST_F(TestNullValidity,TestIfElse)90 TEST_F(TestNullValidity, TestIfElse) {
91   // schema for input fields
92   auto fielda = field("a", int32());
93   auto fieldb = field("b", int32());
94   auto schema = arrow::schema({fielda, fieldb});
95 
96   // output fields
97   auto field_result = field("res", int32());
98 
99   // build expression.
100   // if (a > b)
101   //   a
102   // else
103   //   b
104   auto node_a = TreeExprBuilder::MakeField(fielda);
105   auto node_b = TreeExprBuilder::MakeField(fieldb);
106   auto condition =
107       TreeExprBuilder::MakeFunction("greater_than", {node_a, node_b}, boolean());
108   auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_b, int32());
109 
110   auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
111 
112   // Build a projector for the expressions.
113   std::shared_ptr<Projector> projector;
114   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
115   EXPECT_TRUE(status.ok());
116 
117   // Create a row-batch with some sample data
118   int num_records = 4;
119   auto array0 = MakeArrowArrayInt32WithNullValidity({10, 12, -20, 5});
120   auto array1 = MakeArrowArrayInt32({5, 15, 15, 17});
121 
122   // expected output
123   auto exp = MakeArrowArrayInt32({10, 15, 15, 17}, {true, true, true, true});
124 
125   // prepare input record batch
126   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
127 
128   // Evaluate expression
129   arrow::ArrayVector outputs;
130   status = projector->Evaluate(*in_batch, pool_, &outputs);
131   EXPECT_TRUE(status.ok());
132 
133   // Validate results
134   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
135 }
136 
TEST_F(TestNullValidity,TestUtf8)137 TEST_F(TestNullValidity, TestUtf8) {
138   // schema for input fields
139   auto field_a = field("a", utf8());
140   auto schema = arrow::schema({field_a});
141 
142   // output fields
143   auto res = field("res1", int32());
144 
145   // build expressions.
146   // length(a)
147   auto expr = TreeExprBuilder::MakeExpression("length", {field_a}, res);
148 
149   // Build a projector for the expressions.
150   std::shared_ptr<Projector> projector;
151   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
152   EXPECT_TRUE(status.ok()) << status.message();
153 
154   // Create a row-batch with some sample data
155   int num_records = 5;
156   auto array_v = MakeArrowArrayUtf8({"foo", "hello", "bye", "hi", "मदन"});
157   auto array_a = std::make_shared<arrow::StringArray>(
158       num_records, array_v->data()->buffers[1], array_v->data()->buffers[2]);
159 
160   // expected output
161   auto exp = MakeArrowArrayInt32({3, 5, 3, 2, 3}, {true, true, true, true, true});
162 
163   // prepare input record batch
164   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
165 
166   // Evaluate expression
167   arrow::ArrayVector outputs;
168   status = projector->Evaluate(*in_batch, pool_, &outputs);
169   EXPECT_TRUE(status.ok());
170 
171   // Validate results
172   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
173 }
174 
175 }  // namespace gandiva
176