1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <gtest/gtest.h>
19 #include "arrow/memory_pool.h"
20 #include "gandiva/filter.h"
21 #include "gandiva/projector.h"
22 #include "gandiva/tests/test_util.h"
23 #include "gandiva/tree_expr_builder.h"
24
25 namespace gandiva {
26
27 using arrow::boolean;
28 using arrow::int32;
29 using arrow::utf8;
30
31 class TestNullValidity : public ::testing::Test {
32 public:
SetUp()33 void SetUp() { pool_ = arrow::default_memory_pool(); }
34
35 protected:
36 arrow::MemoryPool* pool_;
37 };
38
39 // Create an array without a validity buffer.
MakeArrowArrayInt32WithNullValidity(std::vector<int32_t> in_data)40 ArrayPtr MakeArrowArrayInt32WithNullValidity(std::vector<int32_t> in_data) {
41 auto array = MakeArrowArrayInt32(in_data);
42 return std::make_shared<arrow::Int32Array>(in_data.size(), array->data()->buffers[1],
43 nullptr, 0);
44 }
45
TEST_F(TestNullValidity,TestFunc)46 TEST_F(TestNullValidity, TestFunc) {
47 // schema for input fields
48 auto field0 = field("f0", int32());
49 auto field1 = field("f1", int32());
50 auto schema = arrow::schema({field0, field1});
51
52 // Build condition f0 + f1 < 10
53 auto node_f0 = TreeExprBuilder::MakeField(field0);
54 auto node_f1 = TreeExprBuilder::MakeField(field1);
55 auto sum_func =
56 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
57 auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
58 auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
59 arrow::boolean());
60 auto condition = TreeExprBuilder::MakeCondition(less_than_10);
61
62 std::shared_ptr<Filter> filter;
63 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
64 EXPECT_TRUE(status.ok());
65
66 // Create a row-batch with some sample data
67 int num_records = 5;
68
69 // Create an array without a validity buffer.
70 auto array0 = MakeArrowArrayInt32WithNullValidity({1, 2, 3, 4, 6});
71 auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
72 // expected output (indices for which condition matches)
73 auto exp = MakeArrowArrayUint16({0, 4});
74
75 // prepare input record batch
76 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
77
78 std::shared_ptr<SelectionVector> selection_vector;
79 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
80 EXPECT_TRUE(status.ok());
81
82 // Evaluate expression
83 status = filter->Evaluate(*in_batch, selection_vector);
84 EXPECT_TRUE(status.ok());
85
86 // Validate results
87 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
88 }
89
TEST_F(TestNullValidity,TestIfElse)90 TEST_F(TestNullValidity, TestIfElse) {
91 // schema for input fields
92 auto fielda = field("a", int32());
93 auto fieldb = field("b", int32());
94 auto schema = arrow::schema({fielda, fieldb});
95
96 // output fields
97 auto field_result = field("res", int32());
98
99 // build expression.
100 // if (a > b)
101 // a
102 // else
103 // b
104 auto node_a = TreeExprBuilder::MakeField(fielda);
105 auto node_b = TreeExprBuilder::MakeField(fieldb);
106 auto condition =
107 TreeExprBuilder::MakeFunction("greater_than", {node_a, node_b}, boolean());
108 auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_b, int32());
109
110 auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
111
112 // Build a projector for the expressions.
113 std::shared_ptr<Projector> projector;
114 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
115 EXPECT_TRUE(status.ok());
116
117 // Create a row-batch with some sample data
118 int num_records = 4;
119 auto array0 = MakeArrowArrayInt32WithNullValidity({10, 12, -20, 5});
120 auto array1 = MakeArrowArrayInt32({5, 15, 15, 17});
121
122 // expected output
123 auto exp = MakeArrowArrayInt32({10, 15, 15, 17}, {true, true, true, true});
124
125 // prepare input record batch
126 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
127
128 // Evaluate expression
129 arrow::ArrayVector outputs;
130 status = projector->Evaluate(*in_batch, pool_, &outputs);
131 EXPECT_TRUE(status.ok());
132
133 // Validate results
134 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
135 }
136
TEST_F(TestNullValidity,TestUtf8)137 TEST_F(TestNullValidity, TestUtf8) {
138 // schema for input fields
139 auto field_a = field("a", utf8());
140 auto schema = arrow::schema({field_a});
141
142 // output fields
143 auto res = field("res1", int32());
144
145 // build expressions.
146 // length(a)
147 auto expr = TreeExprBuilder::MakeExpression("length", {field_a}, res);
148
149 // Build a projector for the expressions.
150 std::shared_ptr<Projector> projector;
151 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
152 EXPECT_TRUE(status.ok()) << status.message();
153
154 // Create a row-batch with some sample data
155 int num_records = 5;
156 auto array_v = MakeArrowArrayUtf8({"foo", "hello", "bye", "hi", "मदन"});
157 auto array_a = std::make_shared<arrow::StringArray>(
158 num_records, array_v->data()->buffers[1], array_v->data()->buffers[2]);
159
160 // expected output
161 auto exp = MakeArrowArrayInt32({3, 5, 3, 2, 3}, {true, true, true, true, true});
162
163 // prepare input record batch
164 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
165
166 // Evaluate expression
167 arrow::ArrayVector outputs;
168 status = projector->Evaluate(*in_batch, pool_, &outputs);
169 EXPECT_TRUE(status.ok());
170
171 // Validate results
172 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
173 }
174
175 } // namespace gandiva
176