1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "gandiva/filter.h"
19 #include <gtest/gtest.h>
20 #include "arrow/memory_pool.h"
21 #include "gandiva/tests/test_util.h"
22 #include "gandiva/tree_expr_builder.h"
23
24 namespace gandiva {
25
26 using arrow::boolean;
27 using arrow::float32;
28 using arrow::int32;
29
30 class TestFilter : public ::testing::Test {
31 public:
SetUp()32 void SetUp() { pool_ = arrow::default_memory_pool(); }
33
34 protected:
35 arrow::MemoryPool* pool_;
36 };
37
TEST_F(TestFilter,TestFilterCache)38 TEST_F(TestFilter, TestFilterCache) {
39 // schema for input fields
40 auto field0 = field("f0", int32());
41 auto field1 = field("f1", int32());
42 auto schema = arrow::schema({field0, field1});
43
44 // Build condition f0 + f1 < 10
45 auto node_f0 = TreeExprBuilder::MakeField(field0);
46 auto node_f1 = TreeExprBuilder::MakeField(field1);
47 auto sum_func =
48 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
49 auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
50 auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
51 arrow::boolean());
52 auto condition = TreeExprBuilder::MakeCondition(less_than_10);
53 auto configuration = TestConfiguration();
54
55 std::shared_ptr<Filter> filter;
56 auto status = Filter::Make(schema, condition, configuration, &filter);
57 EXPECT_TRUE(status.ok());
58
59 // same schema and condition, should return the same filter as above.
60 std::shared_ptr<Filter> cached_filter;
61 status = Filter::Make(schema, condition, configuration, &cached_filter);
62 EXPECT_TRUE(status.ok());
63 EXPECT_TRUE(cached_filter.get() == filter.get());
64
65 // schema is different should return a new filter.
66 auto field2 = field("f2", int32());
67 auto different_schema = arrow::schema({field0, field1, field2});
68 std::shared_ptr<Filter> should_be_new_filter;
69 status =
70 Filter::Make(different_schema, condition, configuration, &should_be_new_filter);
71 EXPECT_TRUE(status.ok());
72 EXPECT_TRUE(cached_filter.get() != should_be_new_filter.get());
73
74 // condition is different, should return a new filter.
75 auto greater_than_10 = TreeExprBuilder::MakeFunction(
76 "greater_than", {sum_func, literal_10}, arrow::boolean());
77 auto new_condition = TreeExprBuilder::MakeCondition(greater_than_10);
78 std::shared_ptr<Filter> should_be_new_filter1;
79 status = Filter::Make(schema, new_condition, configuration, &should_be_new_filter1);
80 EXPECT_TRUE(status.ok());
81 EXPECT_TRUE(cached_filter.get() != should_be_new_filter1.get());
82 }
83
TEST_F(TestFilter,TestSimple)84 TEST_F(TestFilter, TestSimple) {
85 // schema for input fields
86 auto field0 = field("f0", int32());
87 auto field1 = field("f1", int32());
88 auto schema = arrow::schema({field0, field1});
89
90 // Build condition f0 + f1 < 10
91 auto node_f0 = TreeExprBuilder::MakeField(field0);
92 auto node_f1 = TreeExprBuilder::MakeField(field1);
93 auto sum_func =
94 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
95 auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
96 auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
97 arrow::boolean());
98 auto condition = TreeExprBuilder::MakeCondition(less_than_10);
99
100 std::shared_ptr<Filter> filter;
101 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
102 EXPECT_TRUE(status.ok());
103
104 // Create a row-batch with some sample data
105 int num_records = 5;
106 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, true});
107 auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
108 // expected output (indices for which condition matches)
109 auto exp = MakeArrowArrayUint16({0, 4});
110
111 // prepare input record batch
112 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
113
114 std::shared_ptr<SelectionVector> selection_vector;
115 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
116 EXPECT_TRUE(status.ok());
117
118 // Evaluate expression
119 status = filter->Evaluate(*in_batch, selection_vector);
120 EXPECT_TRUE(status.ok());
121
122 // Validate results
123 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
124 }
125
TEST_F(TestFilter,TestSimpleCustomConfig)126 TEST_F(TestFilter, TestSimpleCustomConfig) {
127 // schema for input fields
128 auto field0 = field("f0", int32());
129 auto field1 = field("f1", int32());
130 auto schema = arrow::schema({field0, field1});
131
132 // Build condition f0 != f1
133 auto condition = TreeExprBuilder::MakeCondition("not_equal", {field0, field1});
134
135 ConfigurationBuilder config_builder;
136 std::shared_ptr<Configuration> config = config_builder.build();
137
138 std::shared_ptr<Filter> filter;
139 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
140 EXPECT_TRUE(status.ok());
141
142 // Create a row-batch with some sample data
143 int num_records = 4;
144 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
145 auto array1 = MakeArrowArrayInt32({11, 2, 3, 17}, {true, true, false, true});
146 // expected output
147 auto exp = MakeArrowArrayUint16({0});
148
149 // prepare input record batch
150 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
151
152 std::shared_ptr<SelectionVector> selection_vector;
153 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
154 EXPECT_TRUE(status.ok());
155
156 // Evaluate expression
157 status = filter->Evaluate(*in_batch, selection_vector);
158 EXPECT_TRUE(status.ok());
159
160 // Validate results
161 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
162 }
163
TEST_F(TestFilter,TestZeroCopy)164 TEST_F(TestFilter, TestZeroCopy) {
165 // schema for input fields
166 auto field0 = field("f0", int32());
167 auto schema = arrow::schema({field0});
168
169 // Build condition
170 auto condition = TreeExprBuilder::MakeCondition("isnotnull", {field0});
171
172 std::shared_ptr<Filter> filter;
173 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
174 EXPECT_TRUE(status.ok());
175
176 // Create a row-batch with some sample data
177 int num_records = 4;
178 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
179 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
180
181 // expected output
182 auto exp = MakeArrowArrayUint16({0, 1, 2});
183
184 // allocate selection buffers
185 int64_t data_sz = sizeof(int16_t) * num_records;
186 std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
187 std::shared_ptr<arrow::MutableBuffer> data_buf =
188 std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
189
190 std::shared_ptr<SelectionVector> selection_vector;
191 status = SelectionVector::MakeInt16(num_records, data_buf, &selection_vector);
192 EXPECT_TRUE(status.ok());
193
194 // Evaluate expression
195 status = filter->Evaluate(*in_batch, selection_vector);
196 EXPECT_TRUE(status.ok());
197
198 // Validate results
199 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
200 }
201
TEST_F(TestFilter,TestZeroCopyNegative)202 TEST_F(TestFilter, TestZeroCopyNegative) {
203 ArrayPtr output;
204
205 // schema for input fields
206 auto field0 = field("f0", int32());
207 auto schema = arrow::schema({field0});
208
209 // Build expression
210 auto condition = TreeExprBuilder::MakeCondition("isnotnull", {field0});
211
212 std::shared_ptr<Filter> filter;
213 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
214 EXPECT_TRUE(status.ok());
215
216 // Create a row-batch with some sample data
217 int num_records = 4;
218 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
219 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
220
221 // expected output
222 auto exp = MakeArrowArrayInt16({0, 1, 2});
223
224 // allocate output buffers
225 int64_t data_sz = sizeof(int16_t) * num_records;
226 std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
227 std::shared_ptr<arrow::MutableBuffer> data_buf =
228 std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
229
230 std::shared_ptr<SelectionVector> selection_vector;
231 status = SelectionVector::MakeInt16(num_records, data_buf, &selection_vector);
232 EXPECT_TRUE(status.ok());
233
234 // the batch can't be empty.
235 auto bad_batch = arrow::RecordBatch::Make(schema, 0 /*num_records*/, {array0});
236 status = filter->Evaluate(*bad_batch, selection_vector);
237 EXPECT_EQ(status.code(), StatusCode::Invalid);
238
239 // the selection_vector can't be null.
240 std::shared_ptr<SelectionVector> null_selection;
241 status = filter->Evaluate(*in_batch, null_selection);
242 EXPECT_EQ(status.code(), StatusCode::Invalid);
243
244 // the selection vector must be suitably sized.
245 std::shared_ptr<SelectionVector> bad_selection;
246 status = SelectionVector::MakeInt16(num_records - 1, data_buf, &bad_selection);
247 EXPECT_TRUE(status.ok());
248
249 status = filter->Evaluate(*in_batch, bad_selection);
250 EXPECT_EQ(status.code(), StatusCode::Invalid);
251 }
252
TEST_F(TestFilter,TestSimpleSVInt32)253 TEST_F(TestFilter, TestSimpleSVInt32) {
254 // schema for input fields
255 auto field0 = field("f0", int32());
256 auto field1 = field("f1", int32());
257 auto schema = arrow::schema({field0, field1});
258
259 // Build condition f0 + f1 < 10
260 auto node_f0 = TreeExprBuilder::MakeField(field0);
261 auto node_f1 = TreeExprBuilder::MakeField(field1);
262 auto sum_func =
263 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
264 auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
265 auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
266 arrow::boolean());
267 auto condition = TreeExprBuilder::MakeCondition(less_than_10);
268
269 std::shared_ptr<Filter> filter;
270 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
271 EXPECT_TRUE(status.ok());
272
273 // Create a row-batch with some sample data
274 int num_records = 5;
275 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, true});
276 auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
277 // expected output (indices for which condition matches)
278 auto exp = MakeArrowArrayUint32({0, 4});
279
280 // prepare input record batch
281 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
282
283 std::shared_ptr<SelectionVector> selection_vector;
284 status = SelectionVector::MakeInt32(num_records, pool_, &selection_vector);
285 EXPECT_TRUE(status.ok());
286
287 // Evaluate expression
288 status = filter->Evaluate(*in_batch, selection_vector);
289 EXPECT_TRUE(status.ok());
290
291 // Validate results
292 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
293 }
294
TEST_F(TestFilter,TestOffset)295 TEST_F(TestFilter, TestOffset) {
296 // schema for input fields
297 auto field0 = field("f0", int32());
298 auto field1 = field("f1", int32());
299 auto schema = arrow::schema({field0, field1});
300
301 // Build condition f0 + f1 < 10
302 auto node_f0 = TreeExprBuilder::MakeField(field0);
303 auto node_f1 = TreeExprBuilder::MakeField(field1);
304 auto sum_func =
305 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
306 auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
307 auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
308 arrow::boolean());
309 auto condition = TreeExprBuilder::MakeCondition(less_than_10);
310
311 std::shared_ptr<Filter> filter;
312 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
313 EXPECT_TRUE(status.ok());
314
315 // Create a row-batch with some sample data
316 int num_records = 5;
317 auto array0 =
318 MakeArrowArrayInt32({0, 1, 2, 3, 4, 6}, {true, true, true, true, false, true});
319 array0 = array0->Slice(1);
320 auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
321 // expected output (indices for which condition matches)
322 auto exp = MakeArrowArrayUint16({3});
323
324 // prepare input record batch
325 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
326 in_batch = in_batch->Slice(1);
327
328 std::shared_ptr<SelectionVector> selection_vector;
329 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
330 EXPECT_TRUE(status.ok());
331
332 // Evaluate expression
333 status = filter->Evaluate(*in_batch, selection_vector);
334 EXPECT_TRUE(status.ok());
335
336 // Validate results
337 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
338 }
339
340 } // namespace gandiva
341