1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "gandiva/filter.h"
19 #include <gtest/gtest.h>
20 #include "arrow/memory_pool.h"
21 #include "gandiva/tests/test_util.h"
22 #include "gandiva/tree_expr_builder.h"
23 
24 namespace gandiva {
25 
26 using arrow::boolean;
27 using arrow::float32;
28 using arrow::int32;
29 
30 class TestFilter : public ::testing::Test {
31  public:
SetUp()32   void SetUp() { pool_ = arrow::default_memory_pool(); }
33 
34  protected:
35   arrow::MemoryPool* pool_;
36 };
37 
TEST_F(TestFilter,TestFilterCache)38 TEST_F(TestFilter, TestFilterCache) {
39   // schema for input fields
40   auto field0 = field("f0", int32());
41   auto field1 = field("f1", int32());
42   auto schema = arrow::schema({field0, field1});
43 
44   // Build condition f0 + f1 < 10
45   auto node_f0 = TreeExprBuilder::MakeField(field0);
46   auto node_f1 = TreeExprBuilder::MakeField(field1);
47   auto sum_func =
48       TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
49   auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
50   auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
51                                                     arrow::boolean());
52   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
53   auto configuration = TestConfiguration();
54 
55   std::shared_ptr<Filter> filter;
56   auto status = Filter::Make(schema, condition, configuration, &filter);
57   EXPECT_TRUE(status.ok());
58 
59   // same schema and condition, should return the same filter as above.
60   std::shared_ptr<Filter> cached_filter;
61   status = Filter::Make(schema, condition, configuration, &cached_filter);
62   EXPECT_TRUE(status.ok());
63   EXPECT_TRUE(cached_filter.get() == filter.get());
64 
65   // schema is different should return a new filter.
66   auto field2 = field("f2", int32());
67   auto different_schema = arrow::schema({field0, field1, field2});
68   std::shared_ptr<Filter> should_be_new_filter;
69   status =
70       Filter::Make(different_schema, condition, configuration, &should_be_new_filter);
71   EXPECT_TRUE(status.ok());
72   EXPECT_TRUE(cached_filter.get() != should_be_new_filter.get());
73 
74   // condition is different, should return a new filter.
75   auto greater_than_10 = TreeExprBuilder::MakeFunction(
76       "greater_than", {sum_func, literal_10}, arrow::boolean());
77   auto new_condition = TreeExprBuilder::MakeCondition(greater_than_10);
78   std::shared_ptr<Filter> should_be_new_filter1;
79   status = Filter::Make(schema, new_condition, configuration, &should_be_new_filter1);
80   EXPECT_TRUE(status.ok());
81   EXPECT_TRUE(cached_filter.get() != should_be_new_filter1.get());
82 }
83 
TEST_F(TestFilter,TestSimple)84 TEST_F(TestFilter, TestSimple) {
85   // schema for input fields
86   auto field0 = field("f0", int32());
87   auto field1 = field("f1", int32());
88   auto schema = arrow::schema({field0, field1});
89 
90   // Build condition f0 + f1 < 10
91   auto node_f0 = TreeExprBuilder::MakeField(field0);
92   auto node_f1 = TreeExprBuilder::MakeField(field1);
93   auto sum_func =
94       TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
95   auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
96   auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
97                                                     arrow::boolean());
98   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
99 
100   std::shared_ptr<Filter> filter;
101   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
102   EXPECT_TRUE(status.ok());
103 
104   // Create a row-batch with some sample data
105   int num_records = 5;
106   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, true});
107   auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
108   // expected output (indices for which condition matches)
109   auto exp = MakeArrowArrayUint16({0, 4});
110 
111   // prepare input record batch
112   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
113 
114   std::shared_ptr<SelectionVector> selection_vector;
115   status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
116   EXPECT_TRUE(status.ok());
117 
118   // Evaluate expression
119   status = filter->Evaluate(*in_batch, selection_vector);
120   EXPECT_TRUE(status.ok());
121 
122   // Validate results
123   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
124 }
125 
TEST_F(TestFilter,TestSimpleCustomConfig)126 TEST_F(TestFilter, TestSimpleCustomConfig) {
127   // schema for input fields
128   auto field0 = field("f0", int32());
129   auto field1 = field("f1", int32());
130   auto schema = arrow::schema({field0, field1});
131 
132   // Build condition f0 != f1
133   auto condition = TreeExprBuilder::MakeCondition("not_equal", {field0, field1});
134 
135   ConfigurationBuilder config_builder;
136   std::shared_ptr<Configuration> config = config_builder.build();
137 
138   std::shared_ptr<Filter> filter;
139   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
140   EXPECT_TRUE(status.ok());
141 
142   // Create a row-batch with some sample data
143   int num_records = 4;
144   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
145   auto array1 = MakeArrowArrayInt32({11, 2, 3, 17}, {true, true, false, true});
146   // expected output
147   auto exp = MakeArrowArrayUint16({0});
148 
149   // prepare input record batch
150   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
151 
152   std::shared_ptr<SelectionVector> selection_vector;
153   status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
154   EXPECT_TRUE(status.ok());
155 
156   // Evaluate expression
157   status = filter->Evaluate(*in_batch, selection_vector);
158   EXPECT_TRUE(status.ok());
159 
160   // Validate results
161   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
162 }
163 
TEST_F(TestFilter,TestZeroCopy)164 TEST_F(TestFilter, TestZeroCopy) {
165   // schema for input fields
166   auto field0 = field("f0", int32());
167   auto schema = arrow::schema({field0});
168 
169   // Build condition
170   auto condition = TreeExprBuilder::MakeCondition("isnotnull", {field0});
171 
172   std::shared_ptr<Filter> filter;
173   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
174   EXPECT_TRUE(status.ok());
175 
176   // Create a row-batch with some sample data
177   int num_records = 4;
178   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
179   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
180 
181   // expected output
182   auto exp = MakeArrowArrayUint16({0, 1, 2});
183 
184   // allocate selection buffers
185   int64_t data_sz = sizeof(int16_t) * num_records;
186   std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
187   std::shared_ptr<arrow::MutableBuffer> data_buf =
188       std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
189 
190   std::shared_ptr<SelectionVector> selection_vector;
191   status = SelectionVector::MakeInt16(num_records, data_buf, &selection_vector);
192   EXPECT_TRUE(status.ok());
193 
194   // Evaluate expression
195   status = filter->Evaluate(*in_batch, selection_vector);
196   EXPECT_TRUE(status.ok());
197 
198   // Validate results
199   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
200 }
201 
TEST_F(TestFilter,TestZeroCopyNegative)202 TEST_F(TestFilter, TestZeroCopyNegative) {
203   ArrayPtr output;
204 
205   // schema for input fields
206   auto field0 = field("f0", int32());
207   auto schema = arrow::schema({field0});
208 
209   // Build expression
210   auto condition = TreeExprBuilder::MakeCondition("isnotnull", {field0});
211 
212   std::shared_ptr<Filter> filter;
213   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
214   EXPECT_TRUE(status.ok());
215 
216   // Create a row-batch with some sample data
217   int num_records = 4;
218   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
219   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
220 
221   // expected output
222   auto exp = MakeArrowArrayInt16({0, 1, 2});
223 
224   // allocate output buffers
225   int64_t data_sz = sizeof(int16_t) * num_records;
226   std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
227   std::shared_ptr<arrow::MutableBuffer> data_buf =
228       std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
229 
230   std::shared_ptr<SelectionVector> selection_vector;
231   status = SelectionVector::MakeInt16(num_records, data_buf, &selection_vector);
232   EXPECT_TRUE(status.ok());
233 
234   // the batch can't be empty.
235   auto bad_batch = arrow::RecordBatch::Make(schema, 0 /*num_records*/, {array0});
236   status = filter->Evaluate(*bad_batch, selection_vector);
237   EXPECT_EQ(status.code(), StatusCode::Invalid);
238 
239   // the selection_vector can't be null.
240   std::shared_ptr<SelectionVector> null_selection;
241   status = filter->Evaluate(*in_batch, null_selection);
242   EXPECT_EQ(status.code(), StatusCode::Invalid);
243 
244   // the selection vector must be suitably sized.
245   std::shared_ptr<SelectionVector> bad_selection;
246   status = SelectionVector::MakeInt16(num_records - 1, data_buf, &bad_selection);
247   EXPECT_TRUE(status.ok());
248 
249   status = filter->Evaluate(*in_batch, bad_selection);
250   EXPECT_EQ(status.code(), StatusCode::Invalid);
251 }
252 
TEST_F(TestFilter,TestSimpleSVInt32)253 TEST_F(TestFilter, TestSimpleSVInt32) {
254   // schema for input fields
255   auto field0 = field("f0", int32());
256   auto field1 = field("f1", int32());
257   auto schema = arrow::schema({field0, field1});
258 
259   // Build condition f0 + f1 < 10
260   auto node_f0 = TreeExprBuilder::MakeField(field0);
261   auto node_f1 = TreeExprBuilder::MakeField(field1);
262   auto sum_func =
263       TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
264   auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
265   auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
266                                                     arrow::boolean());
267   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
268 
269   std::shared_ptr<Filter> filter;
270   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
271   EXPECT_TRUE(status.ok());
272 
273   // Create a row-batch with some sample data
274   int num_records = 5;
275   auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, true});
276   auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
277   // expected output (indices for which condition matches)
278   auto exp = MakeArrowArrayUint32({0, 4});
279 
280   // prepare input record batch
281   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
282 
283   std::shared_ptr<SelectionVector> selection_vector;
284   status = SelectionVector::MakeInt32(num_records, pool_, &selection_vector);
285   EXPECT_TRUE(status.ok());
286 
287   // Evaluate expression
288   status = filter->Evaluate(*in_batch, selection_vector);
289   EXPECT_TRUE(status.ok());
290 
291   // Validate results
292   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
293 }
294 
TEST_F(TestFilter,TestOffset)295 TEST_F(TestFilter, TestOffset) {
296   // schema for input fields
297   auto field0 = field("f0", int32());
298   auto field1 = field("f1", int32());
299   auto schema = arrow::schema({field0, field1});
300 
301   // Build condition f0 + f1 < 10
302   auto node_f0 = TreeExprBuilder::MakeField(field0);
303   auto node_f1 = TreeExprBuilder::MakeField(field1);
304   auto sum_func =
305       TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
306   auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
307   auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
308                                                     arrow::boolean());
309   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
310 
311   std::shared_ptr<Filter> filter;
312   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
313   EXPECT_TRUE(status.ok());
314 
315   // Create a row-batch with some sample data
316   int num_records = 5;
317   auto array0 =
318       MakeArrowArrayInt32({0, 1, 2, 3, 4, 6}, {true, true, true, true, false, true});
319   array0 = array0->Slice(1);
320   auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true});
321   // expected output (indices for which condition matches)
322   auto exp = MakeArrowArrayUint16({3});
323 
324   // prepare input record batch
325   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
326   in_batch = in_batch->Slice(1);
327 
328   std::shared_ptr<SelectionVector> selection_vector;
329   status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
330   EXPECT_TRUE(status.ok());
331 
332   // Evaluate expression
333   status = filter->Evaluate(*in_batch, selection_vector);
334   EXPECT_TRUE(status.ok());
335 
336   // Validate results
337   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
338 }
339 
340 }  // namespace gandiva
341