1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <gtest/gtest.h>
19 #include "arrow/memory_pool.h"
20 #include "gandiva/filter.h"
21 #include "gandiva/projector.h"
22 #include "gandiva/tests/test_util.h"
23 #include "gandiva/tree_expr_builder.h"
24 
25 namespace gandiva {
26 
27 using arrow::boolean;
28 using arrow::float32;
29 using arrow::int32;
30 
31 class LARGE_MEMORY_TEST(TestHugeProjector) : public ::testing::Test {
32  public:
33   void SetUp() { pool_ = arrow::default_memory_pool(); }
34 
35  protected:
36   arrow::MemoryPool* pool_;
37 };
38 
39 class LARGE_MEMORY_TEST(TestHugeFilter) : public ::testing::Test {
40  public:
41   void SetUp() { pool_ = arrow::default_memory_pool(); }
42 
43  protected:
44   arrow::MemoryPool* pool_;
45 };
46 
47 TEST_F(LARGE_MEMORY_TEST(TestHugeProjector), SimpleTestSumHuge) {
48   auto atype = arrow::TypeTraits<arrow::Int32Type>::type_singleton();
49 
50   // schema for input fields
51   auto field0 = field("f0", atype);
52   auto field1 = field("f1", atype);
53   auto schema = arrow::schema({field0, field1});
54 
55   // output fields
56   auto field_sum = field("add", atype);
57 
58   // Build expression
59   auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
60   std::shared_ptr<Projector> projector;
61   auto status = Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector);
62   EXPECT_TRUE(status.ok());
63 
64   // Create a row-batch with some sample data
65   // Cause an overflow in int32_t
66   int64_t num_records = static_cast<int64_t>(INT32_MAX) + 3;
67   std::vector<int32_t> input0 = {2, 29, 5, 37, 11, 59, 17, 19};
68   std::vector<int32_t> input1 = {23, 3, 31, 7, 41, 47, 13};
69   std::vector<bool> validity;
70 
71   std::vector<int32_t> arr1;
72   std::vector<int32_t> arr2;
73   // expected output
74   std::vector<int32_t> sum1;
75 
76   for (int64_t i = 0; i < num_records; i++) {
77     arr1.push_back(input0[i % 8]);
78     arr2.push_back(input1[i % 7]);
79     sum1.push_back(input0[i % 8] + input1[i % 7]);
80     validity.push_back(true);
81   }
82 
83   auto exp_sum = MakeArrowArray<arrow::Int32Type, int32_t>(sum1, validity);
84   auto array0 = MakeArrowArray<arrow::Int32Type, int32_t>(arr1, validity);
85   auto array1 = MakeArrowArray<arrow::Int32Type, int32_t>(arr2, validity);
86 
87   // prepare input record batch
88   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
89 
90   // Evaluate expression
91   arrow::ArrayVector outputs;
92   status = projector->Evaluate(*in_batch, pool_, &outputs);
93   EXPECT_TRUE(status.ok());
94 
95   // Validate results
96   EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
97 }
98 
99 TEST_F(LARGE_MEMORY_TEST(TestHugeFilter), TestSimpleHugeFilter) {
100   // Create a row-batch with some sample data
101   // Cause an overflow in int32_t
102   int64_t num_records = static_cast<int64_t>(INT32_MAX) + 3;
103   std::vector<int32_t> input0 = {2, 29, 5, 37, 11, 59, 17, 19};
104   std::vector<int32_t> input1 = {23, 3, 31, 7, 41, 47, 13};
105   std::vector<bool> validity;
106 
107   std::vector<int32_t> arr1;
108   std::vector<int32_t> arr2;
109   // expected output
110   std::vector<uint64_t> sel;
111 
112   for (int64_t i = 0; i < num_records; i++) {
113     arr1.push_back(input0[i % 8]);
114     arr2.push_back(input1[i % 7]);
115     if (input0[i % 8] + input1[i % 7] > 50) {
116       sel.push_back(i);
117     }
118     validity.push_back(true);
119   }
120 
121   auto exp = MakeArrowArrayUint64(sel);
122 
123   // schema for input fields
124   auto field0 = field("f0", int32());
125   auto field1 = field("f1", int32());
126   auto schema = arrow::schema({field0, field1});
127 
128   // Build condition f0 + f1 < 50
129   auto node_f0 = TreeExprBuilder::MakeField(field0);
130   auto node_f1 = TreeExprBuilder::MakeField(field1);
131   auto sum_func =
132       TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
133   auto literal_50 = TreeExprBuilder::MakeLiteral((int32_t)50);
134   auto less_than_50 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_50},
135                                                     arrow::boolean());
136   auto condition = TreeExprBuilder::MakeCondition(less_than_50);
137 
138   std::shared_ptr<Filter> filter;
139   auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
140   EXPECT_TRUE(status.ok());
141 
142   // prepare input record batch
143   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arr1, arr2});
144 
145   std::shared_ptr<SelectionVector> selection_vector;
146   status = SelectionVector::MakeInt64(num_records, pool_, &selection_vector);
147   EXPECT_TRUE(status.ok());
148 
149   // Evaluate expression
150   status = filter->Evaluate(*in_batch, selection_vector);
151   EXPECT_TRUE(status.ok());
152 
153   // Validate results
154   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
155 }
156 
157 }  // namespace gandiva
158