1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 #include <gtest/gtest.h> 19 #include "arrow/memory_pool.h" 20 #include "gandiva/filter.h" 21 #include "gandiva/projector.h" 22 #include "gandiva/tests/test_util.h" 23 #include "gandiva/tree_expr_builder.h" 24 25 namespace gandiva { 26 27 using arrow::boolean; 28 using arrow::float32; 29 using arrow::int32; 30 31 class LARGE_MEMORY_TEST(TestHugeProjector) : public ::testing::Test { 32 public: 33 void SetUp() { pool_ = arrow::default_memory_pool(); } 34 35 protected: 36 arrow::MemoryPool* pool_; 37 }; 38 39 class LARGE_MEMORY_TEST(TestHugeFilter) : public ::testing::Test { 40 public: 41 void SetUp() { pool_ = arrow::default_memory_pool(); } 42 43 protected: 44 arrow::MemoryPool* pool_; 45 }; 46 47 TEST_F(LARGE_MEMORY_TEST(TestHugeProjector), SimpleTestSumHuge) { 48 auto atype = arrow::TypeTraits<arrow::Int32Type>::type_singleton(); 49 50 // schema for input fields 51 auto field0 = field("f0", atype); 52 auto field1 = field("f1", atype); 53 auto schema = arrow::schema({field0, field1}); 54 55 // output fields 56 auto field_sum = field("add", atype); 57 58 // Build expression 59 auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum); 60 std::shared_ptr<Projector> projector; 61 auto status = Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector); 62 EXPECT_TRUE(status.ok()); 63 64 // Create a row-batch with some sample data 65 // Cause an overflow in int32_t 66 int64_t num_records = static_cast<int64_t>(INT32_MAX) + 3; 67 std::vector<int32_t> input0 = {2, 29, 5, 37, 11, 59, 17, 19}; 68 std::vector<int32_t> input1 = {23, 3, 31, 7, 41, 47, 13}; 69 std::vector<bool> validity; 70 71 std::vector<int32_t> arr1; 72 std::vector<int32_t> arr2; 73 // expected output 74 std::vector<int32_t> sum1; 75 76 for (int64_t i = 0; i < num_records; i++) { 77 arr1.push_back(input0[i % 8]); 78 arr2.push_back(input1[i % 7]); 79 sum1.push_back(input0[i % 8] + input1[i % 7]); 80 validity.push_back(true); 81 } 82 83 auto exp_sum = MakeArrowArray<arrow::Int32Type, int32_t>(sum1, validity); 84 auto array0 = MakeArrowArray<arrow::Int32Type, int32_t>(arr1, validity); 85 auto array1 = MakeArrowArray<arrow::Int32Type, int32_t>(arr2, validity); 86 87 // prepare input record batch 88 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); 89 90 // Evaluate expression 91 arrow::ArrayVector outputs; 92 status = projector->Evaluate(*in_batch, pool_, &outputs); 93 EXPECT_TRUE(status.ok()); 94 95 // Validate results 96 EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0)); 97 } 98 99 TEST_F(LARGE_MEMORY_TEST(TestHugeFilter), TestSimpleHugeFilter) { 100 // Create a row-batch with some sample data 101 // Cause an overflow in int32_t 102 int64_t num_records = static_cast<int64_t>(INT32_MAX) + 3; 103 std::vector<int32_t> input0 = {2, 29, 5, 37, 11, 59, 17, 19}; 104 std::vector<int32_t> input1 = {23, 3, 31, 7, 41, 47, 13}; 105 std::vector<bool> validity; 106 107 std::vector<int32_t> arr1; 108 std::vector<int32_t> arr2; 109 // expected output 110 std::vector<uint64_t> sel; 111 112 for (int64_t i = 0; i < num_records; i++) { 113 arr1.push_back(input0[i % 8]); 114 arr2.push_back(input1[i % 7]); 115 if (input0[i % 8] + input1[i % 7] > 50) { 116 sel.push_back(i); 117 } 118 validity.push_back(true); 119 } 120 121 auto exp = MakeArrowArrayUint64(sel); 122 123 // schema for input fields 124 auto field0 = field("f0", int32()); 125 auto field1 = field("f1", int32()); 126 auto schema = arrow::schema({field0, field1}); 127 128 // Build condition f0 + f1 < 50 129 auto node_f0 = TreeExprBuilder::MakeField(field0); 130 auto node_f1 = TreeExprBuilder::MakeField(field1); 131 auto sum_func = 132 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32()); 133 auto literal_50 = TreeExprBuilder::MakeLiteral((int32_t)50); 134 auto less_than_50 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_50}, 135 arrow::boolean()); 136 auto condition = TreeExprBuilder::MakeCondition(less_than_50); 137 138 std::shared_ptr<Filter> filter; 139 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter); 140 EXPECT_TRUE(status.ok()); 141 142 // prepare input record batch 143 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arr1, arr2}); 144 145 std::shared_ptr<SelectionVector> selection_vector; 146 status = SelectionVector::MakeInt64(num_records, pool_, &selection_vector); 147 EXPECT_TRUE(status.ok()); 148 149 // Evaluate expression 150 status = filter->Evaluate(*in_batch, selection_vector); 151 EXPECT_TRUE(status.ok()); 152 153 // Validate results 154 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); 155 } 156 157 } // namespace gandiva 158