1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "gandiva/like_holder.h"
19 #include "gandiva/regex_util.h"
20
21 #include <memory>
22 #include <vector>
23
24 #include <gtest/gtest.h>
25
26 namespace gandiva {
27
28 class TestLikeHolder : public ::testing::Test {
29 public:
30 RE2::Options regex_op;
BuildLike(std::string pattern)31 FunctionNode BuildLike(std::string pattern) {
32 auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
33 auto pattern_node =
34 std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
35 return FunctionNode("like", {field, pattern_node}, arrow::boolean());
36 }
37
BuildLike(std::string pattern,char escape_char)38 FunctionNode BuildLike(std::string pattern, char escape_char) {
39 auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
40 auto pattern_node =
41 std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
42 auto escape_char_node = std::make_shared<LiteralNode>(
43 arrow::int8(), LiteralHolder((int8_t)escape_char), false);
44 return FunctionNode("like", {field, pattern_node, escape_char_node},
45 arrow::boolean());
46 }
47 };
48
TEST_F(TestLikeHolder,TestMatchAny)49 TEST_F(TestLikeHolder, TestMatchAny) {
50 std::shared_ptr<LikeHolder> like_holder;
51
52 auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
53 EXPECT_EQ(status.ok(), true) << status.message();
54
55 auto& like = *like_holder;
56 EXPECT_TRUE(like("ab"));
57 EXPECT_TRUE(like("abc"));
58 EXPECT_TRUE(like("abcd"));
59
60 EXPECT_FALSE(like("a"));
61 EXPECT_FALSE(like("cab"));
62 }
63
TEST_F(TestLikeHolder,TestMatchOne)64 TEST_F(TestLikeHolder, TestMatchOne) {
65 std::shared_ptr<LikeHolder> like_holder;
66
67 auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
68 EXPECT_EQ(status.ok(), true) << status.message();
69
70 auto& like = *like_holder;
71 EXPECT_TRUE(like("abc"));
72 EXPECT_TRUE(like("abd"));
73
74 EXPECT_FALSE(like("a"));
75 EXPECT_FALSE(like("abcd"));
76 EXPECT_FALSE(like("dabc"));
77 }
78
TEST_F(TestLikeHolder,TestPcreSpecial)79 TEST_F(TestLikeHolder, TestPcreSpecial) {
80 std::shared_ptr<LikeHolder> like_holder;
81
82 auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
83 EXPECT_EQ(status.ok(), true) << status.message();
84
85 auto& like = *like_holder;
86 EXPECT_TRUE(like(".*abc")); // . and * aren't special in sql regex
87 EXPECT_FALSE(like("xxabc"));
88 }
89
TEST_F(TestLikeHolder,TestRegexEscape)90 TEST_F(TestLikeHolder, TestRegexEscape) {
91 std::string res;
92 auto status = RegexUtil::SqlLikePatternToPcre("#%hello#_abc_def##", '#', res);
93 EXPECT_TRUE(status.ok()) << status.message();
94
95 EXPECT_EQ(res, "%hello_abc.def#");
96 }
97
TEST_F(TestLikeHolder,TestDot)98 TEST_F(TestLikeHolder, TestDot) {
99 std::shared_ptr<LikeHolder> like_holder;
100
101 auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
102 EXPECT_EQ(status.ok(), true) << status.message();
103
104 auto& like = *like_holder;
105 EXPECT_FALSE(like("abcd"));
106 }
107
TEST_F(TestLikeHolder,TestOptimise)108 TEST_F(TestLikeHolder, TestOptimise) {
109 // optimise for 'starts_with'
110 auto fnode = LikeHolder::TryOptimize(BuildLike("xy 123z%"));
111 EXPECT_EQ(fnode.descriptor()->name(), "starts_with");
112 EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) xy 123z)");
113
114 // optimise for 'ends_with'
115 fnode = LikeHolder::TryOptimize(BuildLike("%xyz"));
116 EXPECT_EQ(fnode.descriptor()->name(), "ends_with");
117 EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) xyz)");
118
119 // optimise for 'is_substr'
120 fnode = LikeHolder::TryOptimize(BuildLike("%abc%"));
121 EXPECT_EQ(fnode.descriptor()->name(), "is_substr");
122 EXPECT_EQ(fnode.ToString(), "bool is_substr((string) in, (const string) abc)");
123
124 // no optimisation for others.
125 fnode = LikeHolder::TryOptimize(BuildLike("xyz_"));
126 EXPECT_EQ(fnode.descriptor()->name(), "like");
127
128 fnode = LikeHolder::TryOptimize(BuildLike("_xyz"));
129 EXPECT_EQ(fnode.descriptor()->name(), "like");
130
131 fnode = LikeHolder::TryOptimize(BuildLike("_xyz_"));
132 EXPECT_EQ(fnode.descriptor()->name(), "like");
133
134 fnode = LikeHolder::TryOptimize(BuildLike("%xyz_"));
135 EXPECT_EQ(fnode.descriptor()->name(), "like");
136
137 fnode = LikeHolder::TryOptimize(BuildLike("x_yz%"));
138 EXPECT_EQ(fnode.descriptor()->name(), "like");
139
140 // no optimisation for escaped pattern.
141 fnode = LikeHolder::TryOptimize(BuildLike("\\%xyz", '\\'));
142 EXPECT_EQ(fnode.descriptor()->name(), "like");
143 EXPECT_EQ(fnode.ToString(),
144 "bool like((string) in, (const string) \\%xyz, (const int8) \\)");
145 }
146
TEST_F(TestLikeHolder,TestMatchOneEscape)147 TEST_F(TestLikeHolder, TestMatchOneEscape) {
148 std::shared_ptr<LikeHolder> like_holder;
149
150 auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
151 EXPECT_EQ(status.ok(), true) << status.message();
152
153 auto& like = *like_holder;
154
155 EXPECT_TRUE(like("ab_"));
156
157 EXPECT_FALSE(like("abc"));
158 EXPECT_FALSE(like("abd"));
159 EXPECT_FALSE(like("a"));
160 EXPECT_FALSE(like("abcd"));
161 EXPECT_FALSE(like("dabc"));
162 }
163
TEST_F(TestLikeHolder,TestMatchManyEscape)164 TEST_F(TestLikeHolder, TestMatchManyEscape) {
165 std::shared_ptr<LikeHolder> like_holder;
166
167 auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
168 EXPECT_EQ(status.ok(), true) << status.message();
169
170 auto& like = *like_holder;
171
172 EXPECT_TRUE(like("ab%"));
173
174 EXPECT_FALSE(like("abc"));
175 EXPECT_FALSE(like("abd"));
176 EXPECT_FALSE(like("a"));
177 EXPECT_FALSE(like("abcd"));
178 EXPECT_FALSE(like("dabc"));
179 }
180
TEST_F(TestLikeHolder,TestMatchEscape)181 TEST_F(TestLikeHolder, TestMatchEscape) {
182 std::shared_ptr<LikeHolder> like_holder;
183
184 auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
185 EXPECT_EQ(status.ok(), true) << status.message();
186
187 auto& like = *like_holder;
188
189 EXPECT_TRUE(like("ab\\"));
190
191 EXPECT_FALSE(like("abc"));
192 }
193
TEST_F(TestLikeHolder,TestEmptyEscapeChar)194 TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
195 std::shared_ptr<LikeHolder> like_holder;
196
197 auto status = LikeHolder::Make("ab\\_", "", &like_holder);
198 EXPECT_EQ(status.ok(), true) << status.message();
199
200 auto& like = *like_holder;
201
202 EXPECT_TRUE(like("ab\\c"));
203 EXPECT_TRUE(like("ab\\_"));
204
205 EXPECT_FALSE(like("ab\\_d"));
206 EXPECT_FALSE(like("ab__"));
207 }
208
TEST_F(TestLikeHolder,TestMultipleEscapeChar)209 TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
210 std::shared_ptr<LikeHolder> like_holder;
211
212 auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
213 EXPECT_EQ(status.ok(), false) << status.message();
214 }
215 class TestILikeHolder : public ::testing::Test {
216 public:
217 RE2::Options regex_op;
BuildILike(std::string pattern)218 FunctionNode BuildILike(std::string pattern) {
219 auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
220 auto pattern_node =
221 std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
222 return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
223 }
224 };
225
TEST_F(TestILikeHolder,TestMatchAny)226 TEST_F(TestILikeHolder, TestMatchAny) {
227 std::shared_ptr<LikeHolder> like_holder;
228
229 regex_op.set_case_sensitive(false);
230 auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
231 EXPECT_EQ(status.ok(), true) << status.message();
232
233 auto& like = *like_holder;
234 EXPECT_TRUE(like("ab"));
235 EXPECT_TRUE(like("aBc"));
236 EXPECT_TRUE(like("ABCD"));
237
238 EXPECT_FALSE(like("a"));
239 EXPECT_FALSE(like("cab"));
240 }
241
TEST_F(TestILikeHolder,TestMatchOne)242 TEST_F(TestILikeHolder, TestMatchOne) {
243 std::shared_ptr<LikeHolder> like_holder;
244
245 regex_op.set_case_sensitive(false);
246 auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
247 EXPECT_EQ(status.ok(), true) << status.message();
248
249 auto& like = *like_holder;
250 EXPECT_TRUE(like("abc"));
251 EXPECT_TRUE(like("aBd"));
252
253 EXPECT_FALSE(like("A"));
254 EXPECT_FALSE(like("Abcd"));
255 EXPECT_FALSE(like("DaBc"));
256 }
257
TEST_F(TestILikeHolder,TestPcreSpecial)258 TEST_F(TestILikeHolder, TestPcreSpecial) {
259 std::shared_ptr<LikeHolder> like_holder;
260
261 regex_op.set_case_sensitive(false);
262 auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
263 EXPECT_EQ(status.ok(), true) << status.message();
264
265 auto& like = *like_holder;
266 EXPECT_TRUE(like(".*Abc")); // . and * aren't special in sql regex
267 EXPECT_FALSE(like("xxAbc"));
268 }
269
TEST_F(TestILikeHolder,TestDot)270 TEST_F(TestILikeHolder, TestDot) {
271 std::shared_ptr<LikeHolder> like_holder;
272
273 regex_op.set_case_sensitive(false);
274 auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
275 EXPECT_EQ(status.ok(), true) << status.message();
276
277 auto& like = *like_holder;
278 EXPECT_FALSE(like("abcd"));
279 }
280
281 } // namespace gandiva
282