1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "gandiva/like_holder.h"
19 #include "gandiva/regex_util.h"
20 
21 #include <memory>
22 #include <vector>
23 
24 #include <gtest/gtest.h>
25 
26 namespace gandiva {
27 
28 class TestLikeHolder : public ::testing::Test {
29  public:
30   RE2::Options regex_op;
BuildLike(std::string pattern)31   FunctionNode BuildLike(std::string pattern) {
32     auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
33     auto pattern_node =
34         std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
35     return FunctionNode("like", {field, pattern_node}, arrow::boolean());
36   }
37 
BuildLike(std::string pattern,char escape_char)38   FunctionNode BuildLike(std::string pattern, char escape_char) {
39     auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
40     auto pattern_node =
41         std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
42     auto escape_char_node = std::make_shared<LiteralNode>(
43         arrow::int8(), LiteralHolder((int8_t)escape_char), false);
44     return FunctionNode("like", {field, pattern_node, escape_char_node},
45                         arrow::boolean());
46   }
47 };
48 
TEST_F(TestLikeHolder,TestMatchAny)49 TEST_F(TestLikeHolder, TestMatchAny) {
50   std::shared_ptr<LikeHolder> like_holder;
51 
52   auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
53   EXPECT_EQ(status.ok(), true) << status.message();
54 
55   auto& like = *like_holder;
56   EXPECT_TRUE(like("ab"));
57   EXPECT_TRUE(like("abc"));
58   EXPECT_TRUE(like("abcd"));
59 
60   EXPECT_FALSE(like("a"));
61   EXPECT_FALSE(like("cab"));
62 }
63 
TEST_F(TestLikeHolder,TestMatchOne)64 TEST_F(TestLikeHolder, TestMatchOne) {
65   std::shared_ptr<LikeHolder> like_holder;
66 
67   auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
68   EXPECT_EQ(status.ok(), true) << status.message();
69 
70   auto& like = *like_holder;
71   EXPECT_TRUE(like("abc"));
72   EXPECT_TRUE(like("abd"));
73 
74   EXPECT_FALSE(like("a"));
75   EXPECT_FALSE(like("abcd"));
76   EXPECT_FALSE(like("dabc"));
77 }
78 
TEST_F(TestLikeHolder,TestPcreSpecial)79 TEST_F(TestLikeHolder, TestPcreSpecial) {
80   std::shared_ptr<LikeHolder> like_holder;
81 
82   auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
83   EXPECT_EQ(status.ok(), true) << status.message();
84 
85   auto& like = *like_holder;
86   EXPECT_TRUE(like(".*abc"));  // . and * aren't special in sql regex
87   EXPECT_FALSE(like("xxabc"));
88 }
89 
TEST_F(TestLikeHolder,TestRegexEscape)90 TEST_F(TestLikeHolder, TestRegexEscape) {
91   std::string res;
92   auto status = RegexUtil::SqlLikePatternToPcre("#%hello#_abc_def##", '#', res);
93   EXPECT_TRUE(status.ok()) << status.message();
94 
95   EXPECT_EQ(res, "%hello_abc.def#");
96 }
97 
TEST_F(TestLikeHolder,TestDot)98 TEST_F(TestLikeHolder, TestDot) {
99   std::shared_ptr<LikeHolder> like_holder;
100 
101   auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
102   EXPECT_EQ(status.ok(), true) << status.message();
103 
104   auto& like = *like_holder;
105   EXPECT_FALSE(like("abcd"));
106 }
107 
TEST_F(TestLikeHolder,TestOptimise)108 TEST_F(TestLikeHolder, TestOptimise) {
109   // optimise for 'starts_with'
110   auto fnode = LikeHolder::TryOptimize(BuildLike("xy 123z%"));
111   EXPECT_EQ(fnode.descriptor()->name(), "starts_with");
112   EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) xy 123z)");
113 
114   // optimise for 'ends_with'
115   fnode = LikeHolder::TryOptimize(BuildLike("%xyz"));
116   EXPECT_EQ(fnode.descriptor()->name(), "ends_with");
117   EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) xyz)");
118 
119   // optimise for 'is_substr'
120   fnode = LikeHolder::TryOptimize(BuildLike("%abc%"));
121   EXPECT_EQ(fnode.descriptor()->name(), "is_substr");
122   EXPECT_EQ(fnode.ToString(), "bool is_substr((string) in, (const string) abc)");
123 
124   // no optimisation for others.
125   fnode = LikeHolder::TryOptimize(BuildLike("xyz_"));
126   EXPECT_EQ(fnode.descriptor()->name(), "like");
127 
128   fnode = LikeHolder::TryOptimize(BuildLike("_xyz"));
129   EXPECT_EQ(fnode.descriptor()->name(), "like");
130 
131   fnode = LikeHolder::TryOptimize(BuildLike("_xyz_"));
132   EXPECT_EQ(fnode.descriptor()->name(), "like");
133 
134   fnode = LikeHolder::TryOptimize(BuildLike("%xyz_"));
135   EXPECT_EQ(fnode.descriptor()->name(), "like");
136 
137   fnode = LikeHolder::TryOptimize(BuildLike("x_yz%"));
138   EXPECT_EQ(fnode.descriptor()->name(), "like");
139 
140   // no optimisation for escaped pattern.
141   fnode = LikeHolder::TryOptimize(BuildLike("\\%xyz", '\\'));
142   EXPECT_EQ(fnode.descriptor()->name(), "like");
143   EXPECT_EQ(fnode.ToString(),
144             "bool like((string) in, (const string) \\%xyz, (const int8) \\)");
145 }
146 
TEST_F(TestLikeHolder,TestMatchOneEscape)147 TEST_F(TestLikeHolder, TestMatchOneEscape) {
148   std::shared_ptr<LikeHolder> like_holder;
149 
150   auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
151   EXPECT_EQ(status.ok(), true) << status.message();
152 
153   auto& like = *like_holder;
154 
155   EXPECT_TRUE(like("ab_"));
156 
157   EXPECT_FALSE(like("abc"));
158   EXPECT_FALSE(like("abd"));
159   EXPECT_FALSE(like("a"));
160   EXPECT_FALSE(like("abcd"));
161   EXPECT_FALSE(like("dabc"));
162 }
163 
TEST_F(TestLikeHolder,TestMatchManyEscape)164 TEST_F(TestLikeHolder, TestMatchManyEscape) {
165   std::shared_ptr<LikeHolder> like_holder;
166 
167   auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
168   EXPECT_EQ(status.ok(), true) << status.message();
169 
170   auto& like = *like_holder;
171 
172   EXPECT_TRUE(like("ab%"));
173 
174   EXPECT_FALSE(like("abc"));
175   EXPECT_FALSE(like("abd"));
176   EXPECT_FALSE(like("a"));
177   EXPECT_FALSE(like("abcd"));
178   EXPECT_FALSE(like("dabc"));
179 }
180 
TEST_F(TestLikeHolder,TestMatchEscape)181 TEST_F(TestLikeHolder, TestMatchEscape) {
182   std::shared_ptr<LikeHolder> like_holder;
183 
184   auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
185   EXPECT_EQ(status.ok(), true) << status.message();
186 
187   auto& like = *like_holder;
188 
189   EXPECT_TRUE(like("ab\\"));
190 
191   EXPECT_FALSE(like("abc"));
192 }
193 
TEST_F(TestLikeHolder,TestEmptyEscapeChar)194 TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
195   std::shared_ptr<LikeHolder> like_holder;
196 
197   auto status = LikeHolder::Make("ab\\_", "", &like_holder);
198   EXPECT_EQ(status.ok(), true) << status.message();
199 
200   auto& like = *like_holder;
201 
202   EXPECT_TRUE(like("ab\\c"));
203   EXPECT_TRUE(like("ab\\_"));
204 
205   EXPECT_FALSE(like("ab\\_d"));
206   EXPECT_FALSE(like("ab__"));
207 }
208 
TEST_F(TestLikeHolder,TestMultipleEscapeChar)209 TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
210   std::shared_ptr<LikeHolder> like_holder;
211 
212   auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
213   EXPECT_EQ(status.ok(), false) << status.message();
214 }
215 class TestILikeHolder : public ::testing::Test {
216  public:
217   RE2::Options regex_op;
BuildILike(std::string pattern)218   FunctionNode BuildILike(std::string pattern) {
219     auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
220     auto pattern_node =
221         std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
222     return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
223   }
224 };
225 
TEST_F(TestILikeHolder,TestMatchAny)226 TEST_F(TestILikeHolder, TestMatchAny) {
227   std::shared_ptr<LikeHolder> like_holder;
228 
229   regex_op.set_case_sensitive(false);
230   auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
231   EXPECT_EQ(status.ok(), true) << status.message();
232 
233   auto& like = *like_holder;
234   EXPECT_TRUE(like("ab"));
235   EXPECT_TRUE(like("aBc"));
236   EXPECT_TRUE(like("ABCD"));
237 
238   EXPECT_FALSE(like("a"));
239   EXPECT_FALSE(like("cab"));
240 }
241 
TEST_F(TestILikeHolder,TestMatchOne)242 TEST_F(TestILikeHolder, TestMatchOne) {
243   std::shared_ptr<LikeHolder> like_holder;
244 
245   regex_op.set_case_sensitive(false);
246   auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
247   EXPECT_EQ(status.ok(), true) << status.message();
248 
249   auto& like = *like_holder;
250   EXPECT_TRUE(like("abc"));
251   EXPECT_TRUE(like("aBd"));
252 
253   EXPECT_FALSE(like("A"));
254   EXPECT_FALSE(like("Abcd"));
255   EXPECT_FALSE(like("DaBc"));
256 }
257 
TEST_F(TestILikeHolder,TestPcreSpecial)258 TEST_F(TestILikeHolder, TestPcreSpecial) {
259   std::shared_ptr<LikeHolder> like_holder;
260 
261   regex_op.set_case_sensitive(false);
262   auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
263   EXPECT_EQ(status.ok(), true) << status.message();
264 
265   auto& like = *like_holder;
266   EXPECT_TRUE(like(".*Abc"));  // . and * aren't special in sql regex
267   EXPECT_FALSE(like("xxAbc"));
268 }
269 
TEST_F(TestILikeHolder,TestDot)270 TEST_F(TestILikeHolder, TestDot) {
271   std::shared_ptr<LikeHolder> like_holder;
272 
273   regex_op.set_case_sensitive(false);
274   auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
275   EXPECT_EQ(status.ok(), true) << status.message();
276 
277   auto& like = *like_holder;
278   EXPECT_FALSE(like("abcd"));
279 }
280 
281 }  // namespace gandiva
282