1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "gandiva/like_holder.h"
19
20 #include <regex>
21 #include "gandiva/node.h"
22 #include "gandiva/regex_util.h"
23
24 namespace gandiva {
25
26 RE2 LikeHolder::starts_with_regex_(R"((\w|\s)*\.\*)");
27 RE2 LikeHolder::ends_with_regex_(R"(\.\*(\w|\s)*)");
28 RE2 LikeHolder::is_substr_regex_(R"(\.\*(\w|\s)*\.\*)");
29
30 // Short-circuit pattern matches for the following common sub cases :
31 // - starts_with, ends_with and is_substr
TryOptimize(const FunctionNode & node)32 const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) {
33 std::shared_ptr<LikeHolder> holder;
34 auto status = Make(node, &holder);
35 if (status.ok()) {
36 std::string& pattern = holder->pattern_;
37 auto literal_type = node.children().at(1)->return_type();
38
39 if (RE2::FullMatch(pattern, starts_with_regex_)) {
40 auto prefix = pattern.substr(0, pattern.length() - 2); // trim .*
41 auto prefix_node =
42 std::make_shared<LiteralNode>(literal_type, LiteralHolder(prefix), false);
43 return FunctionNode("starts_with", {node.children().at(0), prefix_node},
44 node.return_type());
45 } else if (RE2::FullMatch(pattern, ends_with_regex_)) {
46 auto suffix = pattern.substr(2); // skip .*
47 auto suffix_node =
48 std::make_shared<LiteralNode>(literal_type, LiteralHolder(suffix), false);
49 return FunctionNode("ends_with", {node.children().at(0), suffix_node},
50 node.return_type());
51 } else if (RE2::FullMatch(pattern, is_substr_regex_)) {
52 auto substr =
53 pattern.substr(2, pattern.length() - 4); // trim starting and ending .*
54 auto substr_node =
55 std::make_shared<LiteralNode>(literal_type, LiteralHolder(substr), false);
56 return FunctionNode("is_substr", {node.children().at(0), substr_node},
57 node.return_type());
58 }
59 }
60
61 // Could not optimize, return original node.
62 return node;
63 }
64
IsArrowStringLiteral(arrow::Type::type type)65 static bool IsArrowStringLiteral(arrow::Type::type type) {
66 return type == arrow::Type::STRING || type == arrow::Type::BINARY;
67 }
68
Make(const FunctionNode & node,std::shared_ptr<LikeHolder> * holder)69 Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
70 ARROW_RETURN_IF(node.children().size() != 2,
71 Status::Invalid("'like' function requires two parameters"));
72
73 auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
74 ARROW_RETURN_IF(
75 literal == nullptr,
76 Status::Invalid("'like' function requires a literal as the second parameter"));
77
78 auto literal_type = literal->return_type()->id();
79 ARROW_RETURN_IF(
80 !IsArrowStringLiteral(literal_type),
81 Status::Invalid(
82 "'like' function requires a string literal as the second parameter"));
83
84 return Make(arrow::util::get<std::string>(literal->holder()), holder);
85 }
86
Make(const std::string & sql_pattern,std::shared_ptr<LikeHolder> * holder)87 Status LikeHolder::Make(const std::string& sql_pattern,
88 std::shared_ptr<LikeHolder>* holder) {
89 std::string pcre_pattern;
90 ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
91
92 auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
93 ARROW_RETURN_IF(!lholder->regex_.ok(),
94 Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
95
96 *holder = lholder;
97 return Status::OK();
98 }
99
100 } // namespace gandiva
101