1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "gandiva/like_holder.h"
19 
20 #include <regex>
21 #include "gandiva/node.h"
22 #include "gandiva/regex_util.h"
23 
24 namespace gandiva {
25 
26 RE2 LikeHolder::starts_with_regex_(R"((\w|\s)*\.\*)");
27 RE2 LikeHolder::ends_with_regex_(R"(\.\*(\w|\s)*)");
28 RE2 LikeHolder::is_substr_regex_(R"(\.\*(\w|\s)*\.\*)");
29 
30 // Short-circuit pattern matches for the following common sub cases :
31 // - starts_with, ends_with and is_substr
TryOptimize(const FunctionNode & node)32 const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) {
33   std::shared_ptr<LikeHolder> holder;
34   auto status = Make(node, &holder);
35   if (status.ok()) {
36     std::string& pattern = holder->pattern_;
37     auto literal_type = node.children().at(1)->return_type();
38 
39     if (RE2::FullMatch(pattern, starts_with_regex_)) {
40       auto prefix = pattern.substr(0, pattern.length() - 2);  // trim .*
41       auto prefix_node =
42           std::make_shared<LiteralNode>(literal_type, LiteralHolder(prefix), false);
43       return FunctionNode("starts_with", {node.children().at(0), prefix_node},
44                           node.return_type());
45     } else if (RE2::FullMatch(pattern, ends_with_regex_)) {
46       auto suffix = pattern.substr(2);  // skip .*
47       auto suffix_node =
48           std::make_shared<LiteralNode>(literal_type, LiteralHolder(suffix), false);
49       return FunctionNode("ends_with", {node.children().at(0), suffix_node},
50                           node.return_type());
51     } else if (RE2::FullMatch(pattern, is_substr_regex_)) {
52       auto substr =
53           pattern.substr(2, pattern.length() - 4);  // trim starting and ending .*
54       auto substr_node =
55           std::make_shared<LiteralNode>(literal_type, LiteralHolder(substr), false);
56       return FunctionNode("is_substr", {node.children().at(0), substr_node},
57                           node.return_type());
58     }
59   }
60 
61   // Could not optimize, return original node.
62   return node;
63 }
64 
IsArrowStringLiteral(arrow::Type::type type)65 static bool IsArrowStringLiteral(arrow::Type::type type) {
66   return type == arrow::Type::STRING || type == arrow::Type::BINARY;
67 }
68 
Make(const FunctionNode & node,std::shared_ptr<LikeHolder> * holder)69 Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
70   ARROW_RETURN_IF(node.children().size() != 2,
71                   Status::Invalid("'like' function requires two parameters"));
72 
73   auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
74   ARROW_RETURN_IF(
75       literal == nullptr,
76       Status::Invalid("'like' function requires a literal as the second parameter"));
77 
78   auto literal_type = literal->return_type()->id();
79   ARROW_RETURN_IF(
80       !IsArrowStringLiteral(literal_type),
81       Status::Invalid(
82           "'like' function requires a string literal as the second parameter"));
83 
84   return Make(arrow::util::get<std::string>(literal->holder()), holder);
85 }
86 
Make(const std::string & sql_pattern,std::shared_ptr<LikeHolder> * holder)87 Status LikeHolder::Make(const std::string& sql_pattern,
88                         std::shared_ptr<LikeHolder>* holder) {
89   std::string pcre_pattern;
90   ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
91 
92   auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
93   ARROW_RETURN_IF(!lholder->regex_.ok(),
94                   Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
95 
96   *holder = lholder;
97   return Status::OK();
98 }
99 
100 }  // namespace gandiva
101