1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #include <sstream>
21 #include <string>
22 #include <unordered_set>
23 #include <vector>
24 
25 #include "arrow/status.h"
26 
27 #include "gandiva/arrow.h"
28 #include "gandiva/func_descriptor.h"
29 #include "gandiva/gandiva_aliases.h"
30 #include "gandiva/literal_holder.h"
31 #include "gandiva/node_visitor.h"
32 #include "gandiva/visibility.h"
33 
34 namespace gandiva {
35 
36 /// \brief Represents a node in the expression tree. Validity and value are
37 /// in a joined state.
38 class GANDIVA_EXPORT Node {
39  public:
Node(DataTypePtr return_type)40   explicit Node(DataTypePtr return_type) : return_type_(return_type) {}
41 
42   virtual ~Node() = default;
43 
return_type()44   const DataTypePtr& return_type() const { return return_type_; }
45 
46   /// Derived classes should simply invoke the Visit api of the visitor.
47   virtual Status Accept(NodeVisitor& visitor) const = 0;
48 
49   virtual std::string ToString() const = 0;
50 
51  protected:
52   DataTypePtr return_type_;
53 };
54 
55 /// \brief Node in the expression tree, representing a literal.
56 class GANDIVA_EXPORT LiteralNode : public Node {
57  public:
LiteralNode(DataTypePtr type,const LiteralHolder & holder,bool is_null)58   LiteralNode(DataTypePtr type, const LiteralHolder& holder, bool is_null)
59       : Node(type), holder_(holder), is_null_(is_null) {}
60 
Accept(NodeVisitor & visitor)61   Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
62 
holder()63   const LiteralHolder& holder() const { return holder_; }
64 
is_null()65   bool is_null() const { return is_null_; }
66 
ToString()67   std::string ToString() const override {
68     std::stringstream ss;
69     ss << "(const " << return_type()->ToString() << ") ";
70     if (is_null()) {
71       ss << std::string("null");
72       return ss.str();
73     }
74 
75     ss << gandiva::ToString(holder_);
76     // The default formatter prints in decimal can cause a loss in precision. so,
77     // print in hex. Can't use hexfloat since gcc 4.9 doesn't support it.
78     if (return_type()->id() == arrow::Type::DOUBLE) {
79       double dvalue = arrow::util::get<double>(holder_);
80       uint64_t bits;
81       memcpy(&bits, &dvalue, sizeof(bits));
82       ss << " raw(" << std::hex << bits << ")";
83     } else if (return_type()->id() == arrow::Type::FLOAT) {
84       float fvalue = arrow::util::get<float>(holder_);
85       uint32_t bits;
86       memcpy(&bits, &fvalue, sizeof(bits));
87       ss << " raw(" << std::hex << bits << ")";
88     }
89     return ss.str();
90   }
91 
92  private:
93   LiteralHolder holder_;
94   bool is_null_;
95 };
96 
97 /// \brief Node in the expression tree, representing an arrow field.
98 class GANDIVA_EXPORT FieldNode : public Node {
99  public:
FieldNode(FieldPtr field)100   explicit FieldNode(FieldPtr field) : Node(field->type()), field_(field) {}
101 
Accept(NodeVisitor & visitor)102   Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
103 
field()104   const FieldPtr& field() const { return field_; }
105 
ToString()106   std::string ToString() const override {
107     return "(" + field()->type()->ToString() + ") " + field()->name();
108   }
109 
110  private:
111   FieldPtr field_;
112 };
113 
114 /// \brief Node in the expression tree, representing a function.
115 class GANDIVA_EXPORT FunctionNode : public Node {
116  public:
117   FunctionNode(const std::string& name, const NodeVector& children, DataTypePtr retType);
118 
Accept(NodeVisitor & visitor)119   Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
120 
descriptor()121   const FuncDescriptorPtr& descriptor() const { return descriptor_; }
children()122   const NodeVector& children() const { return children_; }
123 
ToString()124   std::string ToString() const override {
125     std::stringstream ss;
126     ss << descriptor()->return_type()->ToString() << " " << descriptor()->name() << "(";
127     bool skip_comma = true;
128     for (auto& child : children()) {
129       if (skip_comma) {
130         ss << child->ToString();
131         skip_comma = false;
132       } else {
133         ss << ", " << child->ToString();
134       }
135     }
136     ss << ")";
137     return ss.str();
138   }
139 
140  private:
141   FuncDescriptorPtr descriptor_;
142   NodeVector children_;
143 };
144 
FunctionNode(const std::string & name,const NodeVector & children,DataTypePtr return_type)145 inline FunctionNode::FunctionNode(const std::string& name, const NodeVector& children,
146                                   DataTypePtr return_type)
147     : Node(return_type), children_(children) {
148   DataTypeVector param_types;
149   for (auto& child : children) {
150     param_types.push_back(child->return_type());
151   }
152 
153   descriptor_ = FuncDescriptorPtr(new FuncDescriptor(name, param_types, return_type));
154 }
155 
156 /// \brief Node in the expression tree, representing an if-else expression.
157 class GANDIVA_EXPORT IfNode : public Node {
158  public:
IfNode(NodePtr condition,NodePtr then_node,NodePtr else_node,DataTypePtr result_type)159   IfNode(NodePtr condition, NodePtr then_node, NodePtr else_node, DataTypePtr result_type)
160       : Node(result_type),
161         condition_(condition),
162         then_node_(then_node),
163         else_node_(else_node) {}
164 
Accept(NodeVisitor & visitor)165   Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
166 
condition()167   const NodePtr& condition() const { return condition_; }
then_node()168   const NodePtr& then_node() const { return then_node_; }
else_node()169   const NodePtr& else_node() const { return else_node_; }
170 
ToString()171   std::string ToString() const override {
172     std::stringstream ss;
173     ss << "if (" << condition()->ToString() << ") { ";
174     ss << then_node()->ToString() << " } else { ";
175     ss << else_node()->ToString() << " }";
176     return ss.str();
177   }
178 
179  private:
180   NodePtr condition_;
181   NodePtr then_node_;
182   NodePtr else_node_;
183 };
184 
185 /// \brief Node in the expression tree, representing an and/or boolean expression.
186 class GANDIVA_EXPORT BooleanNode : public Node {
187  public:
188   enum ExprType : char { AND, OR };
189 
BooleanNode(ExprType expr_type,const NodeVector & children)190   BooleanNode(ExprType expr_type, const NodeVector& children)
191       : Node(arrow::boolean()), expr_type_(expr_type), children_(children) {}
192 
Accept(NodeVisitor & visitor)193   Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
194 
expr_type()195   ExprType expr_type() const { return expr_type_; }
196 
children()197   const NodeVector& children() const { return children_; }
198 
ToString()199   std::string ToString() const override {
200     std::stringstream ss;
201     bool first = true;
202     for (auto& child : children_) {
203       if (!first) {
204         if (expr_type() == BooleanNode::AND) {
205           ss << " && ";
206         } else {
207           ss << " || ";
208         }
209       }
210       ss << child->ToString();
211       first = false;
212     }
213     return ss.str();
214   }
215 
216  private:
217   ExprType expr_type_;
218   NodeVector children_;
219 };
220 
221 /// \brief Node in expression tree, representing an in expression.
222 template <typename Type>
223 class InExpressionNode : public Node {
224  public:
InExpressionNode(NodePtr eval_expr,const std::unordered_set<Type> & values)225   InExpressionNode(NodePtr eval_expr, const std::unordered_set<Type>& values)
226       : Node(arrow::boolean()), eval_expr_(eval_expr), values_(values) {}
227 
eval_expr()228   const NodePtr& eval_expr() const { return eval_expr_; }
229 
values()230   const std::unordered_set<Type>& values() const { return values_; }
231 
Accept(NodeVisitor & visitor)232   Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
233 
ToString()234   std::string ToString() const override {
235     std::stringstream ss;
236     ss << eval_expr_->ToString() << " IN (";
237     bool add_comma = false;
238     for (auto& value : values_) {
239       if (add_comma) {
240         ss << ", ";
241       }
242       // add type in the front to differentiate
243       ss << value;
244       add_comma = true;
245     }
246     ss << ")";
247     return ss.str();
248   }
249 
250  private:
251   NodePtr eval_expr_;
252   std::unordered_set<Type> values_;
253 };
254 
255 template <>
256 class InExpressionNode<gandiva::DecimalScalar128> : public Node {
257  public:
InExpressionNode(NodePtr eval_expr,std::unordered_set<gandiva::DecimalScalar128> & values,int32_t precision,int32_t scale)258   InExpressionNode(NodePtr eval_expr,
259                    std::unordered_set<gandiva::DecimalScalar128>& values,
260                    int32_t precision, int32_t scale)
261       : Node(arrow::boolean()),
262         eval_expr_(std::move(eval_expr)),
263         values_(std::move(values)),
264         precision_(precision),
265         scale_(scale) {}
266 
get_precision()267   int32_t get_precision() const { return precision_; }
268 
get_scale()269   int32_t get_scale() const { return scale_; }
270 
eval_expr()271   const NodePtr& eval_expr() const { return eval_expr_; }
272 
values()273   const std::unordered_set<gandiva::DecimalScalar128>& values() const { return values_; }
274 
Accept(NodeVisitor & visitor)275   Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
276 
ToString()277   std::string ToString() const override {
278     std::stringstream ss;
279     ss << eval_expr_->ToString() << " IN (";
280     bool add_comma = false;
281     for (auto& value : values_) {
282       if (add_comma) {
283         ss << ", ";
284       }
285       // add type in the front to differentiate
286       ss << value;
287       add_comma = true;
288     }
289     ss << ")";
290     return ss.str();
291   }
292 
293  private:
294   NodePtr eval_expr_;
295   std::unordered_set<gandiva::DecimalScalar128> values_;
296   int32_t precision_, scale_;
297 };
298 
299 }  // namespace gandiva
300