1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 #include "arrow/util/macros.h"
26 
27 #include "gandiva/annotator.h"
28 #include "gandiva/compiled_expr.h"
29 #include "gandiva/configuration.h"
30 #include "gandiva/dex_visitor.h"
31 #include "gandiva/engine.h"
32 #include "gandiva/execution_context.h"
33 #include "gandiva/function_registry.h"
34 #include "gandiva/gandiva_aliases.h"
35 #include "gandiva/llvm_types.h"
36 #include "gandiva/lvalue.h"
37 #include "gandiva/selection_vector.h"
38 #include "gandiva/value_validity_pair.h"
39 #include "gandiva/visibility.h"
40 
41 namespace gandiva {
42 
43 class FunctionHolder;
44 
45 /// Builds an LLVM module and generates code for the specified set of expressions.
46 class GANDIVA_EXPORT LLVMGenerator {
47  public:
48   /// \brief Factory method to initialize the generator.
49   static Status Make(std::shared_ptr<Configuration> config,
50                      std::unique_ptr<LLVMGenerator>* llvm_generator);
51 
52   /// \brief Build the code for the expression trees for default mode. Each
53   /// element in the vector represents an expression tree
54   Status Build(const ExpressionVector& exprs, SelectionVector::Mode mode);
55 
56   /// \brief Build the code for the expression trees for default mode. Each
57   /// element in the vector represents an expression tree
Build(const ExpressionVector & exprs)58   Status Build(const ExpressionVector& exprs) {
59     return Build(exprs, SelectionVector::Mode::MODE_NONE);
60   }
61 
62   /// \brief Execute the built expression against the provided arguments for
63   /// default mode.
64   Status Execute(const arrow::RecordBatch& record_batch,
65                  const ArrayDataVector& output_vector);
66 
67   /// \brief Execute the built expression against the provided arguments for
68   /// all modes. Only works on the records specified in the selection_vector.
69   Status Execute(const arrow::RecordBatch& record_batch,
70                  const SelectionVector* selection_vector,
71                  const ArrayDataVector& output_vector);
72 
selection_vector_mode()73   SelectionVector::Mode selection_vector_mode() { return selection_vector_mode_; }
types()74   LLVMTypes* types() { return engine_->types(); }
module()75   llvm::Module* module() { return engine_->module(); }
DumpIR()76   std::string DumpIR() { return engine_->DumpIR(); }
77 
78  private:
79   LLVMGenerator();
80 
81   FRIEND_TEST(TestLLVMGenerator, VerifyPCFunctions);
82   FRIEND_TEST(TestLLVMGenerator, TestAdd);
83   FRIEND_TEST(TestLLVMGenerator, TestNullInternal);
84 
context()85   llvm::LLVMContext* context() { return engine_->context(); }
ir_builder()86   llvm::IRBuilder<>* ir_builder() { return engine_->ir_builder(); }
87 
88   /// Visitor to generate the code for a decomposed expression.
89   class Visitor : public DexVisitor {
90    public:
91     Visitor(LLVMGenerator* generator, llvm::Function* function,
92             llvm::BasicBlock* entry_block, llvm::Value* arg_addrs,
93             llvm::Value* arg_local_bitmaps, std::vector<llvm::Value*> slice_offsets,
94             llvm::Value* arg_context_ptr, llvm::Value* loop_var);
95 
96     void Visit(const VectorReadValidityDex& dex) override;
97     void Visit(const VectorReadFixedLenValueDex& dex) override;
98     void Visit(const VectorReadVarLenValueDex& dex) override;
99     void Visit(const LocalBitMapValidityDex& dex) override;
100     void Visit(const TrueDex& dex) override;
101     void Visit(const FalseDex& dex) override;
102     void Visit(const LiteralDex& dex) override;
103     void Visit(const NonNullableFuncDex& dex) override;
104     void Visit(const NullableNeverFuncDex& dex) override;
105     void Visit(const NullableInternalFuncDex& dex) override;
106     void Visit(const IfDex& dex) override;
107     void Visit(const BooleanAndDex& dex) override;
108     void Visit(const BooleanOrDex& dex) override;
109     void Visit(const InExprDexBase<int32_t>& dex) override;
110     void Visit(const InExprDexBase<int64_t>& dex) override;
111     void Visit(const InExprDexBase<std::string>& dex) override;
112     template <typename Type>
113     void VisitInExpression(const InExprDexBase<Type>& dex);
114 
result()115     LValuePtr result() { return result_; }
116 
has_arena_allocs()117     bool has_arena_allocs() { return has_arena_allocs_; }
118 
119    private:
120     enum BufferType { kBufferTypeValidity = 0, kBufferTypeData, kBufferTypeOffsets };
121 
ir_builder()122     llvm::IRBuilder<>* ir_builder() { return generator_->ir_builder(); }
module()123     llvm::Module* module() { return generator_->module(); }
124 
125     // Generate the code to build the combined validity (bitwise and) from the
126     // vector of validities.
127     llvm::Value* BuildCombinedValidity(const DexVector& validities);
128 
129     // Generate the code to build the validity and the value for the given pair.
130     LValuePtr BuildValueAndValidity(const ValueValidityPair& pair);
131 
132     // Generate code to build the params.
133     std::vector<llvm::Value*> BuildParams(FunctionHolder* holder,
134                                           const ValueValidityPairVector& args,
135                                           bool with_validity, bool with_context);
136 
137     // Generate code to onvoke a function call.
138     LValuePtr BuildFunctionCall(const NativeFunction* func, DataTypePtr arrow_return_type,
139                                 std::vector<llvm::Value*>* params);
140 
141     // Generate code for an if-else condition.
142     LValuePtr BuildIfElse(llvm::Value* condition, std::function<LValuePtr()> then_func,
143                           std::function<LValuePtr()> else_func,
144                           DataTypePtr arrow_return_type);
145 
146     // Switch to the entry_block and get reference of the validity/value/offsets buffer
147     llvm::Value* GetBufferReference(int idx, BufferType buffer_type, FieldPtr field);
148 
149     // Get the slice offset of the validity/value/offsets buffer
150     llvm::Value* GetSliceOffset(int idx);
151 
152     // Switch to the entry_block and get reference to the local bitmap.
153     llvm::Value* GetLocalBitMapReference(int idx);
154 
155     // Clear the bit in the local bitmap, if is_valid is 'false'
156     void ClearLocalBitMapIfNotValid(int local_bitmap_idx, llvm::Value* is_valid);
157 
158     LLVMGenerator* generator_;
159     LValuePtr result_;
160     llvm::Function* function_;
161     llvm::BasicBlock* entry_block_;
162     llvm::Value* arg_addrs_;
163     llvm::Value* arg_local_bitmaps_;
164     std::vector<llvm::Value*> slice_offsets_;
165     llvm::Value* arg_context_ptr_;
166     llvm::Value* loop_var_;
167     bool has_arena_allocs_;
168   };
169 
170   // Generate the code for one expression for default mode, with the output of
171   // the expression going to 'output'.
172   Status Add(const ExpressionPtr expr, const FieldDescriptorPtr output);
173 
174   /// Generate code to load the vector at specified index in the 'arg_addrs' array.
175   llvm::Value* LoadVectorAtIndex(llvm::Value* arg_addrs, int idx,
176                                  const std::string& name);
177 
178   /// Generate code to load the vector at specified index and cast it as bitmap.
179   llvm::Value* GetValidityReference(llvm::Value* arg_addrs, int idx, FieldPtr field);
180 
181   /// Generate code to load the vector at specified index and cast it as data array.
182   llvm::Value* GetDataReference(llvm::Value* arg_addrs, int idx, FieldPtr field);
183 
184   /// Generate code to load the vector at specified index and cast it as offsets array.
185   llvm::Value* GetOffsetsReference(llvm::Value* arg_addrs, int idx, FieldPtr field);
186 
187   /// Generate code to load the vector at specified index and cast it as buffer pointer.
188   llvm::Value* GetDataBufferPtrReference(llvm::Value* arg_addrs, int idx, FieldPtr field);
189 
190   /// Generate code for the value array of one expression.
191   Status CodeGenExprValue(DexPtr value_expr, int num_buffers, FieldDescriptorPtr output,
192                           int suffix_idx, llvm::Function** fn,
193                           SelectionVector::Mode selection_vector_mode);
194 
195   /// Generate code to load the local bitmap specified index and cast it as bitmap.
196   llvm::Value* GetLocalBitMapReference(llvm::Value* arg_bitmaps, int idx);
197 
198   /// Generate code to get the bit value at 'position' in the bitmap.
199   llvm::Value* GetPackedBitValue(llvm::Value* bitmap, llvm::Value* position);
200 
201   /// Generate code to get the bit value at 'position' in the validity bitmap.
202   llvm::Value* GetPackedValidityBitValue(llvm::Value* bitmap, llvm::Value* position);
203 
204   /// Generate code to set the bit value at 'position' in the bitmap to 'value'.
205   void SetPackedBitValue(llvm::Value* bitmap, llvm::Value* position, llvm::Value* value);
206 
207   /// Generate code to clear the bit value at 'position' in the bitmap if 'value'
208   /// is false.
209   void ClearPackedBitValueIfFalse(llvm::Value* bitmap, llvm::Value* position,
210                                   llvm::Value* value);
211 
212   // Generate code to build a DecimalLValue with specified value/precision/scale.
213   std::shared_ptr<DecimalLValue> BuildDecimalLValue(llvm::Value* value,
214                                                     DataTypePtr arrow_type);
215 
216   /// Generate code to make a function call (to a pre-compiled IR function) which takes
217   /// 'args' and has a return type 'ret_type'.
218   llvm::Value* AddFunctionCall(const std::string& full_name, llvm::Type* ret_type,
219                                const std::vector<llvm::Value*>& args);
220 
221   /// Compute the result bitmap for the expression.
222   ///
223   /// \param[in] compiled_expr the compiled expression (includes the bitmap indices to be
224   ///            used for computing the validity bitmap of the result).
225   /// \param[in] eval_batch (includes input/output buffer addresses)
226   /// \param[in] selection_vector the list of selected positions
227   void ComputeBitMapsForExpr(const CompiledExpr& compiled_expr,
228                              const EvalBatch& eval_batch,
229                              const SelectionVector* selection_vector);
230 
231   /// Replace the %T in the trace msg with the correct type corresponding to 'type'
232   /// eg. %d for int32, %ld for int64, ..
233   std::string ReplaceFormatInTrace(const std::string& msg, llvm::Value* value,
234                                    std::string* print_fn);
235 
236   /// Generate the code to print a trace msg with one optional argument (%T)
237   void AddTrace(const std::string& msg, llvm::Value* value = NULLPTR);
238 
239   std::unique_ptr<Engine> engine_;
240   std::vector<std::unique_ptr<CompiledExpr>> compiled_exprs_;
241   FunctionRegistry function_registry_;
242   Annotator annotator_;
243   SelectionVector::Mode selection_vector_mode_;
244 
245   // used for debug
246   bool enable_ir_traces_;
247   std::vector<std::string> trace_strings_;
248 };
249 
250 }  // namespace gandiva
251