1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #include <list>
21 #include <string>
22 #include <unordered_map>
23 #include <vector>
24 
25 #include "arrow/util/logging.h"
26 #include "gandiva/arrow.h"
27 #include "gandiva/eval_batch.h"
28 #include "gandiva/gandiva_aliases.h"
29 #include "gandiva/visibility.h"
30 
31 namespace gandiva {
32 
33 /// \brief annotate the arrow fields in an expression, and use that
34 /// to convert the incoming arrow-format row batch to an EvalBatch.
35 class GANDIVA_EXPORT Annotator {
36  public:
Annotator()37   Annotator() : buffer_count_(0), local_bitmap_count_(0) {}
38 
39   /// Add an annotated field descriptor for a field in an input schema.
40   /// If the field is already annotated, returns that instead.
41   FieldDescriptorPtr CheckAndAddInputFieldDescriptor(FieldPtr field);
42 
43   /// Add an annotated field descriptor for an output field.
44   FieldDescriptorPtr AddOutputFieldDescriptor(FieldPtr field);
45 
46   /// Add a local bitmap (for saving validity bits of an intermediate node).
47   /// Returns the index of the bitmap in the list of local bitmaps.
AddLocalBitMap()48   int AddLocalBitMap() { return local_bitmap_count_++; }
49 
50   /// Prepare an eval batch for the incoming record batch.
51   EvalBatchPtr PrepareEvalBatch(const arrow::RecordBatch& record_batch,
52                                 const ArrayDataVector& out_vector);
53 
buffer_count()54   int buffer_count() { return buffer_count_; }
55 
56  private:
57   /// Annotate a field and return the descriptor.
58   FieldDescriptorPtr MakeDesc(FieldPtr field, bool is_output);
59 
60   /// Populate eval_batch by extracting the raw buffers from the arrow array, whose
61   /// contents are represent by the annotated descriptor 'desc'.
62   void PrepareBuffersForField(const FieldDescriptor& desc,
63                               const arrow::ArrayData& array_data, EvalBatch* eval_batch,
64                               bool is_output);
65 
66   /// The list of input/output buffers (includes bitmap buffers, value buffers and
67   /// offset buffers).
68   int buffer_count_;
69 
70   /// The number of local bitmaps. These are used to save the validity bits for
71   /// intermediate nodes in the expression tree.
72   int local_bitmap_count_;
73 
74   /// map between field name and annotated input field descriptor.
75   std::unordered_map<std::string, FieldDescriptorPtr> in_name_to_desc_;
76 
77   /// vector of annotated output field descriptors.
78   std::vector<FieldDescriptorPtr> out_descs_;
79 };
80 
81 }  // namespace gandiva
82