1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 #pragma once 19 20 #include <list> 21 #include <string> 22 #include <unordered_map> 23 #include <vector> 24 25 #include "arrow/util/logging.h" 26 #include "gandiva/arrow.h" 27 #include "gandiva/eval_batch.h" 28 #include "gandiva/gandiva_aliases.h" 29 #include "gandiva/visibility.h" 30 31 namespace gandiva { 32 33 /// \brief annotate the arrow fields in an expression, and use that 34 /// to convert the incoming arrow-format row batch to an EvalBatch. 35 class GANDIVA_EXPORT Annotator { 36 public: Annotator()37 Annotator() : buffer_count_(0), local_bitmap_count_(0) {} 38 39 /// Add an annotated field descriptor for a field in an input schema. 40 /// If the field is already annotated, returns that instead. 41 FieldDescriptorPtr CheckAndAddInputFieldDescriptor(FieldPtr field); 42 43 /// Add an annotated field descriptor for an output field. 44 FieldDescriptorPtr AddOutputFieldDescriptor(FieldPtr field); 45 46 /// Add a local bitmap (for saving validity bits of an intermediate node). 47 /// Returns the index of the bitmap in the list of local bitmaps. AddLocalBitMap()48 int AddLocalBitMap() { return local_bitmap_count_++; } 49 50 /// Prepare an eval batch for the incoming record batch. 51 EvalBatchPtr PrepareEvalBatch(const arrow::RecordBatch& record_batch, 52 const ArrayDataVector& out_vector); 53 buffer_count()54 int buffer_count() { return buffer_count_; } 55 56 private: 57 /// Annotate a field and return the descriptor. 58 FieldDescriptorPtr MakeDesc(FieldPtr field, bool is_output); 59 60 /// Populate eval_batch by extracting the raw buffers from the arrow array, whose 61 /// contents are represent by the annotated descriptor 'desc'. 62 void PrepareBuffersForField(const FieldDescriptor& desc, 63 const arrow::ArrayData& array_data, EvalBatch* eval_batch, 64 bool is_output); 65 66 /// The list of input/output buffers (includes bitmap buffers, value buffers and 67 /// offset buffers). 68 int buffer_count_; 69 70 /// The number of local bitmaps. These are used to save the validity bits for 71 /// intermediate nodes in the expression tree. 72 int local_bitmap_count_; 73 74 /// map between field name and annotated input field descriptor. 75 std::unordered_map<std::string, FieldDescriptorPtr> in_name_to_desc_; 76 77 /// vector of annotated output field descriptors. 78 std::vector<FieldDescriptorPtr> out_descs_; 79 }; 80 81 } // namespace gandiva 82