1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #include <cstdint>
21 #include <memory>
22 #include <vector>
23 
24 #include "arrow/builder.h"
25 #include "arrow/status.h"
26 #include "arrow/type.h"
27 #include "arrow/util/checked_cast.h"
28 #include "arrow/util/macros.h"
29 #include "arrow/util/visibility.h"
30 
31 namespace arrow {
32 
33 class MemoryPool;
34 class RecordBatch;
35 
36 /// \class RecordBatchBuilder
37 /// \brief Helper class for creating record batches iteratively given a known
38 /// schema
39 class ARROW_EXPORT RecordBatchBuilder {
40  public:
41   /// \brief Create an initialize a RecordBatchBuilder
42   /// \param[in] schema The schema for the record batch
43   /// \param[in] pool A MemoryPool to use for allocations
44   /// \param[in] builder the created builder instance
45   static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
46                      std::unique_ptr<RecordBatchBuilder>* builder);
47 
48   /// \brief Create an initialize a RecordBatchBuilder
49   /// \param[in] schema The schema for the record batch
50   /// \param[in] pool A MemoryPool to use for allocations
51   /// \param[in] initial_capacity The initial capacity for the builders
52   /// \param[in] builder the created builder instance
53   static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
54                      int64_t initial_capacity,
55                      std::unique_ptr<RecordBatchBuilder>* builder);
56 
57   /// \brief Get base pointer to field builder
58   /// \param i the field index
59   /// \return pointer to ArrayBuilder
GetField(int i)60   ArrayBuilder* GetField(int i) { return raw_field_builders_[i]; }
61 
62   /// \brief Return field builder casted to indicated specific builder type
63   /// \param i the field index
64   /// \return pointer to template type
65   template <typename T>
GetFieldAs(int i)66   T* GetFieldAs(int i) {
67     return internal::checked_cast<T*>(raw_field_builders_[i]);
68   }
69 
70   /// \brief Finish current batch and optionally reset
71   /// \param[in] reset_builders the resulting RecordBatch
72   /// \param[out] batch the resulting RecordBatch
73   /// \return Status
74   Status Flush(bool reset_builders, std::shared_ptr<RecordBatch>* batch);
75 
76   /// \brief Finish current batch and reset
77   /// \param[out] batch the resulting RecordBatch
78   /// \return Status
79   Status Flush(std::shared_ptr<RecordBatch>* batch);
80 
81   /// \brief Set the initial capacity for new builders
82   void SetInitialCapacity(int64_t capacity);
83 
84   /// \brief The initial capacity for builders
initial_capacity()85   int64_t initial_capacity() const { return initial_capacity_; }
86 
87   /// \brief The number of fields in the schema
num_fields()88   int num_fields() const { return schema_->num_fields(); }
89 
90   /// \brief The number of fields in the schema
schema()91   std::shared_ptr<Schema> schema() const { return schema_; }
92 
93  private:
94   ARROW_DISALLOW_COPY_AND_ASSIGN(RecordBatchBuilder);
95 
96   RecordBatchBuilder(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
97                      int64_t initial_capacity);
98 
99   Status CreateBuilders();
100   Status InitBuilders();
101 
102   std::shared_ptr<Schema> schema_;
103   int64_t initial_capacity_;
104   MemoryPool* pool_;
105 
106   std::vector<std::unique_ptr<ArrayBuilder>> field_builders_;
107   std::vector<ArrayBuilder*> raw_field_builders_;
108 };
109 
110 }  // namespace arrow
111