1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/builder.h"
19
20 #include <string>
21 #include <utility>
22 #include <vector>
23
24 #include "arrow/status.h"
25 #include "arrow/type.h"
26 #include "arrow/util/checked_cast.h"
27 #include "arrow/util/hashing.h"
28 #include "arrow/visitor_inline.h"
29
30 namespace arrow {
31
32 class MemoryPool;
33
34 // ----------------------------------------------------------------------
35 // Helper functions
36
37 struct DictionaryBuilderCase {
38 template <typename ValueType, typename Enable = typename ValueType::c_type>
Visitarrow::DictionaryBuilderCase39 Status Visit(const ValueType&) {
40 return CreateFor<ValueType>();
41 }
42
Visitarrow::DictionaryBuilderCase43 Status Visit(const BinaryType&) { return Create<BinaryDictionaryBuilder>(); }
Visitarrow::DictionaryBuilderCase44 Status Visit(const StringType&) { return Create<StringDictionaryBuilder>(); }
Visitarrow::DictionaryBuilderCase45 Status Visit(const FixedSizeBinaryType&) { return CreateFor<FixedSizeBinaryType>(); }
46
Visitarrow::DictionaryBuilderCase47 Status Visit(const DataType& value_type) { return NotImplemented(value_type); }
Visitarrow::DictionaryBuilderCase48 Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); }
NotImplementedarrow::DictionaryBuilderCase49 Status NotImplemented(const DataType& value_type) {
50 return Status::NotImplemented(
51 "MakeBuilder: cannot construct builder for dictionaries with value type ",
52 value_type);
53 }
54
55 template <typename ValueType>
CreateForarrow::DictionaryBuilderCase56 Status CreateFor() {
57 return Create<DictionaryBuilder<ValueType>>();
58 }
59
60 template <typename BuilderType>
Createarrow::DictionaryBuilderCase61 Status Create() {
62 if (dictionary != nullptr) {
63 out->reset(new BuilderType(dictionary, pool));
64 } else {
65 out->reset(new BuilderType(value_type, pool));
66 }
67 return Status::OK();
68 }
69
Makearrow::DictionaryBuilderCase70 Status Make() { return VisitTypeInline(*value_type, this); }
71
72 MemoryPool* pool;
73 const std::shared_ptr<DataType>& value_type;
74 const std::shared_ptr<Array>& dictionary;
75 std::unique_ptr<ArrayBuilder>* out;
76 };
77
78 #define BUILDER_CASE(TYPE_CLASS) \
79 case TYPE_CLASS##Type::type_id: \
80 out->reset(new TYPE_CLASS##Builder(type, pool)); \
81 return Status::OK();
82
MakeBuilder(MemoryPool * pool,const std::shared_ptr<DataType> & type,std::unique_ptr<ArrayBuilder> * out)83 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
84 std::unique_ptr<ArrayBuilder>* out) {
85 switch (type->id()) {
86 case Type::NA: {
87 out->reset(new NullBuilder(pool));
88 return Status::OK();
89 }
90 BUILDER_CASE(UInt8);
91 BUILDER_CASE(Int8);
92 BUILDER_CASE(UInt16);
93 BUILDER_CASE(Int16);
94 BUILDER_CASE(UInt32);
95 BUILDER_CASE(Int32);
96 BUILDER_CASE(UInt64);
97 BUILDER_CASE(Int64);
98 BUILDER_CASE(Date32);
99 BUILDER_CASE(Date64);
100 BUILDER_CASE(Duration);
101 BUILDER_CASE(Time32);
102 BUILDER_CASE(Time64);
103 BUILDER_CASE(Timestamp);
104 BUILDER_CASE(MonthInterval);
105 BUILDER_CASE(DayTimeInterval);
106 BUILDER_CASE(Boolean);
107 BUILDER_CASE(HalfFloat);
108 BUILDER_CASE(Float);
109 BUILDER_CASE(Double);
110 BUILDER_CASE(String);
111 BUILDER_CASE(Binary);
112 BUILDER_CASE(LargeString);
113 BUILDER_CASE(LargeBinary);
114 BUILDER_CASE(FixedSizeBinary);
115 BUILDER_CASE(Decimal128);
116
117 case Type::DICTIONARY: {
118 const auto& dict_type = static_cast<const DictionaryType&>(*type);
119 DictionaryBuilderCase visitor = {pool, dict_type.value_type(), nullptr, out};
120 return visitor.Make();
121 }
122
123 case Type::LIST: {
124 std::unique_ptr<ArrayBuilder> value_builder;
125 std::shared_ptr<DataType> value_type =
126 internal::checked_cast<const ListType&>(*type).value_type();
127 RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
128 out->reset(new ListBuilder(pool, std::move(value_builder), type));
129 return Status::OK();
130 }
131
132 case Type::LARGE_LIST: {
133 std::unique_ptr<ArrayBuilder> value_builder;
134 std::shared_ptr<DataType> value_type =
135 internal::checked_cast<const LargeListType&>(*type).value_type();
136 RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
137 out->reset(new LargeListBuilder(pool, std::move(value_builder), type));
138 return Status::OK();
139 }
140
141 case Type::MAP: {
142 const auto& map_type = internal::checked_cast<const MapType&>(*type);
143 std::unique_ptr<ArrayBuilder> key_builder, item_builder;
144 RETURN_NOT_OK(MakeBuilder(pool, map_type.key_type(), &key_builder));
145 RETURN_NOT_OK(MakeBuilder(pool, map_type.item_type(), &item_builder));
146 out->reset(
147 new MapBuilder(pool, std::move(key_builder), std::move(item_builder), type));
148 return Status::OK();
149 }
150
151 case Type::FIXED_SIZE_LIST: {
152 const auto& list_type = internal::checked_cast<const FixedSizeListType&>(*type);
153 std::unique_ptr<ArrayBuilder> value_builder;
154 auto value_type = list_type.value_type();
155 RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
156 out->reset(new FixedSizeListBuilder(pool, std::move(value_builder), type));
157 return Status::OK();
158 }
159
160 case Type::STRUCT: {
161 const std::vector<std::shared_ptr<Field>>& fields = type->fields();
162 std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
163
164 for (const auto& it : fields) {
165 std::unique_ptr<ArrayBuilder> builder;
166 RETURN_NOT_OK(MakeBuilder(pool, it->type(), &builder));
167 field_builders.emplace_back(std::move(builder));
168 }
169 out->reset(new StructBuilder(type, pool, std::move(field_builders)));
170 return Status::OK();
171 }
172
173 case Type::UNION: {
174 const auto& union_type = internal::checked_cast<const UnionType&>(*type);
175 const std::vector<std::shared_ptr<Field>>& fields = type->fields();
176 std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
177
178 for (const auto& it : fields) {
179 std::unique_ptr<ArrayBuilder> builder;
180 RETURN_NOT_OK(MakeBuilder(pool, it->type(), &builder));
181 field_builders.emplace_back(std::move(builder));
182 }
183 if (union_type.mode() == UnionMode::DENSE) {
184 out->reset(new DenseUnionBuilder(pool, std::move(field_builders), type));
185 } else {
186 out->reset(new SparseUnionBuilder(pool, std::move(field_builders), type));
187 }
188 return Status::OK();
189 }
190
191 default:
192 break;
193 }
194 return Status::NotImplemented("MakeBuilder: cannot construct builder for type ",
195 type->ToString());
196 }
197
MakeDictionaryBuilder(MemoryPool * pool,const std::shared_ptr<DataType> & type,const std::shared_ptr<Array> & dictionary,std::unique_ptr<ArrayBuilder> * out)198 Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
199 const std::shared_ptr<Array>& dictionary,
200 std::unique_ptr<ArrayBuilder>* out) {
201 const auto& dict_type = static_cast<const DictionaryType&>(*type);
202 DictionaryBuilderCase visitor = {pool, dict_type.value_type(), dictionary, out};
203 return visitor.Make();
204 }
205
206 } // namespace arrow
207