1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "arrow/builder.h"
19 
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "arrow/status.h"
25 #include "arrow/type.h"
26 #include "arrow/util/checked_cast.h"
27 #include "arrow/util/hashing.h"
28 #include "arrow/visitor_inline.h"
29 
30 namespace arrow {
31 
32 class MemoryPool;
33 
34 // ----------------------------------------------------------------------
35 // Helper functions
36 
37 struct DictionaryBuilderCase {
38   template <typename ValueType, typename Enable = typename ValueType::c_type>
Visitarrow::DictionaryBuilderCase39   Status Visit(const ValueType&) {
40     return CreateFor<ValueType>();
41   }
42 
Visitarrow::DictionaryBuilderCase43   Status Visit(const BinaryType&) { return Create<BinaryDictionaryBuilder>(); }
Visitarrow::DictionaryBuilderCase44   Status Visit(const StringType&) { return Create<StringDictionaryBuilder>(); }
Visitarrow::DictionaryBuilderCase45   Status Visit(const FixedSizeBinaryType&) { return CreateFor<FixedSizeBinaryType>(); }
46 
Visitarrow::DictionaryBuilderCase47   Status Visit(const DataType& value_type) { return NotImplemented(value_type); }
Visitarrow::DictionaryBuilderCase48   Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); }
NotImplementedarrow::DictionaryBuilderCase49   Status NotImplemented(const DataType& value_type) {
50     return Status::NotImplemented(
51         "MakeBuilder: cannot construct builder for dictionaries with value type ",
52         value_type);
53   }
54 
55   template <typename ValueType>
CreateForarrow::DictionaryBuilderCase56   Status CreateFor() {
57     return Create<DictionaryBuilder<ValueType>>();
58   }
59 
60   template <typename BuilderType>
Createarrow::DictionaryBuilderCase61   Status Create() {
62     if (dictionary != nullptr) {
63       out->reset(new BuilderType(dictionary, pool));
64     } else {
65       out->reset(new BuilderType(value_type, pool));
66     }
67     return Status::OK();
68   }
69 
Makearrow::DictionaryBuilderCase70   Status Make() { return VisitTypeInline(*value_type, this); }
71 
72   MemoryPool* pool;
73   const std::shared_ptr<DataType>& value_type;
74   const std::shared_ptr<Array>& dictionary;
75   std::unique_ptr<ArrayBuilder>* out;
76 };
77 
78 #define BUILDER_CASE(TYPE_CLASS)                     \
79   case TYPE_CLASS##Type::type_id:                    \
80     out->reset(new TYPE_CLASS##Builder(type, pool)); \
81     return Status::OK();
82 
MakeBuilder(MemoryPool * pool,const std::shared_ptr<DataType> & type,std::unique_ptr<ArrayBuilder> * out)83 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
84                    std::unique_ptr<ArrayBuilder>* out) {
85   switch (type->id()) {
86     case Type::NA: {
87       out->reset(new NullBuilder(pool));
88       return Status::OK();
89     }
90       BUILDER_CASE(UInt8);
91       BUILDER_CASE(Int8);
92       BUILDER_CASE(UInt16);
93       BUILDER_CASE(Int16);
94       BUILDER_CASE(UInt32);
95       BUILDER_CASE(Int32);
96       BUILDER_CASE(UInt64);
97       BUILDER_CASE(Int64);
98       BUILDER_CASE(Date32);
99       BUILDER_CASE(Date64);
100       BUILDER_CASE(Duration);
101       BUILDER_CASE(Time32);
102       BUILDER_CASE(Time64);
103       BUILDER_CASE(Timestamp);
104       BUILDER_CASE(MonthInterval);
105       BUILDER_CASE(DayTimeInterval);
106       BUILDER_CASE(Boolean);
107       BUILDER_CASE(HalfFloat);
108       BUILDER_CASE(Float);
109       BUILDER_CASE(Double);
110       BUILDER_CASE(String);
111       BUILDER_CASE(Binary);
112       BUILDER_CASE(LargeString);
113       BUILDER_CASE(LargeBinary);
114       BUILDER_CASE(FixedSizeBinary);
115       BUILDER_CASE(Decimal128);
116 
117     case Type::DICTIONARY: {
118       const auto& dict_type = static_cast<const DictionaryType&>(*type);
119       DictionaryBuilderCase visitor = {pool, dict_type.value_type(), nullptr, out};
120       return visitor.Make();
121     }
122 
123     case Type::LIST: {
124       std::unique_ptr<ArrayBuilder> value_builder;
125       std::shared_ptr<DataType> value_type =
126           internal::checked_cast<const ListType&>(*type).value_type();
127       RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
128       out->reset(new ListBuilder(pool, std::move(value_builder), type));
129       return Status::OK();
130     }
131 
132     case Type::LARGE_LIST: {
133       std::unique_ptr<ArrayBuilder> value_builder;
134       std::shared_ptr<DataType> value_type =
135           internal::checked_cast<const LargeListType&>(*type).value_type();
136       RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
137       out->reset(new LargeListBuilder(pool, std::move(value_builder), type));
138       return Status::OK();
139     }
140 
141     case Type::MAP: {
142       const auto& map_type = internal::checked_cast<const MapType&>(*type);
143       std::unique_ptr<ArrayBuilder> key_builder, item_builder;
144       RETURN_NOT_OK(MakeBuilder(pool, map_type.key_type(), &key_builder));
145       RETURN_NOT_OK(MakeBuilder(pool, map_type.item_type(), &item_builder));
146       out->reset(
147           new MapBuilder(pool, std::move(key_builder), std::move(item_builder), type));
148       return Status::OK();
149     }
150 
151     case Type::FIXED_SIZE_LIST: {
152       const auto& list_type = internal::checked_cast<const FixedSizeListType&>(*type);
153       std::unique_ptr<ArrayBuilder> value_builder;
154       auto value_type = list_type.value_type();
155       RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
156       out->reset(new FixedSizeListBuilder(pool, std::move(value_builder), type));
157       return Status::OK();
158     }
159 
160     case Type::STRUCT: {
161       const std::vector<std::shared_ptr<Field>>& fields = type->fields();
162       std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
163 
164       for (const auto& it : fields) {
165         std::unique_ptr<ArrayBuilder> builder;
166         RETURN_NOT_OK(MakeBuilder(pool, it->type(), &builder));
167         field_builders.emplace_back(std::move(builder));
168       }
169       out->reset(new StructBuilder(type, pool, std::move(field_builders)));
170       return Status::OK();
171     }
172 
173     case Type::UNION: {
174       const auto& union_type = internal::checked_cast<const UnionType&>(*type);
175       const std::vector<std::shared_ptr<Field>>& fields = type->fields();
176       std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
177 
178       for (const auto& it : fields) {
179         std::unique_ptr<ArrayBuilder> builder;
180         RETURN_NOT_OK(MakeBuilder(pool, it->type(), &builder));
181         field_builders.emplace_back(std::move(builder));
182       }
183       if (union_type.mode() == UnionMode::DENSE) {
184         out->reset(new DenseUnionBuilder(pool, std::move(field_builders), type));
185       } else {
186         out->reset(new SparseUnionBuilder(pool, std::move(field_builders), type));
187       }
188       return Status::OK();
189     }
190 
191     default:
192       break;
193   }
194   return Status::NotImplemented("MakeBuilder: cannot construct builder for type ",
195                                 type->ToString());
196 }
197 
MakeDictionaryBuilder(MemoryPool * pool,const std::shared_ptr<DataType> & type,const std::shared_ptr<Array> & dictionary,std::unique_ptr<ArrayBuilder> * out)198 Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
199                              const std::shared_ptr<Array>& dictionary,
200                              std::unique_ptr<ArrayBuilder>* out) {
201   const auto& dict_type = static_cast<const DictionaryType&>(*type);
202   DictionaryBuilderCase visitor = {pool, dict_type.value_type(), dictionary, out};
203   return visitor.Make();
204 }
205 
206 }  // namespace arrow
207