1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 // Object model for scalar (non-Array) values. Not intended for use with large
19 // amounts of data
20 //
21 // NOTE: This API is experimental as of the 0.13 version and subject to change
22 // without deprecation warnings
23
24 #pragma once
25
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30
31 #include "arrow/result.h"
32 #include "arrow/status.h"
33 #include "arrow/type.h"
34 #include "arrow/type_fwd.h"
35 #include "arrow/type_traits.h"
36 #include "arrow/util/compare.h"
37 #include "arrow/util/decimal.h"
38 #include "arrow/util/logging.h"
39 #include "arrow/util/string_view.h"
40 #include "arrow/util/visibility.h"
41
42 namespace arrow {
43
44 class Array;
45
46 /// \brief Base class for scalar values, representing a single value occupying
47 /// an array "slot"
48 struct ARROW_EXPORT Scalar : public util::EqualityComparable<Scalar> {
49 virtual ~Scalar() = default;
50
ScalarScalar51 explicit Scalar(std::shared_ptr<DataType> type) : type(std::move(type)) {}
52
53 /// \brief The type of the scalar value
54 std::shared_ptr<DataType> type;
55
56 /// \brief Whether the value is valid (not null) or not
57 bool is_valid = false;
58
59 using util::EqualityComparable<Scalar>::operator==;
60 using util::EqualityComparable<Scalar>::Equals;
61 bool Equals(const Scalar& other) const;
62
63 struct ARROW_EXPORT Hash {
operatorScalar::Hash64 size_t operator()(const Scalar& scalar) const { return hash(scalar); }
65
operatorScalar::Hash66 size_t operator()(const std::shared_ptr<Scalar>& scalar) const {
67 return hash(*scalar);
68 }
69
70 static size_t hash(const Scalar& scalar);
71 };
72
73 std::string ToString() const;
74
75 static Result<std::shared_ptr<Scalar>> Parse(const std::shared_ptr<DataType>& type,
76 util::string_view repr);
77
78 // TODO(bkietz) add compute::CastOptions
79 Result<std::shared_ptr<Scalar>> CastTo(std::shared_ptr<DataType> to) const;
80
81 protected:
ScalarScalar82 Scalar(std::shared_ptr<DataType> type, bool is_valid)
83 : type(std::move(type)), is_valid(is_valid) {}
84 };
85
86 /// \brief A scalar value for NullType. Never valid
87 struct ARROW_EXPORT NullScalar : public Scalar {
88 public:
89 using TypeClass = NullType;
90
NullScalarNullScalar91 NullScalar() : Scalar{null(), false} {}
92 };
93
94 namespace internal {
95
96 template <typename T, typename CType = typename T::c_type>
97 struct ARROW_EXPORT PrimitiveScalar : public Scalar {
98 using Scalar::Scalar;
99 using TypeClass = T;
100 using ValueType = CType;
101
102 // Non-null constructor.
PrimitiveScalarPrimitiveScalar103 PrimitiveScalar(ValueType value, std::shared_ptr<DataType> type)
104 : Scalar(std::move(type), true), value(value) {
105 ARROW_CHECK_EQ(this->type->id(), T::type_id);
106 }
107
PrimitiveScalarPrimitiveScalar108 explicit PrimitiveScalar(ValueType value)
109 : PrimitiveScalar(value, TypeTraits<T>::type_singleton()) {}
110
PrimitiveScalarPrimitiveScalar111 PrimitiveScalar() : Scalar(TypeTraits<T>::type_singleton()) {}
112
113 ValueType value{};
114 };
115
116 } // namespace internal
117
118 struct ARROW_EXPORT BooleanScalar : public internal::PrimitiveScalar<BooleanType, bool> {
119 using internal::PrimitiveScalar<BooleanType, bool>::PrimitiveScalar;
120 };
121
122 template <typename T>
123 struct NumericScalar : public internal::PrimitiveScalar<T> {
124 using internal::PrimitiveScalar<T>::PrimitiveScalar;
125 };
126
127 struct ARROW_EXPORT Int8Scalar : public NumericScalar<Int8Type> {
128 using NumericScalar<Int8Type>::NumericScalar;
129 };
130
131 struct ARROW_EXPORT Int16Scalar : public NumericScalar<Int16Type> {
132 using NumericScalar<Int16Type>::NumericScalar;
133 };
134
135 struct ARROW_EXPORT Int32Scalar : public NumericScalar<Int32Type> {
136 using NumericScalar<Int32Type>::NumericScalar;
137 };
138
139 struct ARROW_EXPORT Int64Scalar : public NumericScalar<Int64Type> {
140 using NumericScalar<Int64Type>::NumericScalar;
141 };
142
143 struct ARROW_EXPORT UInt8Scalar : public NumericScalar<UInt8Type> {
144 using NumericScalar<UInt8Type>::NumericScalar;
145 };
146
147 struct ARROW_EXPORT UInt16Scalar : public NumericScalar<UInt16Type> {
148 using NumericScalar<UInt16Type>::NumericScalar;
149 };
150
151 struct ARROW_EXPORT UInt32Scalar : public NumericScalar<UInt32Type> {
152 using NumericScalar<UInt32Type>::NumericScalar;
153 };
154
155 struct ARROW_EXPORT UInt64Scalar : public NumericScalar<UInt64Type> {
156 using NumericScalar<UInt64Type>::NumericScalar;
157 };
158
159 struct ARROW_EXPORT HalfFloatScalar : public NumericScalar<HalfFloatType> {
160 using NumericScalar<HalfFloatType>::NumericScalar;
161 };
162
163 struct ARROW_EXPORT FloatScalar : public NumericScalar<FloatType> {
164 using NumericScalar<FloatType>::NumericScalar;
165 };
166
167 struct ARROW_EXPORT DoubleScalar : public NumericScalar<DoubleType> {
168 using NumericScalar<DoubleType>::NumericScalar;
169 };
170
171 struct ARROW_EXPORT BaseBinaryScalar : public Scalar {
172 using Scalar::Scalar;
173 using ValueType = std::shared_ptr<Buffer>;
174
175 std::shared_ptr<Buffer> value;
176
177 protected:
BaseBinaryScalarBaseBinaryScalar178 BaseBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
179 : Scalar{std::move(type), true}, value(std::move(value)) {}
180 };
181
182 struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar {
183 using BaseBinaryScalar::BaseBinaryScalar;
184 using TypeClass = BinaryScalar;
185
BinaryScalarBinaryScalar186 BinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
187 : BaseBinaryScalar(std::move(value), std::move(type)) {}
188
BinaryScalarBinaryScalar189 explicit BinaryScalar(std::shared_ptr<Buffer> value)
190 : BinaryScalar(std::move(value), binary()) {}
191
BinaryScalarBinaryScalar192 BinaryScalar() : BinaryScalar(binary()) {}
193 };
194
195 struct ARROW_EXPORT StringScalar : public BinaryScalar {
196 using BinaryScalar::BinaryScalar;
197 using TypeClass = StringType;
198
StringScalarStringScalar199 explicit StringScalar(std::shared_ptr<Buffer> value)
200 : StringScalar(std::move(value), utf8()) {}
201
202 explicit StringScalar(std::string s);
203
StringScalarStringScalar204 StringScalar() : StringScalar(utf8()) {}
205 };
206
207 struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar {
208 using BaseBinaryScalar::BaseBinaryScalar;
209 using TypeClass = LargeBinaryScalar;
210
LargeBinaryScalarLargeBinaryScalar211 LargeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
212 : BaseBinaryScalar(std::move(value), std::move(type)) {}
213
LargeBinaryScalarLargeBinaryScalar214 explicit LargeBinaryScalar(std::shared_ptr<Buffer> value)
215 : LargeBinaryScalar(std::move(value), large_binary()) {}
216
LargeBinaryScalarLargeBinaryScalar217 LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {}
218 };
219
220 struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar {
221 using LargeBinaryScalar::LargeBinaryScalar;
222 using TypeClass = LargeStringType;
223
LargeStringScalarLargeStringScalar224 explicit LargeStringScalar(std::shared_ptr<Buffer> value)
225 : LargeStringScalar(std::move(value), large_utf8()) {}
226
227 explicit LargeStringScalar(std::string s);
228
LargeStringScalarLargeStringScalar229 LargeStringScalar() : LargeStringScalar(large_utf8()) {}
230 };
231
232 struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
233 using TypeClass = FixedSizeBinaryType;
234
235 FixedSizeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type);
236
FixedSizeBinaryScalarFixedSizeBinaryScalar237 explicit FixedSizeBinaryScalar(std::shared_ptr<DataType> type) : BinaryScalar(type) {}
238 };
239
240 template <typename T>
241 struct ARROW_EXPORT TemporalScalar : public Scalar {
242 using Scalar::Scalar;
243 using TypeClass = T;
244 using ValueType = typename T::c_type;
245
TemporalScalarTemporalScalar246 TemporalScalar(ValueType value, std::shared_ptr<DataType> type)
247 : Scalar(std::move(type), true), value(value) {}
248
TemporalScalarTemporalScalar249 explicit TemporalScalar(std::shared_ptr<DataType> type)
250 : Scalar(std::move(type), false) {}
251
252 ValueType value;
253 };
254
255 template <typename T>
256 struct ARROW_EXPORT DateScalar : public TemporalScalar<T> {
257 using TemporalScalar<T>::TemporalScalar;
258 using ValueType = typename TemporalScalar<T>::ValueType;
259
DateScalarDateScalar260 explicit DateScalar(ValueType value)
261 : TemporalScalar<T>(std::move(value), TypeTraits<T>::type_singleton()) {}
DateScalarDateScalar262 DateScalar() : TemporalScalar<T>(TypeTraits<T>::type_singleton()) {}
263 };
264
265 struct ARROW_EXPORT Date32Scalar : public DateScalar<Date32Type> {
266 using DateScalar<Date32Type>::DateScalar;
267 };
268
269 struct ARROW_EXPORT Date64Scalar : public DateScalar<Date64Type> {
270 using DateScalar<Date64Type>::DateScalar;
271 };
272
273 template <typename T>
274 struct ARROW_EXPORT TimeScalar : public TemporalScalar<T> {
275 using TemporalScalar<T>::TemporalScalar;
276 };
277
278 struct ARROW_EXPORT Time32Scalar : public TimeScalar<Time32Type> {
279 using TimeScalar<Time32Type>::TimeScalar;
280 };
281
282 struct ARROW_EXPORT Time64Scalar : public TimeScalar<Time64Type> {
283 using TimeScalar<Time64Type>::TimeScalar;
284 };
285
286 struct ARROW_EXPORT TimestampScalar : public TemporalScalar<TimestampType> {
287 using TemporalScalar<TimestampType>::TemporalScalar;
288 };
289
290 template <typename T>
291 struct ARROW_EXPORT IntervalScalar : public TemporalScalar<T> {
292 using TemporalScalar<T>::TemporalScalar;
293 using ValueType = typename TemporalScalar<T>::ValueType;
294
IntervalScalarIntervalScalar295 explicit IntervalScalar(ValueType value)
296 : TemporalScalar<T>(value, TypeTraits<T>::type_singleton()) {}
IntervalScalarIntervalScalar297 IntervalScalar() : TemporalScalar<T>(TypeTraits<T>::type_singleton()) {}
298 };
299
300 struct ARROW_EXPORT MonthIntervalScalar : public IntervalScalar<MonthIntervalType> {
301 using IntervalScalar<MonthIntervalType>::IntervalScalar;
302 };
303
304 struct ARROW_EXPORT DayTimeIntervalScalar : public IntervalScalar<DayTimeIntervalType> {
305 using IntervalScalar<DayTimeIntervalType>::IntervalScalar;
306 };
307
308 struct ARROW_EXPORT DurationScalar : public TemporalScalar<DurationType> {
309 using TemporalScalar<DurationType>::TemporalScalar;
310 };
311
312 struct ARROW_EXPORT Decimal128Scalar : public Scalar {
313 using Scalar::Scalar;
314 using TypeClass = Decimal128Type;
315 using ValueType = Decimal128;
316
Decimal128ScalarDecimal128Scalar317 Decimal128Scalar(Decimal128 value, std::shared_ptr<DataType> type)
318 : Scalar(std::move(type), true), value(value) {}
319
320 Decimal128 value;
321 };
322
323 struct ARROW_EXPORT BaseListScalar : public Scalar {
324 using Scalar::Scalar;
325 using ValueType = std::shared_ptr<Array>;
326
327 BaseListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
328
329 std::shared_ptr<Array> value;
330 };
331
332 struct ARROW_EXPORT ListScalar : public BaseListScalar {
333 using TypeClass = ListType;
334 using BaseListScalar::BaseListScalar;
335
336 explicit ListScalar(std::shared_ptr<Array> value);
337 };
338
339 struct ARROW_EXPORT LargeListScalar : public BaseListScalar {
340 using TypeClass = LargeListType;
341 using BaseListScalar::BaseListScalar;
342
343 explicit LargeListScalar(std::shared_ptr<Array> value);
344 };
345
346 struct ARROW_EXPORT MapScalar : public BaseListScalar {
347 using TypeClass = MapType;
348 using BaseListScalar::BaseListScalar;
349
350 explicit MapScalar(std::shared_ptr<Array> value);
351 };
352
353 struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar {
354 using TypeClass = FixedSizeListType;
355 using BaseListScalar::BaseListScalar;
356
357 FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
358
359 explicit FixedSizeListScalar(std::shared_ptr<Array> value);
360 };
361
362 struct ARROW_EXPORT StructScalar : public Scalar {
363 using TypeClass = StructType;
364 using ValueType = std::vector<std::shared_ptr<Scalar>>;
365
366 std::vector<std::shared_ptr<Scalar>> value;
367
StructScalarStructScalar368 StructScalar(ValueType value, std::shared_ptr<DataType> type)
369 : Scalar(std::move(type), true), value(std::move(value)) {}
370
StructScalarStructScalar371 explicit StructScalar(std::shared_ptr<DataType> type) : Scalar(std::move(type)) {}
372 };
373
374 struct ARROW_EXPORT UnionScalar : public Scalar {
375 using Scalar::Scalar;
376 using TypeClass = UnionType;
377 };
378
379 struct ARROW_EXPORT DictionaryScalar : public Scalar {
380 using TypeClass = DictionaryType;
381 using ValueType = std::shared_ptr<Scalar>;
382 ValueType value;
383
384 explicit DictionaryScalar(std::shared_ptr<DataType> type);
385
DictionaryScalarDictionaryScalar386 DictionaryScalar(ValueType value, std::shared_ptr<DataType> type)
387 : Scalar(std::move(type), true), value(std::move(value)) {}
388 };
389
390 struct ARROW_EXPORT ExtensionScalar : public Scalar {
391 using Scalar::Scalar;
392 using TypeClass = ExtensionType;
393 };
394
395 ARROW_EXPORT
396 std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type);
397
398 namespace internal {
399
CheckBufferLength(...)400 inline Status CheckBufferLength(...) { return Status::OK(); }
401
402 ARROW_EXPORT Status CheckBufferLength(const FixedSizeBinaryType* t,
403 const std::shared_ptr<Buffer>* b);
404
405 } // namespace internal
406
407 template <typename ValueRef>
408 struct MakeScalarImpl {
409 template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType,
410 typename ValueType = typename ScalarType::ValueType,
411 typename Enable = typename std::enable_if<
412 std::is_constructible<ScalarType, ValueType,
413 std::shared_ptr<DataType>>::value &&
414 std::is_convertible<ValueRef, ValueType>::value>::type>
VisitMakeScalarImpl415 Status Visit(const T& t) {
416 ARROW_RETURN_NOT_OK(internal::CheckBufferLength(&t, &value_));
417 out_ = std::make_shared<ScalarType>(
418 static_cast<ValueType>(static_cast<ValueRef>(value_)), std::move(type_));
419 return Status::OK();
420 }
421
VisitMakeScalarImpl422 Status Visit(const DataType& t) {
423 return Status::NotImplemented("constructing scalars of type ", t,
424 " from unboxed values");
425 }
426
FinishMakeScalarImpl427 Result<std::shared_ptr<Scalar>> Finish() && {
428 ARROW_RETURN_NOT_OK(VisitTypeInline(*type_, this));
429 return std::move(out_);
430 }
431
432 std::shared_ptr<DataType> type_;
433 ValueRef value_;
434 std::shared_ptr<Scalar> out_;
435 };
436
437 template <typename Value>
MakeScalar(std::shared_ptr<DataType> type,Value && value)438 Result<std::shared_ptr<Scalar>> MakeScalar(std::shared_ptr<DataType> type,
439 Value&& value) {
440 return MakeScalarImpl<Value&&>{type, std::forward<Value>(value), NULLPTR}.Finish();
441 }
442
443 /// \brief type inferring scalar factory
444 template <typename Value, typename Traits = CTypeTraits<typename std::decay<Value>::type>,
445 typename ScalarType = typename Traits::ScalarType,
446 typename Enable = decltype(ScalarType(std::declval<Value>(),
447 Traits::type_singleton()))>
MakeScalar(Value value)448 std::shared_ptr<Scalar> MakeScalar(Value value) {
449 return std::make_shared<ScalarType>(std::move(value), Traits::type_singleton());
450 }
451
MakeScalar(std::string value)452 inline std::shared_ptr<Scalar> MakeScalar(std::string value) {
453 return std::make_shared<StringScalar>(std::move(value));
454 }
455
456 } // namespace arrow
457