1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #include <atomic>  // IWYU pragma: export
21 #include <cstdint>
22 #include <iosfwd>
23 #include <memory>
24 #include <string>
25 #include <type_traits>
26 #include <utility>
27 #include <vector>
28 
29 #include "arrow/compare.h"
30 #include "arrow/type.h"
31 #include "arrow/type_fwd.h"
32 #include "arrow/type_traits.h"
33 #include "arrow/util/bit_util.h"
34 #include "arrow/util/checked_cast.h"
35 #include "arrow/util/macros.h"
36 #include "arrow/util/string_view.h"  // IWYU pragma: export
37 #include "arrow/util/visibility.h"
38 
39 namespace arrow {
40 
41 class Array;
42 class ArrayVisitor;
43 
44 // When slicing, we do not know the null count of the sliced range without
45 // doing some computation. To avoid doing this eagerly, we set the null count
46 // to -1 (any negative number will do). When Array::null_count is called the
47 // first time, the null count will be computed. See ARROW-33
48 constexpr int64_t kUnknownNullCount = -1;
49 
50 class MemoryPool;
51 class Status;
52 
53 // ----------------------------------------------------------------------
54 // Generic array data container
55 
56 /// \class ArrayData
57 /// \brief Mutable container for generic Arrow array data
58 ///
59 /// This data structure is a self-contained representation of the memory and
60 /// metadata inside an Arrow array data structure (called vectors in Java). The
61 /// classes arrow::Array and its subclasses provide strongly-typed accessors
62 /// with support for the visitor pattern and other affordances.
63 ///
64 /// This class is designed for easy internal data manipulation, analytical data
65 /// processing, and data transport to and from IPC messages. For example, we
66 /// could cast from int64 to float64 like so:
67 ///
68 /// Int64Array arr = GetMyData();
69 /// auto new_data = arr.data()->Copy();
70 /// new_data->type = arrow::float64();
71 /// DoubleArray double_arr(new_data);
72 ///
73 /// This object is also useful in an analytics setting where memory may be
74 /// reused. For example, if we had a group of operations all returning doubles,
75 /// say:
76 ///
77 /// Log(Sqrt(Expr(arr)))
78 ///
79 /// Then the low-level implementations of each of these functions could have
80 /// the signatures
81 ///
82 /// void Log(const ArrayData& values, ArrayData* out);
83 ///
84 /// As another example a function may consume one or more memory buffers in an
85 /// input array and replace them with newly-allocated data, changing the output
86 /// data type as well.
87 struct ARROW_EXPORT ArrayData {
ArrayDataArrayData88   ArrayData() : length(0), null_count(0), offset(0) {}
89 
90   ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
91             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
typeArrayData92       : type(type), length(length), null_count(null_count), offset(offset) {}
93 
94   ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
95             std::vector<std::shared_ptr<Buffer>> buffers,
96             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
ArrayDataArrayData97       : ArrayData(type, length, null_count, offset) {
98     this->buffers = std::move(buffers);
99   }
100 
101   ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
102             std::vector<std::shared_ptr<Buffer>> buffers,
103             std::vector<std::shared_ptr<ArrayData>> child_data,
104             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
ArrayDataArrayData105       : ArrayData(type, length, null_count, offset) {
106     this->buffers = std::move(buffers);
107     this->child_data = std::move(child_data);
108   }
109 
110   static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
111                                          int64_t length,
112                                          std::vector<std::shared_ptr<Buffer>> buffers,
113                                          int64_t null_count = kUnknownNullCount,
114                                          int64_t offset = 0);
115 
116   static std::shared_ptr<ArrayData> Make(
117       const std::shared_ptr<DataType>& type, int64_t length,
118       std::vector<std::shared_ptr<Buffer>> buffers,
119       std::vector<std::shared_ptr<ArrayData>> child_data,
120       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
121 
122   static std::shared_ptr<ArrayData> Make(
123       const std::shared_ptr<DataType>& type, int64_t length,
124       std::vector<std::shared_ptr<Buffer>> buffers,
125       std::vector<std::shared_ptr<ArrayData>> child_data,
126       std::shared_ptr<Array> dictionary, int64_t null_count = kUnknownNullCount,
127       int64_t offset = 0);
128 
129   static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
130                                          int64_t length,
131                                          int64_t null_count = kUnknownNullCount,
132                                          int64_t offset = 0);
133 
134   // Move constructor
ArrayDataArrayData135   ArrayData(ArrayData&& other) noexcept
136       : type(std::move(other.type)),
137         length(other.length),
138         offset(other.offset),
139         buffers(std::move(other.buffers)),
140         child_data(std::move(other.child_data)),
141         dictionary(std::move(other.dictionary)) {
142     SetNullCount(other.null_count);
143   }
144 
145   // Copy constructor
ArrayDataArrayData146   ArrayData(const ArrayData& other) noexcept
147       : type(other.type),
148         length(other.length),
149         offset(other.offset),
150         buffers(other.buffers),
151         child_data(other.child_data),
152         dictionary(other.dictionary) {
153     SetNullCount(other.null_count);
154   }
155 
156   // Move assignment
157   ArrayData& operator=(ArrayData&& other) {
158     type = std::move(other.type);
159     length = other.length;
160     SetNullCount(other.null_count);
161     offset = other.offset;
162     buffers = std::move(other.buffers);
163     child_data = std::move(other.child_data);
164     dictionary = std::move(other.dictionary);
165     return *this;
166   }
167 
168   // Copy assignment
169   ArrayData& operator=(const ArrayData& other) {
170     type = other.type;
171     length = other.length;
172     SetNullCount(other.null_count);
173     offset = other.offset;
174     buffers = other.buffers;
175     child_data = other.child_data;
176     dictionary = other.dictionary;
177     return *this;
178   }
179 
CopyArrayData180   std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
181 
182   // Access a buffer's data as a typed C pointer
183   template <typename T>
GetValuesArrayData184   inline const T* GetValues(int i, int64_t absolute_offset) const {
185     if (buffers[i]) {
186       return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
187     } else {
188       return NULLPTR;
189     }
190   }
191 
192   template <typename T>
GetValuesArrayData193   inline const T* GetValues(int i) const {
194     return GetValues<T>(i, offset);
195   }
196 
197   // Access a buffer's data as a typed C pointer
198   template <typename T>
GetMutableValuesArrayData199   inline T* GetMutableValues(int i, int64_t absolute_offset) {
200     if (buffers[i]) {
201       return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
202     } else {
203       return NULLPTR;
204     }
205   }
206 
207   template <typename T>
GetMutableValuesArrayData208   inline T* GetMutableValues(int i) {
209     return GetMutableValues<T>(i, offset);
210   }
211 
212   // Construct a zero-copy slice of the data with the indicated offset and length
213   ArrayData Slice(int64_t offset, int64_t length) const;
214 
SetNullCountArrayData215   void SetNullCount(int64_t v) { null_count.store(v); }
216 
217   /// \brief Return null count, or compute and set it if it's not known
218   int64_t GetNullCount() const;
219 
220   std::shared_ptr<DataType> type;
221   int64_t length;
222   mutable std::atomic<int64_t> null_count;
223   // The logical start point into the physical buffers (in values, not bytes).
224   // Note that, for child data, this must be *added* to the child data's own offset.
225   int64_t offset;
226   std::vector<std::shared_ptr<Buffer>> buffers;
227   std::vector<std::shared_ptr<ArrayData>> child_data;
228 
229   // The dictionary for this Array, if any. Only used for dictionary
230   // type
231   std::shared_ptr<Array> dictionary;
232 };
233 
234 /// \brief Create a strongly-typed Array instance from generic ArrayData
235 /// \param[in] data the array contents
236 /// \return the resulting Array instance
237 ARROW_EXPORT
238 std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data);
239 
240 /// \brief Create a strongly-typed Array instance with all elements null
241 /// \param[in] type the array type
242 /// \param[in] length the array length
243 /// \param[in] pool the memory pool to allocate memory from
244 ARROW_EXPORT
245 Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
246                                                int64_t length,
247                                                MemoryPool* pool = default_memory_pool());
248 
249 /// \brief Create an Array instance whose slots are the given scalar
250 /// \param[in] scalar the value with which to fill the array
251 /// \param[in] length the array length
252 /// \param[in] pool the memory pool to allocate memory from
253 ARROW_EXPORT
254 Result<std::shared_ptr<Array>> MakeArrayFromScalar(
255     const Scalar& scalar, int64_t length, MemoryPool* pool = default_memory_pool());
256 
257 /// \brief Create a strongly-typed Array instance with all elements null
258 /// \param[in] type the array type
259 /// \param[in] length the array length
260 /// \param[out] out resulting Array instance
261 ARROW_DEPRECATED("Use Result-returning version")
262 ARROW_EXPORT
263 Status MakeArrayOfNull(const std::shared_ptr<DataType>& type, int64_t length,
264                        std::shared_ptr<Array>* out);
265 
266 /// \brief Create a strongly-typed Array instance with all elements null
267 /// \param[in] pool the pool from which memory for this array will be allocated
268 /// \param[in] type the array type
269 /// \param[in] length the array length
270 /// \param[out] out resulting Array instance
271 ARROW_DEPRECATED("Use Result-returning version")
272 ARROW_EXPORT
273 Status MakeArrayOfNull(MemoryPool* pool, const std::shared_ptr<DataType>& type,
274                        int64_t length, std::shared_ptr<Array>* out);
275 
276 /// \brief Create an Array instance whose slots are the given scalar
277 /// \param[in] scalar the value with which to fill the array
278 /// \param[in] length the array length
279 /// \param[out] out resulting Array instance
280 ARROW_DEPRECATED("Use Result-returning version")
281 ARROW_EXPORT
282 Status MakeArrayFromScalar(const Scalar& scalar, int64_t length,
283                            std::shared_ptr<Array>* out);
284 
285 /// \brief Create a strongly-typed Array instance with all elements null
286 /// \param[in] pool the pool from which memory for this array will be allocated
287 /// \param[in] scalar the value with which to fill the array
288 /// \param[in] length the array length
289 /// \param[out] out resulting Array instance
290 ARROW_DEPRECATED("Use Result-returning version")
291 ARROW_EXPORT
292 Status MakeArrayFromScalar(MemoryPool* pool, const Scalar& scalar, int64_t length,
293                            std::shared_ptr<Array>* out);
294 
295 // ----------------------------------------------------------------------
296 // User array accessor types
297 
298 /// \brief Array base type
299 /// Immutable data array with some logical type and some length.
300 ///
301 /// Any memory is owned by the respective Buffer instance (or its parents).
302 ///
303 /// The base class is only required to have a null bitmap buffer if the null
304 /// count is greater than 0
305 ///
306 /// If known, the null count can be provided in the base Array constructor. If
307 /// the null count is not known, pass -1 to indicate that the null count is to
308 /// be computed on the first call to null_count()
309 class ARROW_EXPORT Array {
310  public:
311   virtual ~Array() = default;
312 
313   /// \brief Return true if value at index is null. Does not boundscheck
IsNull(int64_t i)314   bool IsNull(int64_t i) const {
315     return null_bitmap_data_ != NULLPTR &&
316            !BitUtil::GetBit(null_bitmap_data_, i + data_->offset);
317   }
318 
319   /// \brief Return true if value at index is valid (not null). Does not
320   /// boundscheck
IsValid(int64_t i)321   bool IsValid(int64_t i) const {
322     return null_bitmap_data_ == NULLPTR ||
323            BitUtil::GetBit(null_bitmap_data_, i + data_->offset);
324   }
325 
326   /// \brief Return a Scalar containing the value of this array at i
327   Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;
328 
329   /// Size in the number of elements this array contains.
length()330   int64_t length() const { return data_->length; }
331 
332   /// A relative position into another array's data, to enable zero-copy
333   /// slicing. This value defaults to zero
offset()334   int64_t offset() const { return data_->offset; }
335 
336   /// The number of null entries in the array. If the null count was not known
337   /// at time of construction (and set to a negative value), then the null
338   /// count will be computed and cached on the first invocation of this
339   /// function
340   int64_t null_count() const;
341 
type()342   std::shared_ptr<DataType> type() const { return data_->type; }
type_id()343   Type::type type_id() const { return data_->type->id(); }
344 
345   /// Buffer for the null bitmap.
346   ///
347   /// Note that for `null_count == 0`, this can be null.
348   /// This buffer does not account for any slice offset
null_bitmap()349   std::shared_ptr<Buffer> null_bitmap() const { return data_->buffers[0]; }
350 
351   /// Raw pointer to the null bitmap.
352   ///
353   /// Note that for `null_count == 0`, this can be null.
354   /// This buffer does not account for any slice offset
null_bitmap_data()355   const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
356 
357   /// Equality comparison with another array
358   bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
359   bool Equals(const std::shared_ptr<Array>& arr,
360               const EqualOptions& = EqualOptions::Defaults()) const;
361 
362   /// \brief Return the formatted unified diff of arrow::Diff between this
363   /// Array and another Array
364   std::string Diff(const Array& other) const;
365 
366   /// Approximate equality comparison with another array
367   ///
368   /// epsilon is only used if this is FloatArray or DoubleArray
369   bool ApproxEquals(const std::shared_ptr<Array>& arr,
370                     const EqualOptions& = EqualOptions::Defaults()) const;
371   bool ApproxEquals(const Array& arr,
372                     const EqualOptions& = EqualOptions::Defaults()) const;
373 
374   /// Compare if the range of slots specified are equal for the given array and
375   /// this array.  end_idx exclusive.  This methods does not bounds check.
376   bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
377                    const Array& other) const;
378   bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
379                    const std::shared_ptr<Array>& other) const;
380   bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
381                    int64_t other_start_idx) const;
382   bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
383                    int64_t end_idx, int64_t other_start_idx) const;
384 
385   Status Accept(ArrayVisitor* visitor) const;
386 
387   /// Construct a zero-copy view of this array with the given type.
388   ///
389   /// This method checks if the types are layout-compatible.
390   /// Nested types are traversed in depth-first order. Data buffers must have
391   /// the same item sizes, even though the logical types may be different.
392   /// An error is returned if the types are not layout-compatible.
393   Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
394 
395   ARROW_DEPRECATED("Use Result-returning version")
396   Status View(const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out) const;
397 
398   /// Construct a zero-copy slice of the array with the indicated offset and
399   /// length
400   ///
401   /// \param[in] offset the position of the first element in the constructed
402   /// slice
403   /// \param[in] length the length of the slice. If there are not enough
404   /// elements in the array, the length will be adjusted accordingly
405   ///
406   /// \return a new object wrapped in std::shared_ptr<Array>
407   std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;
408 
409   /// Slice from offset until end of the array
410   std::shared_ptr<Array> Slice(int64_t offset) const;
411 
data()412   std::shared_ptr<ArrayData> data() const { return data_; }
413 
num_fields()414   int num_fields() const { return static_cast<int>(data_->child_data.size()); }
415 
416   /// \return PrettyPrint representation of array suitable for debugging
417   std::string ToString() const;
418 
419   /// \brief Perform cheap validation checks to determine obvious inconsistencies
420   /// within the array's internal data.
421   ///
422   /// This is O(k) where k is the number of descendents.
423   ///
424   /// \return Status
425   Status Validate() const;
426 
427   /// \brief Perform extensive validation checks to determine inconsistencies
428   /// within the array's internal data.
429   ///
430   /// This is potentially O(k*n) where k is the number of descendents and n
431   /// is the array length.
432   ///
433   /// \return Status
434   Status ValidateFull() const;
435 
436  protected:
Array()437   Array() : null_bitmap_data_(NULLPTR) {}
438 
439   std::shared_ptr<ArrayData> data_;
440   const uint8_t* null_bitmap_data_;
441 
442   /// Protected method for constructors
SetData(const std::shared_ptr<ArrayData> & data)443   inline void SetData(const std::shared_ptr<ArrayData>& data) {
444     if (data->buffers.size() > 0 && data->buffers[0]) {
445       null_bitmap_data_ = data->buffers[0]->data();
446     } else {
447       null_bitmap_data_ = NULLPTR;
448     }
449     data_ = data;
450   }
451 
452  private:
453   ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
454 };
455 
456 namespace internal {
457 
458 /// Given a number of ArrayVectors, treat each ArrayVector as the
459 /// chunks of a chunked array.  Then rechunk each ArrayVector such that
460 /// all ArrayVectors are chunked identically.  It is mandatory that
461 /// all ArrayVectors contain the same total number of elements.
462 ARROW_EXPORT
463 std::vector<ArrayVector> RechunkArraysConsistently(const std::vector<ArrayVector>&);
464 
465 }  // namespace internal
466 
467 static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
468   os << x.ToString();
469   return os;
470 }
471 
472 /// Base class for non-nested arrays
473 class ARROW_EXPORT FlatArray : public Array {
474  protected:
475   using Array::Array;
476 };
477 
478 /// Degenerate null type Array
479 class ARROW_EXPORT NullArray : public FlatArray {
480  public:
481   using TypeClass = NullType;
482 
NullArray(const std::shared_ptr<ArrayData> & data)483   explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
484   explicit NullArray(int64_t length);
485 
486  private:
SetData(const std::shared_ptr<ArrayData> & data)487   inline void SetData(const std::shared_ptr<ArrayData>& data) {
488     null_bitmap_data_ = NULLPTR;
489     data->null_count = data->length;
490     data_ = data;
491   }
492 };
493 
494 /// Base class for arrays of fixed-size logical types
495 class ARROW_EXPORT PrimitiveArray : public FlatArray {
496  public:
497   PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
498                  const std::shared_ptr<Buffer>& data,
499                  const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
500                  int64_t null_count = kUnknownNullCount, int64_t offset = 0);
501 
502   /// Does not account for any slice offset
values()503   std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
504 
505  protected:
PrimitiveArray()506   PrimitiveArray() : raw_values_(NULLPTR) {}
507 
SetData(const std::shared_ptr<ArrayData> & data)508   inline void SetData(const std::shared_ptr<ArrayData>& data) {
509     auto values = data->buffers[1];
510     this->Array::SetData(data);
511     raw_values_ = values == NULLPTR ? NULLPTR : values->data();
512   }
513 
PrimitiveArray(const std::shared_ptr<ArrayData> & data)514   explicit inline PrimitiveArray(const std::shared_ptr<ArrayData>& data) {
515     SetData(data);
516   }
517 
518   const uint8_t* raw_values_;
519 };
520 
521 /// Concrete Array class for numeric data.
522 template <typename TYPE>
523 class NumericArray : public PrimitiveArray {
524  public:
525   using TypeClass = TYPE;
526   using value_type = typename TypeClass::c_type;
527 
NumericArray(const std::shared_ptr<ArrayData> & data)528   explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
529 
530   // Only enable this constructor without a type argument for types without additional
531   // metadata
532   template <typename T1 = TYPE>
533   NumericArray(enable_if_parameter_free<T1, int64_t> length,
534                const std::shared_ptr<Buffer>& data,
535                const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
536                int64_t null_count = kUnknownNullCount, int64_t offset = 0)
PrimitiveArray(TypeTraits<T1>::type_singleton (),length,data,null_bitmap,null_count,offset)537       : PrimitiveArray(TypeTraits<T1>::type_singleton(), length, data, null_bitmap,
538                        null_count, offset) {}
539 
raw_values()540   const value_type* raw_values() const {
541     return reinterpret_cast<const value_type*>(raw_values_) + data_->offset;
542   }
543 
Value(int64_t i)544   value_type Value(int64_t i) const { return raw_values()[i]; }
545 
546   // For API compatibility with BinaryArray etc.
GetView(int64_t i)547   value_type GetView(int64_t i) const { return Value(i); }
548 
549  protected:
550   using PrimitiveArray::PrimitiveArray;
551 };
552 
553 /// Concrete Array class for boolean data
554 class ARROW_EXPORT BooleanArray : public PrimitiveArray {
555  public:
556   using TypeClass = BooleanType;
557 
558   explicit BooleanArray(const std::shared_ptr<ArrayData>& data);
559 
560   BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
561                const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
562                int64_t null_count = kUnknownNullCount, int64_t offset = 0);
563 
Value(int64_t i)564   bool Value(int64_t i) const {
565     return BitUtil::GetBit(reinterpret_cast<const uint8_t*>(raw_values_),
566                            i + data_->offset);
567   }
568 
GetView(int64_t i)569   bool GetView(int64_t i) const { return Value(i); }
570 
571  protected:
572   using PrimitiveArray::PrimitiveArray;
573 };
574 
575 // ----------------------------------------------------------------------
576 // ListArray
577 
578 /// Base class for variable-sized list arrays, regardless of offset size.
579 template <typename TYPE>
580 class BaseListArray : public Array {
581  public:
582   using TypeClass = TYPE;
583   using offset_type = typename TypeClass::offset_type;
584 
list_type()585   const TypeClass* list_type() const { return list_type_; }
586 
587   /// \brief Return array object containing the list's values
values()588   std::shared_ptr<Array> values() const { return values_; }
589 
590   /// Note that this buffer does not account for any slice offset
value_offsets()591   std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
592 
value_type()593   std::shared_ptr<DataType> value_type() const { return list_type_->value_type(); }
594 
595   /// Return pointer to raw value offsets accounting for any slice offset
raw_value_offsets()596   const offset_type* raw_value_offsets() const {
597     return raw_value_offsets_ + data_->offset;
598   }
599 
600   // The following functions will not perform boundschecking
value_offset(int64_t i)601   offset_type value_offset(int64_t i) const {
602     return raw_value_offsets_[i + data_->offset];
603   }
value_length(int64_t i)604   offset_type value_length(int64_t i) const {
605     i += data_->offset;
606     return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
607   }
value_slice(int64_t i)608   std::shared_ptr<Array> value_slice(int64_t i) const {
609     return values_->Slice(value_offset(i), value_length(i));
610   }
611 
612  protected:
613   const TypeClass* list_type_ = NULLPTR;
614   std::shared_ptr<Array> values_;
615   const offset_type* raw_value_offsets_ = NULLPTR;
616 };
617 
618 /// Concrete Array class for list data
619 class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
620  public:
621   explicit ListArray(std::shared_ptr<ArrayData> data);
622 
623   ListArray(std::shared_ptr<DataType> type, int64_t length,
624             std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
625             std::shared_ptr<Buffer> null_bitmap = NULLPTR,
626             int64_t null_count = kUnknownNullCount, int64_t offset = 0);
627 
628   /// \brief Construct ListArray from array of offsets and child value array
629   ///
630   /// This function does the bare minimum of validation of the offsets and
631   /// input types, and will allocate a new offsets array if necessary (i.e. if
632   /// the offsets contain any nulls). If the offsets do not have nulls, they
633   /// are assumed to be well-formed
634   ///
635   /// \param[in] offsets Array containing n + 1 offsets encoding length and
636   /// size. Must be of int32 type
637   /// \param[in] values Array containing list values
638   /// \param[in] pool MemoryPool in case new offsets array needs to be
639   /// allocated because of null values
640   static Result<std::shared_ptr<Array>> FromArrays(
641       const Array& offsets, const Array& values,
642       MemoryPool* pool = default_memory_pool());
643 
644   ARROW_DEPRECATED("Use Result-returning version")
645   static Status FromArrays(const Array& offsets, const Array& values, MemoryPool* pool,
646                            std::shared_ptr<Array>* out);
647 
648   /// \brief Return an Array that is a concatenation of the lists in this array.
649   ///
650   /// Note that it's different from `values()` in that it takes into
651   /// consideration of this array's offsets as well as null elements backed
652   /// by non-empty lists (they are skipped, thus copying may be needed).
653   Result<std::shared_ptr<Array>> Flatten(
654       MemoryPool* memory_pool = default_memory_pool()) const;
655 
656  protected:
657   // This constructor defers SetData to a derived array class
658   ListArray() = default;
659   void SetData(const std::shared_ptr<ArrayData>& data,
660                Type::type expected_type_id = Type::LIST);
661 };
662 
663 /// Concrete Array class for large list data (with 64-bit offsets)
664 class ARROW_EXPORT LargeListArray : public BaseListArray<LargeListType> {
665  public:
666   explicit LargeListArray(const std::shared_ptr<ArrayData>& data);
667 
668   LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
669                  const std::shared_ptr<Buffer>& value_offsets,
670                  const std::shared_ptr<Array>& values,
671                  const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
672                  int64_t null_count = kUnknownNullCount, int64_t offset = 0);
673 
674   /// \brief Construct LargeListArray from array of offsets and child value array
675   ///
676   /// This function does the bare minimum of validation of the offsets and
677   /// input types, and will allocate a new offsets array if necessary (i.e. if
678   /// the offsets contain any nulls). If the offsets do not have nulls, they
679   /// are assumed to be well-formed
680   ///
681   /// \param[in] offsets Array containing n + 1 offsets encoding length and
682   /// size. Must be of int64 type
683   /// \param[in] values Array containing list values
684   /// \param[in] pool MemoryPool in case new offsets array needs to be
685   /// allocated because of null values
686   static Result<std::shared_ptr<Array>> FromArrays(
687       const Array& offsets, const Array& values,
688       MemoryPool* pool = default_memory_pool());
689 
690   ARROW_DEPRECATED("Use Result-returning version")
691   static Status FromArrays(const Array& offsets, const Array& values, MemoryPool* pool,
692                            std::shared_ptr<Array>* out);
693 
694   /// \brief Return an Array that is a concatenation of the lists in this array.
695   ///
696   /// Note that it's different from `values()` in that it takes into
697   /// consideration of this array's offsets as well as null elements backed
698   /// by non-empty lists (they are skipped, thus copying may be needed).
699   Result<std::shared_ptr<Array>> Flatten(
700       MemoryPool* memory_pool = default_memory_pool()) const;
701 
702  protected:
703   void SetData(const std::shared_ptr<ArrayData>& data);
704 };
705 
706 // ----------------------------------------------------------------------
707 // MapArray
708 
709 /// Concrete Array class for map data
710 ///
711 /// NB: "value" in this context refers to a pair of a key and the corresponding item
712 class ARROW_EXPORT MapArray : public ListArray {
713  public:
714   using TypeClass = MapType;
715 
716   explicit MapArray(const std::shared_ptr<ArrayData>& data);
717 
718   MapArray(const std::shared_ptr<DataType>& type, int64_t length,
719            const std::shared_ptr<Buffer>& value_offsets,
720            const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
721            const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
722            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
723 
724   MapArray(const std::shared_ptr<DataType>& type, int64_t length,
725            const std::shared_ptr<Buffer>& value_offsets,
726            const std::shared_ptr<Array>& values,
727            const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
728            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
729 
730   /// \brief Construct MapArray from array of offsets and child key, item arrays
731   ///
732   /// This function does the bare minimum of validation of the offsets and
733   /// input types, and will allocate a new offsets array if necessary (i.e. if
734   /// the offsets contain any nulls). If the offsets do not have nulls, they
735   /// are assumed to be well-formed
736   ///
737   /// \param[in] offsets Array containing n + 1 offsets encoding length and
738   /// size. Must be of int32 type
739   /// \param[in] keys Array containing key values
740   /// \param[in] items Array containing item values
741   /// \param[in] pool MemoryPool in case new offsets array needs to be
742   /// allocated because of null values
743   static Result<std::shared_ptr<Array>> FromArrays(
744       const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
745       const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool());
746 
747   ARROW_DEPRECATED("Use Result-returning version")
748   static Status FromArrays(const std::shared_ptr<Array>& offsets,
749                            const std::shared_ptr<Array>& keys,
750                            const std::shared_ptr<Array>& items, MemoryPool* pool,
751                            std::shared_ptr<Array>* out);
752 
map_type()753   const MapType* map_type() const { return map_type_; }
754 
755   /// \brief Return array object containing all map keys
keys()756   std::shared_ptr<Array> keys() const { return keys_; }
757 
758   /// \brief Return array object containing all mapped items
items()759   std::shared_ptr<Array> items() const { return items_; }
760 
761   /// Validate child data before constructing the actual MapArray.
762   static Status ValidateChildData(
763       const std::vector<std::shared_ptr<ArrayData>>& child_data);
764 
765  protected:
766   void SetData(const std::shared_ptr<ArrayData>& data);
767 
768  private:
769   const MapType* map_type_;
770   std::shared_ptr<Array> keys_, items_;
771 };
772 
773 // ----------------------------------------------------------------------
774 // FixedSizeListArray
775 
776 /// Concrete Array class for fixed size list data
777 class ARROW_EXPORT FixedSizeListArray : public Array {
778  public:
779   using TypeClass = FixedSizeListType;
780   using offset_type = TypeClass::offset_type;
781 
782   explicit FixedSizeListArray(const std::shared_ptr<ArrayData>& data);
783 
784   FixedSizeListArray(const std::shared_ptr<DataType>& type, int64_t length,
785                      const std::shared_ptr<Array>& values,
786                      const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
787                      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
788 
789   const FixedSizeListType* list_type() const;
790 
791   /// \brief Return array object containing the list's values
792   std::shared_ptr<Array> values() const;
793 
794   std::shared_ptr<DataType> value_type() const;
795 
796   // The following functions will not perform boundschecking
value_offset(int64_t i)797   int32_t value_offset(int64_t i) const {
798     i += data_->offset;
799     return static_cast<int32_t>(list_size_ * i);
800   }
801   int32_t value_length(int64_t i = 0) const { return list_size_; }
value_slice(int64_t i)802   std::shared_ptr<Array> value_slice(int64_t i) const {
803     return values_->Slice(value_offset(i), value_length(i));
804   }
805 
806   /// \brief Construct FixedSizeListArray from child value array and value_length
807   ///
808   /// \param[in] values Array containing list values
809   /// \param[in] list_size The fixed length of each list
810   /// \return Will have length equal to values.length() / list_size
811   static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
812                                                    int32_t list_size);
813 
814  protected:
815   void SetData(const std::shared_ptr<ArrayData>& data);
816   int32_t list_size_;
817 
818  private:
819   std::shared_ptr<Array> values_;
820 };
821 
822 // ----------------------------------------------------------------------
823 // Binary and String
824 
825 /// Base class for variable-sized binary arrays, regardless of offset size
826 /// and logical interpretation.
827 template <typename TYPE>
828 class BaseBinaryArray : public FlatArray {
829  public:
830   using TypeClass = TYPE;
831   using offset_type = typename TypeClass::offset_type;
832 
833   /// Return the pointer to the given elements bytes
834   // XXX should GetValue(int64_t i) return a string_view?
GetValue(int64_t i,offset_type * out_length)835   const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
836     // Account for base offset
837     i += data_->offset;
838     const offset_type pos = raw_value_offsets_[i];
839     *out_length = raw_value_offsets_[i + 1] - pos;
840     return raw_data_ + pos;
841   }
842 
843   /// \brief Get binary value as a string_view
844   ///
845   /// \param i the value index
846   /// \return the view over the selected value
GetView(int64_t i)847   util::string_view GetView(int64_t i) const {
848     // Account for base offset
849     i += data_->offset;
850     const offset_type pos = raw_value_offsets_[i];
851     return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
852                              raw_value_offsets_[i + 1] - pos);
853   }
854 
855   /// \brief Get binary value as a std::string
856   ///
857   /// \param i the value index
858   /// \return the value copied into a std::string
GetString(int64_t i)859   std::string GetString(int64_t i) const { return std::string(GetView(i)); }
860 
861   /// Note that this buffer does not account for any slice offset
value_offsets()862   std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
863 
864   /// Note that this buffer does not account for any slice offset
value_data()865   std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
866 
raw_value_offsets()867   const offset_type* raw_value_offsets() const {
868     return raw_value_offsets_ + data_->offset;
869   }
870 
871   // Neither of these functions will perform boundschecking
value_offset(int64_t i)872   offset_type value_offset(int64_t i) const {
873     return raw_value_offsets_[i + data_->offset];
874   }
value_length(int64_t i)875   offset_type value_length(int64_t i) const {
876     i += data_->offset;
877     return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
878   }
879 
880  protected:
881   // For subclasses
BaseBinaryArray()882   BaseBinaryArray() : raw_value_offsets_(NULLPTR), raw_data_(NULLPTR) {}
883 
884   // Protected method for constructors
SetData(const std::shared_ptr<ArrayData> & data)885   void SetData(const std::shared_ptr<ArrayData>& data) {
886     auto value_offsets = data->buffers[1];
887     auto value_data = data->buffers[2];
888     this->Array::SetData(data);
889     raw_data_ = value_data == NULLPTR ? NULLPTR : value_data->data();
890     raw_value_offsets_ =
891         value_offsets == NULLPTR
892             ? NULLPTR
893             : reinterpret_cast<const offset_type*>(value_offsets->data());
894   }
895 
896   const offset_type* raw_value_offsets_;
897   const uint8_t* raw_data_;
898 };
899 
900 /// Concrete Array class for variable-size binary data
901 class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
902  public:
903   explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
904 
905   BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
906               const std::shared_ptr<Buffer>& data,
907               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
908               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
909 
910  protected:
911   // For subclasses such as StringArray
BinaryArray()912   BinaryArray() : BaseBinaryArray() {}
913 };
914 
915 /// Concrete Array class for variable-size string (utf-8) data
916 class ARROW_EXPORT StringArray : public BinaryArray {
917  public:
918   using TypeClass = StringType;
919 
920   explicit StringArray(const std::shared_ptr<ArrayData>& data);
921 
922   StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
923               const std::shared_ptr<Buffer>& data,
924               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
925               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
926 };
927 
928 /// Concrete Array class for large variable-size binary data
929 class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
930  public:
931   explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
932 
933   LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
934                    const std::shared_ptr<Buffer>& data,
935                    const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
936                    int64_t null_count = kUnknownNullCount, int64_t offset = 0);
937 
938  protected:
939   // For subclasses such as LargeStringArray
LargeBinaryArray()940   LargeBinaryArray() : BaseBinaryArray() {}
941 };
942 
943 /// Concrete Array class for large variable-size string (utf-8) data
944 class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
945  public:
946   using TypeClass = LargeStringType;
947 
948   explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
949 
950   LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
951                    const std::shared_ptr<Buffer>& data,
952                    const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
953                    int64_t null_count = kUnknownNullCount, int64_t offset = 0);
954 };
955 
956 // ----------------------------------------------------------------------
957 // Fixed width binary
958 
959 /// Concrete Array class for fixed-size binary data
960 class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
961  public:
962   using TypeClass = FixedSizeBinaryType;
963 
964   explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
965 
966   FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
967                        const std::shared_ptr<Buffer>& data,
968                        const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
969                        int64_t null_count = kUnknownNullCount, int64_t offset = 0);
970 
971   const uint8_t* GetValue(int64_t i) const;
Value(int64_t i)972   const uint8_t* Value(int64_t i) const { return GetValue(i); }
973 
GetView(int64_t i)974   util::string_view GetView(int64_t i) const {
975     return util::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
976   }
977 
GetString(int64_t i)978   std::string GetString(int64_t i) const { return std::string(GetView(i)); }
979 
byte_width()980   int32_t byte_width() const { return byte_width_; }
981 
raw_values()982   const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
983 
984  protected:
SetData(const std::shared_ptr<ArrayData> & data)985   inline void SetData(const std::shared_ptr<ArrayData>& data) {
986     this->PrimitiveArray::SetData(data);
987     byte_width_ =
988         internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
989   }
990 
991   int32_t byte_width_;
992 };
993 
994 /// DayTimeArray
995 /// ---------------------
996 /// \brief Array of Day and Millisecond values.
997 class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
998  public:
999   using TypeClass = DayTimeIntervalType;
1000 
1001   explicit DayTimeIntervalArray(const std::shared_ptr<ArrayData>& data);
1002 
1003   DayTimeIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
1004                        const std::shared_ptr<Buffer>& data,
1005                        const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
1006                        int64_t null_count = kUnknownNullCount, int64_t offset = 0);
1007 
1008   TypeClass::DayMilliseconds GetValue(int64_t i) const;
Value(int64_t i)1009   TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); }
1010 
1011   // For compatibility with Take kernel.
GetView(int64_t i)1012   TypeClass::DayMilliseconds GetView(int64_t i) const { return GetValue(i); }
1013 
byte_width()1014   int32_t byte_width() const { return sizeof(TypeClass::DayMilliseconds); }
1015 
raw_values()1016   const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
1017 
1018  protected:
SetData(const std::shared_ptr<ArrayData> & data)1019   inline void SetData(const std::shared_ptr<ArrayData>& data) {
1020     this->PrimitiveArray::SetData(data);
1021   }
1022 };
1023 
1024 // ----------------------------------------------------------------------
1025 // Decimal128Array
1026 
1027 /// Concrete Array class for 128-bit decimal data
1028 class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
1029  public:
1030   using TypeClass = Decimal128Type;
1031 
1032   using FixedSizeBinaryArray::FixedSizeBinaryArray;
1033 
1034   /// \brief Construct Decimal128Array from ArrayData instance
1035   explicit Decimal128Array(const std::shared_ptr<ArrayData>& data);
1036 
1037   std::string FormatValue(int64_t i) const;
1038 };
1039 
1040 // Backward compatibility
1041 using DecimalArray = Decimal128Array;
1042 
1043 // ----------------------------------------------------------------------
1044 // Struct
1045 
1046 /// Concrete Array class for struct data
1047 class ARROW_EXPORT StructArray : public Array {
1048  public:
1049   using TypeClass = StructType;
1050 
1051   explicit StructArray(const std::shared_ptr<ArrayData>& data);
1052 
1053   StructArray(const std::shared_ptr<DataType>& type, int64_t length,
1054               const std::vector<std::shared_ptr<Array>>& children,
1055               std::shared_ptr<Buffer> null_bitmap = NULLPTR,
1056               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
1057 
1058   /// \brief Return a StructArray from child arrays and field names.
1059   ///
1060   /// The length and data type are automatically inferred from the arguments.
1061   /// There should be at least one child array.
1062   static Result<std::shared_ptr<StructArray>> Make(
1063       const std::vector<std::shared_ptr<Array>>& children,
1064       const std::vector<std::string>& field_names,
1065       std::shared_ptr<Buffer> null_bitmap = NULLPTR,
1066       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
1067 
1068   /// \brief Return a StructArray from child arrays and fields.
1069   ///
1070   /// The length is automatically inferred from the arguments.
1071   /// There should be at least one child array.  This method does not
1072   /// check that field types and child array types are consistent.
1073   static Result<std::shared_ptr<StructArray>> Make(
1074       const std::vector<std::shared_ptr<Array>>& children,
1075       const std::vector<std::shared_ptr<Field>>& fields,
1076       std::shared_ptr<Buffer> null_bitmap = NULLPTR,
1077       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
1078 
1079   const StructType* struct_type() const;
1080 
1081   // Return a shared pointer in case the requestor desires to share ownership
1082   // with this array.  The returned array has its offset, length and null
1083   // count adjusted.
1084   std::shared_ptr<Array> field(int pos) const;
1085 
1086   const ArrayVector& fields() const;
1087 
1088   /// Returns null if name not found
1089   std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
1090 
1091   /// \brief Flatten this array as a vector of arrays, one for each field
1092   ///
1093   /// \param[in] pool The pool to allocate null bitmaps from, if necessary
1094   Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;
1095 
1096   ARROW_DEPRECATED("Use Result-returning version")
1097   Status Flatten(MemoryPool* pool, ArrayVector* out) const;
1098 
1099  private:
1100   // For caching boxed child data
1101   // XXX This is not handled in a thread-safe manner.
1102   mutable ArrayVector boxed_fields_;
1103 };
1104 
1105 // ----------------------------------------------------------------------
1106 // Union
1107 
1108 /// Concrete Array class for union data
1109 class ARROW_EXPORT UnionArray : public Array {
1110  public:
1111   using TypeClass = UnionType;
1112 
1113   using type_code_t = int8_t;
1114 
1115   explicit UnionArray(const std::shared_ptr<ArrayData>& data);
1116 
1117   UnionArray(const std::shared_ptr<DataType>& type, int64_t length,
1118              const std::vector<std::shared_ptr<Array>>& children,
1119              const std::shared_ptr<Buffer>& type_ids,
1120              const std::shared_ptr<Buffer>& value_offsets = NULLPTR,
1121              const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
1122              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
1123 
1124   /// \brief Construct Dense UnionArray from types_ids, value_offsets and children
1125   ///
1126   /// This function does the bare minimum of validation of the offsets and
1127   /// input types. The value_offsets are assumed to be well-formed.
1128   ///
1129   /// \param[in] type_ids An array of logical type ids for the union type
1130   /// \param[in] value_offsets An array of signed int32 values indicating the
1131   /// relative offset into the respective child array for the type in a given slot.
1132   /// The respective offsets for each child value array must be in order / increasing.
1133   /// \param[in] children Vector of children Arrays containing the data for each type.
1134   /// \param[in] field_names Vector of strings containing the name of each field.
1135   /// \param[in] type_codes Vector of type codes.
1136   static Result<std::shared_ptr<Array>> MakeDense(
1137       const Array& type_ids, const Array& value_offsets,
1138       const std::vector<std::shared_ptr<Array>>& children,
1139       const std::vector<std::string>& field_names = {},
1140       const std::vector<type_code_t>& type_codes = {});
1141 
1142   /// \brief Construct Dense UnionArray from types_ids, value_offsets and children
1143   ///
1144   /// This function does the bare minimum of validation of the offsets and
1145   /// input types. The value_offsets are assumed to be well-formed.
1146   ///
1147   /// \param[in] type_ids An array of logical type ids for the union type
1148   /// \param[in] value_offsets An array of signed int32 values indicating the
1149   /// relative offset into the respective child array for the type in a given slot.
1150   /// The respective offsets for each child value array must be in order / increasing.
1151   /// \param[in] children Vector of children Arrays containing the data for each type.
1152   /// \param[in] type_codes Vector of type codes.
MakeDense(const Array & type_ids,const Array & value_offsets,const std::vector<std::shared_ptr<Array>> & children,const std::vector<type_code_t> & type_codes)1153   static Result<std::shared_ptr<Array>> MakeDense(
1154       const Array& type_ids, const Array& value_offsets,
1155       const std::vector<std::shared_ptr<Array>>& children,
1156       const std::vector<type_code_t>& type_codes) {
1157     return MakeDense(type_ids, value_offsets, children, std::vector<std::string>{},
1158                      type_codes);
1159   }
1160 
1161   ARROW_DEPRECATED("Use Result-returning version")
MakeDense(const Array & type_ids,const Array & value_offsets,const std::vector<std::shared_ptr<Array>> & children,const std::vector<std::string> & field_names,const std::vector<type_code_t> & type_codes,std::shared_ptr<Array> * out)1162   static Status MakeDense(const Array& type_ids, const Array& value_offsets,
1163                           const std::vector<std::shared_ptr<Array>>& children,
1164                           const std::vector<std::string>& field_names,
1165                           const std::vector<type_code_t>& type_codes,
1166                           std::shared_ptr<Array>* out) {
1167     return MakeDense(type_ids, value_offsets, children, field_names, type_codes)
1168         .Value(out);
1169   }
1170 
1171   ARROW_DEPRECATED("Use Result-returning version")
MakeDense(const Array & type_ids,const Array & value_offsets,const std::vector<std::shared_ptr<Array>> & children,const std::vector<std::string> & field_names,std::shared_ptr<Array> * out)1172   static Status MakeDense(const Array& type_ids, const Array& value_offsets,
1173                           const std::vector<std::shared_ptr<Array>>& children,
1174                           const std::vector<std::string>& field_names,
1175                           std::shared_ptr<Array>* out) {
1176     return MakeDense(type_ids, value_offsets, children, field_names).Value(out);
1177   }
1178 
1179   ARROW_DEPRECATED("Use Result-returning version")
MakeDense(const Array & type_ids,const Array & value_offsets,const std::vector<std::shared_ptr<Array>> & children,const std::vector<type_code_t> & type_codes,std::shared_ptr<Array> * out)1180   static Status MakeDense(const Array& type_ids, const Array& value_offsets,
1181                           const std::vector<std::shared_ptr<Array>>& children,
1182                           const std::vector<type_code_t>& type_codes,
1183                           std::shared_ptr<Array>* out) {
1184     return MakeDense(type_ids, value_offsets, children, type_codes).Value(out);
1185   }
1186 
1187   ARROW_DEPRECATED("Use Result-returning version")
MakeDense(const Array & type_ids,const Array & value_offsets,const std::vector<std::shared_ptr<Array>> & children,std::shared_ptr<Array> * out)1188   static Status MakeDense(const Array& type_ids, const Array& value_offsets,
1189                           const std::vector<std::shared_ptr<Array>>& children,
1190                           std::shared_ptr<Array>* out) {
1191     return MakeDense(type_ids, value_offsets, children).Value(out);
1192   }
1193 
1194   /// \brief Construct Sparse UnionArray from type_ids and children
1195   ///
1196   /// This function does the bare minimum of validation of the offsets and
1197   /// input types.
1198   ///
1199   /// \param[in] type_ids An array of logical type ids for the union type
1200   /// \param[in] children Vector of children Arrays containing the data for each type.
1201   /// \param[in] field_names Vector of strings containing the name of each field.
1202   /// \param[in] type_codes Vector of type codes.
1203   static Result<std::shared_ptr<Array>> MakeSparse(
1204       const Array& type_ids, const std::vector<std::shared_ptr<Array>>& children,
1205       const std::vector<std::string>& field_names = {},
1206       const std::vector<type_code_t>& type_codes = {});
1207 
1208   /// \brief Construct Sparse UnionArray from type_ids and children
1209   ///
1210   /// This function does the bare minimum of validation of the offsets and
1211   /// input types.
1212   ///
1213   /// \param[in] type_ids An array of logical type ids for the union type
1214   /// \param[in] children Vector of children Arrays containing the data for each type.
1215   /// \param[in] type_codes Vector of type codes.
MakeSparse(const Array & type_ids,const std::vector<std::shared_ptr<Array>> & children,const std::vector<type_code_t> & type_codes)1216   static Result<std::shared_ptr<Array>> MakeSparse(
1217       const Array& type_ids, const std::vector<std::shared_ptr<Array>>& children,
1218       const std::vector<type_code_t>& type_codes) {
1219     return MakeSparse(type_ids, children, std::vector<std::string>{}, type_codes);
1220   }
1221 
1222   ARROW_DEPRECATED("Use Result-returning version")
MakeSparse(const Array & type_ids,const std::vector<std::shared_ptr<Array>> & children,const std::vector<std::string> & field_names,const std::vector<type_code_t> & type_codes,std::shared_ptr<Array> * out)1223   static Status MakeSparse(const Array& type_ids,
1224                            const std::vector<std::shared_ptr<Array>>& children,
1225                            const std::vector<std::string>& field_names,
1226                            const std::vector<type_code_t>& type_codes,
1227                            std::shared_ptr<Array>* out) {
1228     return MakeSparse(type_ids, children, field_names, type_codes).Value(out);
1229   }
1230 
1231   ARROW_DEPRECATED("Use Result-returning version")
MakeSparse(const Array & type_ids,const std::vector<std::shared_ptr<Array>> & children,const std::vector<std::string> & field_names,std::shared_ptr<Array> * out)1232   static Status MakeSparse(const Array& type_ids,
1233                            const std::vector<std::shared_ptr<Array>>& children,
1234                            const std::vector<std::string>& field_names,
1235                            std::shared_ptr<Array>* out) {
1236     return MakeSparse(type_ids, children, field_names).Value(out);
1237   }
1238 
1239   ARROW_DEPRECATED("Use Result-returning version")
MakeSparse(const Array & type_ids,const std::vector<std::shared_ptr<Array>> & children,const std::vector<type_code_t> & type_codes,std::shared_ptr<Array> * out)1240   static Status MakeSparse(const Array& type_ids,
1241                            const std::vector<std::shared_ptr<Array>>& children,
1242                            const std::vector<type_code_t>& type_codes,
1243                            std::shared_ptr<Array>* out) {
1244     return MakeSparse(type_ids, children, type_codes).Value(out);
1245   }
1246 
1247   ARROW_DEPRECATED("Use Result-returning version")
MakeSparse(const Array & type_ids,const std::vector<std::shared_ptr<Array>> & children,std::shared_ptr<Array> * out)1248   static Status MakeSparse(const Array& type_ids,
1249                            const std::vector<std::shared_ptr<Array>>& children,
1250                            std::shared_ptr<Array>* out) {
1251     return MakeSparse(type_ids, children).Value(out);
1252   }
1253 
1254   /// Note that this buffer does not account for any slice offset
type_codes()1255   std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
1256 
raw_type_codes()1257   const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
1258 
1259   /// The physical child id containing value at index.
child_id(int64_t i)1260   int child_id(int64_t i) const {
1261     return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
1262   }
1263 
1264   /// For dense arrays only.
1265   /// Note that this buffer does not account for any slice offset
value_offsets()1266   std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[2]; }
1267 
1268   /// For dense arrays only.
value_offset(int64_t i)1269   int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
1270 
1271   /// For dense arrays only.
raw_value_offsets()1272   const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
1273 
union_type()1274   const UnionType* union_type() const { return union_type_; }
1275 
mode()1276   UnionMode::type mode() const { return union_type_->mode(); }
1277 
1278   // Return the given field as an individual array.
1279   // For sparse unions, the returned array has its offset, length and null
1280   // count adjusted.
1281   ARROW_DEPRECATED("Use field(pos)")
1282   std::shared_ptr<Array> child(int pos) const;
1283 
1284   /// \brief Return the given field as an individual array.
1285   ///
1286   /// For sparse unions, the returned array has its offset, length and null
1287   /// count adjusted.
1288   std::shared_ptr<Array> field(int pos) const;
1289 
1290  protected:
1291   void SetData(const std::shared_ptr<ArrayData>& data);
1292 
1293   const type_code_t* raw_type_codes_;
1294   const int32_t* raw_value_offsets_;
1295   const UnionType* union_type_;
1296 
1297   // For caching boxed child data
1298   mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
1299 };
1300 
1301 // ----------------------------------------------------------------------
1302 // DictionaryArray
1303 
1304 /// \brief Array type for dictionary-encoded data with a
1305 /// data-dependent dictionary
1306 ///
1307 /// A dictionary array contains an array of non-negative integers (the
1308 /// "dictionary indices") along with a data type containing a "dictionary"
1309 /// corresponding to the distinct values represented in the data.
1310 ///
1311 /// For example, the array
1312 ///
1313 ///   ["foo", "bar", "foo", "bar", "foo", "bar"]
1314 ///
1315 /// with dictionary ["bar", "foo"], would have dictionary array representation
1316 ///
1317 ///   indices: [1, 0, 1, 0, 1, 0]
1318 ///   dictionary: ["bar", "foo"]
1319 ///
1320 /// The indices in principle may have any integer type (signed or unsigned),
1321 /// though presently data in IPC exchanges must be signed int32.
1322 class ARROW_EXPORT DictionaryArray : public Array {
1323  public:
1324   using TypeClass = DictionaryType;
1325 
1326   explicit DictionaryArray(const std::shared_ptr<ArrayData>& data);
1327 
1328   DictionaryArray(const std::shared_ptr<DataType>& type,
1329                   const std::shared_ptr<Array>& indices,
1330                   const std::shared_ptr<Array>& dictionary);
1331 
1332   /// \brief Construct DictionaryArray from dictionary and indices
1333   /// array and validate
1334   ///
1335   /// This function does the validation of the indices and input type. It checks if
1336   /// all indices are non-negative and smaller than the size of the dictionary
1337   ///
1338   /// \param[in] type a dictionary type
1339   /// \param[in] dictionary the dictionary with same value type as the
1340   /// type object
1341   /// \param[in] indices an array of non-negative signed
1342   /// integers smaller than the size of the dictionary
1343   static Result<std::shared_ptr<Array>> FromArrays(
1344       const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& indices,
1345       const std::shared_ptr<Array>& dictionary);
1346 
1347   ARROW_DEPRECATED("Use Result-returning version")
1348   static Status FromArrays(const std::shared_ptr<DataType>& type,
1349                            const std::shared_ptr<Array>& indices,
1350                            const std::shared_ptr<Array>& dictionary,
1351                            std::shared_ptr<Array>* out);
1352 
1353   /// \brief Transpose this DictionaryArray
1354   ///
1355   /// This method constructs a new dictionary array with the given dictionary type,
1356   /// transposing indices using the transpose map.
1357   /// The type and the transpose map are typically computed using
1358   /// DictionaryUnifier.
1359   ///
1360   /// \param[in] type the new type object
1361   /// \param[in] dictionary the new dictionary
1362   /// \param[in] transpose_map transposition array of this array's indices
1363   ///   into the target array's indices
1364   /// \param[in] pool a pool to allocate the array data from
1365   Result<std::shared_ptr<Array>> Transpose(
1366       const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
1367       const int32_t* transpose_map, MemoryPool* pool = default_memory_pool()) const;
1368 
1369   ARROW_DEPRECATED("Use Result-returning version")
1370   Status Transpose(MemoryPool* pool, const std::shared_ptr<DataType>& type,
1371                    const std::shared_ptr<Array>& dictionary, const int32_t* transpose_map,
1372                    std::shared_ptr<Array>* out) const;
1373 
1374   /// \brief Determine whether dictionary arrays may be compared without unification
1375   bool CanCompareIndices(const DictionaryArray& other) const;
1376 
1377   /// \brief Return the dictionary for this array, which is stored as
1378   /// a member of the ArrayData internal structure
1379   std::shared_ptr<Array> dictionary() const;
1380   std::shared_ptr<Array> indices() const;
1381 
1382   /// \brief Return the ith value of indices, cast to int64_t
1383   int64_t GetValueIndex(int64_t i) const;
1384 
dict_type()1385   const DictionaryType* dict_type() const { return dict_type_; }
1386 
1387  private:
1388   void SetData(const std::shared_ptr<ArrayData>& data);
1389   const DictionaryType* dict_type_;
1390   std::shared_ptr<Array> indices_;
1391 };
1392 
1393 }  // namespace arrow
1394