1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 //! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable
19 //! of creating a [`Buffer`](crate::buffer::Buffer) which can be used
20 //! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
21 //! object.
22
23 use std::any::Any;
24 use std::collections::HashMap;
25 use std::io::Write;
26 use std::marker::PhantomData;
27 use std::mem;
28 use std::sync::Arc;
29
30 use crate::array::*;
31 use crate::buffer::{Buffer, MutableBuffer};
32 use crate::datatypes::*;
33 use crate::error::{ArrowError, Result};
34 use crate::util::bit_util;
35
36 /// Converts a `MutableBuffer` to a `BufferBuilder<T>`.
37 ///
38 /// `slots` is the number of array slots currently represented in the `MutableBuffer`.
mutable_buffer_to_builder<T: ArrowPrimitiveType>( mutable_buffer: MutableBuffer, slots: usize, ) -> BufferBuilder<T>39 pub(crate) fn mutable_buffer_to_builder<T: ArrowPrimitiveType>(
40 mutable_buffer: MutableBuffer,
41 slots: usize,
42 ) -> BufferBuilder<T> {
43 BufferBuilder::<T> {
44 buffer: mutable_buffer,
45 len: slots,
46 _marker: PhantomData,
47 }
48 }
49
50 /// Converts a `BufferBuilder<T>` into it's underlying `MutableBuffer`.
51 ///
52 /// `From` is not implemented because associated type bounds are unstable.
builder_to_mutable_buffer<T: ArrowPrimitiveType>( builder: BufferBuilder<T>, ) -> MutableBuffer53 pub(crate) fn builder_to_mutable_buffer<T: ArrowPrimitiveType>(
54 builder: BufferBuilder<T>,
55 ) -> MutableBuffer {
56 builder.buffer
57 }
58
59 /// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
60 ///
61 /// This builder is implemented for primitive types and creates a
62 /// buffer with a zero-copy `build()` method.
63 ///
64 /// See trait [`BufferBuilderTrait`](crate::array::BufferBuilderTrait)
65 /// for further documentation and examples.
66 ///
67 /// A [`Buffer`](crate::buffer::Buffer) is the underlying data
68 /// structure of Arrow's [`Arrays`](crate::array::Array).
69 ///
70 /// For all supported types, there are type definitions for the
71 /// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`.
72 ///
73 /// # Example:
74 ///
75 /// ```
76 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
77 ///
78 /// # fn main() -> arrow::error::Result<()> {
79 /// let mut builder = UInt8BufferBuilder::new(100);
80 /// builder.append_slice(&[42, 43, 44]);
81 /// builder.append(45);
82 /// let buffer = builder.finish();
83 ///
84 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
85 /// # Ok(())
86 /// # }
87 /// ```
88 pub struct BufferBuilder<T: ArrowPrimitiveType> {
89 buffer: MutableBuffer,
90 len: usize,
91 _marker: PhantomData<T>,
92 }
93
94 /// Trait for simplifying the construction of [`Buffers`](crate::buffer::Buffer).
95 ///
96 /// This trait is used mainly to offer separate implementations for
97 /// numeric types and boolean types, while still be able to call methods on buffer builder
98 /// with generic primitive type.
99 /// Separate implementations of this trait allow to add implementation-details,
100 /// e.g. the implementation for boolean types uses bit-packing.
101 pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
102 /// Creates a new builder with initial capacity for _at least_ `capacity`
103 /// elements of type `T`.
104 ///
105 /// The capacity can later be manually adjusted with the
106 /// [`reserve()`](BufferBuilderTrait::reserve) method.
107 /// Also the
108 /// [`append()`](BufferBuilderTrait::append),
109 /// [`append_slice()`](BufferBuilderTrait::append_slice) and
110 /// [`advance()`](BufferBuilderTrait::advance)
111 /// methods automatically increase the capacity if needed.
112 ///
113 /// # Example:
114 ///
115 /// ```
116 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
117 ///
118 /// let mut builder = UInt8BufferBuilder::new(10);
119 ///
120 /// assert!(builder.capacity() >= 10);
121 /// ```
new(capacity: usize) -> Self122 fn new(capacity: usize) -> Self;
123
124 /// Returns the current number of array elements in the internal buffer.
125 ///
126 /// # Example:
127 ///
128 /// ```
129 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
130 ///
131 /// let mut builder = UInt8BufferBuilder::new(10);
132 /// builder.append(42);
133 ///
134 /// assert_eq!(builder.len(), 1);
135 /// ```
len(&self) -> usize136 fn len(&self) -> usize;
137
138 /// Returns the actual capacity (number of elements) of the internal buffer.
139 ///
140 /// Note: the internal capacity returned by this method might be larger than
141 /// what you'd expect after setting the capacity in the `new()` or `reserve()`
142 /// functions.
capacity(&self) -> usize143 fn capacity(&self) -> usize;
144
145 /// Increases the number of elements in the internal buffer by `n`
146 /// and resizes the buffer as needed.
147 ///
148 /// The values of the newly added elements are undefined.
149 /// This method is usually used when appending `NULL` values to the buffer
150 /// as they still require physical memory space.
151 ///
152 /// # Example:
153 ///
154 /// ```
155 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
156 ///
157 /// let mut builder = UInt8BufferBuilder::new(10);
158 /// builder.advance(2);
159 ///
160 /// assert_eq!(builder.len(), 2);
161 /// ```
advance(&mut self, n: usize) -> Result<()>162 fn advance(&mut self, n: usize) -> Result<()>;
163
164 /// Reserves memory for _at least_ `n` more elements of type `T`.
165 ///
166 /// # Example:
167 ///
168 /// ```
169 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
170 ///
171 /// let mut builder = UInt8BufferBuilder::new(10);
172 /// builder.reserve(10);
173 ///
174 /// assert!(builder.capacity() >= 20);
175 /// ```
reserve(&mut self, n: usize) -> Result<()>176 fn reserve(&mut self, n: usize) -> Result<()>;
177
178 /// Appends a value of type `T` into the builder,
179 /// growing the internal buffer as needed.
180 ///
181 /// # Example:
182 ///
183 /// ```
184 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
185 ///
186 /// let mut builder = UInt8BufferBuilder::new(10);
187 /// builder.append(42);
188 ///
189 /// assert_eq!(builder.len(), 1);
190 /// ```
append(&mut self, value: T::Native) -> Result<()>191 fn append(&mut self, value: T::Native) -> Result<()>;
192
193 /// Appends a value of type `T` into the builder N times,
194 /// growing the internal buffer as needed.
195 ///
196 /// # Example:
197 ///
198 /// ```
199 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
200 ///
201 /// let mut builder = UInt8BufferBuilder::new(10);
202 /// builder.append_n(10, 42);
203 ///
204 /// assert_eq!(builder.len(), 10);
205 /// ```
append_n(&mut self, n: usize, value: T::Native) -> Result<()>206 fn append_n(&mut self, n: usize, value: T::Native) -> Result<()>;
207
208 /// Appends a slice of type `T`, growing the internal buffer as needed.
209 ///
210 /// # Example:
211 ///
212 /// ```
213 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
214 ///
215 /// let mut builder = UInt8BufferBuilder::new(10);
216 /// builder.append_slice(&[42, 44, 46]);
217 ///
218 /// assert_eq!(builder.len(), 3);
219 /// ```
append_slice(&mut self, slice: &[T::Native]) -> Result<()>220 fn append_slice(&mut self, slice: &[T::Native]) -> Result<()>;
221
222 /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer).
223 ///
224 /// # Example:
225 ///
226 /// ```
227 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
228 ///
229 /// let mut builder = UInt8BufferBuilder::new(10);
230 /// builder.append_slice(&[42, 44, 46]);
231 ///
232 /// let buffer = builder.finish();
233 ///
234 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
235 /// ```
finish(&mut self) -> Buffer236 fn finish(&mut self) -> Buffer;
237 }
238
239 impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
new(capacity: usize) -> Self240 default fn new(capacity: usize) -> Self {
241 let buffer = MutableBuffer::new(capacity * mem::size_of::<T::Native>());
242 Self {
243 buffer,
244 len: 0,
245 _marker: PhantomData,
246 }
247 }
248
len(&self) -> usize249 fn len(&self) -> usize {
250 self.len
251 }
252
capacity(&self) -> usize253 fn capacity(&self) -> usize {
254 let bit_capacity = self.buffer.capacity() * 8;
255 bit_capacity / T::get_bit_width()
256 }
257
advance(&mut self, i: usize) -> Result<()>258 default fn advance(&mut self, i: usize) -> Result<()> {
259 let new_buffer_len = (self.len + i) * mem::size_of::<T::Native>();
260 self.buffer.resize(new_buffer_len)?;
261 self.len += i;
262 Ok(())
263 }
264
reserve(&mut self, n: usize) -> Result<()>265 default fn reserve(&mut self, n: usize) -> Result<()> {
266 let new_capacity = self.len + n;
267 let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
268 self.buffer.reserve(byte_capacity)?;
269 Ok(())
270 }
271
append(&mut self, v: T::Native) -> Result<()>272 default fn append(&mut self, v: T::Native) -> Result<()> {
273 self.reserve(1)?;
274 self.write_bytes(v.to_byte_slice(), 1)
275 }
276
append_n(&mut self, n: usize, v: T::Native) -> Result<()>277 default fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> {
278 self.reserve(n)?;
279 for _ in 0..n {
280 self.write_bytes(v.to_byte_slice(), 1)?;
281 }
282 Ok(())
283 }
284
append_slice(&mut self, slice: &[T::Native]) -> Result<()>285 default fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
286 let array_slots = slice.len();
287 self.reserve(array_slots)?;
288 self.write_bytes(slice.to_byte_slice(), array_slots)
289 }
290
finish(&mut self) -> Buffer291 default fn finish(&mut self) -> Buffer {
292 let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
293 self.len = 0;
294 buf.freeze()
295 }
296 }
297
298 impl<T: ArrowPrimitiveType> BufferBuilder<T> {
299 /// Writes a byte slice to the underlying buffer and updates the `len`, i.e. the
300 /// number array elements in the builder. Also, converts the `io::Result`
301 /// required by the `Write` trait to the Arrow `Result` type.
write_bytes(&mut self, bytes: &[u8], len_added: usize) -> Result<()>302 fn write_bytes(&mut self, bytes: &[u8], len_added: usize) -> Result<()> {
303 let write_result = self.buffer.write(bytes);
304 // `io::Result` has many options one of which we use, so pattern matching is
305 // overkill here
306 if write_result.is_err() {
307 Err(ArrowError::MemoryError(
308 "Could not write to Buffer, not big enough".to_string(),
309 ))
310 } else {
311 self.len += len_added;
312 Ok(())
313 }
314 }
315 }
316
317 impl BufferBuilderTrait<BooleanType> for BufferBuilder<BooleanType> {
new(capacity: usize) -> Self318 fn new(capacity: usize) -> Self {
319 let byte_capacity = bit_util::ceil(capacity, 8);
320 let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity);
321 let mut buffer = MutableBuffer::new(actual_capacity);
322 buffer.set_null_bits(0, actual_capacity);
323 Self {
324 buffer,
325 len: 0,
326 _marker: PhantomData,
327 }
328 }
329
advance(&mut self, i: usize) -> Result<()>330 fn advance(&mut self, i: usize) -> Result<()> {
331 let new_buffer_len = bit_util::ceil(self.len + i, 8);
332 self.buffer.resize(new_buffer_len)?;
333 self.len += i;
334 Ok(())
335 }
336
append(&mut self, v: bool) -> Result<()>337 fn append(&mut self, v: bool) -> Result<()> {
338 self.reserve(1)?;
339 if v {
340 // For performance the `len` of the buffer is not updated on each append but
341 // is updated in the `freeze` method instead.
342 unsafe {
343 bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len);
344 }
345 }
346 self.len += 1;
347 Ok(())
348 }
349
append_n(&mut self, n: usize, v: bool) -> Result<()>350 fn append_n(&mut self, n: usize, v: bool) -> Result<()> {
351 self.reserve(n)?;
352 if v {
353 unsafe {
354 bit_util::set_bits_raw(self.buffer.raw_data_mut(), self.len, self.len + n)
355 }
356 }
357 self.len += n;
358 Ok(())
359 }
360
append_slice(&mut self, slice: &[bool]) -> Result<()>361 fn append_slice(&mut self, slice: &[bool]) -> Result<()> {
362 self.reserve(slice.len())?;
363 for v in slice {
364 if *v {
365 // For performance the `len` of the buffer is not
366 // updated on each append but is updated in the
367 // `freeze` method instead.
368 unsafe {
369 bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len);
370 }
371 }
372 self.len += 1;
373 }
374 Ok(())
375 }
376
reserve(&mut self, n: usize) -> Result<()>377 fn reserve(&mut self, n: usize) -> Result<()> {
378 let new_capacity = self.len + n;
379 if new_capacity > self.capacity() {
380 let new_byte_capacity = bit_util::ceil(new_capacity, 8);
381 let existing_capacity = self.buffer.capacity();
382 let new_capacity = self.buffer.reserve(new_byte_capacity)?;
383 self.buffer
384 .set_null_bits(existing_capacity, new_capacity - existing_capacity);
385 }
386 Ok(())
387 }
388
finish(&mut self) -> Buffer389 fn finish(&mut self) -> Buffer {
390 // `append` does not update the buffer's `len` so do it before `freeze` is called.
391 let new_buffer_len = bit_util::ceil(self.len, 8);
392 debug_assert!(new_buffer_len >= self.buffer.len());
393 let mut buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
394 self.len = 0;
395 buf.resize(new_buffer_len).unwrap();
396 buf.freeze()
397 }
398 }
399
400 /// Trait for dealing with different array builders at runtime
401 pub trait ArrayBuilder: Any {
402 /// Returns the number of array slots in the builder
len(&self) -> usize403 fn len(&self) -> usize;
404
405 /// Builds the array
finish(&mut self) -> ArrayRef406 fn finish(&mut self) -> ArrayRef;
407
408 /// Returns the builder as a non-mutable `Any` reference.
409 ///
410 /// This is most useful when one wants to call non-mutable APIs on a specific builder
411 /// type. In this case, one can first cast this into a `Any`, and then use
412 /// `downcast_ref` to get a reference on the specific builder.
as_any(&self) -> &Any413 fn as_any(&self) -> &Any;
414
415 /// Returns the builder as a mutable `Any` reference.
416 ///
417 /// This is most useful when one wants to call mutable APIs on a specific builder
418 /// type. In this case, one can first cast this into a `Any`, and then use
419 /// `downcast_mut` to get a reference on the specific builder.
as_any_mut(&mut self) -> &mut Any420 fn as_any_mut(&mut self) -> &mut Any;
421
422 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>423 fn into_box_any(self: Box<Self>) -> Box<Any>;
424 }
425
426 /// Array builder for fixed-width primitive types
427 pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
428 values_builder: BufferBuilder<T>,
429 bitmap_builder: BooleanBufferBuilder,
430 }
431
432 impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
433 /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any434 fn as_any(&self) -> &Any {
435 self
436 }
437
438 /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any439 fn as_any_mut(&mut self) -> &mut Any {
440 self
441 }
442
443 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>444 fn into_box_any(self: Box<Self>) -> Box<Any> {
445 self
446 }
447
448 /// Returns the number of array slots in the builder
len(&self) -> usize449 fn len(&self) -> usize {
450 self.values_builder.len
451 }
452
453 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef454 fn finish(&mut self) -> ArrayRef {
455 Arc::new(self.finish())
456 }
457 }
458
459 impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
460 /// Creates a new primitive array builder
new(capacity: usize) -> Self461 pub fn new(capacity: usize) -> Self {
462 Self {
463 values_builder: BufferBuilder::<T>::new(capacity),
464 bitmap_builder: BooleanBufferBuilder::new(capacity),
465 }
466 }
467
468 /// Returns the capacity of this builder measured in slots of type `T`
capacity(&self) -> usize469 pub fn capacity(&self) -> usize {
470 self.values_builder.capacity()
471 }
472
473 /// Appends a value of type `T` into the builder
append_value(&mut self, v: T::Native) -> Result<()>474 pub fn append_value(&mut self, v: T::Native) -> Result<()> {
475 self.bitmap_builder.append(true)?;
476 self.values_builder.append(v)?;
477 Ok(())
478 }
479
480 /// Appends a null slot into the builder
append_null(&mut self) -> Result<()>481 pub fn append_null(&mut self) -> Result<()> {
482 self.bitmap_builder.append(false)?;
483 self.values_builder.advance(1)?;
484 Ok(())
485 }
486
487 /// Appends an `Option<T>` into the builder
append_option(&mut self, v: Option<T::Native>) -> Result<()>488 pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> {
489 match v {
490 None => self.append_null()?,
491 Some(v) => self.append_value(v)?,
492 };
493 Ok(())
494 }
495
496 /// Appends a slice of type `T` into the builder
append_slice(&mut self, v: &[T::Native]) -> Result<()>497 pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> {
498 self.bitmap_builder.append_n(v.len(), true)?;
499 self.values_builder.append_slice(v)?;
500 Ok(())
501 }
502
503 /// Builds the `PrimitiveArray` and reset this builder.
finish(&mut self) -> PrimitiveArray<T>504 pub fn finish(&mut self) -> PrimitiveArray<T> {
505 let len = self.len();
506 let null_bit_buffer = self.bitmap_builder.finish();
507 let null_count = len - bit_util::count_set_bits(null_bit_buffer.data());
508 let mut builder = ArrayData::builder(T::get_data_type())
509 .len(len)
510 .add_buffer(self.values_builder.finish());
511 if null_count > 0 {
512 builder = builder
513 .null_count(null_count)
514 .null_bit_buffer(null_bit_buffer);
515 }
516 let data = builder.build();
517 PrimitiveArray::<T>::from(data)
518 }
519
520 /// Builds the `DictionaryArray` and reset this builder.
finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T>521 pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
522 let len = self.len();
523 let null_bit_buffer = self.bitmap_builder.finish();
524 let null_count = len - bit_util::count_set_bits(null_bit_buffer.data());
525 let data_type = DataType::Dictionary(
526 Box::new(T::get_data_type()),
527 Box::new(values.data_type().clone()),
528 );
529 let mut builder = ArrayData::builder(data_type)
530 .len(len)
531 .add_buffer(self.values_builder.finish());
532 if null_count > 0 {
533 builder = builder
534 .null_count(null_count)
535 .null_bit_buffer(null_bit_buffer);
536 }
537 builder = builder.add_child_data(values.data());
538 DictionaryArray::<T>::from(builder.build())
539 }
540 }
541
542 /// Array builder for `ListArray`
543 pub struct ListBuilder<T: ArrayBuilder> {
544 offsets_builder: Int32BufferBuilder,
545 bitmap_builder: BooleanBufferBuilder,
546 values_builder: T,
547 len: usize,
548 }
549
550 impl<T: ArrayBuilder> ListBuilder<T> {
551 /// Creates a new `ListArrayBuilder` from a given values array builder
new(values_builder: T) -> Self552 pub fn new(values_builder: T) -> Self {
553 let capacity = values_builder.len();
554 Self::with_capacity(values_builder, capacity)
555 }
556
557 /// Creates a new `ListArrayBuilder` from a given values array builder
558 /// `capacity` is the number of items to pre-allocate space for in this builder
with_capacity(values_builder: T, capacity: usize) -> Self559 pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
560 let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
561 offsets_builder.append(0).unwrap();
562 Self {
563 offsets_builder,
564 bitmap_builder: BooleanBufferBuilder::new(capacity),
565 values_builder,
566 len: 0,
567 }
568 }
569 }
570
571 impl<T: ArrayBuilder> ArrayBuilder for ListBuilder<T>
572 where
573 T: 'static,
574 {
575 /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any576 fn as_any(&self) -> &Any {
577 self
578 }
579
580 /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any581 fn as_any_mut(&mut self) -> &mut Any {
582 self
583 }
584
585 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>586 fn into_box_any(self: Box<Self>) -> Box<Any> {
587 self
588 }
589
590 /// Returns the number of array slots in the builder
len(&self) -> usize591 fn len(&self) -> usize {
592 self.len
593 }
594
595 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef596 fn finish(&mut self) -> ArrayRef {
597 Arc::new(self.finish())
598 }
599 }
600
601 impl<T: ArrayBuilder> ListBuilder<T>
602 where
603 T: 'static,
604 {
605 /// Returns the child array builder as a mutable reference.
606 ///
607 /// This mutable reference can be used to append values into the child array builder,
608 /// but you must call `append` to delimit each distinct list value.
values(&mut self) -> &mut T609 pub fn values(&mut self) -> &mut T {
610 &mut self.values_builder
611 }
612
613 /// Finish the current variable-length list array slot
append(&mut self, is_valid: bool) -> Result<()>614 pub fn append(&mut self, is_valid: bool) -> Result<()> {
615 self.offsets_builder
616 .append(self.values_builder.len() as i32)?;
617 self.bitmap_builder.append(is_valid)?;
618 self.len += 1;
619 Ok(())
620 }
621
622 /// Builds the `ListArray` and reset this builder.
finish(&mut self) -> ListArray623 pub fn finish(&mut self) -> ListArray {
624 let len = self.len();
625 self.len = 0;
626 let values_arr = self
627 .values_builder
628 .as_any_mut()
629 .downcast_mut::<T>()
630 .unwrap()
631 .finish();
632 let values_data = values_arr.data();
633
634 let offset_buffer = self.offsets_builder.finish();
635 let null_bit_buffer = self.bitmap_builder.finish();
636 self.offsets_builder.append(0).unwrap();
637 let data =
638 ArrayData::builder(DataType::List(Box::new(values_data.data_type().clone())))
639 .len(len)
640 .null_count(len - bit_util::count_set_bits(null_bit_buffer.data()))
641 .add_buffer(offset_buffer)
642 .add_child_data(values_data)
643 .null_bit_buffer(null_bit_buffer)
644 .build();
645
646 ListArray::from(data)
647 }
648 }
649
650 /// Array builder for `ListArray`
651 pub struct FixedSizeListBuilder<T: ArrayBuilder> {
652 bitmap_builder: BooleanBufferBuilder,
653 values_builder: T,
654 len: usize,
655 list_len: i32,
656 }
657
658 impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
659 /// Creates a new `FixedSizeListBuilder` from a given values array builder
660 /// `length` is the number of values within each array
new(values_builder: T, length: i32) -> Self661 pub fn new(values_builder: T, length: i32) -> Self {
662 let capacity = values_builder.len();
663 Self::with_capacity(values_builder, length, capacity)
664 }
665
666 /// Creates a new `FixedSizeListBuilder` from a given values array builder
667 /// `length` is the number of values within each array
668 /// `capacity` is the number of items to pre-allocate space for in this builder
with_capacity(values_builder: T, length: i32, capacity: usize) -> Self669 pub fn with_capacity(values_builder: T, length: i32, capacity: usize) -> Self {
670 let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
671 offsets_builder.append(0).unwrap();
672 Self {
673 bitmap_builder: BooleanBufferBuilder::new(capacity),
674 values_builder,
675 len: 0,
676 list_len: length,
677 }
678 }
679 }
680
681 impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
682 where
683 T: 'static,
684 {
685 /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any686 fn as_any(&self) -> &Any {
687 self
688 }
689
690 /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any691 fn as_any_mut(&mut self) -> &mut Any {
692 self
693 }
694
695 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>696 fn into_box_any(self: Box<Self>) -> Box<Any> {
697 self
698 }
699
700 /// Returns the number of array slots in the builder
len(&self) -> usize701 fn len(&self) -> usize {
702 self.len
703 }
704
705 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef706 fn finish(&mut self) -> ArrayRef {
707 Arc::new(self.finish())
708 }
709 }
710
711 impl<T: ArrayBuilder> FixedSizeListBuilder<T>
712 where
713 T: 'static,
714 {
715 /// Returns the child array builder as a mutable reference.
716 ///
717 /// This mutable reference can be used to append values into the child array builder,
718 /// but you must call `append` to delimit each distinct list value.
values(&mut self) -> &mut T719 pub fn values(&mut self) -> &mut T {
720 &mut self.values_builder
721 }
722
value_length(&self) -> i32723 pub fn value_length(&self) -> i32 {
724 self.list_len
725 }
726
727 /// Finish the current variable-length list array slot
append(&mut self, is_valid: bool) -> Result<()>728 pub fn append(&mut self, is_valid: bool) -> Result<()> {
729 self.bitmap_builder.append(is_valid)?;
730 self.len += 1;
731 Ok(())
732 }
733
734 /// Builds the `FixedSizeListBuilder` and reset this builder.
finish(&mut self) -> FixedSizeListArray735 pub fn finish(&mut self) -> FixedSizeListArray {
736 let len = self.len();
737 self.len = 0;
738 let values_arr = self
739 .values_builder
740 .as_any_mut()
741 .downcast_mut::<T>()
742 .unwrap()
743 .finish();
744 let values_data = values_arr.data();
745
746 // check that values_data length is multiple of len if we have data
747 if len != 0 {
748 assert!(
749 values_data.len() / len == self.list_len as usize,
750 "Values of FixedSizeList must have equal lengths, values have length {} and list has {}",
751 values_data.len(),
752 len
753 );
754 }
755
756 let null_bit_buffer = self.bitmap_builder.finish();
757 let data = ArrayData::builder(DataType::FixedSizeList(
758 Box::new(values_data.data_type().clone()),
759 self.list_len,
760 ))
761 .len(len)
762 .null_count(len - bit_util::count_set_bits(null_bit_buffer.data()))
763 .add_child_data(values_data)
764 .null_bit_buffer(null_bit_buffer)
765 .build();
766
767 FixedSizeListArray::from(data)
768 }
769 }
770
771 /// Array builder for `BinaryArray`
772 pub struct BinaryBuilder {
773 builder: ListBuilder<UInt8Builder>,
774 }
775
776 pub struct StringBuilder {
777 builder: ListBuilder<UInt8Builder>,
778 }
779
780 pub struct FixedSizeBinaryBuilder {
781 builder: FixedSizeListBuilder<UInt8Builder>,
782 }
783
784 pub trait BinaryArrayBuilder: ArrayBuilder {}
785
786 impl BinaryArrayBuilder for BinaryBuilder {}
787 impl BinaryArrayBuilder for StringBuilder {}
788 impl BinaryArrayBuilder for FixedSizeBinaryBuilder {}
789
790 impl ArrayBuilder for BinaryBuilder {
791 /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any792 fn as_any(&self) -> &Any {
793 self
794 }
795
796 /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any797 fn as_any_mut(&mut self) -> &mut Any {
798 self
799 }
800
801 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>802 fn into_box_any(self: Box<Self>) -> Box<Any> {
803 self
804 }
805
806 /// Returns the number of array slots in the builder
len(&self) -> usize807 fn len(&self) -> usize {
808 self.builder.len()
809 }
810
811 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef812 fn finish(&mut self) -> ArrayRef {
813 Arc::new(self.finish())
814 }
815 }
816
817 impl ArrayBuilder for StringBuilder {
818 /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any819 fn as_any(&self) -> &Any {
820 self
821 }
822
823 /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any824 fn as_any_mut(&mut self) -> &mut Any {
825 self
826 }
827
828 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>829 fn into_box_any(self: Box<Self>) -> Box<Any> {
830 self
831 }
832
833 /// Returns the number of array slots in the builder
len(&self) -> usize834 fn len(&self) -> usize {
835 self.builder.len()
836 }
837
838 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef839 fn finish(&mut self) -> ArrayRef {
840 Arc::new(self.finish())
841 }
842 }
843
844 impl ArrayBuilder for FixedSizeBinaryBuilder {
845 /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any846 fn as_any(&self) -> &Any {
847 self
848 }
849
850 /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any851 fn as_any_mut(&mut self) -> &mut Any {
852 self
853 }
854
855 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>856 fn into_box_any(self: Box<Self>) -> Box<Any> {
857 self
858 }
859
860 /// Returns the number of array slots in the builder
len(&self) -> usize861 fn len(&self) -> usize {
862 self.builder.len()
863 }
864
865 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef866 fn finish(&mut self) -> ArrayRef {
867 Arc::new(self.finish())
868 }
869 }
870
871 impl BinaryBuilder {
872 /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
873 /// array
new(capacity: usize) -> Self874 pub fn new(capacity: usize) -> Self {
875 let values_builder = UInt8Builder::new(capacity);
876 Self {
877 builder: ListBuilder::new(values_builder),
878 }
879 }
880
881 /// Appends a single byte value into the builder's values array.
882 ///
883 /// Note, when appending individual byte values you must call `append` to delimit each
884 /// distinct list value.
append_byte(&mut self, value: u8) -> Result<()>885 pub fn append_byte(&mut self, value: u8) -> Result<()> {
886 self.builder.values().append_value(value)?;
887 Ok(())
888 }
889
890 /// Appends a byte slice into the builder.
891 ///
892 /// Automatically calls the `append` method to delimit the slice appended in as a
893 /// distinct array element.
append_value(&mut self, value: &[u8]) -> Result<()>894 pub fn append_value(&mut self, value: &[u8]) -> Result<()> {
895 self.builder.values().append_slice(value)?;
896 self.builder.append(true)?;
897 Ok(())
898 }
899
900 /// Finish the current variable-length list array slot.
append(&mut self, is_valid: bool) -> Result<()>901 pub fn append(&mut self, is_valid: bool) -> Result<()> {
902 self.builder.append(is_valid)
903 }
904
905 /// Append a null value to the array.
append_null(&mut self) -> Result<()>906 pub fn append_null(&mut self) -> Result<()> {
907 self.append(false)
908 }
909
910 /// Builds the `BinaryArray` and reset this builder.
finish(&mut self) -> BinaryArray911 pub fn finish(&mut self) -> BinaryArray {
912 BinaryArray::from(self.builder.finish())
913 }
914 }
915
916 impl StringBuilder {
917 /// Creates a new `StringBuilder`,
918 /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
new(capacity: usize) -> Self919 pub fn new(capacity: usize) -> Self {
920 let values_builder = UInt8Builder::new(capacity);
921 Self {
922 builder: ListBuilder::new(values_builder),
923 }
924 }
925
926 /// Creates a new `StringBuilder`,
927 /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
928 /// `item_capacity` is the number of items to pre-allocate space for in this builder
with_capacity(item_capacity: usize, data_capacity: usize) -> Self929 pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
930 let values_builder = UInt8Builder::new(data_capacity);
931 Self {
932 builder: ListBuilder::with_capacity(values_builder, item_capacity),
933 }
934 }
935
936 /// Appends a string into the builder.
937 ///
938 /// Automatically calls the `append` method to delimit the string appended in as a
939 /// distinct array element.
append_value(&mut self, value: &str) -> Result<()>940 pub fn append_value(&mut self, value: &str) -> Result<()> {
941 self.builder.values().append_slice(value.as_bytes())?;
942 self.builder.append(true)?;
943 Ok(())
944 }
945
946 /// Finish the current variable-length list array slot.
append(&mut self, is_valid: bool) -> Result<()>947 pub fn append(&mut self, is_valid: bool) -> Result<()> {
948 self.builder.append(is_valid)
949 }
950
951 /// Append a null value to the array.
append_null(&mut self) -> Result<()>952 pub fn append_null(&mut self) -> Result<()> {
953 self.append(false)
954 }
955
956 /// Builds the `StringArray` and reset this builder.
finish(&mut self) -> StringArray957 pub fn finish(&mut self) -> StringArray {
958 StringArray::from(self.builder.finish())
959 }
960 }
961
962 impl FixedSizeBinaryBuilder {
963 /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
964 /// array
new(capacity: usize, byte_width: i32) -> Self965 pub fn new(capacity: usize, byte_width: i32) -> Self {
966 let values_builder = UInt8Builder::new(capacity);
967 Self {
968 builder: FixedSizeListBuilder::new(values_builder, byte_width),
969 }
970 }
971
972 /// Appends a byte slice into the builder.
973 ///
974 /// Automatically calls the `append` method to delimit the slice appended in as a
975 /// distinct array element.
append_value(&mut self, value: &[u8]) -> Result<()>976 pub fn append_value(&mut self, value: &[u8]) -> Result<()> {
977 assert_eq!(
978 self.builder.value_length(),
979 value.len() as i32,
980 "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
981 );
982 self.builder.values().append_slice(value)?;
983 self.builder.append(true)
984 }
985
986 /// Append a null value to the array.
append_null(&mut self) -> Result<()>987 pub fn append_null(&mut self) -> Result<()> {
988 let length: usize = self.builder.value_length() as usize;
989 self.builder.values().append_slice(&vec![0u8; length][..])?;
990 self.builder.append(false)
991 }
992
993 /// Builds the `FixedSizeBinaryArray` and reset this builder.
finish(&mut self) -> FixedSizeBinaryArray994 pub fn finish(&mut self) -> FixedSizeBinaryArray {
995 FixedSizeBinaryArray::from(self.builder.finish())
996 }
997 }
998
999 /// Array builder for Struct types.
1000 ///
1001 /// Note that callers should make sure that methods of all the child field builders are
1002 /// properly called to maintain the consistency of the data structure.
1003 pub struct StructBuilder {
1004 fields: Vec<Field>,
1005 field_anys: Vec<Box<Any>>,
1006 field_builders: Vec<Box<ArrayBuilder>>,
1007 bitmap_builder: BooleanBufferBuilder,
1008 len: usize,
1009 }
1010
1011 impl ArrayBuilder for StructBuilder {
1012 /// Returns the number of array slots in the builder.
1013 ///
1014 /// Note that this always return the first child field builder's length, and it is
1015 /// the caller's responsibility to maintain the consistency that all the child field
1016 /// builder should have the equal number of elements.
len(&self) -> usize1017 fn len(&self) -> usize {
1018 self.len
1019 }
1020
1021 /// Builds the array.
finish(&mut self) -> ArrayRef1022 fn finish(&mut self) -> ArrayRef {
1023 Arc::new(self.finish())
1024 }
1025
1026 /// Returns the builder as a non-mutable `Any` reference.
1027 ///
1028 /// This is most useful when one wants to call non-mutable APIs on a specific builder
1029 /// type. In this case, one can first cast this into a `Any`, and then use
1030 /// `downcast_ref` to get a reference on the specific builder.
as_any(&self) -> &Any1031 fn as_any(&self) -> &Any {
1032 self
1033 }
1034
1035 /// Returns the builder as a mutable `Any` reference.
1036 ///
1037 /// This is most useful when one wants to call mutable APIs on a specific builder
1038 /// type. In this case, one can first cast this into a `Any`, and then use
1039 /// `downcast_mut` to get a reference on the specific builder.
as_any_mut(&mut self) -> &mut Any1040 fn as_any_mut(&mut self) -> &mut Any {
1041 self
1042 }
1043
1044 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>1045 fn into_box_any(self: Box<Self>) -> Box<Any> {
1046 self
1047 }
1048 }
1049
1050 impl StructBuilder {
new(fields: Vec<Field>, builders: Vec<Box<ArrayBuilder>>) -> Self1051 pub fn new(fields: Vec<Field>, builders: Vec<Box<ArrayBuilder>>) -> Self {
1052 let mut field_anys = Vec::with_capacity(builders.len());
1053 let mut field_builders = Vec::with_capacity(builders.len());
1054
1055 // Create and maintain two references for each of the input builder. We need the
1056 // extra `Any` reference because we need to cast the builder to a specific type
1057 // in `field_builder()` by calling `downcast_mut`.
1058 for f in builders.into_iter() {
1059 let raw_f = Box::into_raw(f);
1060 let raw_f_copy = raw_f;
1061 unsafe {
1062 field_anys.push(Box::from_raw(raw_f).into_box_any());
1063 field_builders.push(Box::from_raw(raw_f_copy));
1064 }
1065 }
1066
1067 Self {
1068 fields,
1069 field_anys,
1070 field_builders,
1071 bitmap_builder: BooleanBufferBuilder::new(0),
1072 len: 0,
1073 }
1074 }
1075
from_schema(schema: Schema, capacity: usize) -> Self1076 pub fn from_schema(schema: Schema, capacity: usize) -> Self {
1077 let fields = schema.fields();
1078 let mut builders = Vec::with_capacity(fields.len());
1079 for f in schema.fields() {
1080 builders.push(Self::from_field(f.clone(), capacity));
1081 }
1082 Self::new(schema.fields, builders)
1083 }
1084
from_field(f: Field, capacity: usize) -> Box<ArrayBuilder>1085 fn from_field(f: Field, capacity: usize) -> Box<ArrayBuilder> {
1086 match f.data_type() {
1087 DataType::Null => unimplemented!(),
1088 DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
1089 DataType::Int8 => Box::new(Int8Builder::new(capacity)),
1090 DataType::Int16 => Box::new(Int16Builder::new(capacity)),
1091 DataType::Int32 => Box::new(Int32Builder::new(capacity)),
1092 DataType::Int64 => Box::new(Int64Builder::new(capacity)),
1093 DataType::UInt8 => Box::new(UInt8Builder::new(capacity)),
1094 DataType::UInt16 => Box::new(UInt16Builder::new(capacity)),
1095 DataType::UInt32 => Box::new(UInt32Builder::new(capacity)),
1096 DataType::UInt64 => Box::new(UInt64Builder::new(capacity)),
1097 DataType::Float32 => Box::new(Float32Builder::new(capacity)),
1098 DataType::Float64 => Box::new(Float64Builder::new(capacity)),
1099 DataType::Binary => Box::new(BinaryBuilder::new(capacity)),
1100 DataType::FixedSizeBinary(len) => {
1101 Box::new(FixedSizeBinaryBuilder::new(capacity, *len))
1102 }
1103 DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
1104 DataType::Date32(DateUnit::Day) => Box::new(Date32Builder::new(capacity)),
1105 DataType::Date64(DateUnit::Millisecond) => {
1106 Box::new(Date64Builder::new(capacity))
1107 }
1108 DataType::Time32(TimeUnit::Second) => {
1109 Box::new(Time32SecondBuilder::new(capacity))
1110 }
1111 DataType::Time32(TimeUnit::Millisecond) => {
1112 Box::new(Time32MillisecondBuilder::new(capacity))
1113 }
1114 DataType::Time64(TimeUnit::Microsecond) => {
1115 Box::new(Time64MicrosecondBuilder::new(capacity))
1116 }
1117 DataType::Time64(TimeUnit::Nanosecond) => {
1118 Box::new(Time64NanosecondBuilder::new(capacity))
1119 }
1120 DataType::Timestamp(TimeUnit::Second, _) => {
1121 Box::new(TimestampSecondBuilder::new(capacity))
1122 }
1123 DataType::Timestamp(TimeUnit::Millisecond, _) => {
1124 Box::new(TimestampMillisecondBuilder::new(capacity))
1125 }
1126 DataType::Timestamp(TimeUnit::Microsecond, _) => {
1127 Box::new(TimestampMicrosecondBuilder::new(capacity))
1128 }
1129 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
1130 Box::new(TimestampNanosecondBuilder::new(capacity))
1131 }
1132 DataType::Interval(IntervalUnit::YearMonth) => {
1133 Box::new(IntervalYearMonthBuilder::new(capacity))
1134 }
1135 DataType::Interval(IntervalUnit::DayTime) => {
1136 Box::new(IntervalDayTimeBuilder::new(capacity))
1137 }
1138 DataType::Duration(TimeUnit::Second) => {
1139 Box::new(DurationSecondBuilder::new(capacity))
1140 }
1141 DataType::Duration(TimeUnit::Millisecond) => {
1142 Box::new(DurationMillisecondBuilder::new(capacity))
1143 }
1144 DataType::Duration(TimeUnit::Microsecond) => {
1145 Box::new(DurationMicrosecondBuilder::new(capacity))
1146 }
1147 DataType::Duration(TimeUnit::Nanosecond) => {
1148 Box::new(DurationNanosecondBuilder::new(capacity))
1149 }
1150 DataType::Struct(fields) => {
1151 let schema = Schema::new(fields.clone());
1152 Box::new(Self::from_schema(schema, capacity))
1153 }
1154 t => panic!("Data type {:?} is not currently supported", t),
1155 }
1156 }
1157
1158 /// Returns a mutable reference to the child field builder at index `i`.
1159 /// Result will be `None` if the input type `T` provided doesn't match the actual
1160 /// field builder's type.
field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T>1161 pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
1162 self.field_anys[i].downcast_mut::<T>()
1163 }
1164
1165 /// Returns the number of fields for the struct this builder is building.
num_fields(&self) -> usize1166 pub fn num_fields(&self) -> usize {
1167 self.field_builders.len()
1168 }
1169
1170 /// Appends an element (either null or non-null) to the struct. The actual elements
1171 /// should be appended for each child sub-array in a consistent way.
append(&mut self, is_valid: bool) -> Result<()>1172 pub fn append(&mut self, is_valid: bool) -> Result<()> {
1173 self.bitmap_builder.append(is_valid)?;
1174 self.len += 1;
1175 Ok(())
1176 }
1177
1178 /// Appends a null element to the struct.
append_null(&mut self) -> Result<()>1179 pub fn append_null(&mut self) -> Result<()> {
1180 self.append(false)
1181 }
1182
1183 /// Builds the `StructArray` and reset this builder.
finish(&mut self) -> StructArray1184 pub fn finish(&mut self) -> StructArray {
1185 let mut child_data = Vec::with_capacity(self.field_builders.len());
1186 for f in &mut self.field_builders {
1187 let arr = f.finish();
1188 child_data.push(arr.data());
1189 }
1190
1191 let null_bit_buffer = self.bitmap_builder.finish();
1192 let null_count = self.len - bit_util::count_set_bits(null_bit_buffer.data());
1193 let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
1194 .len(self.len)
1195 .child_data(child_data);
1196 if null_count > 0 {
1197 builder = builder
1198 .null_count(null_count)
1199 .null_bit_buffer(null_bit_buffer);
1200 }
1201
1202 self.len = 0;
1203
1204 StructArray::from(builder.build())
1205 }
1206 }
1207
1208 impl Drop for StructBuilder {
drop(&mut self)1209 fn drop(&mut self) {
1210 // To avoid double drop on the field array builders.
1211 let builders = std::mem::replace(&mut self.field_builders, Vec::new());
1212 std::mem::forget(builders);
1213 }
1214 }
1215
1216 /// Array builder for `DictionaryArray`. For example to map a set of byte indices
1217 /// to f32 values. Note that the use of a `HashMap` here will not scale to very large
1218 /// arrays or result in an ordered dictionary.
1219 pub struct PrimitiveDictionaryBuilder<K, V>
1220 where
1221 K: ArrowPrimitiveType,
1222 V: ArrowPrimitiveType,
1223 {
1224 keys_builder: PrimitiveBuilder<K>,
1225 values_builder: PrimitiveBuilder<V>,
1226 map: HashMap<Box<[u8]>, K::Native>,
1227 }
1228
1229 impl<K, V> PrimitiveDictionaryBuilder<K, V>
1230 where
1231 K: ArrowPrimitiveType,
1232 V: ArrowPrimitiveType,
1233 {
1234 /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder.
new( keys_builder: PrimitiveBuilder<K>, values_builder: PrimitiveBuilder<V>, ) -> Self1235 pub fn new(
1236 keys_builder: PrimitiveBuilder<K>,
1237 values_builder: PrimitiveBuilder<V>,
1238 ) -> Self {
1239 Self {
1240 keys_builder,
1241 values_builder,
1242 map: HashMap::new(),
1243 }
1244 }
1245 }
1246
1247 impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V>
1248 where
1249 K: ArrowPrimitiveType,
1250 V: ArrowPrimitiveType,
1251 {
1252 /// Returns the builder as an non-mutable `Any` reference.
as_any(&self) -> &Any1253 fn as_any(&self) -> &Any {
1254 self
1255 }
1256
1257 /// Returns the builder as an mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any1258 fn as_any_mut(&mut self) -> &mut Any {
1259 self
1260 }
1261
1262 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>1263 fn into_box_any(self: Box<Self>) -> Box<Any> {
1264 self
1265 }
1266
1267 /// Returns the number of array slots in the builder
len(&self) -> usize1268 fn len(&self) -> usize {
1269 self.keys_builder.len()
1270 }
1271
1272 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef1273 fn finish(&mut self) -> ArrayRef {
1274 Arc::new(self.finish())
1275 }
1276 }
1277
1278 impl<K, V> PrimitiveDictionaryBuilder<K, V>
1279 where
1280 K: ArrowPrimitiveType,
1281 V: ArrowPrimitiveType,
1282 {
1283 /// Append a primitive value to the array. Return an existing index
1284 /// if already present in the values array or a new index if the
1285 /// value is appended to the values array.
append(&mut self, value: V::Native) -> Result<K::Native>1286 pub fn append(&mut self, value: V::Native) -> Result<K::Native> {
1287 if let Some(&key) = self.map.get(value.to_byte_slice()) {
1288 // Append existing value.
1289 self.keys_builder.append_value(key)?;
1290 Ok(key)
1291 } else {
1292 // Append new value.
1293 let key = K::Native::from_usize(self.values_builder.len())
1294 .ok_or(ArrowError::DictionaryKeyOverflowError)?;
1295 self.values_builder.append_value(value)?;
1296 self.keys_builder.append_value(key as K::Native)?;
1297 self.map.insert(value.to_byte_slice().into(), key);
1298 Ok(key)
1299 }
1300 }
1301
append_null(&mut self) -> Result<()>1302 pub fn append_null(&mut self) -> Result<()> {
1303 self.keys_builder.append_null()
1304 }
1305
1306 /// Builds the `DictionaryArray` and reset this builder.
finish(&mut self) -> DictionaryArray<K>1307 pub fn finish(&mut self) -> DictionaryArray<K> {
1308 self.map.clear();
1309 let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
1310 self.keys_builder.finish_dict(value_ref)
1311 }
1312 }
1313
1314 /// Array builder for `DictionaryArray`. For example to map a set of byte indices
1315 /// to f32 values. Note that the use of a `HashMap` here will not scale to very large
1316 /// arrays or result in an ordered dictionary.
1317 pub struct StringDictionaryBuilder<K>
1318 where
1319 K: ArrowDictionaryKeyType,
1320 {
1321 keys_builder: PrimitiveBuilder<K>,
1322 values_builder: StringBuilder,
1323 map: HashMap<Box<[u8]>, K::Native>,
1324 }
1325
1326 impl<K> StringDictionaryBuilder<K>
1327 where
1328 K: ArrowDictionaryKeyType,
1329 {
1330 /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder.
new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self1331 pub fn new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self {
1332 Self {
1333 keys_builder,
1334 values_builder,
1335 map: HashMap::new(),
1336 }
1337 }
1338
1339 /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary
1340 /// which is initialized with the given values.
1341 /// The indices of those dictionary values are used as keys.
1342 ///
1343 /// # Example
1344 ///
1345 /// ```
1346 /// use arrow::datatypes::Int16Type;
1347 /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder};
1348 /// use std::convert::TryFrom;
1349 ///
1350 /// let dictionary_values = StringArray::try_from(vec![None, Some("abc"), Some("def")]).unwrap();
1351 ///
1352 /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::<Int16Type>::new(3), &dictionary_values).unwrap();
1353 /// builder.append("def").unwrap();
1354 /// builder.append_null().unwrap();
1355 /// builder.append("abc").unwrap();
1356 ///
1357 /// let dictionary_array = builder.finish();
1358 ///
1359 /// let keys: Vec<Option<i16>> = dictionary_array.keys().collect();
1360 ///
1361 /// assert_eq!(keys, vec![Some(2), None, Some(1)]);
1362 /// ```
new_with_dictionary( keys_builder: PrimitiveBuilder<K>, dictionary_values: &StringArray, ) -> Result<Self>1363 pub fn new_with_dictionary(
1364 keys_builder: PrimitiveBuilder<K>,
1365 dictionary_values: &StringArray,
1366 ) -> Result<Self> {
1367 let dict_len = dictionary_values.len();
1368 let mut values_builder =
1369 StringBuilder::with_capacity(dict_len, dictionary_values.value_data().len());
1370 let mut map: HashMap<Box<[u8]>, K::Native> = HashMap::with_capacity(dict_len);
1371 for i in 0..dict_len {
1372 if dictionary_values.is_valid(i) {
1373 let value = dictionary_values.value(i);
1374 map.insert(
1375 value.as_bytes().into(),
1376 K::Native::from_usize(i)
1377 .ok_or(ArrowError::DictionaryKeyOverflowError)?,
1378 );
1379 values_builder.append_value(value)?;
1380 } else {
1381 values_builder.append_null()?;
1382 }
1383 }
1384 Ok(Self {
1385 keys_builder,
1386 values_builder,
1387 map,
1388 })
1389 }
1390 }
1391
1392 impl<K> ArrayBuilder for StringDictionaryBuilder<K>
1393 where
1394 K: ArrowDictionaryKeyType,
1395 {
1396 /// Returns the builder as an non-mutable `Any` reference.
as_any(&self) -> &Any1397 fn as_any(&self) -> &Any {
1398 self
1399 }
1400
1401 /// Returns the builder as an mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any1402 fn as_any_mut(&mut self) -> &mut Any {
1403 self
1404 }
1405
1406 /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>1407 fn into_box_any(self: Box<Self>) -> Box<Any> {
1408 self
1409 }
1410
1411 /// Returns the number of array slots in the builder
len(&self) -> usize1412 fn len(&self) -> usize {
1413 self.keys_builder.len()
1414 }
1415
1416 /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef1417 fn finish(&mut self) -> ArrayRef {
1418 Arc::new(self.finish())
1419 }
1420 }
1421
1422 impl<K> StringDictionaryBuilder<K>
1423 where
1424 K: ArrowDictionaryKeyType,
1425 {
1426 /// Append a primitive value to the array. Return an existing index
1427 /// if already present in the values array or a new index if the
1428 /// value is appended to the values array.
append(&mut self, value: &str) -> Result<K::Native>1429 pub fn append(&mut self, value: &str) -> Result<K::Native> {
1430 if let Some(&key) = self.map.get(value.as_bytes()) {
1431 // Append existing value.
1432 self.keys_builder.append_value(key)?;
1433 Ok(key)
1434 } else {
1435 // Append new value.
1436 let key = K::Native::from_usize(self.values_builder.len())
1437 .ok_or(ArrowError::DictionaryKeyOverflowError)?;
1438 self.values_builder.append_value(value)?;
1439 self.keys_builder.append_value(key as K::Native)?;
1440 self.map.insert(value.as_bytes().into(), key);
1441 Ok(key)
1442 }
1443 }
1444
append_null(&mut self) -> Result<()>1445 pub fn append_null(&mut self) -> Result<()> {
1446 self.keys_builder.append_null()
1447 }
1448
1449 /// Builds the `DictionaryArray` and reset this builder.
finish(&mut self) -> DictionaryArray<K>1450 pub fn finish(&mut self) -> DictionaryArray<K> {
1451 self.map.clear();
1452 let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
1453 self.keys_builder.finish_dict(value_ref)
1454 }
1455 }
1456
1457 #[cfg(test)]
1458 mod tests {
1459 use super::*;
1460
1461 use crate::array::Array;
1462 use crate::bitmap::Bitmap;
1463 use std::convert::TryFrom;
1464
1465 #[test]
test_builder_i32_empty()1466 fn test_builder_i32_empty() {
1467 let mut b = Int32BufferBuilder::new(5);
1468 assert_eq!(0, b.len());
1469 assert_eq!(16, b.capacity());
1470 let a = b.finish();
1471 assert_eq!(0, a.len());
1472 }
1473
1474 #[test]
test_builder_i32_alloc_zero_bytes()1475 fn test_builder_i32_alloc_zero_bytes() {
1476 let mut b = Int32BufferBuilder::new(0);
1477 b.append(123).unwrap();
1478 let a = b.finish();
1479 assert_eq!(4, a.len());
1480 }
1481
1482 #[test]
test_builder_i32()1483 fn test_builder_i32() {
1484 let mut b = Int32BufferBuilder::new(5);
1485 for i in 0..5 {
1486 b.append(i).unwrap();
1487 }
1488 assert_eq!(16, b.capacity());
1489 let a = b.finish();
1490 assert_eq!(20, a.len());
1491 }
1492
1493 #[test]
test_builder_i32_grow_buffer()1494 fn test_builder_i32_grow_buffer() {
1495 let mut b = Int32BufferBuilder::new(2);
1496 assert_eq!(16, b.capacity());
1497 for i in 0..20 {
1498 b.append(i).unwrap();
1499 }
1500 assert_eq!(32, b.capacity());
1501 let a = b.finish();
1502 assert_eq!(80, a.len());
1503 }
1504
1505 #[test]
test_builder_finish()1506 fn test_builder_finish() {
1507 let mut b = Int32BufferBuilder::new(5);
1508 assert_eq!(16, b.capacity());
1509 for i in 0..10 {
1510 b.append(i).unwrap();
1511 }
1512 let mut a = b.finish();
1513 assert_eq!(40, a.len());
1514 assert_eq!(0, b.len());
1515 assert_eq!(0, b.capacity());
1516
1517 // Try build another buffer after cleaning up.
1518 for i in 0..20 {
1519 b.append(i).unwrap()
1520 }
1521 assert_eq!(32, b.capacity());
1522 a = b.finish();
1523 assert_eq!(80, a.len());
1524 }
1525
1526 #[test]
test_reserve()1527 fn test_reserve() {
1528 let mut b = UInt8BufferBuilder::new(2);
1529 assert_eq!(64, b.capacity());
1530 b.reserve(64).unwrap();
1531 assert_eq!(64, b.capacity());
1532 b.reserve(65).unwrap();
1533 assert_eq!(128, b.capacity());
1534
1535 let mut b = Int32BufferBuilder::new(2);
1536 assert_eq!(16, b.capacity());
1537 b.reserve(16).unwrap();
1538 assert_eq!(16, b.capacity());
1539 b.reserve(17).unwrap();
1540 assert_eq!(32, b.capacity());
1541 }
1542
1543 #[test]
test_append_slice()1544 fn test_append_slice() {
1545 let mut b = UInt8BufferBuilder::new(0);
1546 b.append_slice("Hello, ".as_bytes()).unwrap();
1547 b.append_slice("World!".as_bytes()).unwrap();
1548 let buffer = b.finish();
1549 assert_eq!(13, buffer.len());
1550
1551 let mut b = Int32BufferBuilder::new(0);
1552 b.append_slice(&[32, 54]).unwrap();
1553 let buffer = b.finish();
1554 assert_eq!(8, buffer.len());
1555 }
1556
1557 #[test]
test_write_bytes()1558 fn test_write_bytes() {
1559 let mut b = BooleanBufferBuilder::new(4);
1560 b.append(false).unwrap();
1561 b.append(true).unwrap();
1562 b.append(false).unwrap();
1563 b.append(true).unwrap();
1564 assert_eq!(4, b.len());
1565 assert_eq!(512, b.capacity());
1566 let buffer = b.finish();
1567 assert_eq!(1, buffer.len());
1568
1569 let mut b = BooleanBufferBuilder::new(4);
1570 b.append_slice(&[false, true, false, true]).unwrap();
1571 assert_eq!(4, b.len());
1572 assert_eq!(512, b.capacity());
1573 let buffer = b.finish();
1574 assert_eq!(1, buffer.len());
1575 }
1576
1577 #[test]
test_write_bytes_i32()1578 fn test_write_bytes_i32() {
1579 let mut b = Int32BufferBuilder::new(4);
1580 let bytes = [8, 16, 32, 64].to_byte_slice();
1581 b.write_bytes(bytes, 4).unwrap();
1582 assert_eq!(4, b.len());
1583 assert_eq!(16, b.capacity());
1584 let buffer = b.finish();
1585 assert_eq!(16, buffer.len());
1586 }
1587
1588 #[test]
1589 #[should_panic(expected = "Could not write to Buffer, not big enough")]
test_write_too_many_bytes()1590 fn test_write_too_many_bytes() {
1591 let mut b = Int32BufferBuilder::new(0);
1592 let bytes = [8, 16, 32, 64].to_byte_slice();
1593 b.write_bytes(bytes, 4).unwrap();
1594 }
1595
1596 #[test]
test_boolean_array_builder_append_slice()1597 fn test_boolean_array_builder_append_slice() {
1598 let arr1 =
1599 BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
1600
1601 let mut builder = BooleanArray::builder(0);
1602 builder.append_slice(&[true, false]).unwrap();
1603 builder.append_null().unwrap();
1604 builder.append_null().unwrap();
1605 builder.append_value(false).unwrap();
1606 let arr2 = builder.finish();
1607
1608 assert_eq!(arr1.len(), arr2.len());
1609 assert_eq!(arr1.offset(), arr2.offset());
1610 assert_eq!(arr1.null_count(), arr2.null_count());
1611 for i in 0..5 {
1612 assert_eq!(arr1.is_null(i), arr2.is_null(i));
1613 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1614 if arr1.is_valid(i) {
1615 assert_eq!(arr1.value(i), arr2.value(i));
1616 }
1617 }
1618 }
1619
1620 #[test]
test_boolean_builder_increases_buffer_len()1621 fn test_boolean_builder_increases_buffer_len() {
1622 // 00000010 01001000
1623 let buf = Buffer::from([72_u8, 2_u8]);
1624 let mut builder = BooleanBufferBuilder::new(8);
1625
1626 for i in 0..10 {
1627 if i == 3 || i == 6 || i == 9 {
1628 builder.append(true).unwrap();
1629 } else {
1630 builder.append(false).unwrap();
1631 }
1632 }
1633 let buf2 = builder.finish();
1634
1635 assert_eq!(buf.len(), buf2.len());
1636 assert_eq!(buf.data(), buf2.data());
1637 }
1638
1639 #[test]
test_primitive_array_builder_i32()1640 fn test_primitive_array_builder_i32() {
1641 let mut builder = Int32Array::builder(5);
1642 for i in 0..5 {
1643 builder.append_value(i).unwrap();
1644 }
1645 let arr = builder.finish();
1646 assert_eq!(5, arr.len());
1647 assert_eq!(0, arr.offset());
1648 assert_eq!(0, arr.null_count());
1649 for i in 0..5 {
1650 assert!(!arr.is_null(i));
1651 assert!(arr.is_valid(i));
1652 assert_eq!(i as i32, arr.value(i));
1653 }
1654 }
1655
1656 #[test]
test_primitive_array_builder_date32()1657 fn test_primitive_array_builder_date32() {
1658 let mut builder = Date32Array::builder(5);
1659 for i in 0..5 {
1660 builder.append_value(i).unwrap();
1661 }
1662 let arr = builder.finish();
1663 assert_eq!(5, arr.len());
1664 assert_eq!(0, arr.offset());
1665 assert_eq!(0, arr.null_count());
1666 for i in 0..5 {
1667 assert!(!arr.is_null(i));
1668 assert!(arr.is_valid(i));
1669 assert_eq!(i as i32, arr.value(i));
1670 }
1671 }
1672
1673 #[test]
test_primitive_array_builder_timestamp_second()1674 fn test_primitive_array_builder_timestamp_second() {
1675 let mut builder = TimestampSecondArray::builder(5);
1676 for i in 0..5 {
1677 builder.append_value(i).unwrap();
1678 }
1679 let arr = builder.finish();
1680 assert_eq!(5, arr.len());
1681 assert_eq!(0, arr.offset());
1682 assert_eq!(0, arr.null_count());
1683 for i in 0..5 {
1684 assert!(!arr.is_null(i));
1685 assert!(arr.is_valid(i));
1686 assert_eq!(i as i64, arr.value(i));
1687 }
1688 }
1689
1690 #[test]
test_primitive_array_builder_bool()1691 fn test_primitive_array_builder_bool() {
1692 // 00000010 01001000
1693 let buf = Buffer::from([72_u8, 2_u8]);
1694 let mut builder = BooleanArray::builder(10);
1695 for i in 0..10 {
1696 if i == 3 || i == 6 || i == 9 {
1697 builder.append_value(true).unwrap();
1698 } else {
1699 builder.append_value(false).unwrap();
1700 }
1701 }
1702
1703 let arr = builder.finish();
1704 assert_eq!(buf, arr.values());
1705 assert_eq!(10, arr.len());
1706 assert_eq!(0, arr.offset());
1707 assert_eq!(0, arr.null_count());
1708 for i in 0..10 {
1709 assert!(!arr.is_null(i));
1710 assert!(arr.is_valid(i));
1711 assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
1712 }
1713 }
1714
1715 #[test]
test_primitive_array_builder_append_option()1716 fn test_primitive_array_builder_append_option() {
1717 let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
1718
1719 let mut builder = Int32Array::builder(5);
1720 builder.append_option(Some(0)).unwrap();
1721 builder.append_option(None).unwrap();
1722 builder.append_option(Some(2)).unwrap();
1723 builder.append_option(None).unwrap();
1724 builder.append_option(Some(4)).unwrap();
1725 let arr2 = builder.finish();
1726
1727 assert_eq!(arr1.len(), arr2.len());
1728 assert_eq!(arr1.offset(), arr2.offset());
1729 assert_eq!(arr1.null_count(), arr2.null_count());
1730 for i in 0..5 {
1731 assert_eq!(arr1.is_null(i), arr2.is_null(i));
1732 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1733 if arr1.is_valid(i) {
1734 assert_eq!(arr1.value(i), arr2.value(i));
1735 }
1736 }
1737 }
1738
1739 #[test]
test_primitive_array_builder_append_null()1740 fn test_primitive_array_builder_append_null() {
1741 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
1742
1743 let mut builder = Int32Array::builder(5);
1744 builder.append_value(0).unwrap();
1745 builder.append_value(2).unwrap();
1746 builder.append_null().unwrap();
1747 builder.append_null().unwrap();
1748 builder.append_value(4).unwrap();
1749 let arr2 = builder.finish();
1750
1751 assert_eq!(arr1.len(), arr2.len());
1752 assert_eq!(arr1.offset(), arr2.offset());
1753 assert_eq!(arr1.null_count(), arr2.null_count());
1754 for i in 0..5 {
1755 assert_eq!(arr1.is_null(i), arr2.is_null(i));
1756 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1757 if arr1.is_valid(i) {
1758 assert_eq!(arr1.value(i), arr2.value(i));
1759 }
1760 }
1761 }
1762
1763 #[test]
test_primitive_array_builder_append_slice()1764 fn test_primitive_array_builder_append_slice() {
1765 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
1766
1767 let mut builder = Int32Array::builder(5);
1768 builder.append_slice(&[0, 2]).unwrap();
1769 builder.append_null().unwrap();
1770 builder.append_null().unwrap();
1771 builder.append_value(4).unwrap();
1772 let arr2 = builder.finish();
1773
1774 assert_eq!(arr1.len(), arr2.len());
1775 assert_eq!(arr1.offset(), arr2.offset());
1776 assert_eq!(arr1.null_count(), arr2.null_count());
1777 for i in 0..5 {
1778 assert_eq!(arr1.is_null(i), arr2.is_null(i));
1779 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1780 if arr1.is_valid(i) {
1781 assert_eq!(arr1.value(i), arr2.value(i));
1782 }
1783 }
1784 }
1785
1786 #[test]
test_primitive_array_builder_finish()1787 fn test_primitive_array_builder_finish() {
1788 let mut builder = Int32Builder::new(5);
1789 builder.append_slice(&[2, 4, 6, 8]).unwrap();
1790 let mut arr = builder.finish();
1791 assert_eq!(4, arr.len());
1792 assert_eq!(0, builder.len());
1793
1794 builder.append_slice(&[1, 3, 5, 7, 9]).unwrap();
1795 arr = builder.finish();
1796 assert_eq!(5, arr.len());
1797 assert_eq!(0, builder.len());
1798 }
1799
1800 #[test]
test_list_array_builder()1801 fn test_list_array_builder() {
1802 let values_builder = Int32Builder::new(10);
1803 let mut builder = ListBuilder::new(values_builder);
1804
1805 // [[0, 1, 2], [3, 4, 5], [6, 7]]
1806 builder.values().append_value(0).unwrap();
1807 builder.values().append_value(1).unwrap();
1808 builder.values().append_value(2).unwrap();
1809 builder.append(true).unwrap();
1810 builder.values().append_value(3).unwrap();
1811 builder.values().append_value(4).unwrap();
1812 builder.values().append_value(5).unwrap();
1813 builder.append(true).unwrap();
1814 builder.values().append_value(6).unwrap();
1815 builder.values().append_value(7).unwrap();
1816 builder.append(true).unwrap();
1817 let list_array = builder.finish();
1818
1819 let values = list_array.values().data().buffers()[0].clone();
1820 assert_eq!(
1821 Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()),
1822 values
1823 );
1824 assert_eq!(
1825 Buffer::from(&[0, 3, 6, 8].to_byte_slice()),
1826 list_array.data().buffers()[0].clone()
1827 );
1828 assert_eq!(DataType::Int32, list_array.value_type());
1829 assert_eq!(3, list_array.len());
1830 assert_eq!(0, list_array.null_count());
1831 assert_eq!(6, list_array.value_offset(2));
1832 assert_eq!(2, list_array.value_length(2));
1833 for i in 0..3 {
1834 assert!(list_array.is_valid(i));
1835 assert!(!list_array.is_null(i));
1836 }
1837 }
1838
1839 #[test]
test_list_array_builder_nulls()1840 fn test_list_array_builder_nulls() {
1841 let values_builder = Int32Builder::new(10);
1842 let mut builder = ListBuilder::new(values_builder);
1843
1844 // [[0, 1, 2], null, [3, null, 5], [6, 7]]
1845 builder.values().append_value(0).unwrap();
1846 builder.values().append_value(1).unwrap();
1847 builder.values().append_value(2).unwrap();
1848 builder.append(true).unwrap();
1849 builder.append(false).unwrap();
1850 builder.values().append_value(3).unwrap();
1851 builder.values().append_null().unwrap();
1852 builder.values().append_value(5).unwrap();
1853 builder.append(true).unwrap();
1854 builder.values().append_value(6).unwrap();
1855 builder.values().append_value(7).unwrap();
1856 builder.append(true).unwrap();
1857 let list_array = builder.finish();
1858
1859 assert_eq!(DataType::Int32, list_array.value_type());
1860 assert_eq!(4, list_array.len());
1861 assert_eq!(1, list_array.null_count());
1862 assert_eq!(3, list_array.value_offset(2));
1863 assert_eq!(3, list_array.value_length(2));
1864 }
1865
1866 #[test]
test_fixed_size_list_array_builder()1867 fn test_fixed_size_list_array_builder() {
1868 let values_builder = Int32Builder::new(10);
1869 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
1870
1871 // [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
1872 builder.values().append_value(0).unwrap();
1873 builder.values().append_value(1).unwrap();
1874 builder.values().append_value(2).unwrap();
1875 builder.append(true).unwrap();
1876 builder.values().append_null().unwrap();
1877 builder.values().append_null().unwrap();
1878 builder.values().append_null().unwrap();
1879 builder.append(false).unwrap();
1880 builder.values().append_value(3).unwrap();
1881 builder.values().append_null().unwrap();
1882 builder.values().append_value(5).unwrap();
1883 builder.append(true).unwrap();
1884 builder.values().append_value(6).unwrap();
1885 builder.values().append_value(7).unwrap();
1886 builder.values().append_null().unwrap();
1887 builder.append(true).unwrap();
1888 let list_array = builder.finish();
1889
1890 assert_eq!(DataType::Int32, list_array.value_type());
1891 assert_eq!(4, list_array.len());
1892 assert_eq!(1, list_array.null_count());
1893 assert_eq!(6, list_array.value_offset(2));
1894 assert_eq!(3, list_array.value_length());
1895 }
1896
1897 #[test]
test_list_array_builder_finish()1898 fn test_list_array_builder_finish() {
1899 let values_builder = Int32Array::builder(5);
1900 let mut builder = ListBuilder::new(values_builder);
1901
1902 builder.values().append_slice(&[1, 2, 3]).unwrap();
1903 builder.append(true).unwrap();
1904 builder.values().append_slice(&[4, 5, 6]).unwrap();
1905 builder.append(true).unwrap();
1906
1907 let mut arr = builder.finish();
1908 assert_eq!(2, arr.len());
1909 assert_eq!(0, builder.len());
1910
1911 builder.values().append_slice(&[7, 8, 9]).unwrap();
1912 builder.append(true).unwrap();
1913 arr = builder.finish();
1914 assert_eq!(1, arr.len());
1915 assert_eq!(0, builder.len());
1916 }
1917
1918 #[test]
test_fixed_size_list_array_builder_empty()1919 fn test_fixed_size_list_array_builder_empty() {
1920 let values_builder = Int32Array::builder(5);
1921 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
1922
1923 let arr = builder.finish();
1924 assert_eq!(0, arr.len());
1925 assert_eq!(0, builder.len());
1926 }
1927
1928 #[test]
test_fixed_size_list_array_builder_finish()1929 fn test_fixed_size_list_array_builder_finish() {
1930 let values_builder = Int32Array::builder(5);
1931 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
1932
1933 builder.values().append_slice(&[1, 2, 3]).unwrap();
1934 builder.append(true).unwrap();
1935 builder.values().append_slice(&[4, 5, 6]).unwrap();
1936 builder.append(true).unwrap();
1937
1938 let mut arr = builder.finish();
1939 assert_eq!(2, arr.len());
1940 assert_eq!(0, builder.len());
1941
1942 builder.values().append_slice(&[7, 8, 9]).unwrap();
1943 builder.append(true).unwrap();
1944 arr = builder.finish();
1945 assert_eq!(1, arr.len());
1946 assert_eq!(0, builder.len());
1947 }
1948
1949 #[test]
test_list_list_array_builder()1950 fn test_list_list_array_builder() {
1951 let primitive_builder = Int32Builder::new(10);
1952 let values_builder = ListBuilder::new(primitive_builder);
1953 let mut builder = ListBuilder::new(values_builder);
1954
1955 // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
1956 builder.values().values().append_value(1).unwrap();
1957 builder.values().values().append_value(2).unwrap();
1958 builder.values().append(true).unwrap();
1959 builder.values().values().append_value(3).unwrap();
1960 builder.values().values().append_value(4).unwrap();
1961 builder.values().append(true).unwrap();
1962 builder.append(true).unwrap();
1963
1964 builder.values().values().append_value(5).unwrap();
1965 builder.values().values().append_value(6).unwrap();
1966 builder.values().values().append_value(7).unwrap();
1967 builder.values().append(true).unwrap();
1968 builder.values().append(false).unwrap();
1969 builder.values().values().append_value(8).unwrap();
1970 builder.values().append(true).unwrap();
1971 builder.append(true).unwrap();
1972
1973 builder.append(false).unwrap();
1974
1975 builder.values().values().append_value(9).unwrap();
1976 builder.values().values().append_value(10).unwrap();
1977 builder.values().append(true).unwrap();
1978 builder.append(true).unwrap();
1979
1980 let list_array = builder.finish();
1981
1982 assert_eq!(4, list_array.len());
1983 assert_eq!(1, list_array.null_count());
1984 assert_eq!(
1985 Buffer::from(&[0, 2, 5, 5, 6].to_byte_slice()),
1986 list_array.data().buffers()[0].clone()
1987 );
1988
1989 assert_eq!(6, list_array.values().data().len());
1990 assert_eq!(1, list_array.values().data().null_count());
1991 assert_eq!(
1992 Buffer::from(&[0, 2, 4, 7, 7, 8, 10].to_byte_slice()),
1993 list_array.values().data().buffers()[0].clone()
1994 );
1995
1996 assert_eq!(10, list_array.values().data().child_data()[0].len());
1997 assert_eq!(0, list_array.values().data().child_data()[0].null_count());
1998 assert_eq!(
1999 Buffer::from(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].to_byte_slice()),
2000 list_array.values().data().child_data()[0].buffers()[0].clone()
2001 );
2002 }
2003
2004 #[test]
test_binary_array_builder()2005 fn test_binary_array_builder() {
2006 let mut builder = BinaryBuilder::new(20);
2007
2008 builder.append_byte(b'h').unwrap();
2009 builder.append_byte(b'e').unwrap();
2010 builder.append_byte(b'l').unwrap();
2011 builder.append_byte(b'l').unwrap();
2012 builder.append_byte(b'o').unwrap();
2013 builder.append(true).unwrap();
2014 builder.append(true).unwrap();
2015 builder.append_byte(b'w').unwrap();
2016 builder.append_byte(b'o').unwrap();
2017 builder.append_byte(b'r').unwrap();
2018 builder.append_byte(b'l').unwrap();
2019 builder.append_byte(b'd').unwrap();
2020 builder.append(true).unwrap();
2021
2022 let array = builder.finish();
2023
2024 let binary_array = BinaryArray::from(array);
2025
2026 assert_eq!(3, binary_array.len());
2027 assert_eq!(0, binary_array.null_count());
2028 assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
2029 assert_eq!([] as [u8; 0], binary_array.value(1));
2030 assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
2031 assert_eq!(5, binary_array.value_offset(2));
2032 assert_eq!(5, binary_array.value_length(2));
2033 }
2034
2035 #[test]
test_string_array_builder()2036 fn test_string_array_builder() {
2037 let mut builder = StringBuilder::new(20);
2038
2039 builder.append_value("hello").unwrap();
2040 builder.append(true).unwrap();
2041 builder.append_value("world").unwrap();
2042
2043 let array = builder.finish();
2044
2045 let string_array = StringArray::from(array);
2046
2047 assert_eq!(3, string_array.len());
2048 assert_eq!(0, string_array.null_count());
2049 assert_eq!("hello", string_array.value(0));
2050 assert_eq!("", string_array.value(1));
2051 assert_eq!("world", string_array.value(2));
2052 assert_eq!(5, string_array.value_offset(2));
2053 assert_eq!(5, string_array.value_length(2));
2054 }
2055
2056 #[test]
test_fixed_size_binary_builder()2057 fn test_fixed_size_binary_builder() {
2058 let mut builder = FixedSizeBinaryBuilder::new(15, 5);
2059
2060 // [b"hello", null, "arrow"]
2061 builder.append_value(b"hello").unwrap();
2062 builder.append_null().unwrap();
2063 builder.append_value(b"arrow").unwrap();
2064 let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish();
2065
2066 assert_eq!(
2067 &DataType::FixedSizeBinary(5),
2068 fixed_size_binary_array.data_type()
2069 );
2070 assert_eq!(3, fixed_size_binary_array.len());
2071 assert_eq!(1, fixed_size_binary_array.null_count());
2072 assert_eq!(10, fixed_size_binary_array.value_offset(2));
2073 assert_eq!(5, fixed_size_binary_array.value_length());
2074 }
2075
2076 #[test]
test_string_array_builder_finish()2077 fn test_string_array_builder_finish() {
2078 let mut builder = StringBuilder::new(10);
2079
2080 builder.append_value("hello").unwrap();
2081 builder.append_value("world").unwrap();
2082
2083 let mut arr = builder.finish();
2084 assert_eq!(2, arr.len());
2085 assert_eq!(0, builder.len());
2086
2087 builder.append_value("arrow").unwrap();
2088 arr = builder.finish();
2089 assert_eq!(1, arr.len());
2090 assert_eq!(0, builder.len());
2091 }
2092
2093 #[test]
test_string_array_builder_append_string()2094 fn test_string_array_builder_append_string() {
2095 let mut builder = StringBuilder::new(20);
2096
2097 let var = "hello".to_owned();
2098 builder.append_value(&var).unwrap();
2099 builder.append(true).unwrap();
2100 builder.append_value("world").unwrap();
2101
2102 let array = builder.finish();
2103
2104 let string_array = StringArray::from(array);
2105
2106 assert_eq!(3, string_array.len());
2107 assert_eq!(0, string_array.null_count());
2108 assert_eq!("hello", string_array.value(0));
2109 assert_eq!("", string_array.value(1));
2110 assert_eq!("world", string_array.value(2));
2111 assert_eq!(5, string_array.value_offset(2));
2112 assert_eq!(5, string_array.value_length(2));
2113 }
2114
2115 #[test]
test_struct_array_builder()2116 fn test_struct_array_builder() {
2117 let string_builder = StringBuilder::new(4);
2118 let int_builder = Int32Builder::new(4);
2119
2120 let mut fields = Vec::new();
2121 let mut field_builders = Vec::new();
2122 fields.push(Field::new("f1", DataType::Utf8, false));
2123 field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
2124 fields.push(Field::new("f2", DataType::Int32, false));
2125 field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
2126
2127 let mut builder = StructBuilder::new(fields, field_builders);
2128 assert_eq!(2, builder.num_fields());
2129
2130 let string_builder = builder
2131 .field_builder::<StringBuilder>(0)
2132 .expect("builder at field 0 should be string builder");
2133 string_builder.append_value("joe").unwrap();
2134 string_builder.append_null().unwrap();
2135 string_builder.append_null().unwrap();
2136 string_builder.append_value("mark").unwrap();
2137
2138 let int_builder = builder
2139 .field_builder::<Int32Builder>(1)
2140 .expect("builder at field 1 should be int builder");
2141 int_builder.append_value(1).unwrap();
2142 int_builder.append_value(2).unwrap();
2143 int_builder.append_null().unwrap();
2144 int_builder.append_value(4).unwrap();
2145
2146 builder.append(true).unwrap();
2147 builder.append(true).unwrap();
2148 builder.append_null().unwrap();
2149 builder.append(true).unwrap();
2150
2151 let arr = builder.finish();
2152
2153 let struct_data = arr.data();
2154 assert_eq!(4, struct_data.len());
2155 assert_eq!(1, struct_data.null_count());
2156 assert_eq!(
2157 &Some(Bitmap::from(Buffer::from(&[11_u8]))),
2158 struct_data.null_bitmap()
2159 );
2160
2161 let expected_string_data = ArrayData::builder(DataType::Utf8)
2162 .len(4)
2163 .null_count(2)
2164 .null_bit_buffer(Buffer::from(&[9_u8]))
2165 .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
2166 .add_buffer(Buffer::from("joemark".as_bytes()))
2167 .build();
2168
2169 let expected_int_data = ArrayData::builder(DataType::Int32)
2170 .len(4)
2171 .null_count(1)
2172 .null_bit_buffer(Buffer::from(&[11_u8]))
2173 .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
2174 .build();
2175
2176 assert_eq!(expected_string_data, arr.column(0).data());
2177
2178 // TODO: implement equality for ArrayData
2179 assert_eq!(expected_int_data.len(), arr.column(1).data().len());
2180 assert_eq!(
2181 expected_int_data.null_count(),
2182 arr.column(1).data().null_count()
2183 );
2184 assert_eq!(
2185 expected_int_data.null_bitmap(),
2186 arr.column(1).data().null_bitmap()
2187 );
2188 let expected_value_buf = expected_int_data.buffers()[0].clone();
2189 let actual_value_buf = arr.column(1).data().buffers()[0].clone();
2190 for i in 0..expected_int_data.len() {
2191 if !expected_int_data.is_null(i) {
2192 assert_eq!(
2193 expected_value_buf.data()[i * 4..(i + 1) * 4],
2194 actual_value_buf.data()[i * 4..(i + 1) * 4]
2195 );
2196 }
2197 }
2198 }
2199
2200 #[test]
test_struct_array_builder_finish()2201 fn test_struct_array_builder_finish() {
2202 let int_builder = Int32Builder::new(10);
2203 let bool_builder = BooleanBuilder::new(10);
2204
2205 let mut fields = Vec::new();
2206 let mut field_builders = Vec::new();
2207 fields.push(Field::new("f1", DataType::Int32, false));
2208 field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
2209 fields.push(Field::new("f2", DataType::Boolean, false));
2210 field_builders.push(Box::new(bool_builder) as Box<ArrayBuilder>);
2211
2212 let mut builder = StructBuilder::new(fields, field_builders);
2213 builder
2214 .field_builder::<Int32Builder>(0)
2215 .unwrap()
2216 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
2217 .unwrap();
2218 builder
2219 .field_builder::<BooleanBuilder>(1)
2220 .unwrap()
2221 .append_slice(&[
2222 false, true, false, true, false, true, false, true, false, true,
2223 ])
2224 .unwrap();
2225
2226 // Append slot values - all are valid.
2227 for _ in 0..10 {
2228 assert!(builder.append(true).is_ok())
2229 }
2230
2231 assert_eq!(10, builder.len());
2232
2233 let arr = builder.finish();
2234
2235 assert_eq!(10, arr.len());
2236 assert_eq!(0, builder.len());
2237
2238 builder
2239 .field_builder::<Int32Builder>(0)
2240 .unwrap()
2241 .append_slice(&[1, 3, 5, 7, 9])
2242 .unwrap();
2243 builder
2244 .field_builder::<BooleanBuilder>(1)
2245 .unwrap()
2246 .append_slice(&[false, true, false, true, false])
2247 .unwrap();
2248
2249 // Append slot values - all are valid.
2250 for _ in 0..5 {
2251 assert!(builder.append(true).is_ok())
2252 }
2253
2254 assert_eq!(5, builder.len());
2255
2256 let arr = builder.finish();
2257
2258 assert_eq!(5, arr.len());
2259 assert_eq!(0, builder.len());
2260 }
2261
2262 #[test]
test_struct_array_builder_from_schema()2263 fn test_struct_array_builder_from_schema() {
2264 let mut fields = Vec::new();
2265 fields.push(Field::new("f1", DataType::Float32, false));
2266 fields.push(Field::new("f2", DataType::Utf8, false));
2267 let mut sub_fields = Vec::new();
2268 sub_fields.push(Field::new("g1", DataType::Int32, false));
2269 sub_fields.push(Field::new("g2", DataType::Boolean, false));
2270 let struct_type = DataType::Struct(sub_fields);
2271 fields.push(Field::new("f3", struct_type, false));
2272
2273 let mut builder = StructBuilder::from_schema(Schema::new(fields), 5);
2274 assert_eq!(3, builder.num_fields());
2275 assert!(builder.field_builder::<Float32Builder>(0).is_some());
2276 assert!(builder.field_builder::<StringBuilder>(1).is_some());
2277 assert!(builder.field_builder::<StructBuilder>(2).is_some());
2278 }
2279
2280 #[test]
2281 #[should_panic(expected = "Data type List(Int64) is not currently supported")]
test_struct_array_builder_from_schema_unsupported_type()2282 fn test_struct_array_builder_from_schema_unsupported_type() {
2283 let mut fields = Vec::new();
2284 fields.push(Field::new("f1", DataType::Int16, false));
2285 let list_type = DataType::List(Box::new(DataType::Int64));
2286 fields.push(Field::new("f2", list_type, false));
2287
2288 let _ = StructBuilder::from_schema(Schema::new(fields), 5);
2289 }
2290
2291 #[test]
test_struct_array_builder_field_builder_type_mismatch()2292 fn test_struct_array_builder_field_builder_type_mismatch() {
2293 let int_builder = Int32Builder::new(10);
2294
2295 let mut fields = Vec::new();
2296 let mut field_builders = Vec::new();
2297 fields.push(Field::new("f1", DataType::Int32, false));
2298 field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
2299
2300 let mut builder = StructBuilder::new(fields, field_builders);
2301 assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
2302 }
2303
2304 #[test]
test_primitive_dictionary_builder()2305 fn test_primitive_dictionary_builder() {
2306 let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
2307 let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
2308 let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
2309 builder.append(12345678).unwrap();
2310 builder.append_null().unwrap();
2311 builder.append(22345678).unwrap();
2312 let array = builder.finish();
2313
2314 // Keys are strongly typed.
2315 let aks: Vec<_> = array.keys().collect();
2316
2317 // Values are polymorphic and so require a downcast.
2318 let av = array.values();
2319 let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
2320 let avs: &[u32] = ava.value_slice(0, array.values().len());
2321
2322 assert_eq!(array.is_null(0), false);
2323 assert_eq!(array.is_null(1), true);
2324 assert_eq!(array.is_null(2), false);
2325
2326 assert_eq!(aks, vec![Some(0), None, Some(1)]);
2327 assert_eq!(avs, &[12345678, 22345678]);
2328 }
2329
2330 #[test]
test_string_dictionary_builder()2331 fn test_string_dictionary_builder() {
2332 let key_builder = PrimitiveBuilder::<Int8Type>::new(5);
2333 let value_builder = StringBuilder::new(2);
2334 let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
2335 builder.append("abc").unwrap();
2336 builder.append_null().unwrap();
2337 builder.append("def").unwrap();
2338 builder.append("def").unwrap();
2339 builder.append("abc").unwrap();
2340 let array = builder.finish();
2341
2342 // Keys are strongly typed.
2343 let aks: Vec<_> = array.keys().collect();
2344
2345 // Values are polymorphic and so require a downcast.
2346 let av = array.values();
2347 let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
2348
2349 assert_eq!(aks, vec![Some(0), None, Some(1), Some(1), Some(0)]);
2350 assert_eq!(ava.value(0), "abc");
2351 assert_eq!(ava.value(1), "def");
2352 }
2353
2354 #[test]
test_string_dictionary_builder_with_existing_dictionary()2355 fn test_string_dictionary_builder_with_existing_dictionary() {
2356 let dictionary =
2357 StringArray::try_from(vec![None, Some("def"), Some("abc")]).unwrap();
2358
2359 let key_builder = PrimitiveBuilder::<Int8Type>::new(6);
2360 let mut builder =
2361 StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
2362 .unwrap();
2363 builder.append("abc").unwrap();
2364 builder.append_null().unwrap();
2365 builder.append("def").unwrap();
2366 builder.append("def").unwrap();
2367 builder.append("abc").unwrap();
2368 builder.append("ghi").unwrap();
2369 let array = builder.finish();
2370
2371 // Keys are strongly typed.
2372 let aks: Vec<_> = array.keys().collect();
2373
2374 // Values are polymorphic and so require a downcast.
2375 let av = array.values();
2376 let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
2377
2378 assert_eq!(aks, vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)]);
2379 assert_eq!(ava.is_valid(0), false);
2380 assert_eq!(ava.value(1), "def");
2381 assert_eq!(ava.value(2), "abc");
2382 assert_eq!(ava.value(3), "ghi");
2383 }
2384
2385 #[test]
test_string_dictionary_builder_with_reserved_null_value()2386 fn test_string_dictionary_builder_with_reserved_null_value() {
2387 let dictionary = StringArray::try_from(vec![None]).unwrap();
2388
2389 let key_builder = PrimitiveBuilder::<Int16Type>::new(4);
2390 let mut builder =
2391 StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
2392 .unwrap();
2393 builder.append("abc").unwrap();
2394 builder.append_null().unwrap();
2395 builder.append("def").unwrap();
2396 builder.append("abc").unwrap();
2397 let array = builder.finish();
2398
2399 assert_eq!(array.is_null(1), true);
2400 assert_eq!(array.is_valid(1), false);
2401
2402 let keys: Int16Array = array.data().into();
2403
2404 assert_eq!(keys.value(0), 1);
2405 assert_eq!(keys.is_null(1), true);
2406 // zero initialization is currently guaranteed by Buffer allocation and resizing
2407 assert_eq!(keys.value(1), 0);
2408 assert_eq!(keys.value(2), 2);
2409 assert_eq!(keys.value(3), 1);
2410 }
2411
2412 #[test]
test_primitive_dictionary_overflow()2413 fn test_primitive_dictionary_overflow() {
2414 let key_builder = PrimitiveBuilder::<UInt8Type>::new(257);
2415 let value_builder = PrimitiveBuilder::<UInt32Type>::new(257);
2416 let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
2417 // 256 unique keys.
2418 for i in 0..256 {
2419 builder.append(i + 1000).unwrap();
2420 }
2421 // Special error if the key overflows (256th entry)
2422 assert_eq!(
2423 builder.append(1257),
2424 Err(ArrowError::DictionaryKeyOverflowError)
2425 );
2426 }
2427 }
2428