1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable
19 //! of creating a [`Buffer`](crate::buffer::Buffer) which can be used
20 //! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
21 //! object.
22 
23 use std::any::Any;
24 use std::collections::HashMap;
25 use std::io::Write;
26 use std::marker::PhantomData;
27 use std::mem;
28 use std::sync::Arc;
29 
30 use crate::array::*;
31 use crate::buffer::{Buffer, MutableBuffer};
32 use crate::datatypes::*;
33 use crate::error::{ArrowError, Result};
34 use crate::util::bit_util;
35 
36 ///  Converts a `MutableBuffer` to a `BufferBuilder<T>`.
37 ///
38 /// `slots` is the number of array slots currently represented in the `MutableBuffer`.
mutable_buffer_to_builder<T: ArrowPrimitiveType>( mutable_buffer: MutableBuffer, slots: usize, ) -> BufferBuilder<T>39 pub(crate) fn mutable_buffer_to_builder<T: ArrowPrimitiveType>(
40     mutable_buffer: MutableBuffer,
41     slots: usize,
42 ) -> BufferBuilder<T> {
43     BufferBuilder::<T> {
44         buffer: mutable_buffer,
45         len: slots,
46         _marker: PhantomData,
47     }
48 }
49 
50 ///  Converts a `BufferBuilder<T>` into it's underlying `MutableBuffer`.
51 ///
52 /// `From` is not implemented because associated type bounds are unstable.
builder_to_mutable_buffer<T: ArrowPrimitiveType>( builder: BufferBuilder<T>, ) -> MutableBuffer53 pub(crate) fn builder_to_mutable_buffer<T: ArrowPrimitiveType>(
54     builder: BufferBuilder<T>,
55 ) -> MutableBuffer {
56     builder.buffer
57 }
58 
59 /// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
60 ///
61 /// This builder is implemented for primitive types and creates a
62 /// buffer with a zero-copy `build()` method.
63 ///
64 /// See trait [`BufferBuilderTrait`](crate::array::BufferBuilderTrait)
65 /// for further documentation and examples.
66 ///
67 /// A [`Buffer`](crate::buffer::Buffer) is the underlying data
68 /// structure of Arrow's [`Arrays`](crate::array::Array).
69 ///
70 /// For all supported types, there are type definitions for the
71 /// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`.
72 ///
73 /// # Example:
74 ///
75 /// ```
76 /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
77 ///
78 /// # fn main() -> arrow::error::Result<()> {
79 /// let mut builder = UInt8BufferBuilder::new(100);
80 /// builder.append_slice(&[42, 43, 44]);
81 /// builder.append(45);
82 /// let buffer = builder.finish();
83 ///
84 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
85 /// # Ok(())
86 /// # }
87 /// ```
88 pub struct BufferBuilder<T: ArrowPrimitiveType> {
89     buffer: MutableBuffer,
90     len: usize,
91     _marker: PhantomData<T>,
92 }
93 
94 /// Trait for simplifying the construction of [`Buffers`](crate::buffer::Buffer).
95 ///
96 /// This trait is used mainly to offer separate implementations for
97 /// numeric types and boolean types, while still be able to call methods on buffer builder
98 /// with generic primitive type.
99 /// Separate implementations of this trait allow to add implementation-details,
100 /// e.g. the implementation for boolean types uses bit-packing.
101 pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
102     /// Creates a new builder with initial capacity for _at least_ `capacity`
103     /// elements of type `T`.
104     ///
105     /// The capacity can later be manually adjusted with the
106     /// [`reserve()`](BufferBuilderTrait::reserve) method.
107     /// Also the
108     /// [`append()`](BufferBuilderTrait::append),
109     /// [`append_slice()`](BufferBuilderTrait::append_slice) and
110     /// [`advance()`](BufferBuilderTrait::advance)
111     /// methods automatically increase the capacity if needed.
112     ///
113     /// # Example:
114     ///
115     /// ```
116     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
117     ///
118     /// let mut builder = UInt8BufferBuilder::new(10);
119     ///
120     /// assert!(builder.capacity() >= 10);
121     /// ```
new(capacity: usize) -> Self122     fn new(capacity: usize) -> Self;
123 
124     /// Returns the current number of array elements in the internal buffer.
125     ///
126     /// # Example:
127     ///
128     /// ```
129     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
130     ///
131     /// let mut builder = UInt8BufferBuilder::new(10);
132     /// builder.append(42);
133     ///
134     /// assert_eq!(builder.len(), 1);
135     /// ```
len(&self) -> usize136     fn len(&self) -> usize;
137 
138     /// Returns the actual capacity (number of elements) of the internal buffer.
139     ///
140     /// Note: the internal capacity returned by this method might be larger than
141     /// what you'd expect after setting the capacity in the `new()` or `reserve()`
142     /// functions.
capacity(&self) -> usize143     fn capacity(&self) -> usize;
144 
145     /// Increases the number of elements in the internal buffer by `n`
146     /// and resizes the buffer as needed.
147     ///
148     /// The values of the newly added elements are undefined.
149     /// This method is usually used when appending `NULL` values to the buffer
150     /// as they still require physical memory space.
151     ///
152     /// # Example:
153     ///
154     /// ```
155     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
156     ///
157     /// let mut builder = UInt8BufferBuilder::new(10);
158     /// builder.advance(2);
159     ///
160     /// assert_eq!(builder.len(), 2);
161     /// ```
advance(&mut self, n: usize) -> Result<()>162     fn advance(&mut self, n: usize) -> Result<()>;
163 
164     /// Reserves memory for _at least_ `n` more elements of type `T`.
165     ///
166     /// # Example:
167     ///
168     /// ```
169     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
170     ///
171     /// let mut builder = UInt8BufferBuilder::new(10);
172     /// builder.reserve(10);
173     ///
174     /// assert!(builder.capacity() >= 20);
175     /// ```
reserve(&mut self, n: usize) -> Result<()>176     fn reserve(&mut self, n: usize) -> Result<()>;
177 
178     /// Appends a value of type `T` into the builder,
179     /// growing the internal buffer as needed.
180     ///
181     /// # Example:
182     ///
183     /// ```
184     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
185     ///
186     /// let mut builder = UInt8BufferBuilder::new(10);
187     /// builder.append(42);
188     ///
189     /// assert_eq!(builder.len(), 1);
190     /// ```
append(&mut self, value: T::Native) -> Result<()>191     fn append(&mut self, value: T::Native) -> Result<()>;
192 
193     /// Appends a value of type `T` into the builder N times,
194     /// growing the internal buffer as needed.
195     ///
196     /// # Example:
197     ///
198     /// ```
199     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
200     ///
201     /// let mut builder = UInt8BufferBuilder::new(10);
202     /// builder.append_n(10, 42);
203     ///
204     /// assert_eq!(builder.len(), 10);
205     /// ```
append_n(&mut self, n: usize, value: T::Native) -> Result<()>206     fn append_n(&mut self, n: usize, value: T::Native) -> Result<()>;
207 
208     /// Appends a slice of type `T`, growing the internal buffer as needed.
209     ///
210     /// # Example:
211     ///
212     /// ```
213     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
214     ///
215     /// let mut builder = UInt8BufferBuilder::new(10);
216     /// builder.append_slice(&[42, 44, 46]);
217     ///
218     /// assert_eq!(builder.len(), 3);
219     /// ```
append_slice(&mut self, slice: &[T::Native]) -> Result<()>220     fn append_slice(&mut self, slice: &[T::Native]) -> Result<()>;
221 
222     /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer).
223     ///
224     /// # Example:
225     ///
226     /// ```
227     /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
228     ///
229     /// let mut builder = UInt8BufferBuilder::new(10);
230     /// builder.append_slice(&[42, 44, 46]);
231     ///
232     /// let buffer = builder.finish();
233     ///
234     /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
235     /// ```
finish(&mut self) -> Buffer236     fn finish(&mut self) -> Buffer;
237 }
238 
239 impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
new(capacity: usize) -> Self240     default fn new(capacity: usize) -> Self {
241         let buffer = MutableBuffer::new(capacity * mem::size_of::<T::Native>());
242         Self {
243             buffer,
244             len: 0,
245             _marker: PhantomData,
246         }
247     }
248 
len(&self) -> usize249     fn len(&self) -> usize {
250         self.len
251     }
252 
capacity(&self) -> usize253     fn capacity(&self) -> usize {
254         let bit_capacity = self.buffer.capacity() * 8;
255         bit_capacity / T::get_bit_width()
256     }
257 
advance(&mut self, i: usize) -> Result<()>258     default fn advance(&mut self, i: usize) -> Result<()> {
259         let new_buffer_len = (self.len + i) * mem::size_of::<T::Native>();
260         self.buffer.resize(new_buffer_len)?;
261         self.len += i;
262         Ok(())
263     }
264 
reserve(&mut self, n: usize) -> Result<()>265     default fn reserve(&mut self, n: usize) -> Result<()> {
266         let new_capacity = self.len + n;
267         let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
268         self.buffer.reserve(byte_capacity)?;
269         Ok(())
270     }
271 
append(&mut self, v: T::Native) -> Result<()>272     default fn append(&mut self, v: T::Native) -> Result<()> {
273         self.reserve(1)?;
274         self.write_bytes(v.to_byte_slice(), 1)
275     }
276 
append_n(&mut self, n: usize, v: T::Native) -> Result<()>277     default fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> {
278         self.reserve(n)?;
279         for _ in 0..n {
280             self.write_bytes(v.to_byte_slice(), 1)?;
281         }
282         Ok(())
283     }
284 
append_slice(&mut self, slice: &[T::Native]) -> Result<()>285     default fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
286         let array_slots = slice.len();
287         self.reserve(array_slots)?;
288         self.write_bytes(slice.to_byte_slice(), array_slots)
289     }
290 
finish(&mut self) -> Buffer291     default fn finish(&mut self) -> Buffer {
292         let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
293         self.len = 0;
294         buf.freeze()
295     }
296 }
297 
298 impl<T: ArrowPrimitiveType> BufferBuilder<T> {
299     /// Writes a byte slice to the underlying buffer and updates the `len`, i.e. the
300     /// number array elements in the builder.  Also, converts the `io::Result`
301     /// required by the `Write` trait to the Arrow `Result` type.
write_bytes(&mut self, bytes: &[u8], len_added: usize) -> Result<()>302     fn write_bytes(&mut self, bytes: &[u8], len_added: usize) -> Result<()> {
303         let write_result = self.buffer.write(bytes);
304         // `io::Result` has many options one of which we use, so pattern matching is
305         // overkill here
306         if write_result.is_err() {
307             Err(ArrowError::MemoryError(
308                 "Could not write to Buffer, not big enough".to_string(),
309             ))
310         } else {
311             self.len += len_added;
312             Ok(())
313         }
314     }
315 }
316 
317 impl BufferBuilderTrait<BooleanType> for BufferBuilder<BooleanType> {
new(capacity: usize) -> Self318     fn new(capacity: usize) -> Self {
319         let byte_capacity = bit_util::ceil(capacity, 8);
320         let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity);
321         let mut buffer = MutableBuffer::new(actual_capacity);
322         buffer.set_null_bits(0, actual_capacity);
323         Self {
324             buffer,
325             len: 0,
326             _marker: PhantomData,
327         }
328     }
329 
advance(&mut self, i: usize) -> Result<()>330     fn advance(&mut self, i: usize) -> Result<()> {
331         let new_buffer_len = bit_util::ceil(self.len + i, 8);
332         self.buffer.resize(new_buffer_len)?;
333         self.len += i;
334         Ok(())
335     }
336 
append(&mut self, v: bool) -> Result<()>337     fn append(&mut self, v: bool) -> Result<()> {
338         self.reserve(1)?;
339         if v {
340             // For performance the `len` of the buffer is not updated on each append but
341             // is updated in the `freeze` method instead.
342             unsafe {
343                 bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len);
344             }
345         }
346         self.len += 1;
347         Ok(())
348     }
349 
append_n(&mut self, n: usize, v: bool) -> Result<()>350     fn append_n(&mut self, n: usize, v: bool) -> Result<()> {
351         self.reserve(n)?;
352         if v {
353             unsafe {
354                 bit_util::set_bits_raw(self.buffer.raw_data_mut(), self.len, self.len + n)
355             }
356         }
357         self.len += n;
358         Ok(())
359     }
360 
append_slice(&mut self, slice: &[bool]) -> Result<()>361     fn append_slice(&mut self, slice: &[bool]) -> Result<()> {
362         self.reserve(slice.len())?;
363         for v in slice {
364             if *v {
365                 // For performance the `len` of the buffer is not
366                 // updated on each append but is updated in the
367                 // `freeze` method instead.
368                 unsafe {
369                     bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len);
370                 }
371             }
372             self.len += 1;
373         }
374         Ok(())
375     }
376 
reserve(&mut self, n: usize) -> Result<()>377     fn reserve(&mut self, n: usize) -> Result<()> {
378         let new_capacity = self.len + n;
379         if new_capacity > self.capacity() {
380             let new_byte_capacity = bit_util::ceil(new_capacity, 8);
381             let existing_capacity = self.buffer.capacity();
382             let new_capacity = self.buffer.reserve(new_byte_capacity)?;
383             self.buffer
384                 .set_null_bits(existing_capacity, new_capacity - existing_capacity);
385         }
386         Ok(())
387     }
388 
finish(&mut self) -> Buffer389     fn finish(&mut self) -> Buffer {
390         // `append` does not update the buffer's `len` so do it before `freeze` is called.
391         let new_buffer_len = bit_util::ceil(self.len, 8);
392         debug_assert!(new_buffer_len >= self.buffer.len());
393         let mut buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
394         self.len = 0;
395         buf.resize(new_buffer_len).unwrap();
396         buf.freeze()
397     }
398 }
399 
400 /// Trait for dealing with different array builders at runtime
401 pub trait ArrayBuilder: Any {
402     /// Returns the number of array slots in the builder
len(&self) -> usize403     fn len(&self) -> usize;
404 
405     /// Builds the array
finish(&mut self) -> ArrayRef406     fn finish(&mut self) -> ArrayRef;
407 
408     /// Returns the builder as a non-mutable `Any` reference.
409     ///
410     /// This is most useful when one wants to call non-mutable APIs on a specific builder
411     /// type. In this case, one can first cast this into a `Any`, and then use
412     /// `downcast_ref` to get a reference on the specific builder.
as_any(&self) -> &Any413     fn as_any(&self) -> &Any;
414 
415     /// Returns the builder as a mutable `Any` reference.
416     ///
417     /// This is most useful when one wants to call mutable APIs on a specific builder
418     /// type. In this case, one can first cast this into a `Any`, and then use
419     /// `downcast_mut` to get a reference on the specific builder.
as_any_mut(&mut self) -> &mut Any420     fn as_any_mut(&mut self) -> &mut Any;
421 
422     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>423     fn into_box_any(self: Box<Self>) -> Box<Any>;
424 }
425 
426 ///  Array builder for fixed-width primitive types
427 pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
428     values_builder: BufferBuilder<T>,
429     bitmap_builder: BooleanBufferBuilder,
430 }
431 
432 impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
433     /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any434     fn as_any(&self) -> &Any {
435         self
436     }
437 
438     /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any439     fn as_any_mut(&mut self) -> &mut Any {
440         self
441     }
442 
443     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>444     fn into_box_any(self: Box<Self>) -> Box<Any> {
445         self
446     }
447 
448     /// Returns the number of array slots in the builder
len(&self) -> usize449     fn len(&self) -> usize {
450         self.values_builder.len
451     }
452 
453     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef454     fn finish(&mut self) -> ArrayRef {
455         Arc::new(self.finish())
456     }
457 }
458 
459 impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
460     /// Creates a new primitive array builder
new(capacity: usize) -> Self461     pub fn new(capacity: usize) -> Self {
462         Self {
463             values_builder: BufferBuilder::<T>::new(capacity),
464             bitmap_builder: BooleanBufferBuilder::new(capacity),
465         }
466     }
467 
468     /// Returns the capacity of this builder measured in slots of type `T`
capacity(&self) -> usize469     pub fn capacity(&self) -> usize {
470         self.values_builder.capacity()
471     }
472 
473     /// Appends a value of type `T` into the builder
append_value(&mut self, v: T::Native) -> Result<()>474     pub fn append_value(&mut self, v: T::Native) -> Result<()> {
475         self.bitmap_builder.append(true)?;
476         self.values_builder.append(v)?;
477         Ok(())
478     }
479 
480     /// Appends a null slot into the builder
append_null(&mut self) -> Result<()>481     pub fn append_null(&mut self) -> Result<()> {
482         self.bitmap_builder.append(false)?;
483         self.values_builder.advance(1)?;
484         Ok(())
485     }
486 
487     /// Appends an `Option<T>` into the builder
append_option(&mut self, v: Option<T::Native>) -> Result<()>488     pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> {
489         match v {
490             None => self.append_null()?,
491             Some(v) => self.append_value(v)?,
492         };
493         Ok(())
494     }
495 
496     /// Appends a slice of type `T` into the builder
append_slice(&mut self, v: &[T::Native]) -> Result<()>497     pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> {
498         self.bitmap_builder.append_n(v.len(), true)?;
499         self.values_builder.append_slice(v)?;
500         Ok(())
501     }
502 
503     /// Builds the `PrimitiveArray` and reset this builder.
finish(&mut self) -> PrimitiveArray<T>504     pub fn finish(&mut self) -> PrimitiveArray<T> {
505         let len = self.len();
506         let null_bit_buffer = self.bitmap_builder.finish();
507         let null_count = len - bit_util::count_set_bits(null_bit_buffer.data());
508         let mut builder = ArrayData::builder(T::get_data_type())
509             .len(len)
510             .add_buffer(self.values_builder.finish());
511         if null_count > 0 {
512             builder = builder
513                 .null_count(null_count)
514                 .null_bit_buffer(null_bit_buffer);
515         }
516         let data = builder.build();
517         PrimitiveArray::<T>::from(data)
518     }
519 
520     /// Builds the `DictionaryArray` and reset this builder.
finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T>521     pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
522         let len = self.len();
523         let null_bit_buffer = self.bitmap_builder.finish();
524         let null_count = len - bit_util::count_set_bits(null_bit_buffer.data());
525         let data_type = DataType::Dictionary(
526             Box::new(T::get_data_type()),
527             Box::new(values.data_type().clone()),
528         );
529         let mut builder = ArrayData::builder(data_type)
530             .len(len)
531             .add_buffer(self.values_builder.finish());
532         if null_count > 0 {
533             builder = builder
534                 .null_count(null_count)
535                 .null_bit_buffer(null_bit_buffer);
536         }
537         builder = builder.add_child_data(values.data());
538         DictionaryArray::<T>::from(builder.build())
539     }
540 }
541 
542 ///  Array builder for `ListArray`
543 pub struct ListBuilder<T: ArrayBuilder> {
544     offsets_builder: Int32BufferBuilder,
545     bitmap_builder: BooleanBufferBuilder,
546     values_builder: T,
547     len: usize,
548 }
549 
550 impl<T: ArrayBuilder> ListBuilder<T> {
551     /// Creates a new `ListArrayBuilder` from a given values array builder
new(values_builder: T) -> Self552     pub fn new(values_builder: T) -> Self {
553         let capacity = values_builder.len();
554         Self::with_capacity(values_builder, capacity)
555     }
556 
557     /// Creates a new `ListArrayBuilder` from a given values array builder
558     /// `capacity` is the number of items to pre-allocate space for in this builder
with_capacity(values_builder: T, capacity: usize) -> Self559     pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
560         let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
561         offsets_builder.append(0).unwrap();
562         Self {
563             offsets_builder,
564             bitmap_builder: BooleanBufferBuilder::new(capacity),
565             values_builder,
566             len: 0,
567         }
568     }
569 }
570 
571 impl<T: ArrayBuilder> ArrayBuilder for ListBuilder<T>
572 where
573     T: 'static,
574 {
575     /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any576     fn as_any(&self) -> &Any {
577         self
578     }
579 
580     /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any581     fn as_any_mut(&mut self) -> &mut Any {
582         self
583     }
584 
585     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>586     fn into_box_any(self: Box<Self>) -> Box<Any> {
587         self
588     }
589 
590     /// Returns the number of array slots in the builder
len(&self) -> usize591     fn len(&self) -> usize {
592         self.len
593     }
594 
595     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef596     fn finish(&mut self) -> ArrayRef {
597         Arc::new(self.finish())
598     }
599 }
600 
601 impl<T: ArrayBuilder> ListBuilder<T>
602 where
603     T: 'static,
604 {
605     /// Returns the child array builder as a mutable reference.
606     ///
607     /// This mutable reference can be used to append values into the child array builder,
608     /// but you must call `append` to delimit each distinct list value.
values(&mut self) -> &mut T609     pub fn values(&mut self) -> &mut T {
610         &mut self.values_builder
611     }
612 
613     /// Finish the current variable-length list array slot
append(&mut self, is_valid: bool) -> Result<()>614     pub fn append(&mut self, is_valid: bool) -> Result<()> {
615         self.offsets_builder
616             .append(self.values_builder.len() as i32)?;
617         self.bitmap_builder.append(is_valid)?;
618         self.len += 1;
619         Ok(())
620     }
621 
622     /// Builds the `ListArray` and reset this builder.
finish(&mut self) -> ListArray623     pub fn finish(&mut self) -> ListArray {
624         let len = self.len();
625         self.len = 0;
626         let values_arr = self
627             .values_builder
628             .as_any_mut()
629             .downcast_mut::<T>()
630             .unwrap()
631             .finish();
632         let values_data = values_arr.data();
633 
634         let offset_buffer = self.offsets_builder.finish();
635         let null_bit_buffer = self.bitmap_builder.finish();
636         self.offsets_builder.append(0).unwrap();
637         let data =
638             ArrayData::builder(DataType::List(Box::new(values_data.data_type().clone())))
639                 .len(len)
640                 .null_count(len - bit_util::count_set_bits(null_bit_buffer.data()))
641                 .add_buffer(offset_buffer)
642                 .add_child_data(values_data)
643                 .null_bit_buffer(null_bit_buffer)
644                 .build();
645 
646         ListArray::from(data)
647     }
648 }
649 
650 ///  Array builder for `ListArray`
651 pub struct FixedSizeListBuilder<T: ArrayBuilder> {
652     bitmap_builder: BooleanBufferBuilder,
653     values_builder: T,
654     len: usize,
655     list_len: i32,
656 }
657 
658 impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
659     /// Creates a new `FixedSizeListBuilder` from a given values array builder
660     /// `length` is the number of values within each array
new(values_builder: T, length: i32) -> Self661     pub fn new(values_builder: T, length: i32) -> Self {
662         let capacity = values_builder.len();
663         Self::with_capacity(values_builder, length, capacity)
664     }
665 
666     /// Creates a new `FixedSizeListBuilder` from a given values array builder
667     /// `length` is the number of values within each array
668     /// `capacity` is the number of items to pre-allocate space for in this builder
with_capacity(values_builder: T, length: i32, capacity: usize) -> Self669     pub fn with_capacity(values_builder: T, length: i32, capacity: usize) -> Self {
670         let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
671         offsets_builder.append(0).unwrap();
672         Self {
673             bitmap_builder: BooleanBufferBuilder::new(capacity),
674             values_builder,
675             len: 0,
676             list_len: length,
677         }
678     }
679 }
680 
681 impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
682 where
683     T: 'static,
684 {
685     /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any686     fn as_any(&self) -> &Any {
687         self
688     }
689 
690     /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any691     fn as_any_mut(&mut self) -> &mut Any {
692         self
693     }
694 
695     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>696     fn into_box_any(self: Box<Self>) -> Box<Any> {
697         self
698     }
699 
700     /// Returns the number of array slots in the builder
len(&self) -> usize701     fn len(&self) -> usize {
702         self.len
703     }
704 
705     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef706     fn finish(&mut self) -> ArrayRef {
707         Arc::new(self.finish())
708     }
709 }
710 
711 impl<T: ArrayBuilder> FixedSizeListBuilder<T>
712 where
713     T: 'static,
714 {
715     /// Returns the child array builder as a mutable reference.
716     ///
717     /// This mutable reference can be used to append values into the child array builder,
718     /// but you must call `append` to delimit each distinct list value.
values(&mut self) -> &mut T719     pub fn values(&mut self) -> &mut T {
720         &mut self.values_builder
721     }
722 
value_length(&self) -> i32723     pub fn value_length(&self) -> i32 {
724         self.list_len
725     }
726 
727     /// Finish the current variable-length list array slot
append(&mut self, is_valid: bool) -> Result<()>728     pub fn append(&mut self, is_valid: bool) -> Result<()> {
729         self.bitmap_builder.append(is_valid)?;
730         self.len += 1;
731         Ok(())
732     }
733 
734     /// Builds the `FixedSizeListBuilder` and reset this builder.
finish(&mut self) -> FixedSizeListArray735     pub fn finish(&mut self) -> FixedSizeListArray {
736         let len = self.len();
737         self.len = 0;
738         let values_arr = self
739             .values_builder
740             .as_any_mut()
741             .downcast_mut::<T>()
742             .unwrap()
743             .finish();
744         let values_data = values_arr.data();
745 
746         // check that values_data length is multiple of len if we have data
747         if len != 0 {
748             assert!(
749                 values_data.len() / len == self.list_len as usize,
750                 "Values of FixedSizeList must have equal lengths, values have length {} and list has {}",
751                 values_data.len(),
752                 len
753             );
754         }
755 
756         let null_bit_buffer = self.bitmap_builder.finish();
757         let data = ArrayData::builder(DataType::FixedSizeList(
758             Box::new(values_data.data_type().clone()),
759             self.list_len,
760         ))
761         .len(len)
762         .null_count(len - bit_util::count_set_bits(null_bit_buffer.data()))
763         .add_child_data(values_data)
764         .null_bit_buffer(null_bit_buffer)
765         .build();
766 
767         FixedSizeListArray::from(data)
768     }
769 }
770 
771 ///  Array builder for `BinaryArray`
772 pub struct BinaryBuilder {
773     builder: ListBuilder<UInt8Builder>,
774 }
775 
776 pub struct StringBuilder {
777     builder: ListBuilder<UInt8Builder>,
778 }
779 
780 pub struct FixedSizeBinaryBuilder {
781     builder: FixedSizeListBuilder<UInt8Builder>,
782 }
783 
784 pub trait BinaryArrayBuilder: ArrayBuilder {}
785 
786 impl BinaryArrayBuilder for BinaryBuilder {}
787 impl BinaryArrayBuilder for StringBuilder {}
788 impl BinaryArrayBuilder for FixedSizeBinaryBuilder {}
789 
790 impl ArrayBuilder for BinaryBuilder {
791     /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any792     fn as_any(&self) -> &Any {
793         self
794     }
795 
796     /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any797     fn as_any_mut(&mut self) -> &mut Any {
798         self
799     }
800 
801     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>802     fn into_box_any(self: Box<Self>) -> Box<Any> {
803         self
804     }
805 
806     /// Returns the number of array slots in the builder
len(&self) -> usize807     fn len(&self) -> usize {
808         self.builder.len()
809     }
810 
811     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef812     fn finish(&mut self) -> ArrayRef {
813         Arc::new(self.finish())
814     }
815 }
816 
817 impl ArrayBuilder for StringBuilder {
818     /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any819     fn as_any(&self) -> &Any {
820         self
821     }
822 
823     /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any824     fn as_any_mut(&mut self) -> &mut Any {
825         self
826     }
827 
828     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>829     fn into_box_any(self: Box<Self>) -> Box<Any> {
830         self
831     }
832 
833     /// Returns the number of array slots in the builder
len(&self) -> usize834     fn len(&self) -> usize {
835         self.builder.len()
836     }
837 
838     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef839     fn finish(&mut self) -> ArrayRef {
840         Arc::new(self.finish())
841     }
842 }
843 
844 impl ArrayBuilder for FixedSizeBinaryBuilder {
845     /// Returns the builder as a non-mutable `Any` reference.
as_any(&self) -> &Any846     fn as_any(&self) -> &Any {
847         self
848     }
849 
850     /// Returns the builder as a mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any851     fn as_any_mut(&mut self) -> &mut Any {
852         self
853     }
854 
855     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>856     fn into_box_any(self: Box<Self>) -> Box<Any> {
857         self
858     }
859 
860     /// Returns the number of array slots in the builder
len(&self) -> usize861     fn len(&self) -> usize {
862         self.builder.len()
863     }
864 
865     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef866     fn finish(&mut self) -> ArrayRef {
867         Arc::new(self.finish())
868     }
869 }
870 
871 impl BinaryBuilder {
872     /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
873     /// array
new(capacity: usize) -> Self874     pub fn new(capacity: usize) -> Self {
875         let values_builder = UInt8Builder::new(capacity);
876         Self {
877             builder: ListBuilder::new(values_builder),
878         }
879     }
880 
881     /// Appends a single byte value into the builder's values array.
882     ///
883     /// Note, when appending individual byte values you must call `append` to delimit each
884     /// distinct list value.
append_byte(&mut self, value: u8) -> Result<()>885     pub fn append_byte(&mut self, value: u8) -> Result<()> {
886         self.builder.values().append_value(value)?;
887         Ok(())
888     }
889 
890     /// Appends a byte slice into the builder.
891     ///
892     /// Automatically calls the `append` method to delimit the slice appended in as a
893     /// distinct array element.
append_value(&mut self, value: &[u8]) -> Result<()>894     pub fn append_value(&mut self, value: &[u8]) -> Result<()> {
895         self.builder.values().append_slice(value)?;
896         self.builder.append(true)?;
897         Ok(())
898     }
899 
900     /// Finish the current variable-length list array slot.
append(&mut self, is_valid: bool) -> Result<()>901     pub fn append(&mut self, is_valid: bool) -> Result<()> {
902         self.builder.append(is_valid)
903     }
904 
905     /// Append a null value to the array.
append_null(&mut self) -> Result<()>906     pub fn append_null(&mut self) -> Result<()> {
907         self.append(false)
908     }
909 
910     /// Builds the `BinaryArray` and reset this builder.
finish(&mut self) -> BinaryArray911     pub fn finish(&mut self) -> BinaryArray {
912         BinaryArray::from(self.builder.finish())
913     }
914 }
915 
916 impl StringBuilder {
917     /// Creates a new `StringBuilder`,
918     /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
new(capacity: usize) -> Self919     pub fn new(capacity: usize) -> Self {
920         let values_builder = UInt8Builder::new(capacity);
921         Self {
922             builder: ListBuilder::new(values_builder),
923         }
924     }
925 
926     /// Creates a new `StringBuilder`,
927     /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
928     /// `item_capacity` is the number of items to pre-allocate space for in this builder
with_capacity(item_capacity: usize, data_capacity: usize) -> Self929     pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
930         let values_builder = UInt8Builder::new(data_capacity);
931         Self {
932             builder: ListBuilder::with_capacity(values_builder, item_capacity),
933         }
934     }
935 
936     /// Appends a string into the builder.
937     ///
938     /// Automatically calls the `append` method to delimit the string appended in as a
939     /// distinct array element.
append_value(&mut self, value: &str) -> Result<()>940     pub fn append_value(&mut self, value: &str) -> Result<()> {
941         self.builder.values().append_slice(value.as_bytes())?;
942         self.builder.append(true)?;
943         Ok(())
944     }
945 
946     /// Finish the current variable-length list array slot.
append(&mut self, is_valid: bool) -> Result<()>947     pub fn append(&mut self, is_valid: bool) -> Result<()> {
948         self.builder.append(is_valid)
949     }
950 
951     /// Append a null value to the array.
append_null(&mut self) -> Result<()>952     pub fn append_null(&mut self) -> Result<()> {
953         self.append(false)
954     }
955 
956     /// Builds the `StringArray` and reset this builder.
finish(&mut self) -> StringArray957     pub fn finish(&mut self) -> StringArray {
958         StringArray::from(self.builder.finish())
959     }
960 }
961 
962 impl FixedSizeBinaryBuilder {
963     /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
964     /// array
new(capacity: usize, byte_width: i32) -> Self965     pub fn new(capacity: usize, byte_width: i32) -> Self {
966         let values_builder = UInt8Builder::new(capacity);
967         Self {
968             builder: FixedSizeListBuilder::new(values_builder, byte_width),
969         }
970     }
971 
972     /// Appends a byte slice into the builder.
973     ///
974     /// Automatically calls the `append` method to delimit the slice appended in as a
975     /// distinct array element.
append_value(&mut self, value: &[u8]) -> Result<()>976     pub fn append_value(&mut self, value: &[u8]) -> Result<()> {
977         assert_eq!(
978             self.builder.value_length(),
979             value.len() as i32,
980             "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
981         );
982         self.builder.values().append_slice(value)?;
983         self.builder.append(true)
984     }
985 
986     /// Append a null value to the array.
append_null(&mut self) -> Result<()>987     pub fn append_null(&mut self) -> Result<()> {
988         let length: usize = self.builder.value_length() as usize;
989         self.builder.values().append_slice(&vec![0u8; length][..])?;
990         self.builder.append(false)
991     }
992 
993     /// Builds the `FixedSizeBinaryArray` and reset this builder.
finish(&mut self) -> FixedSizeBinaryArray994     pub fn finish(&mut self) -> FixedSizeBinaryArray {
995         FixedSizeBinaryArray::from(self.builder.finish())
996     }
997 }
998 
999 /// Array builder for Struct types.
1000 ///
1001 /// Note that callers should make sure that methods of all the child field builders are
1002 /// properly called to maintain the consistency of the data structure.
1003 pub struct StructBuilder {
1004     fields: Vec<Field>,
1005     field_anys: Vec<Box<Any>>,
1006     field_builders: Vec<Box<ArrayBuilder>>,
1007     bitmap_builder: BooleanBufferBuilder,
1008     len: usize,
1009 }
1010 
1011 impl ArrayBuilder for StructBuilder {
1012     /// Returns the number of array slots in the builder.
1013     ///
1014     /// Note that this always return the first child field builder's length, and it is
1015     /// the caller's responsibility to maintain the consistency that all the child field
1016     /// builder should have the equal number of elements.
len(&self) -> usize1017     fn len(&self) -> usize {
1018         self.len
1019     }
1020 
1021     /// Builds the array.
finish(&mut self) -> ArrayRef1022     fn finish(&mut self) -> ArrayRef {
1023         Arc::new(self.finish())
1024     }
1025 
1026     /// Returns the builder as a non-mutable `Any` reference.
1027     ///
1028     /// This is most useful when one wants to call non-mutable APIs on a specific builder
1029     /// type. In this case, one can first cast this into a `Any`, and then use
1030     /// `downcast_ref` to get a reference on the specific builder.
as_any(&self) -> &Any1031     fn as_any(&self) -> &Any {
1032         self
1033     }
1034 
1035     /// Returns the builder as a mutable `Any` reference.
1036     ///
1037     /// This is most useful when one wants to call mutable APIs on a specific builder
1038     /// type. In this case, one can first cast this into a `Any`, and then use
1039     /// `downcast_mut` to get a reference on the specific builder.
as_any_mut(&mut self) -> &mut Any1040     fn as_any_mut(&mut self) -> &mut Any {
1041         self
1042     }
1043 
1044     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>1045     fn into_box_any(self: Box<Self>) -> Box<Any> {
1046         self
1047     }
1048 }
1049 
1050 impl StructBuilder {
new(fields: Vec<Field>, builders: Vec<Box<ArrayBuilder>>) -> Self1051     pub fn new(fields: Vec<Field>, builders: Vec<Box<ArrayBuilder>>) -> Self {
1052         let mut field_anys = Vec::with_capacity(builders.len());
1053         let mut field_builders = Vec::with_capacity(builders.len());
1054 
1055         // Create and maintain two references for each of the input builder. We need the
1056         // extra `Any` reference because we need to cast the builder to a specific type
1057         // in `field_builder()` by calling `downcast_mut`.
1058         for f in builders.into_iter() {
1059             let raw_f = Box::into_raw(f);
1060             let raw_f_copy = raw_f;
1061             unsafe {
1062                 field_anys.push(Box::from_raw(raw_f).into_box_any());
1063                 field_builders.push(Box::from_raw(raw_f_copy));
1064             }
1065         }
1066 
1067         Self {
1068             fields,
1069             field_anys,
1070             field_builders,
1071             bitmap_builder: BooleanBufferBuilder::new(0),
1072             len: 0,
1073         }
1074     }
1075 
from_schema(schema: Schema, capacity: usize) -> Self1076     pub fn from_schema(schema: Schema, capacity: usize) -> Self {
1077         let fields = schema.fields();
1078         let mut builders = Vec::with_capacity(fields.len());
1079         for f in schema.fields() {
1080             builders.push(Self::from_field(f.clone(), capacity));
1081         }
1082         Self::new(schema.fields, builders)
1083     }
1084 
from_field(f: Field, capacity: usize) -> Box<ArrayBuilder>1085     fn from_field(f: Field, capacity: usize) -> Box<ArrayBuilder> {
1086         match f.data_type() {
1087             DataType::Null => unimplemented!(),
1088             DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
1089             DataType::Int8 => Box::new(Int8Builder::new(capacity)),
1090             DataType::Int16 => Box::new(Int16Builder::new(capacity)),
1091             DataType::Int32 => Box::new(Int32Builder::new(capacity)),
1092             DataType::Int64 => Box::new(Int64Builder::new(capacity)),
1093             DataType::UInt8 => Box::new(UInt8Builder::new(capacity)),
1094             DataType::UInt16 => Box::new(UInt16Builder::new(capacity)),
1095             DataType::UInt32 => Box::new(UInt32Builder::new(capacity)),
1096             DataType::UInt64 => Box::new(UInt64Builder::new(capacity)),
1097             DataType::Float32 => Box::new(Float32Builder::new(capacity)),
1098             DataType::Float64 => Box::new(Float64Builder::new(capacity)),
1099             DataType::Binary => Box::new(BinaryBuilder::new(capacity)),
1100             DataType::FixedSizeBinary(len) => {
1101                 Box::new(FixedSizeBinaryBuilder::new(capacity, *len))
1102             }
1103             DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
1104             DataType::Date32(DateUnit::Day) => Box::new(Date32Builder::new(capacity)),
1105             DataType::Date64(DateUnit::Millisecond) => {
1106                 Box::new(Date64Builder::new(capacity))
1107             }
1108             DataType::Time32(TimeUnit::Second) => {
1109                 Box::new(Time32SecondBuilder::new(capacity))
1110             }
1111             DataType::Time32(TimeUnit::Millisecond) => {
1112                 Box::new(Time32MillisecondBuilder::new(capacity))
1113             }
1114             DataType::Time64(TimeUnit::Microsecond) => {
1115                 Box::new(Time64MicrosecondBuilder::new(capacity))
1116             }
1117             DataType::Time64(TimeUnit::Nanosecond) => {
1118                 Box::new(Time64NanosecondBuilder::new(capacity))
1119             }
1120             DataType::Timestamp(TimeUnit::Second, _) => {
1121                 Box::new(TimestampSecondBuilder::new(capacity))
1122             }
1123             DataType::Timestamp(TimeUnit::Millisecond, _) => {
1124                 Box::new(TimestampMillisecondBuilder::new(capacity))
1125             }
1126             DataType::Timestamp(TimeUnit::Microsecond, _) => {
1127                 Box::new(TimestampMicrosecondBuilder::new(capacity))
1128             }
1129             DataType::Timestamp(TimeUnit::Nanosecond, _) => {
1130                 Box::new(TimestampNanosecondBuilder::new(capacity))
1131             }
1132             DataType::Interval(IntervalUnit::YearMonth) => {
1133                 Box::new(IntervalYearMonthBuilder::new(capacity))
1134             }
1135             DataType::Interval(IntervalUnit::DayTime) => {
1136                 Box::new(IntervalDayTimeBuilder::new(capacity))
1137             }
1138             DataType::Duration(TimeUnit::Second) => {
1139                 Box::new(DurationSecondBuilder::new(capacity))
1140             }
1141             DataType::Duration(TimeUnit::Millisecond) => {
1142                 Box::new(DurationMillisecondBuilder::new(capacity))
1143             }
1144             DataType::Duration(TimeUnit::Microsecond) => {
1145                 Box::new(DurationMicrosecondBuilder::new(capacity))
1146             }
1147             DataType::Duration(TimeUnit::Nanosecond) => {
1148                 Box::new(DurationNanosecondBuilder::new(capacity))
1149             }
1150             DataType::Struct(fields) => {
1151                 let schema = Schema::new(fields.clone());
1152                 Box::new(Self::from_schema(schema, capacity))
1153             }
1154             t => panic!("Data type {:?} is not currently supported", t),
1155         }
1156     }
1157 
1158     /// Returns a mutable reference to the child field builder at index `i`.
1159     /// Result will be `None` if the input type `T` provided doesn't match the actual
1160     /// field builder's type.
field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T>1161     pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
1162         self.field_anys[i].downcast_mut::<T>()
1163     }
1164 
1165     /// Returns the number of fields for the struct this builder is building.
num_fields(&self) -> usize1166     pub fn num_fields(&self) -> usize {
1167         self.field_builders.len()
1168     }
1169 
1170     /// Appends an element (either null or non-null) to the struct. The actual elements
1171     /// should be appended for each child sub-array in a consistent way.
append(&mut self, is_valid: bool) -> Result<()>1172     pub fn append(&mut self, is_valid: bool) -> Result<()> {
1173         self.bitmap_builder.append(is_valid)?;
1174         self.len += 1;
1175         Ok(())
1176     }
1177 
1178     /// Appends a null element to the struct.
append_null(&mut self) -> Result<()>1179     pub fn append_null(&mut self) -> Result<()> {
1180         self.append(false)
1181     }
1182 
1183     /// Builds the `StructArray` and reset this builder.
finish(&mut self) -> StructArray1184     pub fn finish(&mut self) -> StructArray {
1185         let mut child_data = Vec::with_capacity(self.field_builders.len());
1186         for f in &mut self.field_builders {
1187             let arr = f.finish();
1188             child_data.push(arr.data());
1189         }
1190 
1191         let null_bit_buffer = self.bitmap_builder.finish();
1192         let null_count = self.len - bit_util::count_set_bits(null_bit_buffer.data());
1193         let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
1194             .len(self.len)
1195             .child_data(child_data);
1196         if null_count > 0 {
1197             builder = builder
1198                 .null_count(null_count)
1199                 .null_bit_buffer(null_bit_buffer);
1200         }
1201 
1202         self.len = 0;
1203 
1204         StructArray::from(builder.build())
1205     }
1206 }
1207 
1208 impl Drop for StructBuilder {
drop(&mut self)1209     fn drop(&mut self) {
1210         // To avoid double drop on the field array builders.
1211         let builders = std::mem::replace(&mut self.field_builders, Vec::new());
1212         std::mem::forget(builders);
1213     }
1214 }
1215 
1216 /// Array builder for `DictionaryArray`. For example to map a set of byte indices
1217 /// to f32 values. Note that the use of a `HashMap` here will not scale to very large
1218 /// arrays or result in an ordered dictionary.
1219 pub struct PrimitiveDictionaryBuilder<K, V>
1220 where
1221     K: ArrowPrimitiveType,
1222     V: ArrowPrimitiveType,
1223 {
1224     keys_builder: PrimitiveBuilder<K>,
1225     values_builder: PrimitiveBuilder<V>,
1226     map: HashMap<Box<[u8]>, K::Native>,
1227 }
1228 
1229 impl<K, V> PrimitiveDictionaryBuilder<K, V>
1230 where
1231     K: ArrowPrimitiveType,
1232     V: ArrowPrimitiveType,
1233 {
1234     /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder.
new( keys_builder: PrimitiveBuilder<K>, values_builder: PrimitiveBuilder<V>, ) -> Self1235     pub fn new(
1236         keys_builder: PrimitiveBuilder<K>,
1237         values_builder: PrimitiveBuilder<V>,
1238     ) -> Self {
1239         Self {
1240             keys_builder,
1241             values_builder,
1242             map: HashMap::new(),
1243         }
1244     }
1245 }
1246 
1247 impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V>
1248 where
1249     K: ArrowPrimitiveType,
1250     V: ArrowPrimitiveType,
1251 {
1252     /// Returns the builder as an non-mutable `Any` reference.
as_any(&self) -> &Any1253     fn as_any(&self) -> &Any {
1254         self
1255     }
1256 
1257     /// Returns the builder as an mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any1258     fn as_any_mut(&mut self) -> &mut Any {
1259         self
1260     }
1261 
1262     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>1263     fn into_box_any(self: Box<Self>) -> Box<Any> {
1264         self
1265     }
1266 
1267     /// Returns the number of array slots in the builder
len(&self) -> usize1268     fn len(&self) -> usize {
1269         self.keys_builder.len()
1270     }
1271 
1272     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef1273     fn finish(&mut self) -> ArrayRef {
1274         Arc::new(self.finish())
1275     }
1276 }
1277 
1278 impl<K, V> PrimitiveDictionaryBuilder<K, V>
1279 where
1280     K: ArrowPrimitiveType,
1281     V: ArrowPrimitiveType,
1282 {
1283     /// Append a primitive value to the array. Return an existing index
1284     /// if already present in the values array or a new index if the
1285     /// value is appended to the values array.
append(&mut self, value: V::Native) -> Result<K::Native>1286     pub fn append(&mut self, value: V::Native) -> Result<K::Native> {
1287         if let Some(&key) = self.map.get(value.to_byte_slice()) {
1288             // Append existing value.
1289             self.keys_builder.append_value(key)?;
1290             Ok(key)
1291         } else {
1292             // Append new value.
1293             let key = K::Native::from_usize(self.values_builder.len())
1294                 .ok_or(ArrowError::DictionaryKeyOverflowError)?;
1295             self.values_builder.append_value(value)?;
1296             self.keys_builder.append_value(key as K::Native)?;
1297             self.map.insert(value.to_byte_slice().into(), key);
1298             Ok(key)
1299         }
1300     }
1301 
append_null(&mut self) -> Result<()>1302     pub fn append_null(&mut self) -> Result<()> {
1303         self.keys_builder.append_null()
1304     }
1305 
1306     /// Builds the `DictionaryArray` and reset this builder.
finish(&mut self) -> DictionaryArray<K>1307     pub fn finish(&mut self) -> DictionaryArray<K> {
1308         self.map.clear();
1309         let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
1310         self.keys_builder.finish_dict(value_ref)
1311     }
1312 }
1313 
1314 /// Array builder for `DictionaryArray`. For example to map a set of byte indices
1315 /// to f32 values. Note that the use of a `HashMap` here will not scale to very large
1316 /// arrays or result in an ordered dictionary.
1317 pub struct StringDictionaryBuilder<K>
1318 where
1319     K: ArrowDictionaryKeyType,
1320 {
1321     keys_builder: PrimitiveBuilder<K>,
1322     values_builder: StringBuilder,
1323     map: HashMap<Box<[u8]>, K::Native>,
1324 }
1325 
1326 impl<K> StringDictionaryBuilder<K>
1327 where
1328     K: ArrowDictionaryKeyType,
1329 {
1330     /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder.
new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self1331     pub fn new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self {
1332         Self {
1333             keys_builder,
1334             values_builder,
1335             map: HashMap::new(),
1336         }
1337     }
1338 
1339     /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary
1340     /// which is initialized with the given values.
1341     /// The indices of those dictionary values are used as keys.
1342     ///
1343     /// # Example
1344     ///
1345     /// ```
1346     /// use arrow::datatypes::Int16Type;
1347     /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder};
1348     /// use std::convert::TryFrom;
1349     ///
1350     /// let dictionary_values = StringArray::try_from(vec![None, Some("abc"), Some("def")]).unwrap();
1351     ///
1352     /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::<Int16Type>::new(3), &dictionary_values).unwrap();
1353     /// builder.append("def").unwrap();
1354     /// builder.append_null().unwrap();
1355     /// builder.append("abc").unwrap();
1356     ///
1357     /// let dictionary_array = builder.finish();
1358     ///
1359     /// let keys: Vec<Option<i16>> = dictionary_array.keys().collect();
1360     ///
1361     /// assert_eq!(keys, vec![Some(2), None, Some(1)]);
1362     /// ```
new_with_dictionary( keys_builder: PrimitiveBuilder<K>, dictionary_values: &StringArray, ) -> Result<Self>1363     pub fn new_with_dictionary(
1364         keys_builder: PrimitiveBuilder<K>,
1365         dictionary_values: &StringArray,
1366     ) -> Result<Self> {
1367         let dict_len = dictionary_values.len();
1368         let mut values_builder =
1369             StringBuilder::with_capacity(dict_len, dictionary_values.value_data().len());
1370         let mut map: HashMap<Box<[u8]>, K::Native> = HashMap::with_capacity(dict_len);
1371         for i in 0..dict_len {
1372             if dictionary_values.is_valid(i) {
1373                 let value = dictionary_values.value(i);
1374                 map.insert(
1375                     value.as_bytes().into(),
1376                     K::Native::from_usize(i)
1377                         .ok_or(ArrowError::DictionaryKeyOverflowError)?,
1378                 );
1379                 values_builder.append_value(value)?;
1380             } else {
1381                 values_builder.append_null()?;
1382             }
1383         }
1384         Ok(Self {
1385             keys_builder,
1386             values_builder,
1387             map,
1388         })
1389     }
1390 }
1391 
1392 impl<K> ArrayBuilder for StringDictionaryBuilder<K>
1393 where
1394     K: ArrowDictionaryKeyType,
1395 {
1396     /// Returns the builder as an non-mutable `Any` reference.
as_any(&self) -> &Any1397     fn as_any(&self) -> &Any {
1398         self
1399     }
1400 
1401     /// Returns the builder as an mutable `Any` reference.
as_any_mut(&mut self) -> &mut Any1402     fn as_any_mut(&mut self) -> &mut Any {
1403         self
1404     }
1405 
1406     /// Returns the boxed builder as a box of `Any`.
into_box_any(self: Box<Self>) -> Box<Any>1407     fn into_box_any(self: Box<Self>) -> Box<Any> {
1408         self
1409     }
1410 
1411     /// Returns the number of array slots in the builder
len(&self) -> usize1412     fn len(&self) -> usize {
1413         self.keys_builder.len()
1414     }
1415 
1416     /// Builds the array and reset this builder.
finish(&mut self) -> ArrayRef1417     fn finish(&mut self) -> ArrayRef {
1418         Arc::new(self.finish())
1419     }
1420 }
1421 
1422 impl<K> StringDictionaryBuilder<K>
1423 where
1424     K: ArrowDictionaryKeyType,
1425 {
1426     /// Append a primitive value to the array. Return an existing index
1427     /// if already present in the values array or a new index if the
1428     /// value is appended to the values array.
append(&mut self, value: &str) -> Result<K::Native>1429     pub fn append(&mut self, value: &str) -> Result<K::Native> {
1430         if let Some(&key) = self.map.get(value.as_bytes()) {
1431             // Append existing value.
1432             self.keys_builder.append_value(key)?;
1433             Ok(key)
1434         } else {
1435             // Append new value.
1436             let key = K::Native::from_usize(self.values_builder.len())
1437                 .ok_or(ArrowError::DictionaryKeyOverflowError)?;
1438             self.values_builder.append_value(value)?;
1439             self.keys_builder.append_value(key as K::Native)?;
1440             self.map.insert(value.as_bytes().into(), key);
1441             Ok(key)
1442         }
1443     }
1444 
append_null(&mut self) -> Result<()>1445     pub fn append_null(&mut self) -> Result<()> {
1446         self.keys_builder.append_null()
1447     }
1448 
1449     /// Builds the `DictionaryArray` and reset this builder.
finish(&mut self) -> DictionaryArray<K>1450     pub fn finish(&mut self) -> DictionaryArray<K> {
1451         self.map.clear();
1452         let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
1453         self.keys_builder.finish_dict(value_ref)
1454     }
1455 }
1456 
1457 #[cfg(test)]
1458 mod tests {
1459     use super::*;
1460 
1461     use crate::array::Array;
1462     use crate::bitmap::Bitmap;
1463     use std::convert::TryFrom;
1464 
1465     #[test]
test_builder_i32_empty()1466     fn test_builder_i32_empty() {
1467         let mut b = Int32BufferBuilder::new(5);
1468         assert_eq!(0, b.len());
1469         assert_eq!(16, b.capacity());
1470         let a = b.finish();
1471         assert_eq!(0, a.len());
1472     }
1473 
1474     #[test]
test_builder_i32_alloc_zero_bytes()1475     fn test_builder_i32_alloc_zero_bytes() {
1476         let mut b = Int32BufferBuilder::new(0);
1477         b.append(123).unwrap();
1478         let a = b.finish();
1479         assert_eq!(4, a.len());
1480     }
1481 
1482     #[test]
test_builder_i32()1483     fn test_builder_i32() {
1484         let mut b = Int32BufferBuilder::new(5);
1485         for i in 0..5 {
1486             b.append(i).unwrap();
1487         }
1488         assert_eq!(16, b.capacity());
1489         let a = b.finish();
1490         assert_eq!(20, a.len());
1491     }
1492 
1493     #[test]
test_builder_i32_grow_buffer()1494     fn test_builder_i32_grow_buffer() {
1495         let mut b = Int32BufferBuilder::new(2);
1496         assert_eq!(16, b.capacity());
1497         for i in 0..20 {
1498             b.append(i).unwrap();
1499         }
1500         assert_eq!(32, b.capacity());
1501         let a = b.finish();
1502         assert_eq!(80, a.len());
1503     }
1504 
1505     #[test]
test_builder_finish()1506     fn test_builder_finish() {
1507         let mut b = Int32BufferBuilder::new(5);
1508         assert_eq!(16, b.capacity());
1509         for i in 0..10 {
1510             b.append(i).unwrap();
1511         }
1512         let mut a = b.finish();
1513         assert_eq!(40, a.len());
1514         assert_eq!(0, b.len());
1515         assert_eq!(0, b.capacity());
1516 
1517         // Try build another buffer after cleaning up.
1518         for i in 0..20 {
1519             b.append(i).unwrap()
1520         }
1521         assert_eq!(32, b.capacity());
1522         a = b.finish();
1523         assert_eq!(80, a.len());
1524     }
1525 
1526     #[test]
test_reserve()1527     fn test_reserve() {
1528         let mut b = UInt8BufferBuilder::new(2);
1529         assert_eq!(64, b.capacity());
1530         b.reserve(64).unwrap();
1531         assert_eq!(64, b.capacity());
1532         b.reserve(65).unwrap();
1533         assert_eq!(128, b.capacity());
1534 
1535         let mut b = Int32BufferBuilder::new(2);
1536         assert_eq!(16, b.capacity());
1537         b.reserve(16).unwrap();
1538         assert_eq!(16, b.capacity());
1539         b.reserve(17).unwrap();
1540         assert_eq!(32, b.capacity());
1541     }
1542 
1543     #[test]
test_append_slice()1544     fn test_append_slice() {
1545         let mut b = UInt8BufferBuilder::new(0);
1546         b.append_slice("Hello, ".as_bytes()).unwrap();
1547         b.append_slice("World!".as_bytes()).unwrap();
1548         let buffer = b.finish();
1549         assert_eq!(13, buffer.len());
1550 
1551         let mut b = Int32BufferBuilder::new(0);
1552         b.append_slice(&[32, 54]).unwrap();
1553         let buffer = b.finish();
1554         assert_eq!(8, buffer.len());
1555     }
1556 
1557     #[test]
test_write_bytes()1558     fn test_write_bytes() {
1559         let mut b = BooleanBufferBuilder::new(4);
1560         b.append(false).unwrap();
1561         b.append(true).unwrap();
1562         b.append(false).unwrap();
1563         b.append(true).unwrap();
1564         assert_eq!(4, b.len());
1565         assert_eq!(512, b.capacity());
1566         let buffer = b.finish();
1567         assert_eq!(1, buffer.len());
1568 
1569         let mut b = BooleanBufferBuilder::new(4);
1570         b.append_slice(&[false, true, false, true]).unwrap();
1571         assert_eq!(4, b.len());
1572         assert_eq!(512, b.capacity());
1573         let buffer = b.finish();
1574         assert_eq!(1, buffer.len());
1575     }
1576 
1577     #[test]
test_write_bytes_i32()1578     fn test_write_bytes_i32() {
1579         let mut b = Int32BufferBuilder::new(4);
1580         let bytes = [8, 16, 32, 64].to_byte_slice();
1581         b.write_bytes(bytes, 4).unwrap();
1582         assert_eq!(4, b.len());
1583         assert_eq!(16, b.capacity());
1584         let buffer = b.finish();
1585         assert_eq!(16, buffer.len());
1586     }
1587 
1588     #[test]
1589     #[should_panic(expected = "Could not write to Buffer, not big enough")]
test_write_too_many_bytes()1590     fn test_write_too_many_bytes() {
1591         let mut b = Int32BufferBuilder::new(0);
1592         let bytes = [8, 16, 32, 64].to_byte_slice();
1593         b.write_bytes(bytes, 4).unwrap();
1594     }
1595 
1596     #[test]
test_boolean_array_builder_append_slice()1597     fn test_boolean_array_builder_append_slice() {
1598         let arr1 =
1599             BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
1600 
1601         let mut builder = BooleanArray::builder(0);
1602         builder.append_slice(&[true, false]).unwrap();
1603         builder.append_null().unwrap();
1604         builder.append_null().unwrap();
1605         builder.append_value(false).unwrap();
1606         let arr2 = builder.finish();
1607 
1608         assert_eq!(arr1.len(), arr2.len());
1609         assert_eq!(arr1.offset(), arr2.offset());
1610         assert_eq!(arr1.null_count(), arr2.null_count());
1611         for i in 0..5 {
1612             assert_eq!(arr1.is_null(i), arr2.is_null(i));
1613             assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1614             if arr1.is_valid(i) {
1615                 assert_eq!(arr1.value(i), arr2.value(i));
1616             }
1617         }
1618     }
1619 
1620     #[test]
test_boolean_builder_increases_buffer_len()1621     fn test_boolean_builder_increases_buffer_len() {
1622         // 00000010 01001000
1623         let buf = Buffer::from([72_u8, 2_u8]);
1624         let mut builder = BooleanBufferBuilder::new(8);
1625 
1626         for i in 0..10 {
1627             if i == 3 || i == 6 || i == 9 {
1628                 builder.append(true).unwrap();
1629             } else {
1630                 builder.append(false).unwrap();
1631             }
1632         }
1633         let buf2 = builder.finish();
1634 
1635         assert_eq!(buf.len(), buf2.len());
1636         assert_eq!(buf.data(), buf2.data());
1637     }
1638 
1639     #[test]
test_primitive_array_builder_i32()1640     fn test_primitive_array_builder_i32() {
1641         let mut builder = Int32Array::builder(5);
1642         for i in 0..5 {
1643             builder.append_value(i).unwrap();
1644         }
1645         let arr = builder.finish();
1646         assert_eq!(5, arr.len());
1647         assert_eq!(0, arr.offset());
1648         assert_eq!(0, arr.null_count());
1649         for i in 0..5 {
1650             assert!(!arr.is_null(i));
1651             assert!(arr.is_valid(i));
1652             assert_eq!(i as i32, arr.value(i));
1653         }
1654     }
1655 
1656     #[test]
test_primitive_array_builder_date32()1657     fn test_primitive_array_builder_date32() {
1658         let mut builder = Date32Array::builder(5);
1659         for i in 0..5 {
1660             builder.append_value(i).unwrap();
1661         }
1662         let arr = builder.finish();
1663         assert_eq!(5, arr.len());
1664         assert_eq!(0, arr.offset());
1665         assert_eq!(0, arr.null_count());
1666         for i in 0..5 {
1667             assert!(!arr.is_null(i));
1668             assert!(arr.is_valid(i));
1669             assert_eq!(i as i32, arr.value(i));
1670         }
1671     }
1672 
1673     #[test]
test_primitive_array_builder_timestamp_second()1674     fn test_primitive_array_builder_timestamp_second() {
1675         let mut builder = TimestampSecondArray::builder(5);
1676         for i in 0..5 {
1677             builder.append_value(i).unwrap();
1678         }
1679         let arr = builder.finish();
1680         assert_eq!(5, arr.len());
1681         assert_eq!(0, arr.offset());
1682         assert_eq!(0, arr.null_count());
1683         for i in 0..5 {
1684             assert!(!arr.is_null(i));
1685             assert!(arr.is_valid(i));
1686             assert_eq!(i as i64, arr.value(i));
1687         }
1688     }
1689 
1690     #[test]
test_primitive_array_builder_bool()1691     fn test_primitive_array_builder_bool() {
1692         // 00000010 01001000
1693         let buf = Buffer::from([72_u8, 2_u8]);
1694         let mut builder = BooleanArray::builder(10);
1695         for i in 0..10 {
1696             if i == 3 || i == 6 || i == 9 {
1697                 builder.append_value(true).unwrap();
1698             } else {
1699                 builder.append_value(false).unwrap();
1700             }
1701         }
1702 
1703         let arr = builder.finish();
1704         assert_eq!(buf, arr.values());
1705         assert_eq!(10, arr.len());
1706         assert_eq!(0, arr.offset());
1707         assert_eq!(0, arr.null_count());
1708         for i in 0..10 {
1709             assert!(!arr.is_null(i));
1710             assert!(arr.is_valid(i));
1711             assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
1712         }
1713     }
1714 
1715     #[test]
test_primitive_array_builder_append_option()1716     fn test_primitive_array_builder_append_option() {
1717         let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
1718 
1719         let mut builder = Int32Array::builder(5);
1720         builder.append_option(Some(0)).unwrap();
1721         builder.append_option(None).unwrap();
1722         builder.append_option(Some(2)).unwrap();
1723         builder.append_option(None).unwrap();
1724         builder.append_option(Some(4)).unwrap();
1725         let arr2 = builder.finish();
1726 
1727         assert_eq!(arr1.len(), arr2.len());
1728         assert_eq!(arr1.offset(), arr2.offset());
1729         assert_eq!(arr1.null_count(), arr2.null_count());
1730         for i in 0..5 {
1731             assert_eq!(arr1.is_null(i), arr2.is_null(i));
1732             assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1733             if arr1.is_valid(i) {
1734                 assert_eq!(arr1.value(i), arr2.value(i));
1735             }
1736         }
1737     }
1738 
1739     #[test]
test_primitive_array_builder_append_null()1740     fn test_primitive_array_builder_append_null() {
1741         let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
1742 
1743         let mut builder = Int32Array::builder(5);
1744         builder.append_value(0).unwrap();
1745         builder.append_value(2).unwrap();
1746         builder.append_null().unwrap();
1747         builder.append_null().unwrap();
1748         builder.append_value(4).unwrap();
1749         let arr2 = builder.finish();
1750 
1751         assert_eq!(arr1.len(), arr2.len());
1752         assert_eq!(arr1.offset(), arr2.offset());
1753         assert_eq!(arr1.null_count(), arr2.null_count());
1754         for i in 0..5 {
1755             assert_eq!(arr1.is_null(i), arr2.is_null(i));
1756             assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1757             if arr1.is_valid(i) {
1758                 assert_eq!(arr1.value(i), arr2.value(i));
1759             }
1760         }
1761     }
1762 
1763     #[test]
test_primitive_array_builder_append_slice()1764     fn test_primitive_array_builder_append_slice() {
1765         let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
1766 
1767         let mut builder = Int32Array::builder(5);
1768         builder.append_slice(&[0, 2]).unwrap();
1769         builder.append_null().unwrap();
1770         builder.append_null().unwrap();
1771         builder.append_value(4).unwrap();
1772         let arr2 = builder.finish();
1773 
1774         assert_eq!(arr1.len(), arr2.len());
1775         assert_eq!(arr1.offset(), arr2.offset());
1776         assert_eq!(arr1.null_count(), arr2.null_count());
1777         for i in 0..5 {
1778             assert_eq!(arr1.is_null(i), arr2.is_null(i));
1779             assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
1780             if arr1.is_valid(i) {
1781                 assert_eq!(arr1.value(i), arr2.value(i));
1782             }
1783         }
1784     }
1785 
1786     #[test]
test_primitive_array_builder_finish()1787     fn test_primitive_array_builder_finish() {
1788         let mut builder = Int32Builder::new(5);
1789         builder.append_slice(&[2, 4, 6, 8]).unwrap();
1790         let mut arr = builder.finish();
1791         assert_eq!(4, arr.len());
1792         assert_eq!(0, builder.len());
1793 
1794         builder.append_slice(&[1, 3, 5, 7, 9]).unwrap();
1795         arr = builder.finish();
1796         assert_eq!(5, arr.len());
1797         assert_eq!(0, builder.len());
1798     }
1799 
1800     #[test]
test_list_array_builder()1801     fn test_list_array_builder() {
1802         let values_builder = Int32Builder::new(10);
1803         let mut builder = ListBuilder::new(values_builder);
1804 
1805         //  [[0, 1, 2], [3, 4, 5], [6, 7]]
1806         builder.values().append_value(0).unwrap();
1807         builder.values().append_value(1).unwrap();
1808         builder.values().append_value(2).unwrap();
1809         builder.append(true).unwrap();
1810         builder.values().append_value(3).unwrap();
1811         builder.values().append_value(4).unwrap();
1812         builder.values().append_value(5).unwrap();
1813         builder.append(true).unwrap();
1814         builder.values().append_value(6).unwrap();
1815         builder.values().append_value(7).unwrap();
1816         builder.append(true).unwrap();
1817         let list_array = builder.finish();
1818 
1819         let values = list_array.values().data().buffers()[0].clone();
1820         assert_eq!(
1821             Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()),
1822             values
1823         );
1824         assert_eq!(
1825             Buffer::from(&[0, 3, 6, 8].to_byte_slice()),
1826             list_array.data().buffers()[0].clone()
1827         );
1828         assert_eq!(DataType::Int32, list_array.value_type());
1829         assert_eq!(3, list_array.len());
1830         assert_eq!(0, list_array.null_count());
1831         assert_eq!(6, list_array.value_offset(2));
1832         assert_eq!(2, list_array.value_length(2));
1833         for i in 0..3 {
1834             assert!(list_array.is_valid(i));
1835             assert!(!list_array.is_null(i));
1836         }
1837     }
1838 
1839     #[test]
test_list_array_builder_nulls()1840     fn test_list_array_builder_nulls() {
1841         let values_builder = Int32Builder::new(10);
1842         let mut builder = ListBuilder::new(values_builder);
1843 
1844         //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
1845         builder.values().append_value(0).unwrap();
1846         builder.values().append_value(1).unwrap();
1847         builder.values().append_value(2).unwrap();
1848         builder.append(true).unwrap();
1849         builder.append(false).unwrap();
1850         builder.values().append_value(3).unwrap();
1851         builder.values().append_null().unwrap();
1852         builder.values().append_value(5).unwrap();
1853         builder.append(true).unwrap();
1854         builder.values().append_value(6).unwrap();
1855         builder.values().append_value(7).unwrap();
1856         builder.append(true).unwrap();
1857         let list_array = builder.finish();
1858 
1859         assert_eq!(DataType::Int32, list_array.value_type());
1860         assert_eq!(4, list_array.len());
1861         assert_eq!(1, list_array.null_count());
1862         assert_eq!(3, list_array.value_offset(2));
1863         assert_eq!(3, list_array.value_length(2));
1864     }
1865 
1866     #[test]
test_fixed_size_list_array_builder()1867     fn test_fixed_size_list_array_builder() {
1868         let values_builder = Int32Builder::new(10);
1869         let mut builder = FixedSizeListBuilder::new(values_builder, 3);
1870 
1871         //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
1872         builder.values().append_value(0).unwrap();
1873         builder.values().append_value(1).unwrap();
1874         builder.values().append_value(2).unwrap();
1875         builder.append(true).unwrap();
1876         builder.values().append_null().unwrap();
1877         builder.values().append_null().unwrap();
1878         builder.values().append_null().unwrap();
1879         builder.append(false).unwrap();
1880         builder.values().append_value(3).unwrap();
1881         builder.values().append_null().unwrap();
1882         builder.values().append_value(5).unwrap();
1883         builder.append(true).unwrap();
1884         builder.values().append_value(6).unwrap();
1885         builder.values().append_value(7).unwrap();
1886         builder.values().append_null().unwrap();
1887         builder.append(true).unwrap();
1888         let list_array = builder.finish();
1889 
1890         assert_eq!(DataType::Int32, list_array.value_type());
1891         assert_eq!(4, list_array.len());
1892         assert_eq!(1, list_array.null_count());
1893         assert_eq!(6, list_array.value_offset(2));
1894         assert_eq!(3, list_array.value_length());
1895     }
1896 
1897     #[test]
test_list_array_builder_finish()1898     fn test_list_array_builder_finish() {
1899         let values_builder = Int32Array::builder(5);
1900         let mut builder = ListBuilder::new(values_builder);
1901 
1902         builder.values().append_slice(&[1, 2, 3]).unwrap();
1903         builder.append(true).unwrap();
1904         builder.values().append_slice(&[4, 5, 6]).unwrap();
1905         builder.append(true).unwrap();
1906 
1907         let mut arr = builder.finish();
1908         assert_eq!(2, arr.len());
1909         assert_eq!(0, builder.len());
1910 
1911         builder.values().append_slice(&[7, 8, 9]).unwrap();
1912         builder.append(true).unwrap();
1913         arr = builder.finish();
1914         assert_eq!(1, arr.len());
1915         assert_eq!(0, builder.len());
1916     }
1917 
1918     #[test]
test_fixed_size_list_array_builder_empty()1919     fn test_fixed_size_list_array_builder_empty() {
1920         let values_builder = Int32Array::builder(5);
1921         let mut builder = FixedSizeListBuilder::new(values_builder, 3);
1922 
1923         let arr = builder.finish();
1924         assert_eq!(0, arr.len());
1925         assert_eq!(0, builder.len());
1926     }
1927 
1928     #[test]
test_fixed_size_list_array_builder_finish()1929     fn test_fixed_size_list_array_builder_finish() {
1930         let values_builder = Int32Array::builder(5);
1931         let mut builder = FixedSizeListBuilder::new(values_builder, 3);
1932 
1933         builder.values().append_slice(&[1, 2, 3]).unwrap();
1934         builder.append(true).unwrap();
1935         builder.values().append_slice(&[4, 5, 6]).unwrap();
1936         builder.append(true).unwrap();
1937 
1938         let mut arr = builder.finish();
1939         assert_eq!(2, arr.len());
1940         assert_eq!(0, builder.len());
1941 
1942         builder.values().append_slice(&[7, 8, 9]).unwrap();
1943         builder.append(true).unwrap();
1944         arr = builder.finish();
1945         assert_eq!(1, arr.len());
1946         assert_eq!(0, builder.len());
1947     }
1948 
1949     #[test]
test_list_list_array_builder()1950     fn test_list_list_array_builder() {
1951         let primitive_builder = Int32Builder::new(10);
1952         let values_builder = ListBuilder::new(primitive_builder);
1953         let mut builder = ListBuilder::new(values_builder);
1954 
1955         //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
1956         builder.values().values().append_value(1).unwrap();
1957         builder.values().values().append_value(2).unwrap();
1958         builder.values().append(true).unwrap();
1959         builder.values().values().append_value(3).unwrap();
1960         builder.values().values().append_value(4).unwrap();
1961         builder.values().append(true).unwrap();
1962         builder.append(true).unwrap();
1963 
1964         builder.values().values().append_value(5).unwrap();
1965         builder.values().values().append_value(6).unwrap();
1966         builder.values().values().append_value(7).unwrap();
1967         builder.values().append(true).unwrap();
1968         builder.values().append(false).unwrap();
1969         builder.values().values().append_value(8).unwrap();
1970         builder.values().append(true).unwrap();
1971         builder.append(true).unwrap();
1972 
1973         builder.append(false).unwrap();
1974 
1975         builder.values().values().append_value(9).unwrap();
1976         builder.values().values().append_value(10).unwrap();
1977         builder.values().append(true).unwrap();
1978         builder.append(true).unwrap();
1979 
1980         let list_array = builder.finish();
1981 
1982         assert_eq!(4, list_array.len());
1983         assert_eq!(1, list_array.null_count());
1984         assert_eq!(
1985             Buffer::from(&[0, 2, 5, 5, 6].to_byte_slice()),
1986             list_array.data().buffers()[0].clone()
1987         );
1988 
1989         assert_eq!(6, list_array.values().data().len());
1990         assert_eq!(1, list_array.values().data().null_count());
1991         assert_eq!(
1992             Buffer::from(&[0, 2, 4, 7, 7, 8, 10].to_byte_slice()),
1993             list_array.values().data().buffers()[0].clone()
1994         );
1995 
1996         assert_eq!(10, list_array.values().data().child_data()[0].len());
1997         assert_eq!(0, list_array.values().data().child_data()[0].null_count());
1998         assert_eq!(
1999             Buffer::from(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].to_byte_slice()),
2000             list_array.values().data().child_data()[0].buffers()[0].clone()
2001         );
2002     }
2003 
2004     #[test]
test_binary_array_builder()2005     fn test_binary_array_builder() {
2006         let mut builder = BinaryBuilder::new(20);
2007 
2008         builder.append_byte(b'h').unwrap();
2009         builder.append_byte(b'e').unwrap();
2010         builder.append_byte(b'l').unwrap();
2011         builder.append_byte(b'l').unwrap();
2012         builder.append_byte(b'o').unwrap();
2013         builder.append(true).unwrap();
2014         builder.append(true).unwrap();
2015         builder.append_byte(b'w').unwrap();
2016         builder.append_byte(b'o').unwrap();
2017         builder.append_byte(b'r').unwrap();
2018         builder.append_byte(b'l').unwrap();
2019         builder.append_byte(b'd').unwrap();
2020         builder.append(true).unwrap();
2021 
2022         let array = builder.finish();
2023 
2024         let binary_array = BinaryArray::from(array);
2025 
2026         assert_eq!(3, binary_array.len());
2027         assert_eq!(0, binary_array.null_count());
2028         assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
2029         assert_eq!([] as [u8; 0], binary_array.value(1));
2030         assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
2031         assert_eq!(5, binary_array.value_offset(2));
2032         assert_eq!(5, binary_array.value_length(2));
2033     }
2034 
2035     #[test]
test_string_array_builder()2036     fn test_string_array_builder() {
2037         let mut builder = StringBuilder::new(20);
2038 
2039         builder.append_value("hello").unwrap();
2040         builder.append(true).unwrap();
2041         builder.append_value("world").unwrap();
2042 
2043         let array = builder.finish();
2044 
2045         let string_array = StringArray::from(array);
2046 
2047         assert_eq!(3, string_array.len());
2048         assert_eq!(0, string_array.null_count());
2049         assert_eq!("hello", string_array.value(0));
2050         assert_eq!("", string_array.value(1));
2051         assert_eq!("world", string_array.value(2));
2052         assert_eq!(5, string_array.value_offset(2));
2053         assert_eq!(5, string_array.value_length(2));
2054     }
2055 
2056     #[test]
test_fixed_size_binary_builder()2057     fn test_fixed_size_binary_builder() {
2058         let mut builder = FixedSizeBinaryBuilder::new(15, 5);
2059 
2060         //  [b"hello", null, "arrow"]
2061         builder.append_value(b"hello").unwrap();
2062         builder.append_null().unwrap();
2063         builder.append_value(b"arrow").unwrap();
2064         let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish();
2065 
2066         assert_eq!(
2067             &DataType::FixedSizeBinary(5),
2068             fixed_size_binary_array.data_type()
2069         );
2070         assert_eq!(3, fixed_size_binary_array.len());
2071         assert_eq!(1, fixed_size_binary_array.null_count());
2072         assert_eq!(10, fixed_size_binary_array.value_offset(2));
2073         assert_eq!(5, fixed_size_binary_array.value_length());
2074     }
2075 
2076     #[test]
test_string_array_builder_finish()2077     fn test_string_array_builder_finish() {
2078         let mut builder = StringBuilder::new(10);
2079 
2080         builder.append_value("hello").unwrap();
2081         builder.append_value("world").unwrap();
2082 
2083         let mut arr = builder.finish();
2084         assert_eq!(2, arr.len());
2085         assert_eq!(0, builder.len());
2086 
2087         builder.append_value("arrow").unwrap();
2088         arr = builder.finish();
2089         assert_eq!(1, arr.len());
2090         assert_eq!(0, builder.len());
2091     }
2092 
2093     #[test]
test_string_array_builder_append_string()2094     fn test_string_array_builder_append_string() {
2095         let mut builder = StringBuilder::new(20);
2096 
2097         let var = "hello".to_owned();
2098         builder.append_value(&var).unwrap();
2099         builder.append(true).unwrap();
2100         builder.append_value("world").unwrap();
2101 
2102         let array = builder.finish();
2103 
2104         let string_array = StringArray::from(array);
2105 
2106         assert_eq!(3, string_array.len());
2107         assert_eq!(0, string_array.null_count());
2108         assert_eq!("hello", string_array.value(0));
2109         assert_eq!("", string_array.value(1));
2110         assert_eq!("world", string_array.value(2));
2111         assert_eq!(5, string_array.value_offset(2));
2112         assert_eq!(5, string_array.value_length(2));
2113     }
2114 
2115     #[test]
test_struct_array_builder()2116     fn test_struct_array_builder() {
2117         let string_builder = StringBuilder::new(4);
2118         let int_builder = Int32Builder::new(4);
2119 
2120         let mut fields = Vec::new();
2121         let mut field_builders = Vec::new();
2122         fields.push(Field::new("f1", DataType::Utf8, false));
2123         field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
2124         fields.push(Field::new("f2", DataType::Int32, false));
2125         field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
2126 
2127         let mut builder = StructBuilder::new(fields, field_builders);
2128         assert_eq!(2, builder.num_fields());
2129 
2130         let string_builder = builder
2131             .field_builder::<StringBuilder>(0)
2132             .expect("builder at field 0 should be string builder");
2133         string_builder.append_value("joe").unwrap();
2134         string_builder.append_null().unwrap();
2135         string_builder.append_null().unwrap();
2136         string_builder.append_value("mark").unwrap();
2137 
2138         let int_builder = builder
2139             .field_builder::<Int32Builder>(1)
2140             .expect("builder at field 1 should be int builder");
2141         int_builder.append_value(1).unwrap();
2142         int_builder.append_value(2).unwrap();
2143         int_builder.append_null().unwrap();
2144         int_builder.append_value(4).unwrap();
2145 
2146         builder.append(true).unwrap();
2147         builder.append(true).unwrap();
2148         builder.append_null().unwrap();
2149         builder.append(true).unwrap();
2150 
2151         let arr = builder.finish();
2152 
2153         let struct_data = arr.data();
2154         assert_eq!(4, struct_data.len());
2155         assert_eq!(1, struct_data.null_count());
2156         assert_eq!(
2157             &Some(Bitmap::from(Buffer::from(&[11_u8]))),
2158             struct_data.null_bitmap()
2159         );
2160 
2161         let expected_string_data = ArrayData::builder(DataType::Utf8)
2162             .len(4)
2163             .null_count(2)
2164             .null_bit_buffer(Buffer::from(&[9_u8]))
2165             .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
2166             .add_buffer(Buffer::from("joemark".as_bytes()))
2167             .build();
2168 
2169         let expected_int_data = ArrayData::builder(DataType::Int32)
2170             .len(4)
2171             .null_count(1)
2172             .null_bit_buffer(Buffer::from(&[11_u8]))
2173             .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
2174             .build();
2175 
2176         assert_eq!(expected_string_data, arr.column(0).data());
2177 
2178         // TODO: implement equality for ArrayData
2179         assert_eq!(expected_int_data.len(), arr.column(1).data().len());
2180         assert_eq!(
2181             expected_int_data.null_count(),
2182             arr.column(1).data().null_count()
2183         );
2184         assert_eq!(
2185             expected_int_data.null_bitmap(),
2186             arr.column(1).data().null_bitmap()
2187         );
2188         let expected_value_buf = expected_int_data.buffers()[0].clone();
2189         let actual_value_buf = arr.column(1).data().buffers()[0].clone();
2190         for i in 0..expected_int_data.len() {
2191             if !expected_int_data.is_null(i) {
2192                 assert_eq!(
2193                     expected_value_buf.data()[i * 4..(i + 1) * 4],
2194                     actual_value_buf.data()[i * 4..(i + 1) * 4]
2195                 );
2196             }
2197         }
2198     }
2199 
2200     #[test]
test_struct_array_builder_finish()2201     fn test_struct_array_builder_finish() {
2202         let int_builder = Int32Builder::new(10);
2203         let bool_builder = BooleanBuilder::new(10);
2204 
2205         let mut fields = Vec::new();
2206         let mut field_builders = Vec::new();
2207         fields.push(Field::new("f1", DataType::Int32, false));
2208         field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
2209         fields.push(Field::new("f2", DataType::Boolean, false));
2210         field_builders.push(Box::new(bool_builder) as Box<ArrayBuilder>);
2211 
2212         let mut builder = StructBuilder::new(fields, field_builders);
2213         builder
2214             .field_builder::<Int32Builder>(0)
2215             .unwrap()
2216             .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
2217             .unwrap();
2218         builder
2219             .field_builder::<BooleanBuilder>(1)
2220             .unwrap()
2221             .append_slice(&[
2222                 false, true, false, true, false, true, false, true, false, true,
2223             ])
2224             .unwrap();
2225 
2226         // Append slot values - all are valid.
2227         for _ in 0..10 {
2228             assert!(builder.append(true).is_ok())
2229         }
2230 
2231         assert_eq!(10, builder.len());
2232 
2233         let arr = builder.finish();
2234 
2235         assert_eq!(10, arr.len());
2236         assert_eq!(0, builder.len());
2237 
2238         builder
2239             .field_builder::<Int32Builder>(0)
2240             .unwrap()
2241             .append_slice(&[1, 3, 5, 7, 9])
2242             .unwrap();
2243         builder
2244             .field_builder::<BooleanBuilder>(1)
2245             .unwrap()
2246             .append_slice(&[false, true, false, true, false])
2247             .unwrap();
2248 
2249         // Append slot values - all are valid.
2250         for _ in 0..5 {
2251             assert!(builder.append(true).is_ok())
2252         }
2253 
2254         assert_eq!(5, builder.len());
2255 
2256         let arr = builder.finish();
2257 
2258         assert_eq!(5, arr.len());
2259         assert_eq!(0, builder.len());
2260     }
2261 
2262     #[test]
test_struct_array_builder_from_schema()2263     fn test_struct_array_builder_from_schema() {
2264         let mut fields = Vec::new();
2265         fields.push(Field::new("f1", DataType::Float32, false));
2266         fields.push(Field::new("f2", DataType::Utf8, false));
2267         let mut sub_fields = Vec::new();
2268         sub_fields.push(Field::new("g1", DataType::Int32, false));
2269         sub_fields.push(Field::new("g2", DataType::Boolean, false));
2270         let struct_type = DataType::Struct(sub_fields);
2271         fields.push(Field::new("f3", struct_type, false));
2272 
2273         let mut builder = StructBuilder::from_schema(Schema::new(fields), 5);
2274         assert_eq!(3, builder.num_fields());
2275         assert!(builder.field_builder::<Float32Builder>(0).is_some());
2276         assert!(builder.field_builder::<StringBuilder>(1).is_some());
2277         assert!(builder.field_builder::<StructBuilder>(2).is_some());
2278     }
2279 
2280     #[test]
2281     #[should_panic(expected = "Data type List(Int64) is not currently supported")]
test_struct_array_builder_from_schema_unsupported_type()2282     fn test_struct_array_builder_from_schema_unsupported_type() {
2283         let mut fields = Vec::new();
2284         fields.push(Field::new("f1", DataType::Int16, false));
2285         let list_type = DataType::List(Box::new(DataType::Int64));
2286         fields.push(Field::new("f2", list_type, false));
2287 
2288         let _ = StructBuilder::from_schema(Schema::new(fields), 5);
2289     }
2290 
2291     #[test]
test_struct_array_builder_field_builder_type_mismatch()2292     fn test_struct_array_builder_field_builder_type_mismatch() {
2293         let int_builder = Int32Builder::new(10);
2294 
2295         let mut fields = Vec::new();
2296         let mut field_builders = Vec::new();
2297         fields.push(Field::new("f1", DataType::Int32, false));
2298         field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
2299 
2300         let mut builder = StructBuilder::new(fields, field_builders);
2301         assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
2302     }
2303 
2304     #[test]
test_primitive_dictionary_builder()2305     fn test_primitive_dictionary_builder() {
2306         let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
2307         let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
2308         let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
2309         builder.append(12345678).unwrap();
2310         builder.append_null().unwrap();
2311         builder.append(22345678).unwrap();
2312         let array = builder.finish();
2313 
2314         // Keys are strongly typed.
2315         let aks: Vec<_> = array.keys().collect();
2316 
2317         // Values are polymorphic and so require a downcast.
2318         let av = array.values();
2319         let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
2320         let avs: &[u32] = ava.value_slice(0, array.values().len());
2321 
2322         assert_eq!(array.is_null(0), false);
2323         assert_eq!(array.is_null(1), true);
2324         assert_eq!(array.is_null(2), false);
2325 
2326         assert_eq!(aks, vec![Some(0), None, Some(1)]);
2327         assert_eq!(avs, &[12345678, 22345678]);
2328     }
2329 
2330     #[test]
test_string_dictionary_builder()2331     fn test_string_dictionary_builder() {
2332         let key_builder = PrimitiveBuilder::<Int8Type>::new(5);
2333         let value_builder = StringBuilder::new(2);
2334         let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
2335         builder.append("abc").unwrap();
2336         builder.append_null().unwrap();
2337         builder.append("def").unwrap();
2338         builder.append("def").unwrap();
2339         builder.append("abc").unwrap();
2340         let array = builder.finish();
2341 
2342         // Keys are strongly typed.
2343         let aks: Vec<_> = array.keys().collect();
2344 
2345         // Values are polymorphic and so require a downcast.
2346         let av = array.values();
2347         let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
2348 
2349         assert_eq!(aks, vec![Some(0), None, Some(1), Some(1), Some(0)]);
2350         assert_eq!(ava.value(0), "abc");
2351         assert_eq!(ava.value(1), "def");
2352     }
2353 
2354     #[test]
test_string_dictionary_builder_with_existing_dictionary()2355     fn test_string_dictionary_builder_with_existing_dictionary() {
2356         let dictionary =
2357             StringArray::try_from(vec![None, Some("def"), Some("abc")]).unwrap();
2358 
2359         let key_builder = PrimitiveBuilder::<Int8Type>::new(6);
2360         let mut builder =
2361             StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
2362                 .unwrap();
2363         builder.append("abc").unwrap();
2364         builder.append_null().unwrap();
2365         builder.append("def").unwrap();
2366         builder.append("def").unwrap();
2367         builder.append("abc").unwrap();
2368         builder.append("ghi").unwrap();
2369         let array = builder.finish();
2370 
2371         // Keys are strongly typed.
2372         let aks: Vec<_> = array.keys().collect();
2373 
2374         // Values are polymorphic and so require a downcast.
2375         let av = array.values();
2376         let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
2377 
2378         assert_eq!(aks, vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)]);
2379         assert_eq!(ava.is_valid(0), false);
2380         assert_eq!(ava.value(1), "def");
2381         assert_eq!(ava.value(2), "abc");
2382         assert_eq!(ava.value(3), "ghi");
2383     }
2384 
2385     #[test]
test_string_dictionary_builder_with_reserved_null_value()2386     fn test_string_dictionary_builder_with_reserved_null_value() {
2387         let dictionary = StringArray::try_from(vec![None]).unwrap();
2388 
2389         let key_builder = PrimitiveBuilder::<Int16Type>::new(4);
2390         let mut builder =
2391             StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
2392                 .unwrap();
2393         builder.append("abc").unwrap();
2394         builder.append_null().unwrap();
2395         builder.append("def").unwrap();
2396         builder.append("abc").unwrap();
2397         let array = builder.finish();
2398 
2399         assert_eq!(array.is_null(1), true);
2400         assert_eq!(array.is_valid(1), false);
2401 
2402         let keys: Int16Array = array.data().into();
2403 
2404         assert_eq!(keys.value(0), 1);
2405         assert_eq!(keys.is_null(1), true);
2406         // zero initialization is currently guaranteed by Buffer allocation and resizing
2407         assert_eq!(keys.value(1), 0);
2408         assert_eq!(keys.value(2), 2);
2409         assert_eq!(keys.value(3), 1);
2410     }
2411 
2412     #[test]
test_primitive_dictionary_overflow()2413     fn test_primitive_dictionary_overflow() {
2414         let key_builder = PrimitiveBuilder::<UInt8Type>::new(257);
2415         let value_builder = PrimitiveBuilder::<UInt32Type>::new(257);
2416         let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
2417         // 256 unique keys.
2418         for i in 0..256 {
2419             builder.append(i + 1000).unwrap();
2420         }
2421         // Special error if the key overflows (256th entry)
2422         assert_eq!(
2423             builder.append(1257),
2424             Err(ArrowError::DictionaryKeyOverflowError)
2425         );
2426     }
2427 }
2428