1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //! The main type in the module is `Buffer`, a contiguous immutable memory region of
19 //! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can
20 //! be mutated and grown.
21 #[cfg(feature = "simd")]
22 use packed_simd::u8x64;
23 
24 use std::cmp;
25 use std::convert::AsRef;
26 use std::fmt::{Debug, Formatter};
27 use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write};
28 use std::mem;
29 use std::ops::{BitAnd, BitOr, Not};
30 use std::slice::{from_raw_parts, from_raw_parts_mut};
31 use std::sync::Arc;
32 
33 use crate::array::{BufferBuilderTrait, UInt8BufferBuilder};
34 use crate::datatypes::ArrowNativeType;
35 use crate::error::{ArrowError, Result};
36 use crate::memory;
37 use crate::util::bit_util;
38 
39 /// Buffer is a contiguous memory region of fixed size and is aligned at a 64-byte
40 /// boundary. Buffer is immutable.
41 #[derive(PartialEq, Debug)]
42 pub struct Buffer {
43     /// Reference-counted pointer to the internal byte buffer.
44     data: Arc<BufferData>,
45 
46     /// The offset into the buffer.
47     offset: usize,
48 }
49 
50 struct BufferData {
51     /// The raw pointer into the buffer bytes
52     ptr: *const u8,
53 
54     /// The length (num of bytes) of the buffer. The region `[0, len)` of the buffer
55     /// is occupied with meaningful data, while the rest `[len, capacity)` is the
56     /// unoccupied region.
57     len: usize,
58 
59     /// Whether this piece of memory is owned by this object
60     owned: bool,
61 
62     /// The capacity (num of bytes) of the buffer
63     /// Invariant: len <= capacity
64     capacity: usize,
65 }
66 
67 impl PartialEq for BufferData {
eq(&self, other: &BufferData) -> bool68     fn eq(&self, other: &BufferData) -> bool {
69         if self.capacity != other.capacity {
70             return false;
71         }
72 
73         self.data() == other.data()
74     }
75 }
76 
77 /// Release the underlying memory when the current buffer goes out of scope
78 impl Drop for BufferData {
drop(&mut self)79     fn drop(&mut self) {
80         if !self.ptr.is_null() && self.owned {
81             unsafe { memory::free_aligned(self.ptr as *mut u8, self.capacity) };
82         }
83     }
84 }
85 
86 impl Debug for BufferData {
fmt(&self, f: &mut Formatter) -> std::fmt::Result87     fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
88         write!(
89             f,
90             "BufferData {{ ptr: {:?}, len: {}, capacity: {}, data: ",
91             self.ptr, self.len, self.capacity
92         )?;
93 
94         f.debug_list().entries(self.data().iter()).finish()?;
95 
96         write!(f, " }}")
97     }
98 }
99 
100 impl BufferData {
data(&self) -> &[u8]101     fn data(&self) -> &[u8] {
102         if self.ptr.is_null() {
103             &[]
104         } else {
105             unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
106         }
107     }
108 }
109 
110 impl Buffer {
111     /// Creates a buffer from an existing memory region (must already be byte-aligned), this
112     /// `Buffer` will free this piece of memory when dropped.
113     ///
114     /// # Arguments
115     ///
116     /// * `ptr` - Pointer to raw parts
117     /// * `len` - Length of raw parts in **bytes**
118     /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
119     ///
120     /// # Safety
121     ///
122     /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
123     /// bytes.
from_raw_parts(ptr: *const u8, len: usize, capacity: usize) -> Self124     pub unsafe fn from_raw_parts(ptr: *const u8, len: usize, capacity: usize) -> Self {
125         Buffer::build_with_arguments(ptr, len, capacity, true)
126     }
127 
128     /// Creates a buffer from an existing memory region (must already be byte-aligned), this
129     /// `Buffer` **does not** free this piece of memory when dropped.
130     ///
131     /// # Arguments
132     ///
133     /// * `ptr` - Pointer to raw parts
134     /// * `len` - Length of raw parts in **bytes**
135     /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
136     ///
137     /// # Safety
138     ///
139     /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
140     /// bytes.
from_unowned(ptr: *const u8, len: usize, capacity: usize) -> Self141     pub unsafe fn from_unowned(ptr: *const u8, len: usize, capacity: usize) -> Self {
142         Buffer::build_with_arguments(ptr, len, capacity, false)
143     }
144 
145     /// Creates a buffer from an existing memory region (must already be byte-aligned).
146     ///
147     /// # Arguments
148     ///
149     /// * `ptr` - Pointer to raw parts
150     /// * `len` - Length of raw parts in bytes
151     /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
152     /// * `owned` - Whether the raw parts is owned by this `Buffer`. If true, this `Buffer` will
153     /// free this memory when dropped, otherwise it will skip freeing the raw parts.
154     ///
155     /// # Safety
156     ///
157     /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
158     /// bytes.
build_with_arguments( ptr: *const u8, len: usize, capacity: usize, owned: bool, ) -> Self159     unsafe fn build_with_arguments(
160         ptr: *const u8,
161         len: usize,
162         capacity: usize,
163         owned: bool,
164     ) -> Self {
165         assert!(
166             memory::is_aligned(ptr, memory::ALIGNMENT),
167             "memory not aligned"
168         );
169         let buf_data = BufferData {
170             ptr,
171             len,
172             capacity,
173             owned,
174         };
175         Buffer {
176             data: Arc::new(buf_data),
177             offset: 0,
178         }
179     }
180 
181     /// Returns the number of bytes in the buffer
len(&self) -> usize182     pub fn len(&self) -> usize {
183         self.data.len - self.offset
184     }
185 
186     /// Returns the capacity of this buffer
capacity(&self) -> usize187     pub fn capacity(&self) -> usize {
188         self.data.capacity
189     }
190 
191     /// Returns whether the buffer is empty.
is_empty(&self) -> bool192     pub fn is_empty(&self) -> bool {
193         self.data.len - self.offset == 0
194     }
195 
196     /// Returns the byte slice stored in this buffer
data(&self) -> &[u8]197     pub fn data(&self) -> &[u8] {
198         &self.data.data()[self.offset..]
199     }
200 
201     /// Returns a slice of this buffer, starting from `offset`.
slice(&self, offset: usize) -> Self202     pub fn slice(&self, offset: usize) -> Self {
203         assert!(
204             offset <= self.len(),
205             "the offset of the new Buffer cannot exceed the existing length"
206         );
207         Self {
208             data: self.data.clone(),
209             offset: self.offset + offset,
210         }
211     }
212 
213     /// Returns a raw pointer for this buffer.
214     ///
215     /// Note that this should be used cautiously, and the returned pointer should not be
216     /// stored anywhere, to avoid dangling pointers.
raw_data(&self) -> *const u8217     pub fn raw_data(&self) -> *const u8 {
218         unsafe { self.data.ptr.add(self.offset) }
219     }
220 
221     /// View buffer as typed slice.
222     ///
223     /// # Safety
224     ///
225     /// `ArrowNativeType` is public so that it can be used as a trait bound for other public
226     /// components, such as the `ToByteSlice` trait.  However, this means that it can be
227     /// implemented by user defined types, which it is not intended for.
228     ///
229     /// Also `typed_data::<bool>` is unsafe as `0x00` and `0x01` are the only valid values for
230     /// `bool` in Rust.  However, `bool` arrays in Arrow are bit-packed which breaks this condition.
typed_data<T: ArrowNativeType + num::Num>(&self) -> &[T]231     pub unsafe fn typed_data<T: ArrowNativeType + num::Num>(&self) -> &[T] {
232         assert_eq!(self.len() % mem::size_of::<T>(), 0);
233         assert!(memory::is_ptr_aligned::<T>(self.raw_data() as *const T));
234         from_raw_parts(
235             self.raw_data() as *const T,
236             self.len() / mem::size_of::<T>(),
237         )
238     }
239 
240     /// Returns an empty buffer.
empty() -> Self241     pub fn empty() -> Self {
242         unsafe { Self::from_raw_parts(::std::ptr::null(), 0, 0) }
243     }
244 }
245 
246 impl Clone for Buffer {
clone(&self) -> Buffer247     fn clone(&self) -> Buffer {
248         Buffer {
249             data: self.data.clone(),
250             offset: self.offset,
251         }
252     }
253 }
254 
255 /// Creating a `Buffer` instance by copying the memory from a `AsRef<[u8]>` into a newly
256 /// allocated memory region.
257 impl<T: AsRef<[u8]>> From<T> for Buffer {
from(p: T) -> Self258     fn from(p: T) -> Self {
259         // allocate aligned memory buffer
260         let slice = p.as_ref();
261         let len = slice.len() * mem::size_of::<u8>();
262         let capacity = bit_util::round_upto_multiple_of_64(len);
263         let buffer = memory::allocate_aligned(capacity);
264         unsafe {
265             memory::memcpy(buffer, slice.as_ptr(), len);
266             Buffer::from_raw_parts(buffer, len, capacity)
267         }
268     }
269 }
270 
271 ///  Helper function for SIMD `BitAnd` and `BitOr` implementations
272 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
bitwise_bin_op_simd_helper<F>(left: &Buffer, right: &Buffer, op: F) -> Buffer where F: Fn(u8x64, u8x64) -> u8x64,273 fn bitwise_bin_op_simd_helper<F>(left: &Buffer, right: &Buffer, op: F) -> Buffer
274 where
275     F: Fn(u8x64, u8x64) -> u8x64,
276 {
277     let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false);
278     let lanes = u8x64::lanes();
279     for i in (0..left.len()).step_by(lanes) {
280         let left_data = unsafe { from_raw_parts(left.raw_data().add(i), lanes) };
281         let right_data = unsafe { from_raw_parts(right.raw_data().add(i), lanes) };
282         let result_slice: &mut [u8] = unsafe {
283             from_raw_parts_mut((result.data_mut().as_mut_ptr() as *mut u8).add(i), lanes)
284         };
285         unsafe {
286             bit_util::bitwise_bin_op_simd(&left_data, &right_data, result_slice, &op)
287         };
288     }
289     result.freeze()
290 }
291 
292 impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
293     type Output = Result<Buffer>;
294 
bitand(self, rhs: &'b Buffer) -> Result<Buffer>295     fn bitand(self, rhs: &'b Buffer) -> Result<Buffer> {
296         if self.len() != rhs.len() {
297             return Err(ArrowError::ComputeError(
298                 "Buffers must be the same size to apply Bitwise AND.".to_string(),
299             ));
300         }
301 
302         // SIMD implementation if available
303         #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
304         {
305             return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b));
306         }
307 
308         // Default implementation
309         #[allow(unreachable_code)]
310         {
311             let mut builder = UInt8BufferBuilder::new(self.len());
312             for i in 0..self.len() {
313                 unsafe {
314                     builder
315                         .append(
316                             self.data().get_unchecked(i) & rhs.data().get_unchecked(i),
317                         )
318                         .unwrap();
319                 }
320             }
321             Ok(builder.finish())
322         }
323     }
324 }
325 
326 impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
327     type Output = Result<Buffer>;
328 
bitor(self, rhs: &'b Buffer) -> Result<Buffer>329     fn bitor(self, rhs: &'b Buffer) -> Result<Buffer> {
330         if self.len() != rhs.len() {
331             return Err(ArrowError::ComputeError(
332                 "Buffers must be the same size to apply Bitwise OR.".to_string(),
333             ));
334         }
335 
336         // SIMD implementation if available
337         #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
338         {
339             return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b));
340         }
341 
342         // Default implementation
343         #[allow(unreachable_code)]
344         {
345             let mut builder = UInt8BufferBuilder::new(self.len());
346             for i in 0..self.len() {
347                 unsafe {
348                     builder
349                         .append(
350                             self.data().get_unchecked(i) | rhs.data().get_unchecked(i),
351                         )
352                         .unwrap();
353                 }
354             }
355             Ok(builder.finish())
356         }
357     }
358 }
359 
360 impl Not for &Buffer {
361     type Output = Buffer;
362 
not(self) -> Buffer363     fn not(self) -> Buffer {
364         // SIMD implementation if available
365         #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
366         {
367             let mut result =
368                 MutableBuffer::new(self.len()).with_bitset(self.len(), false);
369             let lanes = u8x64::lanes();
370             for i in (0..self.len()).step_by(lanes) {
371                 unsafe {
372                     let data = from_raw_parts(self.raw_data().add(i), lanes);
373                     let data_simd = u8x64::from_slice_unaligned_unchecked(data);
374                     let simd_result = !data_simd;
375                     let result_slice: &mut [u8] = from_raw_parts_mut(
376                         (result.data_mut().as_mut_ptr() as *mut u8).add(i),
377                         lanes,
378                     );
379                     simd_result.write_to_slice_unaligned_unchecked(result_slice);
380                 }
381             }
382             return result.freeze();
383         }
384 
385         // Default implementation
386         #[allow(unreachable_code)]
387         {
388             let mut builder = UInt8BufferBuilder::new(self.len());
389             for i in 0..self.len() {
390                 unsafe {
391                     builder.append(!self.data().get_unchecked(i)).unwrap();
392                 }
393             }
394             builder.finish()
395         }
396     }
397 }
398 
399 unsafe impl Sync for Buffer {}
400 unsafe impl Send for Buffer {}
401 
402 /// Similar to `Buffer`, but is growable and can be mutated. A mutable buffer can be
403 /// converted into a immutable buffer via the `freeze` method.
404 #[derive(Debug)]
405 pub struct MutableBuffer {
406     data: *mut u8,
407     len: usize,
408     capacity: usize,
409 }
410 
411 impl MutableBuffer {
412     /// Allocate a new mutable buffer with initial capacity to be `capacity`.
new(capacity: usize) -> Self413     pub fn new(capacity: usize) -> Self {
414         let new_capacity = bit_util::round_upto_multiple_of_64(capacity);
415         let ptr = memory::allocate_aligned(new_capacity);
416         Self {
417             data: ptr,
418             len: 0,
419             capacity: new_capacity,
420         }
421     }
422 
423     /// Set the bits in the range of `[0, end)` to 0 (if `val` is false), or 1 (if `val`
424     /// is true). Also extend the length of this buffer to be `end`.
425     ///
426     /// This is useful when one wants to clear (or set) the bits and then manipulate
427     /// the buffer directly (e.g., modifying the buffer by holding a mutable reference
428     /// from `data_mut()`).
with_bitset(mut self, end: usize, val: bool) -> Self429     pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
430         assert!(end <= self.capacity);
431         let v = if val { 255 } else { 0 };
432         unsafe {
433             std::ptr::write_bytes(self.data, v, end);
434             self.len = end;
435         }
436         self
437     }
438 
439     /// Ensure that `count` bytes from `start` contain zero bits
440     ///
441     /// This is used to initialize the bits in a buffer, however, it has no impact on the
442     /// `len` of the buffer and so can be used to initialize the memory region from
443     /// `len` to `capacity`.
set_null_bits(&mut self, start: usize, count: usize)444     pub fn set_null_bits(&mut self, start: usize, count: usize) {
445         assert!(start + count <= self.capacity);
446         unsafe {
447             std::ptr::write_bytes(self.data.add(start), 0, count);
448         }
449     }
450 
451     /// Ensures that this buffer has at least `capacity` slots in this buffer. This will
452     /// also ensure the new capacity will be a multiple of 64 bytes.
453     ///
454     /// Returns the new capacity for this buffer.
reserve(&mut self, capacity: usize) -> Result<usize>455     pub fn reserve(&mut self, capacity: usize) -> Result<usize> {
456         if capacity > self.capacity {
457             let new_capacity = bit_util::round_upto_multiple_of_64(capacity);
458             let new_capacity = cmp::max(new_capacity, self.capacity * 2);
459             let new_data =
460                 unsafe { memory::reallocate(self.data, self.capacity, new_capacity) };
461             self.data = new_data as *mut u8;
462             self.capacity = new_capacity;
463         }
464         Ok(self.capacity)
465     }
466 
467     /// Resizes the buffer so that the `len` will equal to the `new_len`.
468     ///
469     /// If `new_len` is greater than `len`, the buffer's length is simply adjusted to be
470     /// the former, optionally extending the capacity. The data between `len` and
471     /// `new_len` will be zeroed out.
472     ///
473     /// If `new_len` is less than `len`, the buffer will be truncated.
resize(&mut self, new_len: usize) -> Result<()>474     pub fn resize(&mut self, new_len: usize) -> Result<()> {
475         if new_len > self.len {
476             self.reserve(new_len)?;
477         } else {
478             let new_capacity = bit_util::round_upto_multiple_of_64(new_len);
479             if new_capacity < self.capacity {
480                 let new_data =
481                     unsafe { memory::reallocate(self.data, self.capacity, new_capacity) };
482                 self.data = new_data as *mut u8;
483                 self.capacity = new_capacity;
484             }
485         }
486         self.len = new_len;
487         Ok(())
488     }
489 
490     /// Returns whether this buffer is empty or not.
is_empty(&self) -> bool491     pub fn is_empty(&self) -> bool {
492         self.len == 0
493     }
494 
495     /// Returns the length (the number of bytes written) in this buffer.
len(&self) -> usize496     pub fn len(&self) -> usize {
497         self.len
498     }
499 
500     /// Returns the total capacity in this buffer.
capacity(&self) -> usize501     pub fn capacity(&self) -> usize {
502         self.capacity
503     }
504 
505     /// Clear all existing data from this buffer.
clear(&mut self)506     pub fn clear(&mut self) {
507         self.len = 0
508     }
509 
510     /// Returns the data stored in this buffer as a slice.
data(&self) -> &[u8]511     pub fn data(&self) -> &[u8] {
512         if self.data.is_null() {
513             &[]
514         } else {
515             unsafe { std::slice::from_raw_parts(self.raw_data(), self.len()) }
516         }
517     }
518 
519     /// Returns the data stored in this buffer as a mutable slice.
data_mut(&mut self) -> &mut [u8]520     pub fn data_mut(&mut self) -> &mut [u8] {
521         if self.data.is_null() {
522             &mut []
523         } else {
524             unsafe { std::slice::from_raw_parts_mut(self.raw_data_mut(), self.len()) }
525         }
526     }
527 
528     /// Returns a raw pointer for this buffer.
529     ///
530     /// Note that this should be used cautiously, and the returned pointer should not be
531     /// stored anywhere, to avoid dangling pointers.
raw_data(&self) -> *const u8532     pub fn raw_data(&self) -> *const u8 {
533         self.data
534     }
535 
raw_data_mut(&mut self) -> *mut u8536     pub fn raw_data_mut(&mut self) -> *mut u8 {
537         self.data
538     }
539 
540     /// Freezes this buffer and return an immutable version of it.
freeze(self) -> Buffer541     pub fn freeze(self) -> Buffer {
542         let buffer_data = BufferData {
543             ptr: self.data,
544             len: self.len,
545             capacity: self.capacity,
546             owned: true,
547         };
548         std::mem::forget(self);
549         Buffer {
550             data: Arc::new(buffer_data),
551             offset: 0,
552         }
553     }
554 
555     /// View buffer as typed slice.
typed_data_mut<T: ArrowNativeType + num::Num>(&mut self) -> &mut [T]556     pub fn typed_data_mut<T: ArrowNativeType + num::Num>(&mut self) -> &mut [T] {
557         assert_eq!(self.len() % mem::size_of::<T>(), 0);
558         assert!(memory::is_ptr_aligned::<T>(self.raw_data() as *const T));
559         unsafe {
560             from_raw_parts_mut(
561                 self.raw_data() as *mut T,
562                 self.len() / mem::size_of::<T>(),
563             )
564         }
565     }
566 
567     /// Writes a byte slice to the underlying buffer and updates the `len`, i.e. the
568     /// number array elements in the buffer.  Also, converts the `io::Result`
569     /// required by the `Write` trait to the Arrow `Result` type.
write_bytes(&mut self, bytes: &[u8], len_added: usize) -> Result<()>570     pub fn write_bytes(&mut self, bytes: &[u8], len_added: usize) -> Result<()> {
571         let write_result = self.write(bytes);
572         // `io::Result` has many options one of which we use, so pattern matching is
573         // overkill here
574         if write_result.is_err() {
575             Err(ArrowError::IoError(
576                 "Could not write to Buffer, not big enough".to_string(),
577             ))
578         } else {
579             self.len += len_added;
580             Ok(())
581         }
582     }
583 }
584 
585 impl Drop for MutableBuffer {
drop(&mut self)586     fn drop(&mut self) {
587         if !self.data.is_null() {
588             unsafe { memory::free_aligned(self.data, self.capacity) };
589         }
590     }
591 }
592 
593 impl PartialEq for MutableBuffer {
eq(&self, other: &MutableBuffer) -> bool594     fn eq(&self, other: &MutableBuffer) -> bool {
595         if self.len != other.len {
596             return false;
597         }
598         if self.capacity != other.capacity {
599             return false;
600         }
601         unsafe { memory::memcmp(self.data, other.data, self.len) == 0 }
602     }
603 }
604 
605 impl Write for MutableBuffer {
write(&mut self, buf: &[u8]) -> IoResult<usize>606     fn write(&mut self, buf: &[u8]) -> IoResult<usize> {
607         let remaining_capacity = self.capacity - self.len;
608         if buf.len() > remaining_capacity {
609             return Err(IoError::new(ErrorKind::Other, "Buffer not big enough"));
610         }
611         unsafe {
612             memory::memcpy(self.data.add(self.len), buf.as_ptr(), buf.len());
613             self.len += buf.len();
614             Ok(buf.len())
615         }
616     }
617 
flush(&mut self) -> IoResult<()>618     fn flush(&mut self) -> IoResult<()> {
619         Ok(())
620     }
621 }
622 
623 unsafe impl Sync for MutableBuffer {}
624 unsafe impl Send for MutableBuffer {}
625 
626 #[cfg(test)]
627 mod tests {
628     use crate::util::bit_util;
629     use std::ptr::null_mut;
630     use std::thread;
631 
632     use super::*;
633     use crate::datatypes::ToByteSlice;
634 
635     #[test]
test_buffer_data_equality()636     fn test_buffer_data_equality() {
637         let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
638         let mut buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
639         assert_eq!(buf1, buf2);
640 
641         // slice with same offset should still preserve equality
642         let buf3 = buf1.slice(2);
643         assert_ne!(buf1, buf3);
644         let buf4 = buf2.slice(2);
645         assert_eq!(buf3, buf4);
646 
647         // unequal because of different elements
648         buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
649         assert_ne!(buf1, buf2);
650 
651         // unequal because of different length
652         buf2 = Buffer::from(&[0, 1, 2, 3]);
653         assert_ne!(buf1, buf2);
654     }
655 
656     #[test]
test_from_raw_parts()657     fn test_from_raw_parts() {
658         let buf = unsafe { Buffer::from_raw_parts(null_mut(), 0, 0) };
659         assert_eq!(0, buf.len());
660         assert_eq!(0, buf.data().len());
661         assert_eq!(0, buf.capacity());
662         assert!(buf.raw_data().is_null());
663 
664         let buf = Buffer::from(&[0, 1, 2, 3, 4]);
665         assert_eq!(5, buf.len());
666         assert!(!buf.raw_data().is_null());
667         assert_eq!([0, 1, 2, 3, 4], buf.data());
668     }
669 
670     #[test]
test_from_vec()671     fn test_from_vec() {
672         let buf = Buffer::from(&[0, 1, 2, 3, 4]);
673         assert_eq!(5, buf.len());
674         assert!(!buf.raw_data().is_null());
675         assert_eq!([0, 1, 2, 3, 4], buf.data());
676     }
677 
678     #[test]
test_copy()679     fn test_copy() {
680         let buf = Buffer::from(&[0, 1, 2, 3, 4]);
681         let buf2 = buf.clone();
682         assert_eq!(5, buf2.len());
683         assert_eq!(64, buf2.capacity());
684         assert!(!buf2.raw_data().is_null());
685         assert_eq!([0, 1, 2, 3, 4], buf2.data());
686     }
687 
688     #[test]
test_slice()689     fn test_slice() {
690         let buf = Buffer::from(&[2, 4, 6, 8, 10]);
691         let buf2 = buf.slice(2);
692 
693         assert_eq!([6, 8, 10], buf2.data());
694         assert_eq!(3, buf2.len());
695         assert_eq!(unsafe { buf.raw_data().offset(2) }, buf2.raw_data());
696 
697         let buf3 = buf2.slice(1);
698         assert_eq!([8, 10], buf3.data());
699         assert_eq!(2, buf3.len());
700         assert_eq!(unsafe { buf.raw_data().offset(3) }, buf3.raw_data());
701 
702         let buf4 = buf.slice(5);
703         let empty_slice: [u8; 0] = [];
704         assert_eq!(empty_slice, buf4.data());
705         assert_eq!(0, buf4.len());
706         assert!(buf4.is_empty());
707         assert_eq!(buf2.slice(2).data(), &[10]);
708     }
709 
710     #[test]
711     #[should_panic(
712         expected = "the offset of the new Buffer cannot exceed the existing length"
713     )]
test_slice_offset_out_of_bound()714     fn test_slice_offset_out_of_bound() {
715         let buf = Buffer::from(&[2, 4, 6, 8, 10]);
716         buf.slice(6);
717     }
718 
719     #[test]
test_with_bitset()720     fn test_with_bitset() {
721         let mut_buf = MutableBuffer::new(64).with_bitset(64, false);
722         let buf = mut_buf.freeze();
723         assert_eq!(0, bit_util::count_set_bits(buf.data()));
724 
725         let mut_buf = MutableBuffer::new(64).with_bitset(64, true);
726         let buf = mut_buf.freeze();
727         assert_eq!(512, bit_util::count_set_bits(buf.data()));
728     }
729 
730     #[test]
test_set_null_bits()731     fn test_set_null_bits() {
732         let mut mut_buf = MutableBuffer::new(64).with_bitset(64, true);
733         mut_buf.set_null_bits(0, 64);
734         let buf = mut_buf.freeze();
735         assert_eq!(0, bit_util::count_set_bits(buf.data()));
736 
737         let mut mut_buf = MutableBuffer::new(64).with_bitset(64, true);
738         mut_buf.set_null_bits(32, 32);
739         let buf = mut_buf.freeze();
740         assert_eq!(256, bit_util::count_set_bits(buf.data()));
741     }
742 
743     #[test]
test_bitwise_and()744     fn test_bitwise_and() {
745         let buf1 = Buffer::from([0b01101010]);
746         let buf2 = Buffer::from([0b01001110]);
747         assert_eq!(Buffer::from([0b01001010]), (&buf1 & &buf2).unwrap());
748     }
749 
750     #[test]
test_bitwise_or()751     fn test_bitwise_or() {
752         let buf1 = Buffer::from([0b01101010]);
753         let buf2 = Buffer::from([0b01001110]);
754         assert_eq!(Buffer::from([0b01101110]), (&buf1 | &buf2).unwrap());
755     }
756 
757     #[test]
test_bitwise_not()758     fn test_bitwise_not() {
759         let buf = Buffer::from([0b01101010]);
760         assert_eq!(Buffer::from([0b10010101]), !&buf);
761     }
762 
763     #[test]
764     #[should_panic(expected = "Buffers must be the same size to apply Bitwise OR.")]
test_buffer_bitand_different_sizes()765     fn test_buffer_bitand_different_sizes() {
766         let buf1 = Buffer::from([1_u8, 1_u8]);
767         let buf2 = Buffer::from([0b01001110]);
768         let _buf3 = (&buf1 | &buf2).unwrap();
769     }
770 
771     #[test]
test_mutable_new()772     fn test_mutable_new() {
773         let buf = MutableBuffer::new(63);
774         assert_eq!(64, buf.capacity());
775         assert_eq!(0, buf.len());
776         assert!(buf.is_empty());
777     }
778 
779     #[test]
test_mutable_write()780     fn test_mutable_write() {
781         let mut buf = MutableBuffer::new(100);
782         buf.write("hello".as_bytes()).expect("Ok");
783         assert_eq!(5, buf.len());
784         assert_eq!("hello".as_bytes(), buf.data());
785 
786         buf.write(" world".as_bytes()).expect("Ok");
787         assert_eq!(11, buf.len());
788         assert_eq!("hello world".as_bytes(), buf.data());
789 
790         buf.clear();
791         assert_eq!(0, buf.len());
792         buf.write("hello arrow".as_bytes()).expect("Ok");
793         assert_eq!(11, buf.len());
794         assert_eq!("hello arrow".as_bytes(), buf.data());
795     }
796 
797     #[test]
798     #[should_panic(expected = "Buffer not big enough")]
test_mutable_write_overflow()799     fn test_mutable_write_overflow() {
800         let mut buf = MutableBuffer::new(1);
801         assert_eq!(64, buf.capacity());
802         for _ in 0..10 {
803             buf.write(&[0, 0, 0, 0, 0, 0, 0, 0]).unwrap();
804         }
805     }
806 
807     #[test]
test_mutable_reserve()808     fn test_mutable_reserve() {
809         let mut buf = MutableBuffer::new(1);
810         assert_eq!(64, buf.capacity());
811 
812         // Reserving a smaller capacity should have no effect.
813         let mut new_cap = buf.reserve(10).expect("reserve should be OK");
814         assert_eq!(64, new_cap);
815         assert_eq!(64, buf.capacity());
816 
817         new_cap = buf.reserve(100).expect("reserve should be OK");
818         assert_eq!(128, new_cap);
819         assert_eq!(128, buf.capacity());
820     }
821 
822     #[test]
test_mutable_resize()823     fn test_mutable_resize() {
824         let mut buf = MutableBuffer::new(1);
825         assert_eq!(64, buf.capacity());
826         assert_eq!(0, buf.len());
827 
828         buf.resize(20).expect("resize should be OK");
829         assert_eq!(64, buf.capacity());
830         assert_eq!(20, buf.len());
831 
832         buf.resize(10).expect("resize should be OK");
833         assert_eq!(64, buf.capacity());
834         assert_eq!(10, buf.len());
835 
836         buf.resize(100).expect("resize should be OK");
837         assert_eq!(128, buf.capacity());
838         assert_eq!(100, buf.len());
839 
840         buf.resize(30).expect("resize should be OK");
841         assert_eq!(64, buf.capacity());
842         assert_eq!(30, buf.len());
843 
844         buf.resize(0).expect("resize should be OK");
845         assert_eq!(0, buf.capacity());
846         assert_eq!(0, buf.len());
847     }
848 
849     #[test]
test_mutable_freeze()850     fn test_mutable_freeze() {
851         let mut buf = MutableBuffer::new(1);
852         buf.write("aaaa bbbb cccc dddd".as_bytes())
853             .expect("write should be OK");
854         assert_eq!(19, buf.len());
855         assert_eq!(64, buf.capacity());
856         assert_eq!("aaaa bbbb cccc dddd".as_bytes(), buf.data());
857 
858         let immutable_buf = buf.freeze();
859         assert_eq!(19, immutable_buf.len());
860         assert_eq!(64, immutable_buf.capacity());
861         assert_eq!("aaaa bbbb cccc dddd".as_bytes(), immutable_buf.data());
862     }
863 
864     #[test]
test_mutable_equal() -> Result<()>865     fn test_mutable_equal() -> Result<()> {
866         let mut buf = MutableBuffer::new(1);
867         let mut buf2 = MutableBuffer::new(1);
868 
869         buf.write(&[0xaa])?;
870         buf2.write(&[0xaa, 0xbb])?;
871         assert!(buf != buf2);
872 
873         buf.write(&[0xbb])?;
874         assert_eq!(buf, buf2);
875 
876         buf2.reserve(65)?;
877         assert!(buf != buf2);
878 
879         Ok(())
880     }
881 
882     #[test]
test_access_concurrently()883     fn test_access_concurrently() {
884         let buffer = Buffer::from(vec![1, 2, 3, 4, 5]);
885         let buffer2 = buffer.clone();
886         assert_eq!([1, 2, 3, 4, 5], buffer.data());
887 
888         let buffer_copy = thread::spawn(move || {
889             // access buffer in another thread.
890             buffer.clone()
891         })
892         .join();
893 
894         assert!(buffer_copy.is_ok());
895         assert_eq!(buffer2, buffer_copy.ok().unwrap());
896     }
897 
898     macro_rules! check_as_typed_data {
899         ($input: expr, $native_t: ty) => {{
900             let buffer = Buffer::from($input.to_byte_slice());
901             let slice: &[$native_t] = unsafe { buffer.typed_data::<$native_t>() };
902             assert_eq!($input, slice);
903         }};
904     }
905 
906     #[test]
test_as_typed_data()907     fn test_as_typed_data() {
908         check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
909         check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
910         check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
911         check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
912         check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
913         check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
914         check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
915         check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
916         check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
917         check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
918     }
919 }
920