1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //! Defines basic comparison kernels for [`PrimitiveArray`]s.
19 //!
20 //! These kernels can leverage SIMD if available on your system.  Currently no runtime
21 //! detection is provided, you should enable the specific SIMD intrinsics using
22 //! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
23 //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
24 
25 use regex::Regex;
26 use std::collections::HashMap;
27 use std::sync::Arc;
28 
29 use crate::array::*;
30 use crate::buffer::{Buffer, MutableBuffer};
31 use crate::compute::util::combine_option_bitmap;
32 use crate::datatypes::{ArrowNumericType, DataType};
33 use crate::error::{ArrowError, Result};
34 use crate::util::bit_util;
35 
36 /// Helper function to perform boolean lambda function on values from two arrays, this
37 /// version does not attempt to use SIMD.
38 macro_rules! compare_op {
39     ($left: expr, $right:expr, $op:expr) => {{
40         if $left.len() != $right.len() {
41             return Err(ArrowError::ComputeError(
42                 "Cannot perform comparison operation on arrays of different length"
43                     .to_string(),
44             ));
45         }
46 
47         let null_bit_buffer =
48             combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
49 
50         let buffer = (0..$left.len())
51             .map(|i| $op($left.value(i), $right.value(i)))
52             .collect();
53 
54         let data = ArrayData::new(
55             DataType::Boolean,
56             $left.len(),
57             None,
58             null_bit_buffer,
59             0,
60             vec![buffer],
61             vec![],
62         );
63         Ok(BooleanArray::from(Arc::new(data)))
64     }};
65 }
66 
67 macro_rules! compare_op_scalar {
68     ($left: expr, $right:expr, $op:expr) => {{
69         let null_bit_buffer = $left.data().null_buffer().cloned();
70 
71         let buffer = (0..$left.len())
72             .map(|i| $op($left.value(i), $right))
73             .collect();
74 
75         let data = ArrayData::new(
76             DataType::Boolean,
77             $left.len(),
78             None,
79             null_bit_buffer,
80             0,
81             vec![buffer],
82             vec![],
83         );
84         Ok(BooleanArray::from(Arc::new(data)))
85     }};
86 }
87 
88 /// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified
89 /// comparison function.
no_simd_compare_op<T, F>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, op: F, ) -> Result<BooleanArray> where T: ArrowNumericType, F: Fn(T::Native, T::Native) -> bool,90 pub fn no_simd_compare_op<T, F>(
91     left: &PrimitiveArray<T>,
92     right: &PrimitiveArray<T>,
93     op: F,
94 ) -> Result<BooleanArray>
95 where
96     T: ArrowNumericType,
97     F: Fn(T::Native, T::Native) -> bool,
98 {
99     compare_op!(left, right, op)
100 }
101 
102 /// Evaluate `op(left, right)` for [`PrimitiveArray`] and scalar using
103 /// a specified comparison function.
no_simd_compare_op_scalar<T, F>( left: &PrimitiveArray<T>, right: T::Native, op: F, ) -> Result<BooleanArray> where T: ArrowNumericType, F: Fn(T::Native, T::Native) -> bool,104 pub fn no_simd_compare_op_scalar<T, F>(
105     left: &PrimitiveArray<T>,
106     right: T::Native,
107     op: F,
108 ) -> Result<BooleanArray>
109 where
110     T: ArrowNumericType,
111     F: Fn(T::Native, T::Native) -> bool,
112 {
113     compare_op_scalar!(left, right, op)
114 }
115 
116 /// Perform SQL `left LIKE right` operation on [`StringArray`] / [`LargeStringArray`].
117 ///
118 /// There are two wildcards supported with the LIKE operator:
119 ///
120 /// 1. `%` - The percent sign represents zero, one, or multiple characters
121 /// 2. `_` - The underscore represents a single character
122 ///
123 /// For example:
124 /// ```
125 /// use arrow::array::{StringArray, BooleanArray};
126 /// use arrow::compute::like_utf8;
127 ///
128 /// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
129 /// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A."]);
130 ///
131 /// let result = like_utf8(&strings, &patterns).unwrap();
132 /// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
133 /// ```
like_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>134 pub fn like_utf8<OffsetSize: StringOffsetSizeTrait>(
135     left: &GenericStringArray<OffsetSize>,
136     right: &GenericStringArray<OffsetSize>,
137 ) -> Result<BooleanArray> {
138     let mut map = HashMap::new();
139     if left.len() != right.len() {
140         return Err(ArrowError::ComputeError(
141             "Cannot perform comparison operation on arrays of different length"
142                 .to_string(),
143         ));
144     }
145 
146     let null_bit_buffer =
147         combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
148 
149     let mut result = BooleanBufferBuilder::new(left.len());
150     for i in 0..left.len() {
151         let haystack = left.value(i);
152         let pat = right.value(i);
153         let re = if let Some(ref regex) = map.get(pat) {
154             regex
155         } else {
156             let re_pattern = pat.replace("%", ".*").replace("_", ".");
157             let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
158                 ArrowError::ComputeError(format!(
159                     "Unable to build regex from LIKE pattern: {}",
160                     e
161                 ))
162             })?;
163             map.insert(pat, re);
164             map.get(pat).unwrap()
165         };
166 
167         result.append(re.is_match(haystack));
168     }
169 
170     let data = ArrayData::new(
171         DataType::Boolean,
172         left.len(),
173         None,
174         null_bit_buffer,
175         0,
176         vec![result.finish()],
177         vec![],
178     );
179     Ok(BooleanArray::from(Arc::new(data)))
180 }
181 
is_like_pattern(c: char) -> bool182 fn is_like_pattern(c: char) -> bool {
183     c == '%' || c == '_'
184 }
185 
186 /// Perform SQL `left LIKE right` operation on [`StringArray`] /
187 /// [`LargeStringArray`] and a scalar.
188 ///
189 /// See the documentation on [`like_utf8`] for more details.
like_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>190 pub fn like_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
191     left: &GenericStringArray<OffsetSize>,
192     right: &str,
193 ) -> Result<BooleanArray> {
194     let null_bit_buffer = left.data().null_buffer().cloned();
195     let bytes = bit_util::ceil(left.len(), 8);
196     let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
197     let bool_slice = bool_buf.as_slice_mut();
198 
199     if !right.contains(is_like_pattern) {
200         // fast path, can use equals
201         for i in 0..left.len() {
202             if left.value(i) == right {
203                 bit_util::set_bit(bool_slice, i);
204             }
205         }
206     } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
207     {
208         // fast path, can use starts_with
209         let starts_with = &right[..right.len() - 1];
210         for i in 0..left.len() {
211             if left.value(i).starts_with(starts_with) {
212                 bit_util::set_bit(bool_slice, i);
213             }
214         }
215     } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
216         // fast path, can use ends_with
217         let ends_with = &right[1..];
218         for i in 0..left.len() {
219             if left.value(i).ends_with(ends_with) {
220                 bit_util::set_bit(bool_slice, i);
221             }
222         }
223     } else {
224         let re_pattern = right.replace("%", ".*").replace("_", ".");
225         let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
226             ArrowError::ComputeError(format!(
227                 "Unable to build regex from LIKE pattern: {}",
228                 e
229             ))
230         })?;
231 
232         for i in 0..left.len() {
233             let haystack = left.value(i);
234             if re.is_match(haystack) {
235                 bit_util::set_bit(bool_slice, i);
236             }
237         }
238     };
239 
240     let data = ArrayData::new(
241         DataType::Boolean,
242         left.len(),
243         None,
244         null_bit_buffer,
245         0,
246         vec![bool_buf.into()],
247         vec![],
248     );
249     Ok(BooleanArray::from(Arc::new(data)))
250 }
251 
252 /// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
253 /// [`LargeStringArray`].
254 ///
255 /// See the documentation on [`like_utf8`] for more details.
nlike_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>256 pub fn nlike_utf8<OffsetSize: StringOffsetSizeTrait>(
257     left: &GenericStringArray<OffsetSize>,
258     right: &GenericStringArray<OffsetSize>,
259 ) -> Result<BooleanArray> {
260     let mut map = HashMap::new();
261     if left.len() != right.len() {
262         return Err(ArrowError::ComputeError(
263             "Cannot perform comparison operation on arrays of different length"
264                 .to_string(),
265         ));
266     }
267 
268     let null_bit_buffer =
269         combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
270 
271     let mut result = BooleanBufferBuilder::new(left.len());
272     for i in 0..left.len() {
273         let haystack = left.value(i);
274         let pat = right.value(i);
275         let re = if let Some(ref regex) = map.get(pat) {
276             regex
277         } else {
278             let re_pattern = pat.replace("%", ".*").replace("_", ".");
279             let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
280                 ArrowError::ComputeError(format!(
281                     "Unable to build regex from LIKE pattern: {}",
282                     e
283                 ))
284             })?;
285             map.insert(pat, re);
286             map.get(pat).unwrap()
287         };
288 
289         result.append(!re.is_match(haystack));
290     }
291 
292     let data = ArrayData::new(
293         DataType::Boolean,
294         left.len(),
295         None,
296         null_bit_buffer,
297         0,
298         vec![result.finish()],
299         vec![],
300     );
301     Ok(BooleanArray::from(Arc::new(data)))
302 }
303 
304 /// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
305 /// [`LargeStringArray`] and a scalar.
306 ///
307 /// See the documentation on [`like_utf8`] for more details.
nlike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>308 pub fn nlike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
309     left: &GenericStringArray<OffsetSize>,
310     right: &str,
311 ) -> Result<BooleanArray> {
312     let null_bit_buffer = left.data().null_buffer().cloned();
313     let mut result = BooleanBufferBuilder::new(left.len());
314 
315     if !right.contains(is_like_pattern) {
316         // fast path, can use equals
317         for i in 0..left.len() {
318             result.append(left.value(i) != right);
319         }
320     } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
321     {
322         // fast path, can use ends_with
323         for i in 0..left.len() {
324             result.append(!left.value(i).starts_with(&right[..right.len() - 1]));
325         }
326     } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
327         // fast path, can use starts_with
328         for i in 0..left.len() {
329             result.append(!left.value(i).ends_with(&right[1..]));
330         }
331     } else {
332         let re_pattern = right.replace("%", ".*").replace("_", ".");
333         let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
334             ArrowError::ComputeError(format!(
335                 "Unable to build regex from LIKE pattern: {}",
336                 e
337             ))
338         })?;
339         for i in 0..left.len() {
340             let haystack = left.value(i);
341             result.append(!re.is_match(haystack));
342         }
343     }
344 
345     let data = ArrayData::new(
346         DataType::Boolean,
347         left.len(),
348         None,
349         null_bit_buffer,
350         0,
351         vec![result.finish()],
352         vec![],
353     );
354     Ok(BooleanArray::from(Arc::new(data)))
355 }
356 
357 /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`].
eq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>358 pub fn eq_utf8<OffsetSize: StringOffsetSizeTrait>(
359     left: &GenericStringArray<OffsetSize>,
360     right: &GenericStringArray<OffsetSize>,
361 ) -> Result<BooleanArray> {
362     compare_op!(left, right, |a, b| a == b)
363 }
364 
365 /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>366 pub fn eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
367     left: &GenericStringArray<OffsetSize>,
368     right: &str,
369 ) -> Result<BooleanArray> {
370     compare_op_scalar!(left, right, |a, b| a == b)
371 }
372 
373 /// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`].
neq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>374 pub fn neq_utf8<OffsetSize: StringOffsetSizeTrait>(
375     left: &GenericStringArray<OffsetSize>,
376     right: &GenericStringArray<OffsetSize>,
377 ) -> Result<BooleanArray> {
378     compare_op!(left, right, |a, b| a != b)
379 }
380 
381 /// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
neq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>382 pub fn neq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
383     left: &GenericStringArray<OffsetSize>,
384     right: &str,
385 ) -> Result<BooleanArray> {
386     compare_op_scalar!(left, right, |a, b| a != b)
387 }
388 
389 /// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`].
lt_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>390 pub fn lt_utf8<OffsetSize: StringOffsetSizeTrait>(
391     left: &GenericStringArray<OffsetSize>,
392     right: &GenericStringArray<OffsetSize>,
393 ) -> Result<BooleanArray> {
394     compare_op!(left, right, |a, b| a < b)
395 }
396 
397 /// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
lt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>398 pub fn lt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
399     left: &GenericStringArray<OffsetSize>,
400     right: &str,
401 ) -> Result<BooleanArray> {
402     compare_op_scalar!(left, right, |a, b| a < b)
403 }
404 
405 /// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`].
lt_eq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>406 pub fn lt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
407     left: &GenericStringArray<OffsetSize>,
408     right: &GenericStringArray<OffsetSize>,
409 ) -> Result<BooleanArray> {
410     compare_op!(left, right, |a, b| a <= b)
411 }
412 
413 /// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
lt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>414 pub fn lt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
415     left: &GenericStringArray<OffsetSize>,
416     right: &str,
417 ) -> Result<BooleanArray> {
418     compare_op_scalar!(left, right, |a, b| a <= b)
419 }
420 
421 /// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`].
gt_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>422 pub fn gt_utf8<OffsetSize: StringOffsetSizeTrait>(
423     left: &GenericStringArray<OffsetSize>,
424     right: &GenericStringArray<OffsetSize>,
425 ) -> Result<BooleanArray> {
426     compare_op!(left, right, |a, b| a > b)
427 }
428 
429 /// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
gt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>430 pub fn gt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
431     left: &GenericStringArray<OffsetSize>,
432     right: &str,
433 ) -> Result<BooleanArray> {
434     compare_op_scalar!(left, right, |a, b| a > b)
435 }
436 
437 /// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`].
gt_eq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>438 pub fn gt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
439     left: &GenericStringArray<OffsetSize>,
440     right: &GenericStringArray<OffsetSize>,
441 ) -> Result<BooleanArray> {
442     compare_op!(left, right, |a, b| a >= b)
443 }
444 
445 /// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
gt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>446 pub fn gt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
447     left: &GenericStringArray<OffsetSize>,
448     right: &str,
449 ) -> Result<BooleanArray> {
450     compare_op_scalar!(left, right, |a, b| a >= b)
451 }
452 
453 /// Helper function to perform boolean lambda function on values from two arrays using
454 /// SIMD.
455 #[cfg(simd)]
simd_compare_op<T, SIMD_OP, SCALAR_OP>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, simd_op: SIMD_OP, scalar_op: SCALAR_OP, ) -> Result<BooleanArray> where T: ArrowNumericType, SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask, SCALAR_OP: Fn(T::Native, T::Native) -> bool,456 fn simd_compare_op<T, SIMD_OP, SCALAR_OP>(
457     left: &PrimitiveArray<T>,
458     right: &PrimitiveArray<T>,
459     simd_op: SIMD_OP,
460     scalar_op: SCALAR_OP,
461 ) -> Result<BooleanArray>
462 where
463     T: ArrowNumericType,
464     SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
465     SCALAR_OP: Fn(T::Native, T::Native) -> bool,
466 {
467     use std::borrow::BorrowMut;
468 
469     let len = left.len();
470     if len != right.len() {
471         return Err(ArrowError::ComputeError(
472             "Cannot perform comparison operation on arrays of different length"
473                 .to_string(),
474         ));
475     }
476 
477     let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), len)?;
478 
479     let lanes = T::lanes();
480     let buffer_size = bit_util::ceil(len, 8);
481     let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
482 
483     // this is currently the case for all our datatypes and allows us to always append full bytes
484     assert!(
485         lanes % 8 == 0,
486         "Number of vector lanes must be multiple of 8"
487     );
488     let mut left_chunks = left.values().chunks_exact(lanes);
489     let mut right_chunks = right.values().chunks_exact(lanes);
490 
491     let result_remainder = left_chunks
492         .borrow_mut()
493         .zip(right_chunks.borrow_mut())
494         .fold(
495             result.typed_data_mut(),
496             |result_slice, (left_slice, right_slice)| {
497                 let simd_left = T::load(left_slice);
498                 let simd_right = T::load(right_slice);
499                 let simd_result = simd_op(simd_left, simd_right);
500 
501                 let bitmask = T::mask_to_u64(&simd_result);
502                 let bytes = bitmask.to_le_bytes();
503                 &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
504 
505                 &mut result_slice[lanes / 8..]
506             },
507         );
508 
509     let left_remainder = left_chunks.remainder();
510     let right_remainder = right_chunks.remainder();
511 
512     assert_eq!(left_remainder.len(), right_remainder.len());
513 
514     let remainder_bitmask = left_remainder
515         .iter()
516         .zip(right_remainder.iter())
517         .enumerate()
518         .fold(0_u64, |mut mask, (i, (scalar_left, scalar_right))| {
519             let bit = if scalar_op(*scalar_left, *scalar_right) {
520                 1_u64
521             } else {
522                 0_u64
523             };
524             mask |= bit << i;
525             mask
526         });
527     let remainder_mask_as_bytes =
528         &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
529     result_remainder.copy_from_slice(remainder_mask_as_bytes);
530 
531     let data = ArrayData::new(
532         DataType::Boolean,
533         len,
534         None,
535         null_bit_buffer,
536         0,
537         vec![result.into()],
538         vec![],
539     );
540     Ok(BooleanArray::from(Arc::new(data)))
541 }
542 
543 /// Helper function to perform boolean lambda function on values from an array and a scalar value using
544 /// SIMD.
545 #[cfg(simd)]
simd_compare_op_scalar<T, SIMD_OP, SCALAR_OP>( left: &PrimitiveArray<T>, right: T::Native, simd_op: SIMD_OP, scalar_op: SCALAR_OP, ) -> Result<BooleanArray> where T: ArrowNumericType, SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask, SCALAR_OP: Fn(T::Native, T::Native) -> bool,546 fn simd_compare_op_scalar<T, SIMD_OP, SCALAR_OP>(
547     left: &PrimitiveArray<T>,
548     right: T::Native,
549     simd_op: SIMD_OP,
550     scalar_op: SCALAR_OP,
551 ) -> Result<BooleanArray>
552 where
553     T: ArrowNumericType,
554     SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
555     SCALAR_OP: Fn(T::Native, T::Native) -> bool,
556 {
557     use std::borrow::BorrowMut;
558 
559     let len = left.len();
560 
561     let lanes = T::lanes();
562     let buffer_size = bit_util::ceil(len, 8);
563     let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
564 
565     // this is currently the case for all our datatypes and allows us to always append full bytes
566     assert!(
567         lanes % 8 == 0,
568         "Number of vector lanes must be multiple of 8"
569     );
570     let mut left_chunks = left.values().chunks_exact(lanes);
571     let simd_right = T::init(right);
572 
573     let result_remainder = left_chunks.borrow_mut().fold(
574         result.typed_data_mut(),
575         |result_slice, left_slice| {
576             let simd_left = T::load(left_slice);
577             let simd_result = simd_op(simd_left, simd_right);
578 
579             let bitmask = T::mask_to_u64(&simd_result);
580             let bytes = bitmask.to_le_bytes();
581             &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
582 
583             &mut result_slice[lanes / 8..]
584         },
585     );
586 
587     let left_remainder = left_chunks.remainder();
588 
589     let remainder_bitmask =
590         left_remainder
591             .iter()
592             .enumerate()
593             .fold(0_u64, |mut mask, (i, scalar_left)| {
594                 let bit = if scalar_op(*scalar_left, right) {
595                     1_u64
596                 } else {
597                     0_u64
598                 };
599                 mask |= bit << i;
600                 mask
601             });
602     let remainder_mask_as_bytes =
603         &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
604     result_remainder.copy_from_slice(remainder_mask_as_bytes);
605 
606     let null_bit_buffer = left
607         .data_ref()
608         .null_buffer()
609         .map(|b| b.bit_slice(left.offset(), left.len()));
610 
611     // null count is the same as in the input since the right side of the scalar comparison cannot be null
612     let null_count = left.null_count();
613 
614     let data = ArrayData::new(
615         DataType::Boolean,
616         len,
617         Some(null_count),
618         null_bit_buffer,
619         0,
620         vec![result.into()],
621         vec![],
622     );
623     Ok(BooleanArray::from(Arc::new(data)))
624 }
625 
626 /// Perform `left == right` operation on two arrays.
eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,627 pub fn eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
628 where
629     T: ArrowNumericType,
630 {
631     #[cfg(simd)]
632     return simd_compare_op(left, right, T::eq, |a, b| a == b);
633     #[cfg(not(simd))]
634     return compare_op!(left, right, |a, b| a == b);
635 }
636 
637 /// Perform `left == right` operation on an array and a scalar value.
eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,638 pub fn eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
639 where
640     T: ArrowNumericType,
641 {
642     #[cfg(simd)]
643     return simd_compare_op_scalar(left, right, T::eq, |a, b| a == b);
644     #[cfg(not(simd))]
645     return compare_op_scalar!(left, right, |a, b| a == b);
646 }
647 
648 /// Perform `left != right` operation on two arrays.
neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,649 pub fn neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
650 where
651     T: ArrowNumericType,
652 {
653     #[cfg(simd)]
654     return simd_compare_op(left, right, T::ne, |a, b| a != b);
655     #[cfg(not(simd))]
656     return compare_op!(left, right, |a, b| a != b);
657 }
658 
659 /// Perform `left != right` operation on an array and a scalar value.
neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,660 pub fn neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
661 where
662     T: ArrowNumericType,
663 {
664     #[cfg(simd)]
665     return simd_compare_op_scalar(left, right, T::ne, |a, b| a != b);
666     #[cfg(not(simd))]
667     return compare_op_scalar!(left, right, |a, b| a != b);
668 }
669 
670 /// Perform `left < right` operation on two arrays. Null values are less than non-null
671 /// values.
lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,672 pub fn lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
673 where
674     T: ArrowNumericType,
675 {
676     #[cfg(simd)]
677     return simd_compare_op(left, right, T::lt, |a, b| a < b);
678     #[cfg(not(simd))]
679     return compare_op!(left, right, |a, b| a < b);
680 }
681 
682 /// Perform `left < right` operation on an array and a scalar value.
683 /// Null values are less than non-null values.
lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,684 pub fn lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
685 where
686     T: ArrowNumericType,
687 {
688     #[cfg(simd)]
689     return simd_compare_op_scalar(left, right, T::lt, |a, b| a < b);
690     #[cfg(not(simd))]
691     return compare_op_scalar!(left, right, |a, b| a < b);
692 }
693 
694 /// Perform `left <= right` operation on two arrays. Null values are less than non-null
695 /// values.
lt_eq<T>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, ) -> Result<BooleanArray> where T: ArrowNumericType,696 pub fn lt_eq<T>(
697     left: &PrimitiveArray<T>,
698     right: &PrimitiveArray<T>,
699 ) -> Result<BooleanArray>
700 where
701     T: ArrowNumericType,
702 {
703     #[cfg(simd)]
704     return simd_compare_op(left, right, T::le, |a, b| a <= b);
705     #[cfg(not(simd))]
706     return compare_op!(left, right, |a, b| a <= b);
707 }
708 
709 /// Perform `left <= right` operation on an array and a scalar value.
710 /// Null values are less than non-null values.
lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,711 pub fn lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
712 where
713     T: ArrowNumericType,
714 {
715     #[cfg(simd)]
716     return simd_compare_op_scalar(left, right, T::le, |a, b| a <= b);
717     #[cfg(not(simd))]
718     return compare_op_scalar!(left, right, |a, b| a <= b);
719 }
720 
721 /// Perform `left > right` operation on two arrays. Non-null values are greater than null
722 /// values.
gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,723 pub fn gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
724 where
725     T: ArrowNumericType,
726 {
727     #[cfg(simd)]
728     return simd_compare_op(left, right, T::gt, |a, b| a > b);
729     #[cfg(not(simd))]
730     return compare_op!(left, right, |a, b| a > b);
731 }
732 
733 /// Perform `left > right` operation on an array and a scalar value.
734 /// Non-null values are greater than null values.
gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,735 pub fn gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
736 where
737     T: ArrowNumericType,
738 {
739     #[cfg(simd)]
740     return simd_compare_op_scalar(left, right, T::gt, |a, b| a > b);
741     #[cfg(not(simd))]
742     return compare_op_scalar!(left, right, |a, b| a > b);
743 }
744 
745 /// Perform `left >= right` operation on two arrays. Non-null values are greater than null
746 /// values.
gt_eq<T>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, ) -> Result<BooleanArray> where T: ArrowNumericType,747 pub fn gt_eq<T>(
748     left: &PrimitiveArray<T>,
749     right: &PrimitiveArray<T>,
750 ) -> Result<BooleanArray>
751 where
752     T: ArrowNumericType,
753 {
754     #[cfg(simd)]
755     return simd_compare_op(left, right, T::ge, |a, b| a >= b);
756     #[cfg(not(simd))]
757     return compare_op!(left, right, |a, b| a >= b);
758 }
759 
760 /// Perform `left >= right` operation on an array and a scalar value.
761 /// Non-null values are greater than null values.
gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,762 pub fn gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
763 where
764     T: ArrowNumericType,
765 {
766     #[cfg(simd)]
767     return simd_compare_op_scalar(left, right, T::ge, |a, b| a >= b);
768     #[cfg(not(simd))]
769     return compare_op_scalar!(left, right, |a, b| a >= b);
770 }
771 
772 /// Checks if a [`GenericListArray`] contains a value in the [`PrimitiveArray`]
contains<T, OffsetSize>( left: &PrimitiveArray<T>, right: &GenericListArray<OffsetSize>, ) -> Result<BooleanArray> where T: ArrowNumericType, OffsetSize: OffsetSizeTrait,773 pub fn contains<T, OffsetSize>(
774     left: &PrimitiveArray<T>,
775     right: &GenericListArray<OffsetSize>,
776 ) -> Result<BooleanArray>
777 where
778     T: ArrowNumericType,
779     OffsetSize: OffsetSizeTrait,
780 {
781     let left_len = left.len();
782     if left_len != right.len() {
783         return Err(ArrowError::ComputeError(
784             "Cannot perform comparison operation on arrays of different length"
785                 .to_string(),
786         ));
787     }
788 
789     let num_bytes = bit_util::ceil(left_len, 8);
790 
791     let not_both_null_bit_buffer =
792         match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
793             Some(buff) => buff,
794             None => new_all_set_buffer(num_bytes),
795         };
796     let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
797 
798     let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
799     let bool_slice = bool_buf.as_slice_mut();
800 
801     // if both array slots are valid, check if list contains primitive
802     for i in 0..left_len {
803         if bit_util::get_bit(not_both_null_bitmap, i) {
804             let list = right.value(i);
805             let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
806 
807             for j in 0..list.len() {
808                 if list.is_valid(j) && (left.value(i) == list.value(j)) {
809                     bit_util::set_bit(bool_slice, i);
810                     continue;
811                 }
812             }
813         }
814     }
815 
816     let data = ArrayData::new(
817         DataType::Boolean,
818         left.len(),
819         None,
820         None,
821         0,
822         vec![bool_buf.into()],
823         vec![],
824     );
825     Ok(BooleanArray::from(Arc::new(data)))
826 }
827 
828 /// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`]
contains_utf8<OffsetSize>( left: &GenericStringArray<OffsetSize>, right: &ListArray, ) -> Result<BooleanArray> where OffsetSize: StringOffsetSizeTrait,829 pub fn contains_utf8<OffsetSize>(
830     left: &GenericStringArray<OffsetSize>,
831     right: &ListArray,
832 ) -> Result<BooleanArray>
833 where
834     OffsetSize: StringOffsetSizeTrait,
835 {
836     let left_len = left.len();
837     if left_len != right.len() {
838         return Err(ArrowError::ComputeError(
839             "Cannot perform comparison operation on arrays of different length"
840                 .to_string(),
841         ));
842     }
843 
844     let num_bytes = bit_util::ceil(left_len, 8);
845 
846     let not_both_null_bit_buffer =
847         match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
848             Some(buff) => buff,
849             None => new_all_set_buffer(num_bytes),
850         };
851     let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
852 
853     let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
854     let bool_slice = &mut bool_buf;
855 
856     for i in 0..left_len {
857         // contains(null, null) = false
858         if bit_util::get_bit(not_both_null_bitmap, i) {
859             let list = right.value(i);
860             let list = list
861                 .as_any()
862                 .downcast_ref::<GenericStringArray<OffsetSize>>()
863                 .unwrap();
864 
865             for j in 0..list.len() {
866                 if list.is_valid(j) && (left.value(i) == list.value(j)) {
867                     bit_util::set_bit(bool_slice, i);
868                     continue;
869                 }
870             }
871         }
872     }
873 
874     let data = ArrayData::new(
875         DataType::Boolean,
876         left.len(),
877         None,
878         None,
879         0,
880         vec![bool_buf.into()],
881         vec![],
882     );
883     Ok(BooleanArray::from(Arc::new(data)))
884 }
885 
886 // create a buffer and fill it with valid bits
887 #[inline]
new_all_set_buffer(len: usize) -> Buffer888 fn new_all_set_buffer(len: usize) -> Buffer {
889     let buffer = MutableBuffer::new(len);
890     let buffer = buffer.with_bitset(len, true);
891 
892     buffer.into()
893 }
894 
895 // disable wrapping inside literal vectors used for test data and assertions
896 #[rustfmt::skip::macros(vec)]
897 #[cfg(test)]
898 mod tests {
899     use super::*;
900     use crate::datatypes::Int8Type;
901     use crate::{array::Int32Array, array::Int64Array, datatypes::Field};
902 
903     /// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
904     /// `A_VEC` and `B_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
905     /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
906     /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
907     macro_rules! cmp_i64 {
908         ($KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
909             let a = Int64Array::from($A_VEC);
910             let b = Int64Array::from($B_VEC);
911             let c = $KERNEL(&a, &b).unwrap();
912             assert_eq!(BooleanArray::from($EXPECTED), c);
913         };
914     }
915 
916     /// Evaluate `KERNEL` with one vectors and one scalar as inputs and assert against the expected output.
917     /// `A_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
918     /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
919     /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
920     macro_rules! cmp_i64_scalar {
921         ($KERNEL:ident, $A_VEC:expr, $B:literal, $EXPECTED:expr) => {
922             let a = Int64Array::from($A_VEC);
923             let c = $KERNEL(&a, $B).unwrap();
924             assert_eq!(BooleanArray::from($EXPECTED), c);
925         };
926     }
927 
928     #[test]
test_primitive_array_eq()929     fn test_primitive_array_eq() {
930         cmp_i64!(
931             eq,
932             vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
933             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
934             vec![false, false, true, false, false, false, false, true, false, false]
935         );
936     }
937 
938     #[test]
test_primitive_array_eq_scalar()939     fn test_primitive_array_eq_scalar() {
940         cmp_i64_scalar!(
941             eq_scalar,
942             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
943             8,
944             vec![false, false, true, false, false, false, false, true, false, false]
945         );
946     }
947 
948     #[test]
test_primitive_array_eq_with_slice()949     fn test_primitive_array_eq_with_slice() {
950         let a = Int32Array::from(vec![6, 7, 8, 8, 10]);
951         let b = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
952         let b_slice = b.slice(5, 5);
953         let c = b_slice.as_any().downcast_ref().unwrap();
954         let d = eq(&c, &a).unwrap();
955         assert_eq!(true, d.value(0));
956         assert_eq!(true, d.value(1));
957         assert_eq!(true, d.value(2));
958         assert_eq!(false, d.value(3));
959         assert_eq!(true, d.value(4));
960     }
961 
962     #[test]
test_primitive_array_neq()963     fn test_primitive_array_neq() {
964         cmp_i64!(
965             neq,
966             vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
967             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
968             vec![true, true, false, true, true, true, true, false, true, true]
969         );
970     }
971 
972     #[test]
test_primitive_array_neq_scalar()973     fn test_primitive_array_neq_scalar() {
974         cmp_i64_scalar!(
975             neq_scalar,
976             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
977             8,
978             vec![true, true, false, true, true, true, true, false, true, true]
979         );
980     }
981 
982     #[test]
test_primitive_array_lt()983     fn test_primitive_array_lt() {
984         cmp_i64!(
985             lt,
986             vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
987             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
988             vec![false, false, false, true, true, false, false, false, true, true]
989         );
990     }
991 
992     #[test]
test_primitive_array_lt_scalar()993     fn test_primitive_array_lt_scalar() {
994         cmp_i64_scalar!(
995             lt_scalar,
996             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
997             8,
998             vec![true, true, false, false, false, true, true, false, false, false]
999         );
1000     }
1001 
1002     #[test]
test_primitive_array_lt_nulls()1003     fn test_primitive_array_lt_nulls() {
1004         cmp_i64!(
1005             lt,
1006             vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
1007             vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
1008             vec![None, None, None, Some(false), None, None, None, Some(true)]
1009         );
1010     }
1011 
1012     #[test]
test_primitive_array_lt_scalar_nulls()1013     fn test_primitive_array_lt_scalar_nulls() {
1014         cmp_i64_scalar!(
1015             lt_scalar,
1016             vec![None, Some(1), Some(2), Some(3), None, Some(1), Some(2), Some(3), Some(2), None],
1017             2,
1018             vec![None, Some(true), Some(false), Some(false), None, Some(true), Some(false), Some(false), Some(false), None]
1019         );
1020     }
1021 
1022     #[test]
test_primitive_array_lt_eq()1023     fn test_primitive_array_lt_eq() {
1024         cmp_i64!(
1025             lt_eq,
1026             vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
1027             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1028             vec![false, false, true, true, true, false, false, true, true, true]
1029         );
1030     }
1031 
1032     #[test]
test_primitive_array_lt_eq_scalar()1033     fn test_primitive_array_lt_eq_scalar() {
1034         cmp_i64_scalar!(
1035             lt_eq_scalar,
1036             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1037             8,
1038             vec![true, true, true, false, false, true, true, true, false, false]
1039         );
1040     }
1041 
1042     #[test]
test_primitive_array_lt_eq_nulls()1043     fn test_primitive_array_lt_eq_nulls() {
1044         cmp_i64!(
1045             lt_eq,
1046             vec![None, None, Some(1), None, None, Some(1), None, None, Some(1)],
1047             vec![None, Some(1), Some(0), None, Some(1), Some(2), None, None, Some(3)],
1048             vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
1049         );
1050     }
1051 
1052     #[test]
test_primitive_array_lt_eq_scalar_nulls()1053     fn test_primitive_array_lt_eq_scalar_nulls() {
1054         cmp_i64_scalar!(
1055             lt_eq_scalar,
1056             vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1057             1,
1058             vec![None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false)]
1059         );
1060     }
1061 
1062     #[test]
test_primitive_array_gt()1063     fn test_primitive_array_gt() {
1064         cmp_i64!(
1065             gt,
1066             vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
1067             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1068             vec![true, true, false, false, false, true, true, false, false, false]
1069         );
1070     }
1071 
1072     #[test]
test_primitive_array_gt_scalar()1073     fn test_primitive_array_gt_scalar() {
1074         cmp_i64_scalar!(
1075             gt_scalar,
1076             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1077             8,
1078             vec![false, false, false, true, true, false, false, false, true, true]
1079         );
1080     }
1081 
1082     #[test]
test_primitive_array_gt_nulls()1083     fn test_primitive_array_gt_nulls() {
1084         cmp_i64!(
1085             gt,
1086             vec![None, None, Some(1), None, None, Some(2), None, None, Some(3)],
1087             vec![None, Some(1), Some(1), None, Some(1), Some(1), None, Some(1), Some(1)],
1088             vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
1089         );
1090     }
1091 
1092     #[test]
test_primitive_array_gt_scalar_nulls()1093     fn test_primitive_array_gt_scalar_nulls() {
1094         cmp_i64_scalar!(
1095             gt_scalar,
1096             vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1097             1,
1098             vec![None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false), Some(true)]
1099         );
1100     }
1101 
1102     #[test]
test_primitive_array_gt_eq()1103     fn test_primitive_array_gt_eq() {
1104         cmp_i64!(
1105             gt_eq,
1106             vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
1107             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1108             vec![true, true, true, false, false, true, true, true, false, false]
1109         );
1110     }
1111 
1112     #[test]
test_primitive_array_gt_eq_scalar()1113     fn test_primitive_array_gt_eq_scalar() {
1114         cmp_i64_scalar!(
1115             gt_eq_scalar,
1116             vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1117             8,
1118             vec![false, false, true, true, true, false, false, true, true, true]
1119         );
1120     }
1121 
1122     #[test]
test_primitive_array_gt_eq_nulls()1123     fn test_primitive_array_gt_eq_nulls() {
1124         cmp_i64!(
1125             gt_eq,
1126             vec![None, None, Some(1), None, Some(1), Some(2), None, None, Some(1)],
1127             vec![None, Some(1), None, None, Some(1), Some(1), None, Some(2), Some(2)],
1128             vec![None, None, None, None, Some(true), Some(true), None, None, Some(false)]
1129         );
1130     }
1131 
1132     #[test]
test_primitive_array_gt_eq_scalar_nulls()1133     fn test_primitive_array_gt_eq_scalar_nulls() {
1134         cmp_i64_scalar!(
1135             gt_eq_scalar,
1136             vec![None, Some(1), Some(2), None, Some(2), Some(3), None, Some(3), Some(4)],
1137             2,
1138             vec![None, Some(false), Some(true), None, Some(true), Some(true), None, Some(true), Some(true)]
1139         );
1140     }
1141 
1142     #[test]
test_primitive_array_compare_slice()1143     fn test_primitive_array_compare_slice() {
1144         let a: Int32Array = (0..100).map(Some).collect();
1145         let a = a.slice(50, 50);
1146         let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
1147         let b: Int32Array = (100..200).map(Some).collect();
1148         let b = b.slice(50, 50);
1149         let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
1150         let actual = lt(&a, &b).unwrap();
1151         let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
1152         assert_eq!(expected, actual);
1153     }
1154 
1155     #[test]
test_primitive_array_compare_scalar_slice()1156     fn test_primitive_array_compare_scalar_slice() {
1157         let a: Int32Array = (0..100).map(Some).collect();
1158         let a = a.slice(50, 50);
1159         let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
1160         let actual = lt_scalar(&a, 200).unwrap();
1161         let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
1162         assert_eq!(expected, actual);
1163     }
1164 
1165     #[test]
test_length_of_result_buffer()1166     fn test_length_of_result_buffer() {
1167         // `item_count` is chosen to not be a multiple of the number of SIMD lanes for this
1168         // type (`Int8Type`), 64.
1169         let item_count = 130;
1170 
1171         let select_mask: BooleanArray = vec![true; item_count].into();
1172 
1173         let array_a: PrimitiveArray<Int8Type> = vec![1; item_count].into();
1174         let array_b: PrimitiveArray<Int8Type> = vec![2; item_count].into();
1175         let result_mask = gt_eq(&array_a, &array_b).unwrap();
1176 
1177         assert_eq!(
1178             result_mask.data().buffers()[0].len(),
1179             select_mask.data().buffers()[0].len()
1180         );
1181     }
1182 
1183     // Expected behaviour:
1184     // contains(1, [1, 2, null]) = true
1185     // contains(3, [1, 2, null]) = false
1186     // contains(null, [1, 2, null]) = false
1187     // contains(null, null) = false
1188     #[test]
test_contains()1189     fn test_contains() {
1190         let value_data = Int32Array::from(vec![
1191             Some(0),
1192             Some(1),
1193             Some(2),
1194             Some(3),
1195             Some(4),
1196             Some(5),
1197             Some(6),
1198             None,
1199             Some(7),
1200         ])
1201         .data();
1202         let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 6, 9]);
1203         let list_data_type =
1204             DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
1205         let list_data = ArrayData::builder(list_data_type)
1206             .len(4)
1207             .add_buffer(value_offsets)
1208             .add_child_data(value_data)
1209             .null_bit_buffer(Buffer::from([0b00001011]))
1210             .build();
1211 
1212         //  [[0, 1, 2], [3, 4, 5], null, [6, null, 7]]
1213         let list_array = LargeListArray::from(list_data);
1214 
1215         let nulls = Int32Array::from(vec![None, None, None, None]);
1216         let nulls_result = contains(&nulls, &list_array).unwrap();
1217         assert_eq!(
1218             nulls_result
1219                 .as_any()
1220                 .downcast_ref::<BooleanArray>()
1221                 .unwrap(),
1222             &BooleanArray::from(vec![false, false, false, false]),
1223         );
1224 
1225         let values = Int32Array::from(vec![Some(0), Some(0), Some(0), Some(0)]);
1226         let values_result = contains(&values, &list_array).unwrap();
1227         assert_eq!(
1228             values_result
1229                 .as_any()
1230                 .downcast_ref::<BooleanArray>()
1231                 .unwrap(),
1232             &BooleanArray::from(vec![true, false, false, false]),
1233         );
1234     }
1235 
1236     // Expected behaviour:
1237     // contains("ab", ["ab", "cd", null]) = true
1238     // contains("ef", ["ab", "cd", null]) = false
1239     // contains(null, ["ab", "cd", null]) = false
1240     // contains(null, null) = false
1241     #[test]
test_contains_utf8()1242     fn test_contains_utf8() {
1243         let values_builder = StringBuilder::new(10);
1244         let mut builder = ListBuilder::new(values_builder);
1245 
1246         builder.values().append_value("Lorem").unwrap();
1247         builder.values().append_value("ipsum").unwrap();
1248         builder.values().append_null().unwrap();
1249         builder.append(true).unwrap();
1250         builder.values().append_value("sit").unwrap();
1251         builder.values().append_value("amet").unwrap();
1252         builder.values().append_value("Lorem").unwrap();
1253         builder.append(true).unwrap();
1254         builder.append(false).unwrap();
1255         builder.values().append_value("ipsum").unwrap();
1256         builder.append(true).unwrap();
1257 
1258         //  [["Lorem", "ipsum", null], ["sit", "amet", "Lorem"], null, ["ipsum"]]
1259         // value_offsets = [0, 3, 6, 6]
1260         let list_array = builder.finish();
1261 
1262         let nulls = StringArray::from(vec![None, None, None, None]);
1263         let nulls_result = contains_utf8(&nulls, &list_array).unwrap();
1264         assert_eq!(
1265             nulls_result
1266                 .as_any()
1267                 .downcast_ref::<BooleanArray>()
1268                 .unwrap(),
1269             &BooleanArray::from(vec![false, false, false, false]),
1270         );
1271 
1272         let values = StringArray::from(vec![
1273             Some("Lorem"),
1274             Some("Lorem"),
1275             Some("Lorem"),
1276             Some("Lorem"),
1277         ]);
1278         let values_result = contains_utf8(&values, &list_array).unwrap();
1279         assert_eq!(
1280             values_result
1281                 .as_any()
1282                 .downcast_ref::<BooleanArray>()
1283                 .unwrap(),
1284             &BooleanArray::from(vec![true, true, false, false]),
1285         );
1286     }
1287 
1288     macro_rules! test_utf8 {
1289         ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
1290             #[test]
1291             fn $test_name() {
1292                 let left = StringArray::from($left);
1293                 let right = StringArray::from($right);
1294                 let res = $op(&left, &right).unwrap();
1295                 let expected = $expected;
1296                 assert_eq!(expected.len(), res.len());
1297                 for i in 0..res.len() {
1298                     let v = res.value(i);
1299                     assert_eq!(v, expected[i]);
1300                 }
1301             }
1302         };
1303     }
1304 
1305     macro_rules! test_utf8_scalar {
1306         ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
1307             #[test]
1308             fn $test_name() {
1309                 let left = StringArray::from($left);
1310                 let res = $op(&left, $right).unwrap();
1311                 let expected = $expected;
1312                 assert_eq!(expected.len(), res.len());
1313                 for i in 0..res.len() {
1314                     let v = res.value(i);
1315                     assert_eq!(
1316                         v,
1317                         expected[i],
1318                         "unexpected result when comparing {} at position {} to {} ",
1319                         left.value(i),
1320                         i,
1321                         $right
1322                     );
1323                 }
1324 
1325                 let left = LargeStringArray::from($left);
1326                 let res = $op(&left, $right).unwrap();
1327                 let expected = $expected;
1328                 assert_eq!(expected.len(), res.len());
1329                 for i in 0..res.len() {
1330                     let v = res.value(i);
1331                     assert_eq!(
1332                         v,
1333                         expected[i],
1334                         "unexpected result when comparing {} at position {} to {} ",
1335                         left.value(i),
1336                         i,
1337                         $right
1338                     );
1339                 }
1340             }
1341         };
1342     }
1343 
1344     test_utf8!(
1345         test_utf8_array_like,
1346         vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
1347         vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
1348         like_utf8,
1349         vec![true, true, true, false, false, true, false]
1350     );
1351 
1352     test_utf8_scalar!(
1353         test_utf8_array_like_scalar,
1354         vec!["arrow", "parquet", "datafusion", "flight"],
1355         "%ar%",
1356         like_utf8_scalar,
1357         vec![true, true, false, false]
1358     );
1359     test_utf8_scalar!(
1360         test_utf8_array_like_scalar_start,
1361         vec!["arrow", "parrow", "arrows", "arr"],
1362         "arrow%",
1363         like_utf8_scalar,
1364         vec![true, false, true, false]
1365     );
1366 
1367     test_utf8_scalar!(
1368         test_utf8_array_like_scalar_end,
1369         vec!["arrow", "parrow", "arrows", "arr"],
1370         "%arrow",
1371         like_utf8_scalar,
1372         vec![true, true, false, false]
1373     );
1374 
1375     test_utf8_scalar!(
1376         test_utf8_array_like_scalar_equals,
1377         vec!["arrow", "parrow", "arrows", "arr"],
1378         "arrow",
1379         like_utf8_scalar,
1380         vec![true, false, false, false]
1381     );
1382 
1383     test_utf8_scalar!(
1384         test_utf8_array_like_scalar_one,
1385         vec!["arrow", "arrows", "parrow", "arr"],
1386         "arrow_",
1387         like_utf8_scalar,
1388         vec![false, true, false, false]
1389     );
1390 
1391     test_utf8!(
1392         test_utf8_array_nlike,
1393         vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
1394         vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
1395         nlike_utf8,
1396         vec![false, false, false, true, true, false, true]
1397     );
1398     test_utf8_scalar!(
1399         test_utf8_array_nlike_scalar,
1400         vec!["arrow", "parquet", "datafusion", "flight"],
1401         "%ar%",
1402         nlike_utf8_scalar,
1403         vec![false, false, true, true]
1404     );
1405 
1406     test_utf8!(
1407         test_utf8_array_eq,
1408         vec!["arrow", "arrow", "arrow", "arrow"],
1409         vec!["arrow", "parquet", "datafusion", "flight"],
1410         eq_utf8,
1411         vec![true, false, false, false]
1412     );
1413     test_utf8_scalar!(
1414         test_utf8_array_eq_scalar,
1415         vec!["arrow", "parquet", "datafusion", "flight"],
1416         "arrow",
1417         eq_utf8_scalar,
1418         vec![true, false, false, false]
1419     );
1420 
1421     test_utf8_scalar!(
1422         test_utf8_array_nlike_scalar_start,
1423         vec!["arrow", "parrow", "arrows", "arr"],
1424         "arrow%",
1425         nlike_utf8_scalar,
1426         vec![false, true, false, true]
1427     );
1428 
1429     test_utf8_scalar!(
1430         test_utf8_array_nlike_scalar_end,
1431         vec!["arrow", "parrow", "arrows", "arr"],
1432         "%arrow",
1433         nlike_utf8_scalar,
1434         vec![false, false, true, true]
1435     );
1436 
1437     test_utf8_scalar!(
1438         test_utf8_array_nlike_scalar_equals,
1439         vec!["arrow", "parrow", "arrows", "arr"],
1440         "arrow",
1441         nlike_utf8_scalar,
1442         vec![false, true, true, true]
1443     );
1444 
1445     test_utf8_scalar!(
1446         test_utf8_array_nlike_scalar_one,
1447         vec!["arrow", "arrows", "parrow", "arr"],
1448         "arrow_",
1449         nlike_utf8_scalar,
1450         vec![true, false, true, true]
1451     );
1452 
1453     test_utf8!(
1454         test_utf8_array_neq,
1455         vec!["arrow", "arrow", "arrow", "arrow"],
1456         vec!["arrow", "parquet", "datafusion", "flight"],
1457         neq_utf8,
1458         vec![false, true, true, true]
1459     );
1460     test_utf8_scalar!(
1461         test_utf8_array_neq_scalar,
1462         vec!["arrow", "parquet", "datafusion", "flight"],
1463         "arrow",
1464         neq_utf8_scalar,
1465         vec![false, true, true, true]
1466     );
1467 
1468     test_utf8!(
1469         test_utf8_array_lt,
1470         vec!["arrow", "datafusion", "flight", "parquet"],
1471         vec!["flight", "flight", "flight", "flight"],
1472         lt_utf8,
1473         vec![true, true, false, false]
1474     );
1475     test_utf8_scalar!(
1476         test_utf8_array_lt_scalar,
1477         vec!["arrow", "datafusion", "flight", "parquet"],
1478         "flight",
1479         lt_utf8_scalar,
1480         vec![true, true, false, false]
1481     );
1482 
1483     test_utf8!(
1484         test_utf8_array_lt_eq,
1485         vec!["arrow", "datafusion", "flight", "parquet"],
1486         vec!["flight", "flight", "flight", "flight"],
1487         lt_eq_utf8,
1488         vec![true, true, true, false]
1489     );
1490     test_utf8_scalar!(
1491         test_utf8_array_lt_eq_scalar,
1492         vec!["arrow", "datafusion", "flight", "parquet"],
1493         "flight",
1494         lt_eq_utf8_scalar,
1495         vec![true, true, true, false]
1496     );
1497 
1498     test_utf8!(
1499         test_utf8_array_gt,
1500         vec!["arrow", "datafusion", "flight", "parquet"],
1501         vec!["flight", "flight", "flight", "flight"],
1502         gt_utf8,
1503         vec![false, false, false, true]
1504     );
1505     test_utf8_scalar!(
1506         test_utf8_array_gt_scalar,
1507         vec!["arrow", "datafusion", "flight", "parquet"],
1508         "flight",
1509         gt_utf8_scalar,
1510         vec![false, false, false, true]
1511     );
1512 
1513     test_utf8!(
1514         test_utf8_array_gt_eq,
1515         vec!["arrow", "datafusion", "flight", "parquet"],
1516         vec!["flight", "flight", "flight", "flight"],
1517         gt_eq_utf8,
1518         vec![false, false, true, true]
1519     );
1520     test_utf8_scalar!(
1521         test_utf8_array_gt_eq_scalar,
1522         vec!["arrow", "datafusion", "flight", "parquet"],
1523         "flight",
1524         gt_eq_utf8_scalar,
1525         vec![false, false, true, true]
1526     );
1527 }
1528