1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 //! Defines basic comparison kernels for [`PrimitiveArray`]s.
19 //!
20 //! These kernels can leverage SIMD if available on your system. Currently no runtime
21 //! detection is provided, you should enable the specific SIMD intrinsics using
22 //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation
23 //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
24
25 use regex::Regex;
26 use std::collections::HashMap;
27 use std::sync::Arc;
28
29 use crate::array::*;
30 use crate::buffer::{Buffer, MutableBuffer};
31 use crate::compute::util::combine_option_bitmap;
32 use crate::datatypes::{ArrowNumericType, DataType};
33 use crate::error::{ArrowError, Result};
34 use crate::util::bit_util;
35
36 /// Helper function to perform boolean lambda function on values from two arrays, this
37 /// version does not attempt to use SIMD.
38 macro_rules! compare_op {
39 ($left: expr, $right:expr, $op:expr) => {{
40 if $left.len() != $right.len() {
41 return Err(ArrowError::ComputeError(
42 "Cannot perform comparison operation on arrays of different length"
43 .to_string(),
44 ));
45 }
46
47 let null_bit_buffer =
48 combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
49
50 let buffer = (0..$left.len())
51 .map(|i| $op($left.value(i), $right.value(i)))
52 .collect();
53
54 let data = ArrayData::new(
55 DataType::Boolean,
56 $left.len(),
57 None,
58 null_bit_buffer,
59 0,
60 vec![buffer],
61 vec![],
62 );
63 Ok(BooleanArray::from(Arc::new(data)))
64 }};
65 }
66
67 macro_rules! compare_op_scalar {
68 ($left: expr, $right:expr, $op:expr) => {{
69 let null_bit_buffer = $left.data().null_buffer().cloned();
70
71 let buffer = (0..$left.len())
72 .map(|i| $op($left.value(i), $right))
73 .collect();
74
75 let data = ArrayData::new(
76 DataType::Boolean,
77 $left.len(),
78 None,
79 null_bit_buffer,
80 0,
81 vec![buffer],
82 vec![],
83 );
84 Ok(BooleanArray::from(Arc::new(data)))
85 }};
86 }
87
88 /// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified
89 /// comparison function.
no_simd_compare_op<T, F>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, op: F, ) -> Result<BooleanArray> where T: ArrowNumericType, F: Fn(T::Native, T::Native) -> bool,90 pub fn no_simd_compare_op<T, F>(
91 left: &PrimitiveArray<T>,
92 right: &PrimitiveArray<T>,
93 op: F,
94 ) -> Result<BooleanArray>
95 where
96 T: ArrowNumericType,
97 F: Fn(T::Native, T::Native) -> bool,
98 {
99 compare_op!(left, right, op)
100 }
101
102 /// Evaluate `op(left, right)` for [`PrimitiveArray`] and scalar using
103 /// a specified comparison function.
no_simd_compare_op_scalar<T, F>( left: &PrimitiveArray<T>, right: T::Native, op: F, ) -> Result<BooleanArray> where T: ArrowNumericType, F: Fn(T::Native, T::Native) -> bool,104 pub fn no_simd_compare_op_scalar<T, F>(
105 left: &PrimitiveArray<T>,
106 right: T::Native,
107 op: F,
108 ) -> Result<BooleanArray>
109 where
110 T: ArrowNumericType,
111 F: Fn(T::Native, T::Native) -> bool,
112 {
113 compare_op_scalar!(left, right, op)
114 }
115
116 /// Perform SQL `left LIKE right` operation on [`StringArray`] / [`LargeStringArray`].
117 ///
118 /// There are two wildcards supported with the LIKE operator:
119 ///
120 /// 1. `%` - The percent sign represents zero, one, or multiple characters
121 /// 2. `_` - The underscore represents a single character
122 ///
123 /// For example:
124 /// ```
125 /// use arrow::array::{StringArray, BooleanArray};
126 /// use arrow::compute::like_utf8;
127 ///
128 /// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
129 /// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A."]);
130 ///
131 /// let result = like_utf8(&strings, &patterns).unwrap();
132 /// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
133 /// ```
like_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>134 pub fn like_utf8<OffsetSize: StringOffsetSizeTrait>(
135 left: &GenericStringArray<OffsetSize>,
136 right: &GenericStringArray<OffsetSize>,
137 ) -> Result<BooleanArray> {
138 let mut map = HashMap::new();
139 if left.len() != right.len() {
140 return Err(ArrowError::ComputeError(
141 "Cannot perform comparison operation on arrays of different length"
142 .to_string(),
143 ));
144 }
145
146 let null_bit_buffer =
147 combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
148
149 let mut result = BooleanBufferBuilder::new(left.len());
150 for i in 0..left.len() {
151 let haystack = left.value(i);
152 let pat = right.value(i);
153 let re = if let Some(ref regex) = map.get(pat) {
154 regex
155 } else {
156 let re_pattern = pat.replace("%", ".*").replace("_", ".");
157 let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
158 ArrowError::ComputeError(format!(
159 "Unable to build regex from LIKE pattern: {}",
160 e
161 ))
162 })?;
163 map.insert(pat, re);
164 map.get(pat).unwrap()
165 };
166
167 result.append(re.is_match(haystack));
168 }
169
170 let data = ArrayData::new(
171 DataType::Boolean,
172 left.len(),
173 None,
174 null_bit_buffer,
175 0,
176 vec![result.finish()],
177 vec![],
178 );
179 Ok(BooleanArray::from(Arc::new(data)))
180 }
181
is_like_pattern(c: char) -> bool182 fn is_like_pattern(c: char) -> bool {
183 c == '%' || c == '_'
184 }
185
186 /// Perform SQL `left LIKE right` operation on [`StringArray`] /
187 /// [`LargeStringArray`] and a scalar.
188 ///
189 /// See the documentation on [`like_utf8`] for more details.
like_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>190 pub fn like_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
191 left: &GenericStringArray<OffsetSize>,
192 right: &str,
193 ) -> Result<BooleanArray> {
194 let null_bit_buffer = left.data().null_buffer().cloned();
195 let bytes = bit_util::ceil(left.len(), 8);
196 let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
197 let bool_slice = bool_buf.as_slice_mut();
198
199 if !right.contains(is_like_pattern) {
200 // fast path, can use equals
201 for i in 0..left.len() {
202 if left.value(i) == right {
203 bit_util::set_bit(bool_slice, i);
204 }
205 }
206 } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
207 {
208 // fast path, can use starts_with
209 let starts_with = &right[..right.len() - 1];
210 for i in 0..left.len() {
211 if left.value(i).starts_with(starts_with) {
212 bit_util::set_bit(bool_slice, i);
213 }
214 }
215 } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
216 // fast path, can use ends_with
217 let ends_with = &right[1..];
218 for i in 0..left.len() {
219 if left.value(i).ends_with(ends_with) {
220 bit_util::set_bit(bool_slice, i);
221 }
222 }
223 } else {
224 let re_pattern = right.replace("%", ".*").replace("_", ".");
225 let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
226 ArrowError::ComputeError(format!(
227 "Unable to build regex from LIKE pattern: {}",
228 e
229 ))
230 })?;
231
232 for i in 0..left.len() {
233 let haystack = left.value(i);
234 if re.is_match(haystack) {
235 bit_util::set_bit(bool_slice, i);
236 }
237 }
238 };
239
240 let data = ArrayData::new(
241 DataType::Boolean,
242 left.len(),
243 None,
244 null_bit_buffer,
245 0,
246 vec![bool_buf.into()],
247 vec![],
248 );
249 Ok(BooleanArray::from(Arc::new(data)))
250 }
251
252 /// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
253 /// [`LargeStringArray`].
254 ///
255 /// See the documentation on [`like_utf8`] for more details.
nlike_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>256 pub fn nlike_utf8<OffsetSize: StringOffsetSizeTrait>(
257 left: &GenericStringArray<OffsetSize>,
258 right: &GenericStringArray<OffsetSize>,
259 ) -> Result<BooleanArray> {
260 let mut map = HashMap::new();
261 if left.len() != right.len() {
262 return Err(ArrowError::ComputeError(
263 "Cannot perform comparison operation on arrays of different length"
264 .to_string(),
265 ));
266 }
267
268 let null_bit_buffer =
269 combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
270
271 let mut result = BooleanBufferBuilder::new(left.len());
272 for i in 0..left.len() {
273 let haystack = left.value(i);
274 let pat = right.value(i);
275 let re = if let Some(ref regex) = map.get(pat) {
276 regex
277 } else {
278 let re_pattern = pat.replace("%", ".*").replace("_", ".");
279 let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
280 ArrowError::ComputeError(format!(
281 "Unable to build regex from LIKE pattern: {}",
282 e
283 ))
284 })?;
285 map.insert(pat, re);
286 map.get(pat).unwrap()
287 };
288
289 result.append(!re.is_match(haystack));
290 }
291
292 let data = ArrayData::new(
293 DataType::Boolean,
294 left.len(),
295 None,
296 null_bit_buffer,
297 0,
298 vec![result.finish()],
299 vec![],
300 );
301 Ok(BooleanArray::from(Arc::new(data)))
302 }
303
304 /// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
305 /// [`LargeStringArray`] and a scalar.
306 ///
307 /// See the documentation on [`like_utf8`] for more details.
nlike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>308 pub fn nlike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
309 left: &GenericStringArray<OffsetSize>,
310 right: &str,
311 ) -> Result<BooleanArray> {
312 let null_bit_buffer = left.data().null_buffer().cloned();
313 let mut result = BooleanBufferBuilder::new(left.len());
314
315 if !right.contains(is_like_pattern) {
316 // fast path, can use equals
317 for i in 0..left.len() {
318 result.append(left.value(i) != right);
319 }
320 } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
321 {
322 // fast path, can use ends_with
323 for i in 0..left.len() {
324 result.append(!left.value(i).starts_with(&right[..right.len() - 1]));
325 }
326 } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
327 // fast path, can use starts_with
328 for i in 0..left.len() {
329 result.append(!left.value(i).ends_with(&right[1..]));
330 }
331 } else {
332 let re_pattern = right.replace("%", ".*").replace("_", ".");
333 let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
334 ArrowError::ComputeError(format!(
335 "Unable to build regex from LIKE pattern: {}",
336 e
337 ))
338 })?;
339 for i in 0..left.len() {
340 let haystack = left.value(i);
341 result.append(!re.is_match(haystack));
342 }
343 }
344
345 let data = ArrayData::new(
346 DataType::Boolean,
347 left.len(),
348 None,
349 null_bit_buffer,
350 0,
351 vec![result.finish()],
352 vec![],
353 );
354 Ok(BooleanArray::from(Arc::new(data)))
355 }
356
357 /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`].
eq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>358 pub fn eq_utf8<OffsetSize: StringOffsetSizeTrait>(
359 left: &GenericStringArray<OffsetSize>,
360 right: &GenericStringArray<OffsetSize>,
361 ) -> Result<BooleanArray> {
362 compare_op!(left, right, |a, b| a == b)
363 }
364
365 /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>366 pub fn eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
367 left: &GenericStringArray<OffsetSize>,
368 right: &str,
369 ) -> Result<BooleanArray> {
370 compare_op_scalar!(left, right, |a, b| a == b)
371 }
372
373 /// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`].
neq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>374 pub fn neq_utf8<OffsetSize: StringOffsetSizeTrait>(
375 left: &GenericStringArray<OffsetSize>,
376 right: &GenericStringArray<OffsetSize>,
377 ) -> Result<BooleanArray> {
378 compare_op!(left, right, |a, b| a != b)
379 }
380
381 /// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
neq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>382 pub fn neq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
383 left: &GenericStringArray<OffsetSize>,
384 right: &str,
385 ) -> Result<BooleanArray> {
386 compare_op_scalar!(left, right, |a, b| a != b)
387 }
388
389 /// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`].
lt_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>390 pub fn lt_utf8<OffsetSize: StringOffsetSizeTrait>(
391 left: &GenericStringArray<OffsetSize>,
392 right: &GenericStringArray<OffsetSize>,
393 ) -> Result<BooleanArray> {
394 compare_op!(left, right, |a, b| a < b)
395 }
396
397 /// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
lt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>398 pub fn lt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
399 left: &GenericStringArray<OffsetSize>,
400 right: &str,
401 ) -> Result<BooleanArray> {
402 compare_op_scalar!(left, right, |a, b| a < b)
403 }
404
405 /// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`].
lt_eq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>406 pub fn lt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
407 left: &GenericStringArray<OffsetSize>,
408 right: &GenericStringArray<OffsetSize>,
409 ) -> Result<BooleanArray> {
410 compare_op!(left, right, |a, b| a <= b)
411 }
412
413 /// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
lt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>414 pub fn lt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
415 left: &GenericStringArray<OffsetSize>,
416 right: &str,
417 ) -> Result<BooleanArray> {
418 compare_op_scalar!(left, right, |a, b| a <= b)
419 }
420
421 /// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`].
gt_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>422 pub fn gt_utf8<OffsetSize: StringOffsetSizeTrait>(
423 left: &GenericStringArray<OffsetSize>,
424 right: &GenericStringArray<OffsetSize>,
425 ) -> Result<BooleanArray> {
426 compare_op!(left, right, |a, b| a > b)
427 }
428
429 /// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
gt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>430 pub fn gt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
431 left: &GenericStringArray<OffsetSize>,
432 right: &str,
433 ) -> Result<BooleanArray> {
434 compare_op_scalar!(left, right, |a, b| a > b)
435 }
436
437 /// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`].
gt_eq_utf8<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &GenericStringArray<OffsetSize>, ) -> Result<BooleanArray>438 pub fn gt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
439 left: &GenericStringArray<OffsetSize>,
440 right: &GenericStringArray<OffsetSize>,
441 ) -> Result<BooleanArray> {
442 compare_op!(left, right, |a, b| a >= b)
443 }
444
445 /// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
gt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>( left: &GenericStringArray<OffsetSize>, right: &str, ) -> Result<BooleanArray>446 pub fn gt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
447 left: &GenericStringArray<OffsetSize>,
448 right: &str,
449 ) -> Result<BooleanArray> {
450 compare_op_scalar!(left, right, |a, b| a >= b)
451 }
452
453 /// Helper function to perform boolean lambda function on values from two arrays using
454 /// SIMD.
455 #[cfg(simd)]
simd_compare_op<T, SIMD_OP, SCALAR_OP>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, simd_op: SIMD_OP, scalar_op: SCALAR_OP, ) -> Result<BooleanArray> where T: ArrowNumericType, SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask, SCALAR_OP: Fn(T::Native, T::Native) -> bool,456 fn simd_compare_op<T, SIMD_OP, SCALAR_OP>(
457 left: &PrimitiveArray<T>,
458 right: &PrimitiveArray<T>,
459 simd_op: SIMD_OP,
460 scalar_op: SCALAR_OP,
461 ) -> Result<BooleanArray>
462 where
463 T: ArrowNumericType,
464 SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
465 SCALAR_OP: Fn(T::Native, T::Native) -> bool,
466 {
467 use std::borrow::BorrowMut;
468
469 let len = left.len();
470 if len != right.len() {
471 return Err(ArrowError::ComputeError(
472 "Cannot perform comparison operation on arrays of different length"
473 .to_string(),
474 ));
475 }
476
477 let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), len)?;
478
479 let lanes = T::lanes();
480 let buffer_size = bit_util::ceil(len, 8);
481 let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
482
483 // this is currently the case for all our datatypes and allows us to always append full bytes
484 assert!(
485 lanes % 8 == 0,
486 "Number of vector lanes must be multiple of 8"
487 );
488 let mut left_chunks = left.values().chunks_exact(lanes);
489 let mut right_chunks = right.values().chunks_exact(lanes);
490
491 let result_remainder = left_chunks
492 .borrow_mut()
493 .zip(right_chunks.borrow_mut())
494 .fold(
495 result.typed_data_mut(),
496 |result_slice, (left_slice, right_slice)| {
497 let simd_left = T::load(left_slice);
498 let simd_right = T::load(right_slice);
499 let simd_result = simd_op(simd_left, simd_right);
500
501 let bitmask = T::mask_to_u64(&simd_result);
502 let bytes = bitmask.to_le_bytes();
503 &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
504
505 &mut result_slice[lanes / 8..]
506 },
507 );
508
509 let left_remainder = left_chunks.remainder();
510 let right_remainder = right_chunks.remainder();
511
512 assert_eq!(left_remainder.len(), right_remainder.len());
513
514 let remainder_bitmask = left_remainder
515 .iter()
516 .zip(right_remainder.iter())
517 .enumerate()
518 .fold(0_u64, |mut mask, (i, (scalar_left, scalar_right))| {
519 let bit = if scalar_op(*scalar_left, *scalar_right) {
520 1_u64
521 } else {
522 0_u64
523 };
524 mask |= bit << i;
525 mask
526 });
527 let remainder_mask_as_bytes =
528 &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
529 result_remainder.copy_from_slice(remainder_mask_as_bytes);
530
531 let data = ArrayData::new(
532 DataType::Boolean,
533 len,
534 None,
535 null_bit_buffer,
536 0,
537 vec![result.into()],
538 vec![],
539 );
540 Ok(BooleanArray::from(Arc::new(data)))
541 }
542
543 /// Helper function to perform boolean lambda function on values from an array and a scalar value using
544 /// SIMD.
545 #[cfg(simd)]
simd_compare_op_scalar<T, SIMD_OP, SCALAR_OP>( left: &PrimitiveArray<T>, right: T::Native, simd_op: SIMD_OP, scalar_op: SCALAR_OP, ) -> Result<BooleanArray> where T: ArrowNumericType, SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask, SCALAR_OP: Fn(T::Native, T::Native) -> bool,546 fn simd_compare_op_scalar<T, SIMD_OP, SCALAR_OP>(
547 left: &PrimitiveArray<T>,
548 right: T::Native,
549 simd_op: SIMD_OP,
550 scalar_op: SCALAR_OP,
551 ) -> Result<BooleanArray>
552 where
553 T: ArrowNumericType,
554 SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
555 SCALAR_OP: Fn(T::Native, T::Native) -> bool,
556 {
557 use std::borrow::BorrowMut;
558
559 let len = left.len();
560
561 let lanes = T::lanes();
562 let buffer_size = bit_util::ceil(len, 8);
563 let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
564
565 // this is currently the case for all our datatypes and allows us to always append full bytes
566 assert!(
567 lanes % 8 == 0,
568 "Number of vector lanes must be multiple of 8"
569 );
570 let mut left_chunks = left.values().chunks_exact(lanes);
571 let simd_right = T::init(right);
572
573 let result_remainder = left_chunks.borrow_mut().fold(
574 result.typed_data_mut(),
575 |result_slice, left_slice| {
576 let simd_left = T::load(left_slice);
577 let simd_result = simd_op(simd_left, simd_right);
578
579 let bitmask = T::mask_to_u64(&simd_result);
580 let bytes = bitmask.to_le_bytes();
581 &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
582
583 &mut result_slice[lanes / 8..]
584 },
585 );
586
587 let left_remainder = left_chunks.remainder();
588
589 let remainder_bitmask =
590 left_remainder
591 .iter()
592 .enumerate()
593 .fold(0_u64, |mut mask, (i, scalar_left)| {
594 let bit = if scalar_op(*scalar_left, right) {
595 1_u64
596 } else {
597 0_u64
598 };
599 mask |= bit << i;
600 mask
601 });
602 let remainder_mask_as_bytes =
603 &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
604 result_remainder.copy_from_slice(remainder_mask_as_bytes);
605
606 let null_bit_buffer = left
607 .data_ref()
608 .null_buffer()
609 .map(|b| b.bit_slice(left.offset(), left.len()));
610
611 // null count is the same as in the input since the right side of the scalar comparison cannot be null
612 let null_count = left.null_count();
613
614 let data = ArrayData::new(
615 DataType::Boolean,
616 len,
617 Some(null_count),
618 null_bit_buffer,
619 0,
620 vec![result.into()],
621 vec![],
622 );
623 Ok(BooleanArray::from(Arc::new(data)))
624 }
625
626 /// Perform `left == right` operation on two arrays.
eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,627 pub fn eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
628 where
629 T: ArrowNumericType,
630 {
631 #[cfg(simd)]
632 return simd_compare_op(left, right, T::eq, |a, b| a == b);
633 #[cfg(not(simd))]
634 return compare_op!(left, right, |a, b| a == b);
635 }
636
637 /// Perform `left == right` operation on an array and a scalar value.
eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,638 pub fn eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
639 where
640 T: ArrowNumericType,
641 {
642 #[cfg(simd)]
643 return simd_compare_op_scalar(left, right, T::eq, |a, b| a == b);
644 #[cfg(not(simd))]
645 return compare_op_scalar!(left, right, |a, b| a == b);
646 }
647
648 /// Perform `left != right` operation on two arrays.
neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,649 pub fn neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
650 where
651 T: ArrowNumericType,
652 {
653 #[cfg(simd)]
654 return simd_compare_op(left, right, T::ne, |a, b| a != b);
655 #[cfg(not(simd))]
656 return compare_op!(left, right, |a, b| a != b);
657 }
658
659 /// Perform `left != right` operation on an array and a scalar value.
neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,660 pub fn neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
661 where
662 T: ArrowNumericType,
663 {
664 #[cfg(simd)]
665 return simd_compare_op_scalar(left, right, T::ne, |a, b| a != b);
666 #[cfg(not(simd))]
667 return compare_op_scalar!(left, right, |a, b| a != b);
668 }
669
670 /// Perform `left < right` operation on two arrays. Null values are less than non-null
671 /// values.
lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,672 pub fn lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
673 where
674 T: ArrowNumericType,
675 {
676 #[cfg(simd)]
677 return simd_compare_op(left, right, T::lt, |a, b| a < b);
678 #[cfg(not(simd))]
679 return compare_op!(left, right, |a, b| a < b);
680 }
681
682 /// Perform `left < right` operation on an array and a scalar value.
683 /// Null values are less than non-null values.
lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,684 pub fn lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
685 where
686 T: ArrowNumericType,
687 {
688 #[cfg(simd)]
689 return simd_compare_op_scalar(left, right, T::lt, |a, b| a < b);
690 #[cfg(not(simd))]
691 return compare_op_scalar!(left, right, |a, b| a < b);
692 }
693
694 /// Perform `left <= right` operation on two arrays. Null values are less than non-null
695 /// values.
lt_eq<T>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, ) -> Result<BooleanArray> where T: ArrowNumericType,696 pub fn lt_eq<T>(
697 left: &PrimitiveArray<T>,
698 right: &PrimitiveArray<T>,
699 ) -> Result<BooleanArray>
700 where
701 T: ArrowNumericType,
702 {
703 #[cfg(simd)]
704 return simd_compare_op(left, right, T::le, |a, b| a <= b);
705 #[cfg(not(simd))]
706 return compare_op!(left, right, |a, b| a <= b);
707 }
708
709 /// Perform `left <= right` operation on an array and a scalar value.
710 /// Null values are less than non-null values.
lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,711 pub fn lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
712 where
713 T: ArrowNumericType,
714 {
715 #[cfg(simd)]
716 return simd_compare_op_scalar(left, right, T::le, |a, b| a <= b);
717 #[cfg(not(simd))]
718 return compare_op_scalar!(left, right, |a, b| a <= b);
719 }
720
721 /// Perform `left > right` operation on two arrays. Non-null values are greater than null
722 /// values.
gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray> where T: ArrowNumericType,723 pub fn gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
724 where
725 T: ArrowNumericType,
726 {
727 #[cfg(simd)]
728 return simd_compare_op(left, right, T::gt, |a, b| a > b);
729 #[cfg(not(simd))]
730 return compare_op!(left, right, |a, b| a > b);
731 }
732
733 /// Perform `left > right` operation on an array and a scalar value.
734 /// Non-null values are greater than null values.
gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,735 pub fn gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
736 where
737 T: ArrowNumericType,
738 {
739 #[cfg(simd)]
740 return simd_compare_op_scalar(left, right, T::gt, |a, b| a > b);
741 #[cfg(not(simd))]
742 return compare_op_scalar!(left, right, |a, b| a > b);
743 }
744
745 /// Perform `left >= right` operation on two arrays. Non-null values are greater than null
746 /// values.
gt_eq<T>( left: &PrimitiveArray<T>, right: &PrimitiveArray<T>, ) -> Result<BooleanArray> where T: ArrowNumericType,747 pub fn gt_eq<T>(
748 left: &PrimitiveArray<T>,
749 right: &PrimitiveArray<T>,
750 ) -> Result<BooleanArray>
751 where
752 T: ArrowNumericType,
753 {
754 #[cfg(simd)]
755 return simd_compare_op(left, right, T::ge, |a, b| a >= b);
756 #[cfg(not(simd))]
757 return compare_op!(left, right, |a, b| a >= b);
758 }
759
760 /// Perform `left >= right` operation on an array and a scalar value.
761 /// Non-null values are greater than null values.
gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray> where T: ArrowNumericType,762 pub fn gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
763 where
764 T: ArrowNumericType,
765 {
766 #[cfg(simd)]
767 return simd_compare_op_scalar(left, right, T::ge, |a, b| a >= b);
768 #[cfg(not(simd))]
769 return compare_op_scalar!(left, right, |a, b| a >= b);
770 }
771
772 /// Checks if a [`GenericListArray`] contains a value in the [`PrimitiveArray`]
contains<T, OffsetSize>( left: &PrimitiveArray<T>, right: &GenericListArray<OffsetSize>, ) -> Result<BooleanArray> where T: ArrowNumericType, OffsetSize: OffsetSizeTrait,773 pub fn contains<T, OffsetSize>(
774 left: &PrimitiveArray<T>,
775 right: &GenericListArray<OffsetSize>,
776 ) -> Result<BooleanArray>
777 where
778 T: ArrowNumericType,
779 OffsetSize: OffsetSizeTrait,
780 {
781 let left_len = left.len();
782 if left_len != right.len() {
783 return Err(ArrowError::ComputeError(
784 "Cannot perform comparison operation on arrays of different length"
785 .to_string(),
786 ));
787 }
788
789 let num_bytes = bit_util::ceil(left_len, 8);
790
791 let not_both_null_bit_buffer =
792 match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
793 Some(buff) => buff,
794 None => new_all_set_buffer(num_bytes),
795 };
796 let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
797
798 let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
799 let bool_slice = bool_buf.as_slice_mut();
800
801 // if both array slots are valid, check if list contains primitive
802 for i in 0..left_len {
803 if bit_util::get_bit(not_both_null_bitmap, i) {
804 let list = right.value(i);
805 let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
806
807 for j in 0..list.len() {
808 if list.is_valid(j) && (left.value(i) == list.value(j)) {
809 bit_util::set_bit(bool_slice, i);
810 continue;
811 }
812 }
813 }
814 }
815
816 let data = ArrayData::new(
817 DataType::Boolean,
818 left.len(),
819 None,
820 None,
821 0,
822 vec![bool_buf.into()],
823 vec![],
824 );
825 Ok(BooleanArray::from(Arc::new(data)))
826 }
827
828 /// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`]
contains_utf8<OffsetSize>( left: &GenericStringArray<OffsetSize>, right: &ListArray, ) -> Result<BooleanArray> where OffsetSize: StringOffsetSizeTrait,829 pub fn contains_utf8<OffsetSize>(
830 left: &GenericStringArray<OffsetSize>,
831 right: &ListArray,
832 ) -> Result<BooleanArray>
833 where
834 OffsetSize: StringOffsetSizeTrait,
835 {
836 let left_len = left.len();
837 if left_len != right.len() {
838 return Err(ArrowError::ComputeError(
839 "Cannot perform comparison operation on arrays of different length"
840 .to_string(),
841 ));
842 }
843
844 let num_bytes = bit_util::ceil(left_len, 8);
845
846 let not_both_null_bit_buffer =
847 match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
848 Some(buff) => buff,
849 None => new_all_set_buffer(num_bytes),
850 };
851 let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
852
853 let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
854 let bool_slice = &mut bool_buf;
855
856 for i in 0..left_len {
857 // contains(null, null) = false
858 if bit_util::get_bit(not_both_null_bitmap, i) {
859 let list = right.value(i);
860 let list = list
861 .as_any()
862 .downcast_ref::<GenericStringArray<OffsetSize>>()
863 .unwrap();
864
865 for j in 0..list.len() {
866 if list.is_valid(j) && (left.value(i) == list.value(j)) {
867 bit_util::set_bit(bool_slice, i);
868 continue;
869 }
870 }
871 }
872 }
873
874 let data = ArrayData::new(
875 DataType::Boolean,
876 left.len(),
877 None,
878 None,
879 0,
880 vec![bool_buf.into()],
881 vec![],
882 );
883 Ok(BooleanArray::from(Arc::new(data)))
884 }
885
886 // create a buffer and fill it with valid bits
887 #[inline]
new_all_set_buffer(len: usize) -> Buffer888 fn new_all_set_buffer(len: usize) -> Buffer {
889 let buffer = MutableBuffer::new(len);
890 let buffer = buffer.with_bitset(len, true);
891
892 buffer.into()
893 }
894
895 // disable wrapping inside literal vectors used for test data and assertions
896 #[rustfmt::skip::macros(vec)]
897 #[cfg(test)]
898 mod tests {
899 use super::*;
900 use crate::datatypes::Int8Type;
901 use crate::{array::Int32Array, array::Int64Array, datatypes::Field};
902
903 /// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
904 /// `A_VEC` and `B_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
905 /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
906 /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
907 macro_rules! cmp_i64 {
908 ($KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
909 let a = Int64Array::from($A_VEC);
910 let b = Int64Array::from($B_VEC);
911 let c = $KERNEL(&a, &b).unwrap();
912 assert_eq!(BooleanArray::from($EXPECTED), c);
913 };
914 }
915
916 /// Evaluate `KERNEL` with one vectors and one scalar as inputs and assert against the expected output.
917 /// `A_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
918 /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
919 /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
920 macro_rules! cmp_i64_scalar {
921 ($KERNEL:ident, $A_VEC:expr, $B:literal, $EXPECTED:expr) => {
922 let a = Int64Array::from($A_VEC);
923 let c = $KERNEL(&a, $B).unwrap();
924 assert_eq!(BooleanArray::from($EXPECTED), c);
925 };
926 }
927
928 #[test]
test_primitive_array_eq()929 fn test_primitive_array_eq() {
930 cmp_i64!(
931 eq,
932 vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
933 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
934 vec![false, false, true, false, false, false, false, true, false, false]
935 );
936 }
937
938 #[test]
test_primitive_array_eq_scalar()939 fn test_primitive_array_eq_scalar() {
940 cmp_i64_scalar!(
941 eq_scalar,
942 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
943 8,
944 vec![false, false, true, false, false, false, false, true, false, false]
945 );
946 }
947
948 #[test]
test_primitive_array_eq_with_slice()949 fn test_primitive_array_eq_with_slice() {
950 let a = Int32Array::from(vec![6, 7, 8, 8, 10]);
951 let b = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
952 let b_slice = b.slice(5, 5);
953 let c = b_slice.as_any().downcast_ref().unwrap();
954 let d = eq(&c, &a).unwrap();
955 assert_eq!(true, d.value(0));
956 assert_eq!(true, d.value(1));
957 assert_eq!(true, d.value(2));
958 assert_eq!(false, d.value(3));
959 assert_eq!(true, d.value(4));
960 }
961
962 #[test]
test_primitive_array_neq()963 fn test_primitive_array_neq() {
964 cmp_i64!(
965 neq,
966 vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
967 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
968 vec![true, true, false, true, true, true, true, false, true, true]
969 );
970 }
971
972 #[test]
test_primitive_array_neq_scalar()973 fn test_primitive_array_neq_scalar() {
974 cmp_i64_scalar!(
975 neq_scalar,
976 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
977 8,
978 vec![true, true, false, true, true, true, true, false, true, true]
979 );
980 }
981
982 #[test]
test_primitive_array_lt()983 fn test_primitive_array_lt() {
984 cmp_i64!(
985 lt,
986 vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
987 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
988 vec![false, false, false, true, true, false, false, false, true, true]
989 );
990 }
991
992 #[test]
test_primitive_array_lt_scalar()993 fn test_primitive_array_lt_scalar() {
994 cmp_i64_scalar!(
995 lt_scalar,
996 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
997 8,
998 vec![true, true, false, false, false, true, true, false, false, false]
999 );
1000 }
1001
1002 #[test]
test_primitive_array_lt_nulls()1003 fn test_primitive_array_lt_nulls() {
1004 cmp_i64!(
1005 lt,
1006 vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
1007 vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
1008 vec![None, None, None, Some(false), None, None, None, Some(true)]
1009 );
1010 }
1011
1012 #[test]
test_primitive_array_lt_scalar_nulls()1013 fn test_primitive_array_lt_scalar_nulls() {
1014 cmp_i64_scalar!(
1015 lt_scalar,
1016 vec![None, Some(1), Some(2), Some(3), None, Some(1), Some(2), Some(3), Some(2), None],
1017 2,
1018 vec![None, Some(true), Some(false), Some(false), None, Some(true), Some(false), Some(false), Some(false), None]
1019 );
1020 }
1021
1022 #[test]
test_primitive_array_lt_eq()1023 fn test_primitive_array_lt_eq() {
1024 cmp_i64!(
1025 lt_eq,
1026 vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
1027 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1028 vec![false, false, true, true, true, false, false, true, true, true]
1029 );
1030 }
1031
1032 #[test]
test_primitive_array_lt_eq_scalar()1033 fn test_primitive_array_lt_eq_scalar() {
1034 cmp_i64_scalar!(
1035 lt_eq_scalar,
1036 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1037 8,
1038 vec![true, true, true, false, false, true, true, true, false, false]
1039 );
1040 }
1041
1042 #[test]
test_primitive_array_lt_eq_nulls()1043 fn test_primitive_array_lt_eq_nulls() {
1044 cmp_i64!(
1045 lt_eq,
1046 vec![None, None, Some(1), None, None, Some(1), None, None, Some(1)],
1047 vec![None, Some(1), Some(0), None, Some(1), Some(2), None, None, Some(3)],
1048 vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
1049 );
1050 }
1051
1052 #[test]
test_primitive_array_lt_eq_scalar_nulls()1053 fn test_primitive_array_lt_eq_scalar_nulls() {
1054 cmp_i64_scalar!(
1055 lt_eq_scalar,
1056 vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1057 1,
1058 vec![None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false)]
1059 );
1060 }
1061
1062 #[test]
test_primitive_array_gt()1063 fn test_primitive_array_gt() {
1064 cmp_i64!(
1065 gt,
1066 vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
1067 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1068 vec![true, true, false, false, false, true, true, false, false, false]
1069 );
1070 }
1071
1072 #[test]
test_primitive_array_gt_scalar()1073 fn test_primitive_array_gt_scalar() {
1074 cmp_i64_scalar!(
1075 gt_scalar,
1076 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1077 8,
1078 vec![false, false, false, true, true, false, false, false, true, true]
1079 );
1080 }
1081
1082 #[test]
test_primitive_array_gt_nulls()1083 fn test_primitive_array_gt_nulls() {
1084 cmp_i64!(
1085 gt,
1086 vec![None, None, Some(1), None, None, Some(2), None, None, Some(3)],
1087 vec![None, Some(1), Some(1), None, Some(1), Some(1), None, Some(1), Some(1)],
1088 vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
1089 );
1090 }
1091
1092 #[test]
test_primitive_array_gt_scalar_nulls()1093 fn test_primitive_array_gt_scalar_nulls() {
1094 cmp_i64_scalar!(
1095 gt_scalar,
1096 vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1097 1,
1098 vec![None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false), Some(true)]
1099 );
1100 }
1101
1102 #[test]
test_primitive_array_gt_eq()1103 fn test_primitive_array_gt_eq() {
1104 cmp_i64!(
1105 gt_eq,
1106 vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
1107 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1108 vec![true, true, true, false, false, true, true, true, false, false]
1109 );
1110 }
1111
1112 #[test]
test_primitive_array_gt_eq_scalar()1113 fn test_primitive_array_gt_eq_scalar() {
1114 cmp_i64_scalar!(
1115 gt_eq_scalar,
1116 vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
1117 8,
1118 vec![false, false, true, true, true, false, false, true, true, true]
1119 );
1120 }
1121
1122 #[test]
test_primitive_array_gt_eq_nulls()1123 fn test_primitive_array_gt_eq_nulls() {
1124 cmp_i64!(
1125 gt_eq,
1126 vec![None, None, Some(1), None, Some(1), Some(2), None, None, Some(1)],
1127 vec![None, Some(1), None, None, Some(1), Some(1), None, Some(2), Some(2)],
1128 vec![None, None, None, None, Some(true), Some(true), None, None, Some(false)]
1129 );
1130 }
1131
1132 #[test]
test_primitive_array_gt_eq_scalar_nulls()1133 fn test_primitive_array_gt_eq_scalar_nulls() {
1134 cmp_i64_scalar!(
1135 gt_eq_scalar,
1136 vec![None, Some(1), Some(2), None, Some(2), Some(3), None, Some(3), Some(4)],
1137 2,
1138 vec![None, Some(false), Some(true), None, Some(true), Some(true), None, Some(true), Some(true)]
1139 );
1140 }
1141
1142 #[test]
test_primitive_array_compare_slice()1143 fn test_primitive_array_compare_slice() {
1144 let a: Int32Array = (0..100).map(Some).collect();
1145 let a = a.slice(50, 50);
1146 let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
1147 let b: Int32Array = (100..200).map(Some).collect();
1148 let b = b.slice(50, 50);
1149 let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
1150 let actual = lt(&a, &b).unwrap();
1151 let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
1152 assert_eq!(expected, actual);
1153 }
1154
1155 #[test]
test_primitive_array_compare_scalar_slice()1156 fn test_primitive_array_compare_scalar_slice() {
1157 let a: Int32Array = (0..100).map(Some).collect();
1158 let a = a.slice(50, 50);
1159 let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
1160 let actual = lt_scalar(&a, 200).unwrap();
1161 let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
1162 assert_eq!(expected, actual);
1163 }
1164
1165 #[test]
test_length_of_result_buffer()1166 fn test_length_of_result_buffer() {
1167 // `item_count` is chosen to not be a multiple of the number of SIMD lanes for this
1168 // type (`Int8Type`), 64.
1169 let item_count = 130;
1170
1171 let select_mask: BooleanArray = vec![true; item_count].into();
1172
1173 let array_a: PrimitiveArray<Int8Type> = vec![1; item_count].into();
1174 let array_b: PrimitiveArray<Int8Type> = vec![2; item_count].into();
1175 let result_mask = gt_eq(&array_a, &array_b).unwrap();
1176
1177 assert_eq!(
1178 result_mask.data().buffers()[0].len(),
1179 select_mask.data().buffers()[0].len()
1180 );
1181 }
1182
1183 // Expected behaviour:
1184 // contains(1, [1, 2, null]) = true
1185 // contains(3, [1, 2, null]) = false
1186 // contains(null, [1, 2, null]) = false
1187 // contains(null, null) = false
1188 #[test]
test_contains()1189 fn test_contains() {
1190 let value_data = Int32Array::from(vec![
1191 Some(0),
1192 Some(1),
1193 Some(2),
1194 Some(3),
1195 Some(4),
1196 Some(5),
1197 Some(6),
1198 None,
1199 Some(7),
1200 ])
1201 .data();
1202 let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 6, 9]);
1203 let list_data_type =
1204 DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
1205 let list_data = ArrayData::builder(list_data_type)
1206 .len(4)
1207 .add_buffer(value_offsets)
1208 .add_child_data(value_data)
1209 .null_bit_buffer(Buffer::from([0b00001011]))
1210 .build();
1211
1212 // [[0, 1, 2], [3, 4, 5], null, [6, null, 7]]
1213 let list_array = LargeListArray::from(list_data);
1214
1215 let nulls = Int32Array::from(vec![None, None, None, None]);
1216 let nulls_result = contains(&nulls, &list_array).unwrap();
1217 assert_eq!(
1218 nulls_result
1219 .as_any()
1220 .downcast_ref::<BooleanArray>()
1221 .unwrap(),
1222 &BooleanArray::from(vec![false, false, false, false]),
1223 );
1224
1225 let values = Int32Array::from(vec![Some(0), Some(0), Some(0), Some(0)]);
1226 let values_result = contains(&values, &list_array).unwrap();
1227 assert_eq!(
1228 values_result
1229 .as_any()
1230 .downcast_ref::<BooleanArray>()
1231 .unwrap(),
1232 &BooleanArray::from(vec![true, false, false, false]),
1233 );
1234 }
1235
1236 // Expected behaviour:
1237 // contains("ab", ["ab", "cd", null]) = true
1238 // contains("ef", ["ab", "cd", null]) = false
1239 // contains(null, ["ab", "cd", null]) = false
1240 // contains(null, null) = false
1241 #[test]
test_contains_utf8()1242 fn test_contains_utf8() {
1243 let values_builder = StringBuilder::new(10);
1244 let mut builder = ListBuilder::new(values_builder);
1245
1246 builder.values().append_value("Lorem").unwrap();
1247 builder.values().append_value("ipsum").unwrap();
1248 builder.values().append_null().unwrap();
1249 builder.append(true).unwrap();
1250 builder.values().append_value("sit").unwrap();
1251 builder.values().append_value("amet").unwrap();
1252 builder.values().append_value("Lorem").unwrap();
1253 builder.append(true).unwrap();
1254 builder.append(false).unwrap();
1255 builder.values().append_value("ipsum").unwrap();
1256 builder.append(true).unwrap();
1257
1258 // [["Lorem", "ipsum", null], ["sit", "amet", "Lorem"], null, ["ipsum"]]
1259 // value_offsets = [0, 3, 6, 6]
1260 let list_array = builder.finish();
1261
1262 let nulls = StringArray::from(vec![None, None, None, None]);
1263 let nulls_result = contains_utf8(&nulls, &list_array).unwrap();
1264 assert_eq!(
1265 nulls_result
1266 .as_any()
1267 .downcast_ref::<BooleanArray>()
1268 .unwrap(),
1269 &BooleanArray::from(vec![false, false, false, false]),
1270 );
1271
1272 let values = StringArray::from(vec![
1273 Some("Lorem"),
1274 Some("Lorem"),
1275 Some("Lorem"),
1276 Some("Lorem"),
1277 ]);
1278 let values_result = contains_utf8(&values, &list_array).unwrap();
1279 assert_eq!(
1280 values_result
1281 .as_any()
1282 .downcast_ref::<BooleanArray>()
1283 .unwrap(),
1284 &BooleanArray::from(vec![true, true, false, false]),
1285 );
1286 }
1287
1288 macro_rules! test_utf8 {
1289 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
1290 #[test]
1291 fn $test_name() {
1292 let left = StringArray::from($left);
1293 let right = StringArray::from($right);
1294 let res = $op(&left, &right).unwrap();
1295 let expected = $expected;
1296 assert_eq!(expected.len(), res.len());
1297 for i in 0..res.len() {
1298 let v = res.value(i);
1299 assert_eq!(v, expected[i]);
1300 }
1301 }
1302 };
1303 }
1304
1305 macro_rules! test_utf8_scalar {
1306 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
1307 #[test]
1308 fn $test_name() {
1309 let left = StringArray::from($left);
1310 let res = $op(&left, $right).unwrap();
1311 let expected = $expected;
1312 assert_eq!(expected.len(), res.len());
1313 for i in 0..res.len() {
1314 let v = res.value(i);
1315 assert_eq!(
1316 v,
1317 expected[i],
1318 "unexpected result when comparing {} at position {} to {} ",
1319 left.value(i),
1320 i,
1321 $right
1322 );
1323 }
1324
1325 let left = LargeStringArray::from($left);
1326 let res = $op(&left, $right).unwrap();
1327 let expected = $expected;
1328 assert_eq!(expected.len(), res.len());
1329 for i in 0..res.len() {
1330 let v = res.value(i);
1331 assert_eq!(
1332 v,
1333 expected[i],
1334 "unexpected result when comparing {} at position {} to {} ",
1335 left.value(i),
1336 i,
1337 $right
1338 );
1339 }
1340 }
1341 };
1342 }
1343
1344 test_utf8!(
1345 test_utf8_array_like,
1346 vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
1347 vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
1348 like_utf8,
1349 vec![true, true, true, false, false, true, false]
1350 );
1351
1352 test_utf8_scalar!(
1353 test_utf8_array_like_scalar,
1354 vec!["arrow", "parquet", "datafusion", "flight"],
1355 "%ar%",
1356 like_utf8_scalar,
1357 vec![true, true, false, false]
1358 );
1359 test_utf8_scalar!(
1360 test_utf8_array_like_scalar_start,
1361 vec!["arrow", "parrow", "arrows", "arr"],
1362 "arrow%",
1363 like_utf8_scalar,
1364 vec![true, false, true, false]
1365 );
1366
1367 test_utf8_scalar!(
1368 test_utf8_array_like_scalar_end,
1369 vec!["arrow", "parrow", "arrows", "arr"],
1370 "%arrow",
1371 like_utf8_scalar,
1372 vec![true, true, false, false]
1373 );
1374
1375 test_utf8_scalar!(
1376 test_utf8_array_like_scalar_equals,
1377 vec!["arrow", "parrow", "arrows", "arr"],
1378 "arrow",
1379 like_utf8_scalar,
1380 vec![true, false, false, false]
1381 );
1382
1383 test_utf8_scalar!(
1384 test_utf8_array_like_scalar_one,
1385 vec!["arrow", "arrows", "parrow", "arr"],
1386 "arrow_",
1387 like_utf8_scalar,
1388 vec![false, true, false, false]
1389 );
1390
1391 test_utf8!(
1392 test_utf8_array_nlike,
1393 vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
1394 vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
1395 nlike_utf8,
1396 vec![false, false, false, true, true, false, true]
1397 );
1398 test_utf8_scalar!(
1399 test_utf8_array_nlike_scalar,
1400 vec!["arrow", "parquet", "datafusion", "flight"],
1401 "%ar%",
1402 nlike_utf8_scalar,
1403 vec![false, false, true, true]
1404 );
1405
1406 test_utf8!(
1407 test_utf8_array_eq,
1408 vec!["arrow", "arrow", "arrow", "arrow"],
1409 vec!["arrow", "parquet", "datafusion", "flight"],
1410 eq_utf8,
1411 vec![true, false, false, false]
1412 );
1413 test_utf8_scalar!(
1414 test_utf8_array_eq_scalar,
1415 vec!["arrow", "parquet", "datafusion", "flight"],
1416 "arrow",
1417 eq_utf8_scalar,
1418 vec![true, false, false, false]
1419 );
1420
1421 test_utf8_scalar!(
1422 test_utf8_array_nlike_scalar_start,
1423 vec!["arrow", "parrow", "arrows", "arr"],
1424 "arrow%",
1425 nlike_utf8_scalar,
1426 vec![false, true, false, true]
1427 );
1428
1429 test_utf8_scalar!(
1430 test_utf8_array_nlike_scalar_end,
1431 vec!["arrow", "parrow", "arrows", "arr"],
1432 "%arrow",
1433 nlike_utf8_scalar,
1434 vec![false, false, true, true]
1435 );
1436
1437 test_utf8_scalar!(
1438 test_utf8_array_nlike_scalar_equals,
1439 vec!["arrow", "parrow", "arrows", "arr"],
1440 "arrow",
1441 nlike_utf8_scalar,
1442 vec![false, true, true, true]
1443 );
1444
1445 test_utf8_scalar!(
1446 test_utf8_array_nlike_scalar_one,
1447 vec!["arrow", "arrows", "parrow", "arr"],
1448 "arrow_",
1449 nlike_utf8_scalar,
1450 vec![true, false, true, true]
1451 );
1452
1453 test_utf8!(
1454 test_utf8_array_neq,
1455 vec!["arrow", "arrow", "arrow", "arrow"],
1456 vec!["arrow", "parquet", "datafusion", "flight"],
1457 neq_utf8,
1458 vec![false, true, true, true]
1459 );
1460 test_utf8_scalar!(
1461 test_utf8_array_neq_scalar,
1462 vec!["arrow", "parquet", "datafusion", "flight"],
1463 "arrow",
1464 neq_utf8_scalar,
1465 vec![false, true, true, true]
1466 );
1467
1468 test_utf8!(
1469 test_utf8_array_lt,
1470 vec!["arrow", "datafusion", "flight", "parquet"],
1471 vec!["flight", "flight", "flight", "flight"],
1472 lt_utf8,
1473 vec![true, true, false, false]
1474 );
1475 test_utf8_scalar!(
1476 test_utf8_array_lt_scalar,
1477 vec!["arrow", "datafusion", "flight", "parquet"],
1478 "flight",
1479 lt_utf8_scalar,
1480 vec![true, true, false, false]
1481 );
1482
1483 test_utf8!(
1484 test_utf8_array_lt_eq,
1485 vec!["arrow", "datafusion", "flight", "parquet"],
1486 vec!["flight", "flight", "flight", "flight"],
1487 lt_eq_utf8,
1488 vec![true, true, true, false]
1489 );
1490 test_utf8_scalar!(
1491 test_utf8_array_lt_eq_scalar,
1492 vec!["arrow", "datafusion", "flight", "parquet"],
1493 "flight",
1494 lt_eq_utf8_scalar,
1495 vec![true, true, true, false]
1496 );
1497
1498 test_utf8!(
1499 test_utf8_array_gt,
1500 vec!["arrow", "datafusion", "flight", "parquet"],
1501 vec!["flight", "flight", "flight", "flight"],
1502 gt_utf8,
1503 vec![false, false, false, true]
1504 );
1505 test_utf8_scalar!(
1506 test_utf8_array_gt_scalar,
1507 vec!["arrow", "datafusion", "flight", "parquet"],
1508 "flight",
1509 gt_utf8_scalar,
1510 vec![false, false, false, true]
1511 );
1512
1513 test_utf8!(
1514 test_utf8_array_gt_eq,
1515 vec!["arrow", "datafusion", "flight", "parquet"],
1516 vec!["flight", "flight", "flight", "flight"],
1517 gt_eq_utf8,
1518 vec![false, false, true, true]
1519 );
1520 test_utf8_scalar!(
1521 test_utf8_array_gt_eq_scalar,
1522 vec!["arrow", "datafusion", "flight", "parquet"],
1523 "flight",
1524 gt_eq_utf8_scalar,
1525 vec![false, false, true, true]
1526 );
1527 }
1528