1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 //! Module containing functionality to compute array equality.
19 //! This module uses [ArrayData] and does not
20 //! depend on dynamic casting of `Array`.
21
22 use super::{
23 Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray,
24 FixedSizeBinaryArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
25 GenericStringArray, NullArray, OffsetSizeTrait, PrimitiveArray,
26 StringOffsetSizeTrait, StructArray,
27 };
28
29 use crate::{
30 buffer::Buffer,
31 datatypes::{ArrowPrimitiveType, DataType, IntervalUnit},
32 };
33
34 mod boolean;
35 mod decimal;
36 mod dictionary;
37 mod fixed_binary;
38 mod fixed_list;
39 mod list;
40 mod null;
41 mod primitive;
42 mod structure;
43 mod utils;
44 mod variable_size;
45
46 // these methods assume the same type, len and null count.
47 // For this reason, they are not exposed and are instead used
48 // to build the generic functions below (`equal_range` and `equal`).
49 use boolean::boolean_equal;
50 use decimal::decimal_equal;
51 use dictionary::dictionary_equal;
52 use fixed_binary::fixed_binary_equal;
53 use fixed_list::fixed_list_equal;
54 use list::list_equal;
55 use null::null_equal;
56 use primitive::primitive_equal;
57 use structure::struct_equal;
58 use variable_size::variable_sized_equal;
59
60 impl PartialEq for dyn Array {
eq(&self, other: &Self) -> bool61 fn eq(&self, other: &Self) -> bool {
62 equal(self.data().as_ref(), other.data().as_ref())
63 }
64 }
65
66 impl<T: Array> PartialEq<T> for dyn Array {
eq(&self, other: &T) -> bool67 fn eq(&self, other: &T) -> bool {
68 equal(self.data().as_ref(), other.data().as_ref())
69 }
70 }
71
72 impl PartialEq for NullArray {
eq(&self, other: &NullArray) -> bool73 fn eq(&self, other: &NullArray) -> bool {
74 equal(self.data().as_ref(), other.data().as_ref())
75 }
76 }
77
78 impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
eq(&self, other: &PrimitiveArray<T>) -> bool79 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
80 equal(self.data().as_ref(), other.data().as_ref())
81 }
82 }
83
84 impl PartialEq for BooleanArray {
eq(&self, other: &BooleanArray) -> bool85 fn eq(&self, other: &BooleanArray) -> bool {
86 equal(self.data().as_ref(), other.data().as_ref())
87 }
88 }
89
90 impl<OffsetSize: StringOffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
eq(&self, other: &Self) -> bool91 fn eq(&self, other: &Self) -> bool {
92 equal(self.data().as_ref(), other.data().as_ref())
93 }
94 }
95
96 impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
eq(&self, other: &Self) -> bool97 fn eq(&self, other: &Self) -> bool {
98 equal(self.data().as_ref(), other.data().as_ref())
99 }
100 }
101
102 impl PartialEq for FixedSizeBinaryArray {
eq(&self, other: &Self) -> bool103 fn eq(&self, other: &Self) -> bool {
104 equal(self.data().as_ref(), other.data().as_ref())
105 }
106 }
107
108 impl PartialEq for DecimalArray {
eq(&self, other: &Self) -> bool109 fn eq(&self, other: &Self) -> bool {
110 equal(self.data().as_ref(), other.data().as_ref())
111 }
112 }
113
114 impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
eq(&self, other: &Self) -> bool115 fn eq(&self, other: &Self) -> bool {
116 equal(self.data().as_ref(), other.data().as_ref())
117 }
118 }
119
120 impl PartialEq for FixedSizeListArray {
eq(&self, other: &Self) -> bool121 fn eq(&self, other: &Self) -> bool {
122 equal(self.data().as_ref(), other.data().as_ref())
123 }
124 }
125
126 impl PartialEq for StructArray {
eq(&self, other: &Self) -> bool127 fn eq(&self, other: &Self) -> bool {
128 equal(self.data().as_ref(), other.data().as_ref())
129 }
130 }
131
132 /// Compares the values of two [ArrayData] starting at `lhs_start` and `rhs_start` respectively
133 /// for `len` slots. The null buffers `lhs_nulls` and `rhs_nulls` inherit parent nullability.
134 ///
135 /// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
136 /// This then affects the null count of the array, thus the merged nulls are passed separately
137 /// as `lhs_nulls` and `rhs_nulls` variables to functions.
138 /// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
139 #[inline]
equal_values( lhs: &ArrayData, rhs: &ArrayData, lhs_nulls: Option<&Buffer>, rhs_nulls: Option<&Buffer>, lhs_start: usize, rhs_start: usize, len: usize, ) -> bool140 fn equal_values(
141 lhs: &ArrayData,
142 rhs: &ArrayData,
143 lhs_nulls: Option<&Buffer>,
144 rhs_nulls: Option<&Buffer>,
145 lhs_start: usize,
146 rhs_start: usize,
147 len: usize,
148 ) -> bool {
149 match lhs.data_type() {
150 DataType::Null => null_equal(lhs, rhs, lhs_start, rhs_start, len),
151 DataType::Boolean => {
152 boolean_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
153 }
154 DataType::UInt8 => primitive_equal::<u8>(
155 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
156 ),
157 DataType::UInt16 => primitive_equal::<u16>(
158 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
159 ),
160 DataType::UInt32 => primitive_equal::<u32>(
161 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
162 ),
163 DataType::UInt64 => primitive_equal::<u64>(
164 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
165 ),
166 DataType::Int8 => primitive_equal::<i8>(
167 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
168 ),
169 DataType::Int16 => primitive_equal::<i16>(
170 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
171 ),
172 DataType::Int32 => primitive_equal::<i32>(
173 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
174 ),
175 DataType::Int64 => primitive_equal::<i64>(
176 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
177 ),
178 DataType::Float32 => primitive_equal::<f32>(
179 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
180 ),
181 DataType::Float64 => primitive_equal::<f64>(
182 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
183 ),
184 DataType::Date32
185 | DataType::Time32(_)
186 | DataType::Interval(IntervalUnit::YearMonth) => primitive_equal::<i32>(
187 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
188 ),
189 DataType::Date64
190 | DataType::Interval(IntervalUnit::DayTime)
191 | DataType::Time64(_)
192 | DataType::Timestamp(_, _)
193 | DataType::Duration(_) => primitive_equal::<i64>(
194 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
195 ),
196 DataType::Utf8 | DataType::Binary => variable_sized_equal::<i32>(
197 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
198 ),
199 DataType::LargeUtf8 | DataType::LargeBinary => variable_sized_equal::<i64>(
200 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
201 ),
202 DataType::FixedSizeBinary(_) => {
203 fixed_binary_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
204 }
205 DataType::Decimal(_, _) => {
206 decimal_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
207 }
208 DataType::List(_) => {
209 list_equal::<i32>(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
210 }
211 DataType::LargeList(_) => {
212 list_equal::<i64>(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
213 }
214 DataType::FixedSizeList(_, _) => {
215 fixed_list_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
216 }
217 DataType::Struct(_) => {
218 struct_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
219 }
220 DataType::Union(_) => unimplemented!("See ARROW-8576"),
221 DataType::Dictionary(data_type, _) => match data_type.as_ref() {
222 DataType::Int8 => dictionary_equal::<i8>(
223 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
224 ),
225 DataType::Int16 => dictionary_equal::<i16>(
226 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
227 ),
228 DataType::Int32 => dictionary_equal::<i32>(
229 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
230 ),
231 DataType::Int64 => dictionary_equal::<i64>(
232 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
233 ),
234 DataType::UInt8 => dictionary_equal::<u8>(
235 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
236 ),
237 DataType::UInt16 => dictionary_equal::<u16>(
238 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
239 ),
240 DataType::UInt32 => dictionary_equal::<u32>(
241 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
242 ),
243 DataType::UInt64 => dictionary_equal::<u64>(
244 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
245 ),
246 _ => unreachable!(),
247 },
248 DataType::Float16 => unreachable!(),
249 }
250 }
251
equal_range( lhs: &ArrayData, rhs: &ArrayData, lhs_nulls: Option<&Buffer>, rhs_nulls: Option<&Buffer>, lhs_start: usize, rhs_start: usize, len: usize, ) -> bool252 fn equal_range(
253 lhs: &ArrayData,
254 rhs: &ArrayData,
255 lhs_nulls: Option<&Buffer>,
256 rhs_nulls: Option<&Buffer>,
257 lhs_start: usize,
258 rhs_start: usize,
259 len: usize,
260 ) -> bool {
261 utils::base_equal(lhs, rhs)
262 && utils::equal_nulls(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
263 && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
264 }
265
266 /// Logically compares two [ArrayData].
267 /// Two arrays are logically equal if and only if:
268 /// * their data types are equal
269 /// * their lengths are equal
270 /// * their null counts are equal
271 /// * their null bitmaps are equal
272 /// * each of their items are equal
273 /// two items are equal when their in-memory representation is physically equal (i.e. same bit content).
274 /// The physical comparison depend on the data type.
275 /// # Panics
276 /// This function may panic whenever any of the [ArrayData] does not follow the Arrow specification.
277 /// (e.g. wrong number of buffers, buffer `len` does not correspond to the declared `len`)
equal(lhs: &ArrayData, rhs: &ArrayData) -> bool278 pub fn equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
279 let lhs_nulls = lhs.null_buffer();
280 let rhs_nulls = rhs.null_buffer();
281 utils::base_equal(lhs, rhs)
282 && lhs.null_count() == rhs.null_count()
283 && utils::equal_nulls(lhs, rhs, lhs_nulls, rhs_nulls, 0, 0, lhs.len())
284 && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, 0, 0, lhs.len())
285 }
286
287 #[cfg(test)]
288 mod tests {
289 use std::convert::TryFrom;
290 use std::sync::Arc;
291
292 use crate::array::{
293 array::Array, ArrayDataBuilder, ArrayDataRef, ArrayRef, BinaryOffsetSizeTrait,
294 BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder,
295 GenericBinaryArray, Int32Builder, ListBuilder, NullArray, PrimitiveBuilder,
296 StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
297 };
298 use crate::array::{GenericStringArray, Int32Array};
299 use crate::buffer::Buffer;
300 use crate::datatypes::{Field, Int16Type, ToByteSlice};
301
302 use super::*;
303
304 #[test]
test_null_equal()305 fn test_null_equal() {
306 let a = NullArray::new(12).data();
307 let b = NullArray::new(12).data();
308 test_equal(&a, &b, true);
309
310 let b = NullArray::new(10).data();
311 test_equal(&a, &b, false);
312
313 // Test the case where offset != 0
314
315 let a_slice = a.slice(2, 3);
316 let b_slice = b.slice(1, 3);
317 test_equal(&a_slice, &b_slice, true);
318
319 let a_slice = a.slice(5, 4);
320 let b_slice = b.slice(3, 3);
321 test_equal(&a_slice, &b_slice, false);
322 }
323
324 #[test]
test_boolean_equal()325 fn test_boolean_equal() {
326 let a = BooleanArray::from(vec![false, false, true]).data();
327 let b = BooleanArray::from(vec![false, false, true]).data();
328 test_equal(a.as_ref(), b.as_ref(), true);
329
330 let b = BooleanArray::from(vec![false, false, false]).data();
331 test_equal(a.as_ref(), b.as_ref(), false);
332 }
333
334 #[test]
test_boolean_equal_null()335 fn test_boolean_equal_null() {
336 let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
337 let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
338 test_equal(a.as_ref(), b.as_ref(), true);
339
340 let b = BooleanArray::from(vec![None, None, None, Some(true)]).data();
341 test_equal(a.as_ref(), b.as_ref(), false);
342
343 let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]).data();
344 test_equal(a.as_ref(), b.as_ref(), false);
345 }
346
347 #[test]
test_boolean_equal_offset()348 fn test_boolean_equal_offset() {
349 let a =
350 BooleanArray::from(vec![false, true, false, true, false, false, true]).data();
351 let b =
352 BooleanArray::from(vec![true, false, false, false, true, false, true, true])
353 .data();
354 assert_eq!(equal(a.as_ref(), b.as_ref()), false);
355 assert_eq!(equal(b.as_ref(), a.as_ref()), false);
356
357 let a_slice = a.slice(2, 3);
358 let b_slice = b.slice(3, 3);
359 assert_eq!(equal(&a_slice, &b_slice), true);
360 assert_eq!(equal(&b_slice, &a_slice), true);
361
362 let a_slice = a.slice(3, 4);
363 let b_slice = b.slice(4, 4);
364 assert_eq!(equal(&a_slice, &b_slice), false);
365 assert_eq!(equal(&b_slice, &a_slice), false);
366
367 // Test the optimization cases where null_count == 0 and starts at 0 and len >= size_of(u8)
368
369 // Elements fill in `u8`'s exactly.
370 let mut vector = vec![false, false, true, true, true, true, true, true];
371 let a = BooleanArray::from(vector.clone()).data();
372 let b = BooleanArray::from(vector.clone()).data();
373 test_equal(a.as_ref(), b.as_ref(), true);
374
375 // Elements fill in `u8`s + suffix bits.
376 vector.push(true);
377 let a = BooleanArray::from(vector.clone()).data();
378 let b = BooleanArray::from(vector).data();
379 test_equal(a.as_ref(), b.as_ref(), true);
380 }
381
382 #[test]
test_primitive()383 fn test_primitive() {
384 let cases = vec![
385 (
386 vec![Some(1), Some(2), Some(3)],
387 vec![Some(1), Some(2), Some(3)],
388 true,
389 ),
390 (
391 vec![Some(1), Some(2), Some(3)],
392 vec![Some(1), Some(2), Some(4)],
393 false,
394 ),
395 (
396 vec![Some(1), Some(2), None],
397 vec![Some(1), Some(2), None],
398 true,
399 ),
400 (
401 vec![Some(1), None, Some(3)],
402 vec![Some(1), Some(2), None],
403 false,
404 ),
405 (
406 vec![Some(1), None, None],
407 vec![Some(1), Some(2), None],
408 false,
409 ),
410 ];
411
412 for (lhs, rhs, expected) in cases {
413 let lhs = Int32Array::from(lhs).data();
414 let rhs = Int32Array::from(rhs).data();
415 test_equal(&lhs, &rhs, expected);
416 }
417 }
418
419 #[test]
test_primitive_slice()420 fn test_primitive_slice() {
421 let cases = vec![
422 (
423 vec![Some(1), Some(2), Some(3)],
424 (0, 1),
425 vec![Some(1), Some(2), Some(3)],
426 (0, 1),
427 true,
428 ),
429 (
430 vec![Some(1), Some(2), Some(3)],
431 (1, 1),
432 vec![Some(1), Some(2), Some(3)],
433 (2, 1),
434 false,
435 ),
436 (
437 vec![Some(1), Some(2), None],
438 (1, 1),
439 vec![Some(1), None, Some(2)],
440 (2, 1),
441 true,
442 ),
443 (
444 vec![None, Some(2), None],
445 (1, 1),
446 vec![None, None, Some(2)],
447 (2, 1),
448 true,
449 ),
450 (
451 vec![Some(1), None, Some(2), None, Some(3)],
452 (2, 2),
453 vec![None, Some(2), None, Some(3)],
454 (1, 2),
455 true,
456 ),
457 ];
458
459 for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases {
460 let lhs = Int32Array::from(lhs).data();
461 let lhs = lhs.slice(slice_lhs.0, slice_lhs.1);
462 let rhs = Int32Array::from(rhs).data();
463 let rhs = rhs.slice(slice_rhs.0, slice_rhs.1);
464
465 test_equal(&lhs, &rhs, expected);
466 }
467 }
468
test_equal(lhs: &ArrayData, rhs: &ArrayData, expected: bool)469 fn test_equal(lhs: &ArrayData, rhs: &ArrayData, expected: bool) {
470 // equality is symmetric
471 assert_eq!(equal(lhs, lhs), true, "\n{:?}\n{:?}", lhs, lhs);
472 assert_eq!(equal(rhs, rhs), true, "\n{:?}\n{:?}", rhs, rhs);
473
474 assert_eq!(equal(lhs, rhs), expected, "\n{:?}\n{:?}", lhs, rhs);
475 assert_eq!(equal(rhs, lhs), expected, "\n{:?}\n{:?}", rhs, lhs);
476 }
477
binary_cases() -> Vec<(Vec<Option<String>>, Vec<Option<String>>, bool)>478 fn binary_cases() -> Vec<(Vec<Option<String>>, Vec<Option<String>>, bool)> {
479 let base = vec![
480 Some("hello".to_owned()),
481 None,
482 None,
483 Some("world".to_owned()),
484 None,
485 None,
486 ];
487 let not_base = vec![
488 Some("hello".to_owned()),
489 Some("foo".to_owned()),
490 None,
491 Some("world".to_owned()),
492 None,
493 None,
494 ];
495 vec![
496 (
497 vec![Some("hello".to_owned()), Some("world".to_owned())],
498 vec![Some("hello".to_owned()), Some("world".to_owned())],
499 true,
500 ),
501 (
502 vec![Some("hello".to_owned()), Some("world".to_owned())],
503 vec![Some("hello".to_owned()), Some("arrow".to_owned())],
504 false,
505 ),
506 (base.clone(), base.clone(), true),
507 (base, not_base, false),
508 ]
509 }
510
test_generic_string_equal<OffsetSize: StringOffsetSizeTrait>()511 fn test_generic_string_equal<OffsetSize: StringOffsetSizeTrait>() {
512 let cases = binary_cases();
513
514 for (lhs, rhs, expected) in cases {
515 let lhs = lhs.iter().map(|x| x.as_deref()).collect();
516 let rhs = rhs.iter().map(|x| x.as_deref()).collect();
517 let lhs = GenericStringArray::<OffsetSize>::from_opt_vec(lhs).data();
518 let rhs = GenericStringArray::<OffsetSize>::from_opt_vec(rhs).data();
519 test_equal(lhs.as_ref(), rhs.as_ref(), expected);
520 }
521 }
522
523 #[test]
test_string_equal()524 fn test_string_equal() {
525 test_generic_string_equal::<i32>()
526 }
527
528 #[test]
test_large_string_equal()529 fn test_large_string_equal() {
530 test_generic_string_equal::<i64>()
531 }
532
test_generic_binary_equal<OffsetSize: BinaryOffsetSizeTrait>()533 fn test_generic_binary_equal<OffsetSize: BinaryOffsetSizeTrait>() {
534 let cases = binary_cases();
535
536 for (lhs, rhs, expected) in cases {
537 let lhs = lhs
538 .iter()
539 .map(|x| x.as_deref().map(|x| x.as_bytes()))
540 .collect();
541 let rhs = rhs
542 .iter()
543 .map(|x| x.as_deref().map(|x| x.as_bytes()))
544 .collect();
545 let lhs = GenericBinaryArray::<OffsetSize>::from_opt_vec(lhs).data();
546 let rhs = GenericBinaryArray::<OffsetSize>::from_opt_vec(rhs).data();
547 test_equal(lhs.as_ref(), rhs.as_ref(), expected);
548 }
549 }
550
551 #[test]
test_binary_equal()552 fn test_binary_equal() {
553 test_generic_binary_equal::<i32>()
554 }
555
556 #[test]
test_large_binary_equal()557 fn test_large_binary_equal() {
558 test_generic_binary_equal::<i64>()
559 }
560
561 #[test]
test_string_offset()562 fn test_string_offset() {
563 let a = StringArray::from(vec![Some("a"), None, Some("b")]).data();
564 let a = a.slice(2, 1);
565 let b = StringArray::from(vec![Some("b")]).data();
566
567 test_equal(&a, b.as_ref(), true);
568 }
569
570 #[test]
test_string_offset_larger()571 fn test_string_offset_larger() {
572 let a =
573 StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data();
574 let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data();
575
576 test_equal(&a.slice(2, 2), &b.slice(0, 2), false);
577 test_equal(&a.slice(2, 2), &b.slice(1, 2), true);
578 test_equal(&a.slice(2, 2), &b.slice(2, 2), false);
579 }
580
581 #[test]
test_null()582 fn test_null() {
583 let a = NullArray::new(2).data();
584 let b = NullArray::new(2).data();
585 test_equal(a.as_ref(), b.as_ref(), true);
586
587 let b = NullArray::new(1).data();
588 test_equal(a.as_ref(), b.as_ref(), false);
589 }
590
create_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>( data: T, ) -> ArrayDataRef591 fn create_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
592 data: T,
593 ) -> ArrayDataRef {
594 let mut builder = ListBuilder::new(Int32Builder::new(10));
595 for d in data.as_ref() {
596 if let Some(v) = d {
597 builder.values().append_slice(v.as_ref()).unwrap();
598 builder.append(true).unwrap()
599 } else {
600 builder.append(false).unwrap()
601 }
602 }
603 builder.finish().data()
604 }
605
606 #[test]
test_list_equal()607 fn test_list_equal() {
608 let a = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
609 let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
610 test_equal(a.as_ref(), b.as_ref(), true);
611
612 let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
613 test_equal(a.as_ref(), b.as_ref(), false);
614 }
615
616 // Test the case where null_count > 0
617 #[test]
test_list_null()618 fn test_list_null() {
619 let a =
620 create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
621 let b =
622 create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
623 test_equal(a.as_ref(), b.as_ref(), true);
624
625 let b = create_list_array(&[
626 Some(&[1, 2]),
627 None,
628 Some(&[5, 6]),
629 Some(&[3, 4]),
630 None,
631 None,
632 ]);
633 test_equal(a.as_ref(), b.as_ref(), false);
634
635 let b =
636 create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
637 test_equal(a.as_ref(), b.as_ref(), false);
638
639 // a list where the nullness of values is determined by the list's bitmap
640 let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]);
641 let c = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
642 "item",
643 DataType::Int32,
644 true,
645 ))))
646 .len(6)
647 .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
648 .add_child_data(c_values.data())
649 .null_bit_buffer(Buffer::from(vec![0b00001001]))
650 .build();
651
652 let d_values = Int32Array::from(vec![
653 Some(1),
654 Some(2),
655 None,
656 None,
657 Some(3),
658 Some(4),
659 None,
660 None,
661 ]);
662 let d = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
663 "item",
664 DataType::Int32,
665 true,
666 ))))
667 .len(6)
668 .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
669 .add_child_data(d_values.data())
670 .null_bit_buffer(Buffer::from(vec![0b00001001]))
671 .build();
672 test_equal(c.as_ref(), d.as_ref(), true);
673 }
674
675 // Test the case where offset != 0
676 #[test]
test_list_offsets()677 fn test_list_offsets() {
678 let a =
679 create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
680 let b =
681 create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
682
683 let a_slice = a.slice(0, 3);
684 let b_slice = b.slice(0, 3);
685 test_equal(&a_slice, &b_slice, true);
686
687 let a_slice = a.slice(0, 5);
688 let b_slice = b.slice(0, 5);
689 test_equal(&a_slice, &b_slice, false);
690
691 let a_slice = a.slice(4, 1);
692 let b_slice = b.slice(4, 1);
693 test_equal(&a_slice, &b_slice, true);
694 }
695
create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>( data: T, ) -> ArrayDataRef696 fn create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>(
697 data: T,
698 ) -> ArrayDataRef {
699 let mut builder = FixedSizeBinaryBuilder::new(15, 5);
700
701 for d in data.as_ref() {
702 if let Some(v) = d {
703 builder.append_value(v.as_ref()).unwrap();
704 } else {
705 builder.append_null().unwrap();
706 }
707 }
708 builder.finish().data()
709 }
710
711 #[test]
test_fixed_size_binary_equal()712 fn test_fixed_size_binary_equal() {
713 let a = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]);
714 let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]);
715 test_equal(a.as_ref(), b.as_ref(), true);
716
717 let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"arrow")]);
718 test_equal(a.as_ref(), b.as_ref(), false);
719 }
720
721 // Test the case where null_count > 0
722 #[test]
test_fixed_size_binary_null()723 fn test_fixed_size_binary_null() {
724 let a = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]);
725 let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]);
726 test_equal(a.as_ref(), b.as_ref(), true);
727
728 let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world"), None]);
729 test_equal(a.as_ref(), b.as_ref(), false);
730
731 let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"arrow")]);
732 test_equal(a.as_ref(), b.as_ref(), false);
733 }
734
735 #[test]
test_fixed_size_binary_offsets()736 fn test_fixed_size_binary_offsets() {
737 // Test the case where offset != 0
738 let a = create_fixed_size_binary_array(&[
739 Some(b"hello"),
740 None,
741 None,
742 Some(b"world"),
743 None,
744 None,
745 ]);
746 let b = create_fixed_size_binary_array(&[
747 Some(b"hello"),
748 None,
749 None,
750 Some(b"arrow"),
751 None,
752 None,
753 ]);
754
755 let a_slice = a.slice(0, 3);
756 let b_slice = b.slice(0, 3);
757 test_equal(&a_slice, &b_slice, true);
758
759 let a_slice = a.slice(0, 5);
760 let b_slice = b.slice(0, 5);
761 test_equal(&a_slice, &b_slice, false);
762
763 let a_slice = a.slice(4, 1);
764 let b_slice = b.slice(4, 1);
765 test_equal(&a_slice, &b_slice, true);
766
767 let a_slice = a.slice(3, 1);
768 let b_slice = b.slice(3, 1);
769 test_equal(&a_slice, &b_slice, false);
770 }
771
create_decimal_array(data: &[Option<i128>]) -> ArrayDataRef772 fn create_decimal_array(data: &[Option<i128>]) -> ArrayDataRef {
773 let mut builder = DecimalBuilder::new(20, 23, 6);
774
775 for d in data {
776 if let Some(v) = d {
777 builder.append_value(*v).unwrap();
778 } else {
779 builder.append_null().unwrap();
780 }
781 }
782 builder.finish().data()
783 }
784
785 #[test]
test_decimal_equal()786 fn test_decimal_equal() {
787 let a = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]);
788 let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]);
789 test_equal(a.as_ref(), b.as_ref(), true);
790
791 let b = create_decimal_array(&[Some(15_887_000_000), Some(-8_887_000_000)]);
792 test_equal(a.as_ref(), b.as_ref(), false);
793 }
794
795 // Test the case where null_count > 0
796 #[test]
test_decimal_null()797 fn test_decimal_null() {
798 let a = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]);
799 let b = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]);
800 test_equal(a.as_ref(), b.as_ref(), true);
801
802 let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000), None]);
803 test_equal(a.as_ref(), b.as_ref(), false);
804
805 let b = create_decimal_array(&[Some(15_887_000_000), None, Some(-8_887_000_000)]);
806 test_equal(a.as_ref(), b.as_ref(), false);
807 }
808
809 #[test]
test_decimal_offsets()810 fn test_decimal_offsets() {
811 // Test the case where offset != 0
812 let a = create_decimal_array(&[
813 Some(8_887_000_000),
814 None,
815 None,
816 Some(-8_887_000_000),
817 None,
818 None,
819 ]);
820 let b = create_decimal_array(&[
821 None,
822 Some(8_887_000_000),
823 None,
824 None,
825 Some(15_887_000_000),
826 None,
827 None,
828 ]);
829
830 let a_slice = a.slice(0, 3);
831 let b_slice = b.slice(1, 3);
832 test_equal(&a_slice, &b_slice, true);
833
834 let a_slice = a.slice(0, 5);
835 let b_slice = b.slice(1, 5);
836 test_equal(&a_slice, &b_slice, false);
837
838 let a_slice = a.slice(4, 1);
839 let b_slice = b.slice(5, 1);
840 test_equal(&a_slice, &b_slice, true);
841
842 let a_slice = a.slice(3, 3);
843 let b_slice = b.slice(4, 3);
844 test_equal(&a_slice, &b_slice, false);
845
846 let a_slice = a.slice(1, 3);
847 let b_slice = b.slice(2, 3);
848 test_equal(&a_slice, &b_slice, false);
849
850 let b = create_decimal_array(&[
851 None,
852 None,
853 None,
854 Some(-8_887_000_000),
855 Some(-3_000),
856 None,
857 ]);
858 let a_slice = a.slice(1, 3);
859 let b_slice = b.slice(1, 3);
860 test_equal(&a_slice, &b_slice, true);
861 }
862
863 /// Create a fixed size list of 2 value lengths
create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>( data: T, ) -> ArrayDataRef864 fn create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
865 data: T,
866 ) -> ArrayDataRef {
867 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(10), 3);
868
869 for d in data.as_ref() {
870 if let Some(v) = d {
871 builder.values().append_slice(v.as_ref()).unwrap();
872 builder.append(true).unwrap()
873 } else {
874 for _ in 0..builder.value_length() {
875 builder.values().append_null().unwrap();
876 }
877 builder.append(false).unwrap()
878 }
879 }
880 builder.finish().data()
881 }
882
883 #[test]
test_fixed_size_list_equal()884 fn test_fixed_size_list_equal() {
885 let a = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
886 let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
887 test_equal(a.as_ref(), b.as_ref(), true);
888
889 let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
890 test_equal(a.as_ref(), b.as_ref(), false);
891 }
892
893 // Test the case where null_count > 0
894 #[test]
test_fixed_list_null()895 fn test_fixed_list_null() {
896 let a = create_fixed_size_list_array(&[
897 Some(&[1, 2, 3]),
898 None,
899 None,
900 Some(&[4, 5, 6]),
901 None,
902 None,
903 ]);
904 let b = create_fixed_size_list_array(&[
905 Some(&[1, 2, 3]),
906 None,
907 None,
908 Some(&[4, 5, 6]),
909 None,
910 None,
911 ]);
912 test_equal(a.as_ref(), b.as_ref(), true);
913
914 let b = create_fixed_size_list_array(&[
915 Some(&[1, 2, 3]),
916 None,
917 Some(&[7, 8, 9]),
918 Some(&[4, 5, 6]),
919 None,
920 None,
921 ]);
922 test_equal(a.as_ref(), b.as_ref(), false);
923
924 let b = create_fixed_size_list_array(&[
925 Some(&[1, 2, 3]),
926 None,
927 None,
928 Some(&[3, 6, 9]),
929 None,
930 None,
931 ]);
932 test_equal(a.as_ref(), b.as_ref(), false);
933 }
934
935 #[test]
test_fixed_list_offsets()936 fn test_fixed_list_offsets() {
937 // Test the case where offset != 0
938 let a = create_fixed_size_list_array(&[
939 Some(&[1, 2, 3]),
940 None,
941 None,
942 Some(&[4, 5, 6]),
943 None,
944 None,
945 ]);
946 let b = create_fixed_size_list_array(&[
947 Some(&[1, 2, 3]),
948 None,
949 None,
950 Some(&[3, 6, 9]),
951 None,
952 None,
953 ]);
954
955 let a_slice = a.slice(0, 3);
956 let b_slice = b.slice(0, 3);
957 test_equal(&a_slice, &b_slice, true);
958
959 let a_slice = a.slice(0, 5);
960 let b_slice = b.slice(0, 5);
961 test_equal(&a_slice, &b_slice, false);
962
963 let a_slice = a.slice(4, 1);
964 let b_slice = b.slice(4, 1);
965 test_equal(&a_slice, &b_slice, true);
966 }
967
968 #[test]
test_struct_equal()969 fn test_struct_equal() {
970 let strings: ArrayRef = Arc::new(StringArray::from(vec![
971 Some("joe"),
972 None,
973 None,
974 Some("mark"),
975 Some("doe"),
976 ]));
977 let ints: ArrayRef = Arc::new(Int32Array::from(vec![
978 Some(1),
979 Some(2),
980 None,
981 Some(4),
982 Some(5),
983 ]));
984
985 let a =
986 StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
987 .unwrap()
988 .data();
989
990 let b = StructArray::try_from(vec![("f1", strings), ("f2", ints)])
991 .unwrap()
992 .data();
993
994 test_equal(a.as_ref(), b.as_ref(), true);
995 }
996
997 #[test]
test_struct_equal_null()998 fn test_struct_equal_null() {
999 let strings: ArrayRef = Arc::new(StringArray::from(vec![
1000 Some("joe"),
1001 None,
1002 None,
1003 Some("mark"),
1004 Some("doe"),
1005 ]));
1006 let ints: ArrayRef = Arc::new(Int32Array::from(vec![
1007 Some(1),
1008 Some(2),
1009 None,
1010 Some(4),
1011 Some(5),
1012 ]));
1013 let ints_non_null: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 0]));
1014
1015 let a = ArrayData::builder(DataType::Struct(vec![
1016 Field::new("f1", DataType::Utf8, true),
1017 Field::new("f2", DataType::Int32, true),
1018 ]))
1019 .null_bit_buffer(Buffer::from(vec![0b00001011]))
1020 .len(5)
1021 .add_child_data(strings.data_ref().clone())
1022 .add_child_data(ints.data_ref().clone())
1023 .build();
1024 let a = crate::array::make_array(a);
1025
1026 let b = ArrayData::builder(DataType::Struct(vec![
1027 Field::new("f1", DataType::Utf8, true),
1028 Field::new("f2", DataType::Int32, true),
1029 ]))
1030 .null_bit_buffer(Buffer::from(vec![0b00001011]))
1031 .len(5)
1032 .add_child_data(strings.data_ref().clone())
1033 .add_child_data(ints_non_null.data_ref().clone())
1034 .build();
1035 let b = crate::array::make_array(b);
1036
1037 test_equal(a.data_ref(), b.data_ref(), true);
1038
1039 // test with arrays that are not equal
1040 let c_ints_non_null: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 0, 4]));
1041 let c = ArrayData::builder(DataType::Struct(vec![
1042 Field::new("f1", DataType::Utf8, true),
1043 Field::new("f2", DataType::Int32, true),
1044 ]))
1045 .null_bit_buffer(Buffer::from(vec![0b00001011]))
1046 .len(5)
1047 .add_child_data(strings.data_ref().clone())
1048 .add_child_data(c_ints_non_null.data_ref().clone())
1049 .build();
1050 let c = crate::array::make_array(c);
1051
1052 test_equal(a.data_ref(), c.data_ref(), false);
1053
1054 // test a nested struct
1055 let a = ArrayData::builder(DataType::Struct(vec![Field::new(
1056 "f3",
1057 a.data_type().clone(),
1058 true,
1059 )]))
1060 .null_bit_buffer(Buffer::from(vec![0b00011110]))
1061 .len(5)
1062 .add_child_data(a.data_ref().clone())
1063 .build();
1064 let a = crate::array::make_array(a);
1065
1066 // reconstruct b, but with different data where the first struct is null
1067 let strings: ArrayRef = Arc::new(StringArray::from(vec![
1068 Some("joanne"), // difference
1069 None,
1070 None,
1071 Some("mark"),
1072 Some("doe"),
1073 ]));
1074 let b = ArrayData::builder(DataType::Struct(vec![
1075 Field::new("f1", DataType::Utf8, true),
1076 Field::new("f2", DataType::Int32, true),
1077 ]))
1078 .null_bit_buffer(Buffer::from(vec![0b00001011]))
1079 .len(5)
1080 .add_child_data(strings.data_ref().clone())
1081 .add_child_data(ints_non_null.data_ref().clone())
1082 .build();
1083
1084 let b = ArrayData::builder(DataType::Struct(vec![Field::new(
1085 "f3",
1086 b.data_type().clone(),
1087 true,
1088 )]))
1089 .null_bit_buffer(Buffer::from(vec![0b00011110]))
1090 .len(5)
1091 .add_child_data(b)
1092 .build();
1093 let b = crate::array::make_array(b);
1094
1095 test_equal(a.data_ref(), b.data_ref(), true);
1096 }
1097
1098 #[test]
test_struct_equal_null_variable_size()1099 fn test_struct_equal_null_variable_size() {
1100 // the string arrays differ, but where the struct array is null
1101 let strings1: ArrayRef = Arc::new(StringArray::from(vec![
1102 Some("joe"),
1103 None,
1104 None,
1105 Some("mark"),
1106 Some("doel"),
1107 ]));
1108 let strings2: ArrayRef = Arc::new(StringArray::from(vec![
1109 Some("joel"),
1110 None,
1111 None,
1112 Some("mark"),
1113 Some("doe"),
1114 ]));
1115
1116 let a = ArrayData::builder(DataType::Struct(vec![Field::new(
1117 "f1",
1118 DataType::Utf8,
1119 true,
1120 )]))
1121 .null_bit_buffer(Buffer::from(vec![0b00001010]))
1122 .len(5)
1123 .add_child_data(strings1.data_ref().clone())
1124 .build();
1125 let a = crate::array::make_array(a);
1126
1127 let b = ArrayData::builder(DataType::Struct(vec![Field::new(
1128 "f1",
1129 DataType::Utf8,
1130 true,
1131 )]))
1132 .null_bit_buffer(Buffer::from(vec![0b00001010]))
1133 .len(5)
1134 .add_child_data(strings2.data_ref().clone())
1135 .build();
1136 let b = crate::array::make_array(b);
1137
1138 test_equal(a.data_ref(), b.data_ref(), true);
1139
1140 // test with arrays that are not equal
1141 let strings3: ArrayRef = Arc::new(StringArray::from(vec![
1142 Some("mark"),
1143 None,
1144 None,
1145 Some("doe"),
1146 Some("joe"),
1147 ]));
1148 let c = ArrayData::builder(DataType::Struct(vec![Field::new(
1149 "f1",
1150 DataType::Utf8,
1151 true,
1152 )]))
1153 .null_bit_buffer(Buffer::from(vec![0b00001011]))
1154 .len(5)
1155 .add_child_data(strings3.data_ref().clone())
1156 .build();
1157 let c = crate::array::make_array(c);
1158
1159 test_equal(a.data_ref(), c.data_ref(), false);
1160 }
1161
create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayDataRef1162 fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayDataRef {
1163 let values = StringArray::from(values.to_vec());
1164 let mut builder = StringDictionaryBuilder::new_with_dictionary(
1165 PrimitiveBuilder::<Int16Type>::new(3),
1166 &values,
1167 )
1168 .unwrap();
1169 for key in keys {
1170 if let Some(v) = key {
1171 builder.append(v).unwrap();
1172 } else {
1173 builder.append_null().unwrap()
1174 }
1175 }
1176 builder.finish().data()
1177 }
1178
1179 #[test]
test_dictionary_equal()1180 fn test_dictionary_equal() {
1181 // (a, b, c), (1, 2, 1, 3) => (a, b, a, c)
1182 let a = create_dictionary_array(
1183 &["a", "b", "c"],
1184 &[Some("a"), Some("b"), Some("a"), Some("c")],
1185 );
1186 // different representation (values and keys are swapped), same result
1187 let b = create_dictionary_array(
1188 &["a", "c", "b"],
1189 &[Some("a"), Some("b"), Some("a"), Some("c")],
1190 );
1191 test_equal(a.as_ref(), b.as_ref(), true);
1192
1193 // different len
1194 let b =
1195 create_dictionary_array(&["a", "c", "b"], &[Some("a"), Some("b"), Some("a")]);
1196 test_equal(a.as_ref(), b.as_ref(), false);
1197
1198 // different key
1199 let b = create_dictionary_array(
1200 &["a", "c", "b"],
1201 &[Some("a"), Some("b"), Some("a"), Some("a")],
1202 );
1203 test_equal(a.as_ref(), b.as_ref(), false);
1204
1205 // different values, same keys
1206 let b = create_dictionary_array(
1207 &["a", "b", "d"],
1208 &[Some("a"), Some("b"), Some("a"), Some("d")],
1209 );
1210 test_equal(a.as_ref(), b.as_ref(), false);
1211 }
1212
1213 #[test]
test_dictionary_equal_null()1214 fn test_dictionary_equal_null() {
1215 // (a, b, c), (1, 2, 1, 3) => (a, b, a, c)
1216 let a = create_dictionary_array(
1217 &["a", "b", "c"],
1218 &[Some("a"), None, Some("a"), Some("c")],
1219 );
1220
1221 // equal to self
1222 test_equal(a.as_ref(), a.as_ref(), true);
1223
1224 // different representation (values and keys are swapped), same result
1225 let b = create_dictionary_array(
1226 &["a", "c", "b"],
1227 &[Some("a"), None, Some("a"), Some("c")],
1228 );
1229 test_equal(a.as_ref(), b.as_ref(), true);
1230
1231 // different null position
1232 let b = create_dictionary_array(
1233 &["a", "c", "b"],
1234 &[Some("a"), Some("b"), Some("a"), None],
1235 );
1236 test_equal(a.as_ref(), b.as_ref(), false);
1237
1238 // different key
1239 let b = create_dictionary_array(
1240 &["a", "c", "b"],
1241 &[Some("a"), None, Some("a"), Some("a")],
1242 );
1243 test_equal(a.as_ref(), b.as_ref(), false);
1244
1245 // different values, same keys
1246 let b = create_dictionary_array(
1247 &["a", "b", "d"],
1248 &[Some("a"), None, Some("a"), Some("d")],
1249 );
1250 test_equal(a.as_ref(), b.as_ref(), false);
1251 }
1252 }
1253