1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "arrow/compute/exec/key_compare.h"
19 
20 #include <memory.h>
21 
22 #include <algorithm>
23 #include <cstdint>
24 
25 #include "arrow/compute/exec/util.h"
26 #include "arrow/util/bit_util.h"
27 #include "arrow/util/ubsan.h"
28 
29 namespace arrow {
30 namespace compute {
31 
32 template <bool use_selection>
NullUpdateColumnToRow(uint32_t id_col,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector)33 void KeyCompare::NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare,
34                                        const uint16_t* sel_left_maybe_null,
35                                        const uint32_t* left_to_right_map,
36                                        KeyEncoder::KeyEncoderContext* ctx,
37                                        const KeyEncoder::KeyColumnArray& col,
38                                        const KeyEncoder::KeyRowArray& rows,
39                                        uint8_t* match_bytevector) {
40   if (!rows.has_any_nulls(ctx) && !col.data(0)) {
41     return;
42   }
43   uint32_t num_processed = 0;
44 #if defined(ARROW_HAVE_AVX2)
45   if (ctx->has_avx2()) {
46     num_processed = NullUpdateColumnToRow_avx2(use_selection, id_col, num_rows_to_compare,
47                                                sel_left_maybe_null, left_to_right_map,
48                                                ctx, col, rows, match_bytevector);
49   }
50 #endif
51 
52   if (!col.data(0)) {
53     // Remove rows from the result for which the column value is a null
54     const uint8_t* null_masks = rows.null_masks();
55     uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
56     for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
57       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
58       uint32_t irow_right = left_to_right_map[irow_left];
59       int64_t bitid = irow_right * null_mask_num_bytes * 8 + id_col;
60       match_bytevector[i] &= (BitUtil::GetBit(null_masks, bitid) ? 0 : 0xff);
61     }
62   } else if (!rows.has_any_nulls(ctx)) {
63     // Remove rows from the result for which the column value on left side is null
64     const uint8_t* non_nulls = col.data(0);
65     ARROW_DCHECK(non_nulls);
66     for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
67       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
68       match_bytevector[i] &=
69           BitUtil::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0xff : 0;
70     }
71   } else {
72     const uint8_t* null_masks = rows.null_masks();
73     uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
74     const uint8_t* non_nulls = col.data(0);
75     ARROW_DCHECK(non_nulls);
76     for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
77       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
78       uint32_t irow_right = left_to_right_map[irow_left];
79       int64_t bitid_right = irow_right * null_mask_num_bytes * 8 + id_col;
80       int right_null = BitUtil::GetBit(null_masks, bitid_right) ? 0xff : 0;
81       int left_null =
82           BitUtil::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0 : 0xff;
83       match_bytevector[i] |= left_null & right_null;
84       match_bytevector[i] &= ~(left_null ^ right_null);
85     }
86   }
87 }
88 
89 template <bool use_selection, class COMPARE_FN>
CompareBinaryColumnToRowHelper(uint32_t offset_within_row,uint32_t first_row_to_compare,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector,COMPARE_FN compare_fn)90 void KeyCompare::CompareBinaryColumnToRowHelper(
91     uint32_t offset_within_row, uint32_t first_row_to_compare,
92     uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
93     const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
94     const KeyEncoder::KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
95     uint8_t* match_bytevector, COMPARE_FN compare_fn) {
96   bool is_fixed_length = rows.metadata().is_fixed_length;
97   if (is_fixed_length) {
98     uint32_t fixed_length = rows.metadata().fixed_length;
99     const uint8_t* rows_left = col.data(1);
100     const uint8_t* rows_right = rows.data(1);
101     for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
102       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
103       uint32_t irow_right = left_to_right_map[irow_left];
104       uint32_t offset_right = irow_right * fixed_length + offset_within_row;
105       match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right);
106     }
107   } else {
108     const uint8_t* rows_left = col.data(1);
109     const uint32_t* offsets_right = rows.offsets();
110     const uint8_t* rows_right = rows.data(2);
111     for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
112       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
113       uint32_t irow_right = left_to_right_map[irow_left];
114       uint32_t offset_right = offsets_right[irow_right] + offset_within_row;
115       match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right);
116     }
117   }
118 }
119 
120 template <bool use_selection>
CompareBinaryColumnToRow(uint32_t offset_within_row,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector)121 void KeyCompare::CompareBinaryColumnToRow(
122     uint32_t offset_within_row, uint32_t num_rows_to_compare,
123     const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
124     KeyEncoder::KeyEncoderContext* ctx, const KeyEncoder::KeyColumnArray& col,
125     const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector) {
126   uint32_t num_processed = 0;
127 #if defined(ARROW_HAVE_AVX2)
128   if (ctx->has_avx2()) {
129     num_processed = CompareBinaryColumnToRow_avx2(
130         use_selection, offset_within_row, num_rows_to_compare, sel_left_maybe_null,
131         left_to_right_map, ctx, col, rows, match_bytevector);
132   }
133 #endif
134 
135   uint32_t col_width = col.metadata().fixed_length;
136   if (col_width == 0) {
137     int bit_offset = col.bit_offset(1);
138     CompareBinaryColumnToRowHelper<use_selection>(
139         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
140         left_to_right_map, ctx, col, rows, match_bytevector,
141         [bit_offset](const uint8_t* left_base, const uint8_t* right_base,
142                      uint32_t irow_left, uint32_t offset_right) {
143           uint8_t left = BitUtil::GetBit(left_base, irow_left + bit_offset) ? 0xff : 0x00;
144           uint8_t right = right_base[offset_right];
145           return left == right ? 0xff : 0;
146         });
147   } else if (col_width == 1) {
148     CompareBinaryColumnToRowHelper<use_selection>(
149         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
150         left_to_right_map, ctx, col, rows, match_bytevector,
151         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
152            uint32_t offset_right) {
153           uint8_t left = left_base[irow_left];
154           uint8_t right = right_base[offset_right];
155           return left == right ? 0xff : 0;
156         });
157   } else if (col_width == 2) {
158     CompareBinaryColumnToRowHelper<use_selection>(
159         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
160         left_to_right_map, ctx, col, rows, match_bytevector,
161         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
162            uint32_t offset_right) {
163           util::CheckAlignment<uint16_t>(left_base);
164           util::CheckAlignment<uint16_t>(right_base + offset_right);
165           uint16_t left = reinterpret_cast<const uint16_t*>(left_base)[irow_left];
166           uint16_t right = *reinterpret_cast<const uint16_t*>(right_base + offset_right);
167           return left == right ? 0xff : 0;
168         });
169   } else if (col_width == 4) {
170     CompareBinaryColumnToRowHelper<use_selection>(
171         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
172         left_to_right_map, ctx, col, rows, match_bytevector,
173         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
174            uint32_t offset_right) {
175           util::CheckAlignment<uint32_t>(left_base);
176           util::CheckAlignment<uint32_t>(right_base + offset_right);
177           uint32_t left = reinterpret_cast<const uint32_t*>(left_base)[irow_left];
178           uint32_t right = *reinterpret_cast<const uint32_t*>(right_base + offset_right);
179           return left == right ? 0xff : 0;
180         });
181   } else if (col_width == 8) {
182     CompareBinaryColumnToRowHelper<use_selection>(
183         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
184         left_to_right_map, ctx, col, rows, match_bytevector,
185         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
186            uint32_t offset_right) {
187           util::CheckAlignment<uint64_t>(left_base);
188           util::CheckAlignment<uint64_t>(right_base + offset_right);
189           uint64_t left = reinterpret_cast<const uint64_t*>(left_base)[irow_left];
190           uint64_t right = *reinterpret_cast<const uint64_t*>(right_base + offset_right);
191           return left == right ? 0xff : 0;
192         });
193   } else {
194     CompareBinaryColumnToRowHelper<use_selection>(
195         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
196         left_to_right_map, ctx, col, rows, match_bytevector,
197         [&col](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
198                uint32_t offset_right) {
199           uint32_t length = col.metadata().fixed_length;
200 
201           // Non-zero length guarantees no underflow
202           int32_t num_loops_less_one =
203               static_cast<int32_t>(BitUtil::CeilDiv(length, 8)) - 1;
204 
205           uint64_t tail_mask = ~0ULL >> (64 - 8 * (length - num_loops_less_one * 8));
206 
207           const uint64_t* key_left_ptr =
208               reinterpret_cast<const uint64_t*>(left_base + irow_left * length);
209           util::CheckAlignment<uint64_t>(right_base + offset_right);
210           const uint64_t* key_right_ptr =
211               reinterpret_cast<const uint64_t*>(right_base + offset_right);
212           uint64_t result_or = 0;
213           int32_t i;
214           // length cannot be zero
215           for (i = 0; i < num_loops_less_one; ++i) {
216             uint64_t key_left = util::SafeLoad(key_left_ptr + i);
217             uint64_t key_right = key_right_ptr[i];
218             result_or |= key_left ^ key_right;
219           }
220           uint64_t key_left = util::SafeLoad(key_left_ptr + i);
221           uint64_t key_right = key_right_ptr[i];
222           result_or |= tail_mask & (key_left ^ key_right);
223           return result_or == 0 ? 0xff : 0;
224         });
225   }
226 }
227 
228 // Overwrites the match_bytevector instead of updating it
229 template <bool use_selection, bool is_first_varbinary_col>
CompareVarBinaryColumnToRow(uint32_t id_varbinary_col,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector)230 void KeyCompare::CompareVarBinaryColumnToRow(
231     uint32_t id_varbinary_col, uint32_t num_rows_to_compare,
232     const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
233     KeyEncoder::KeyEncoderContext* ctx, const KeyEncoder::KeyColumnArray& col,
234     const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector) {
235 #if defined(ARROW_HAVE_AVX2)
236   if (ctx->has_avx2()) {
237     CompareVarBinaryColumnToRow_avx2(
238         use_selection, is_first_varbinary_col, id_varbinary_col, num_rows_to_compare,
239         sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector);
240     return;
241   }
242 #endif
243 
244   const uint32_t* offsets_left = col.offsets();
245   const uint32_t* offsets_right = rows.offsets();
246   const uint8_t* rows_left = col.data(2);
247   const uint8_t* rows_right = rows.data(2);
248   for (uint32_t i = 0; i < num_rows_to_compare; ++i) {
249     uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
250     uint32_t irow_right = left_to_right_map[irow_left];
251     uint32_t begin_left = offsets_left[irow_left];
252     uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
253     uint32_t begin_right = offsets_right[irow_right];
254     uint32_t length_right;
255     uint32_t offset_within_row;
256     if (!is_first_varbinary_col) {
257       rows.metadata().nth_varbinary_offset_and_length(
258           rows_right + begin_right, id_varbinary_col, &offset_within_row, &length_right);
259     } else {
260       rows.metadata().first_varbinary_offset_and_length(
261           rows_right + begin_right, &offset_within_row, &length_right);
262     }
263     begin_right += offset_within_row;
264     uint32_t length = std::min(length_left, length_right);
265     const uint64_t* key_left_ptr =
266         reinterpret_cast<const uint64_t*>(rows_left + begin_left);
267     util::CheckAlignment<uint64_t>(rows_right + begin_right);
268     const uint64_t* key_right_ptr =
269         reinterpret_cast<const uint64_t*>(rows_right + begin_right);
270     uint64_t result_or = 0;
271     if (length > 0) {
272       int32_t j;
273       // length can be zero
274       for (j = 0; j < static_cast<int32_t>(BitUtil::CeilDiv(length, 8)) - 1; ++j) {
275         uint64_t key_left = util::SafeLoad(key_left_ptr + j);
276         uint64_t key_right = key_right_ptr[j];
277         result_or |= key_left ^ key_right;
278       }
279       uint64_t tail_mask = ~0ULL >> (64 - 8 * (length - j * 8));
280       uint64_t key_left = util::SafeLoad(key_left_ptr + j);
281       uint64_t key_right = key_right_ptr[j];
282       result_or |= tail_mask & (key_left ^ key_right);
283     }
284     int result = result_or == 0 ? 0xff : 0;
285     result *= (length_left == length_right ? 1 : 0);
286     match_bytevector[i] = result;
287   }
288 }
289 
AndByteVectors(KeyEncoder::KeyEncoderContext * ctx,uint32_t num_elements,uint8_t * bytevector_A,const uint8_t * bytevector_B)290 void KeyCompare::AndByteVectors(KeyEncoder::KeyEncoderContext* ctx, uint32_t num_elements,
291                                 uint8_t* bytevector_A, const uint8_t* bytevector_B) {
292   uint32_t num_processed = 0;
293 #if defined(ARROW_HAVE_AVX2)
294   if (ctx->has_avx2()) {
295     num_processed = AndByteVectors_avx2(num_elements, bytevector_A, bytevector_B);
296   }
297 #endif
298 
299   for (uint32_t i = num_processed / 8; i < BitUtil::CeilDiv(num_elements, 8); ++i) {
300     uint64_t* a = reinterpret_cast<uint64_t*>(bytevector_A);
301     const uint64_t* b = reinterpret_cast<const uint64_t*>(bytevector_B);
302     a[i] &= b[i];
303   }
304 }
305 
CompareColumnsToRows(uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,uint32_t * out_num_rows,uint16_t * out_sel_left_maybe_same,const std::vector<KeyEncoder::KeyColumnArray> & cols,const KeyEncoder::KeyRowArray & rows)306 void KeyCompare::CompareColumnsToRows(uint32_t num_rows_to_compare,
307                                       const uint16_t* sel_left_maybe_null,
308                                       const uint32_t* left_to_right_map,
309                                       KeyEncoder::KeyEncoderContext* ctx,
310                                       uint32_t* out_num_rows,
311                                       uint16_t* out_sel_left_maybe_same,
312                                       const std::vector<KeyEncoder::KeyColumnArray>& cols,
313                                       const KeyEncoder::KeyRowArray& rows) {
314   if (num_rows_to_compare == 0) {
315     *out_num_rows = 0;
316     return;
317   }
318 
319   // Allocate temporary byte and bit vectors
320   auto bytevector_A_holder =
321       util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
322   auto bytevector_B_holder =
323       util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
324   auto bitvector_holder =
325       util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
326 
327   uint8_t* match_bytevector_A = bytevector_A_holder.mutable_data();
328   uint8_t* match_bytevector_B = bytevector_B_holder.mutable_data();
329   uint8_t* match_bitvector = bitvector_holder.mutable_data();
330 
331   bool is_first_column = true;
332   for (size_t icol = 0; icol < cols.size(); ++icol) {
333     const KeyEncoder::KeyColumnArray& col = cols[icol];
334     uint32_t offset_within_row =
335         rows.metadata().encoded_field_offset(static_cast<uint32_t>(icol));
336     if (col.metadata().is_fixed_length) {
337       if (sel_left_maybe_null) {
338         CompareBinaryColumnToRow<true>(
339             offset_within_row, num_rows_to_compare, sel_left_maybe_null,
340             left_to_right_map, ctx, col, rows,
341             is_first_column ? match_bytevector_A : match_bytevector_B);
342         NullUpdateColumnToRow<true>(
343             static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
344             left_to_right_map, ctx, col, rows,
345             is_first_column ? match_bytevector_A : match_bytevector_B);
346       } else {
347         // Version without using selection vector
348         CompareBinaryColumnToRow<false>(
349             offset_within_row, num_rows_to_compare, sel_left_maybe_null,
350             left_to_right_map, ctx, col, rows,
351             is_first_column ? match_bytevector_A : match_bytevector_B);
352         NullUpdateColumnToRow<false>(
353             static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
354             left_to_right_map, ctx, col, rows,
355             is_first_column ? match_bytevector_A : match_bytevector_B);
356       }
357       if (!is_first_column) {
358         AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
359       }
360       is_first_column = false;
361     }
362   }
363 
364   uint32_t ivarbinary = 0;
365   for (size_t icol = 0; icol < cols.size(); ++icol) {
366     const KeyEncoder::KeyColumnArray& col = cols[icol];
367     if (!col.metadata().is_fixed_length) {
368       // Process varbinary and nulls
369       if (sel_left_maybe_null) {
370         if (ivarbinary == 0) {
371           CompareVarBinaryColumnToRow<true, true>(
372               ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
373               ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
374         } else {
375           CompareVarBinaryColumnToRow<true, false>(ivarbinary, num_rows_to_compare,
376                                                    sel_left_maybe_null, left_to_right_map,
377                                                    ctx, col, rows, match_bytevector_B);
378         }
379         NullUpdateColumnToRow<true>(
380             static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
381             left_to_right_map, ctx, col, rows,
382             is_first_column ? match_bytevector_A : match_bytevector_B);
383       } else {
384         if (ivarbinary == 0) {
385           CompareVarBinaryColumnToRow<false, true>(
386               ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
387               ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
388         } else {
389           CompareVarBinaryColumnToRow<false, false>(
390               ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
391               ctx, col, rows, match_bytevector_B);
392         }
393         NullUpdateColumnToRow<false>(
394             static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
395             left_to_right_map, ctx, col, rows,
396             is_first_column ? match_bytevector_A : match_bytevector_B);
397       }
398       if (!is_first_column) {
399         AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
400       }
401       is_first_column = false;
402       ++ivarbinary;
403     }
404   }
405 
406   util::BitUtil::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare,
407                                match_bytevector_A, match_bitvector);
408   if (sel_left_maybe_null) {
409     int out_num_rows_int;
410     util::BitUtil::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare,
411                                        match_bitvector, sel_left_maybe_null,
412                                        &out_num_rows_int, out_sel_left_maybe_same);
413     *out_num_rows = out_num_rows_int;
414   } else {
415     int out_num_rows_int;
416     util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare,
417                                    match_bitvector, &out_num_rows_int,
418                                    out_sel_left_maybe_same);
419     *out_num_rows = out_num_rows_int;
420   }
421 }
422 
423 }  // namespace compute
424 }  // namespace arrow
425