1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/compute/exec/key_compare.h"
19
20 #include <memory.h>
21
22 #include <algorithm>
23 #include <cstdint>
24
25 #include "arrow/compute/exec/util.h"
26 #include "arrow/util/bit_util.h"
27 #include "arrow/util/ubsan.h"
28
29 namespace arrow {
30 namespace compute {
31
32 template <bool use_selection>
NullUpdateColumnToRow(uint32_t id_col,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector)33 void KeyCompare::NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare,
34 const uint16_t* sel_left_maybe_null,
35 const uint32_t* left_to_right_map,
36 KeyEncoder::KeyEncoderContext* ctx,
37 const KeyEncoder::KeyColumnArray& col,
38 const KeyEncoder::KeyRowArray& rows,
39 uint8_t* match_bytevector) {
40 if (!rows.has_any_nulls(ctx) && !col.data(0)) {
41 return;
42 }
43 uint32_t num_processed = 0;
44 #if defined(ARROW_HAVE_AVX2)
45 if (ctx->has_avx2()) {
46 num_processed = NullUpdateColumnToRow_avx2(use_selection, id_col, num_rows_to_compare,
47 sel_left_maybe_null, left_to_right_map,
48 ctx, col, rows, match_bytevector);
49 }
50 #endif
51
52 if (!col.data(0)) {
53 // Remove rows from the result for which the column value is a null
54 const uint8_t* null_masks = rows.null_masks();
55 uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
56 for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
57 uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
58 uint32_t irow_right = left_to_right_map[irow_left];
59 int64_t bitid = irow_right * null_mask_num_bytes * 8 + id_col;
60 match_bytevector[i] &= (BitUtil::GetBit(null_masks, bitid) ? 0 : 0xff);
61 }
62 } else if (!rows.has_any_nulls(ctx)) {
63 // Remove rows from the result for which the column value on left side is null
64 const uint8_t* non_nulls = col.data(0);
65 ARROW_DCHECK(non_nulls);
66 for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
67 uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
68 match_bytevector[i] &=
69 BitUtil::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0xff : 0;
70 }
71 } else {
72 const uint8_t* null_masks = rows.null_masks();
73 uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
74 const uint8_t* non_nulls = col.data(0);
75 ARROW_DCHECK(non_nulls);
76 for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
77 uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
78 uint32_t irow_right = left_to_right_map[irow_left];
79 int64_t bitid_right = irow_right * null_mask_num_bytes * 8 + id_col;
80 int right_null = BitUtil::GetBit(null_masks, bitid_right) ? 0xff : 0;
81 int left_null =
82 BitUtil::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0 : 0xff;
83 match_bytevector[i] |= left_null & right_null;
84 match_bytevector[i] &= ~(left_null ^ right_null);
85 }
86 }
87 }
88
89 template <bool use_selection, class COMPARE_FN>
CompareBinaryColumnToRowHelper(uint32_t offset_within_row,uint32_t first_row_to_compare,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector,COMPARE_FN compare_fn)90 void KeyCompare::CompareBinaryColumnToRowHelper(
91 uint32_t offset_within_row, uint32_t first_row_to_compare,
92 uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
93 const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
94 const KeyEncoder::KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
95 uint8_t* match_bytevector, COMPARE_FN compare_fn) {
96 bool is_fixed_length = rows.metadata().is_fixed_length;
97 if (is_fixed_length) {
98 uint32_t fixed_length = rows.metadata().fixed_length;
99 const uint8_t* rows_left = col.data(1);
100 const uint8_t* rows_right = rows.data(1);
101 for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
102 uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
103 uint32_t irow_right = left_to_right_map[irow_left];
104 uint32_t offset_right = irow_right * fixed_length + offset_within_row;
105 match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right);
106 }
107 } else {
108 const uint8_t* rows_left = col.data(1);
109 const uint32_t* offsets_right = rows.offsets();
110 const uint8_t* rows_right = rows.data(2);
111 for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
112 uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
113 uint32_t irow_right = left_to_right_map[irow_left];
114 uint32_t offset_right = offsets_right[irow_right] + offset_within_row;
115 match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right);
116 }
117 }
118 }
119
120 template <bool use_selection>
CompareBinaryColumnToRow(uint32_t offset_within_row,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector)121 void KeyCompare::CompareBinaryColumnToRow(
122 uint32_t offset_within_row, uint32_t num_rows_to_compare,
123 const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
124 KeyEncoder::KeyEncoderContext* ctx, const KeyEncoder::KeyColumnArray& col,
125 const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector) {
126 uint32_t num_processed = 0;
127 #if defined(ARROW_HAVE_AVX2)
128 if (ctx->has_avx2()) {
129 num_processed = CompareBinaryColumnToRow_avx2(
130 use_selection, offset_within_row, num_rows_to_compare, sel_left_maybe_null,
131 left_to_right_map, ctx, col, rows, match_bytevector);
132 }
133 #endif
134
135 uint32_t col_width = col.metadata().fixed_length;
136 if (col_width == 0) {
137 int bit_offset = col.bit_offset(1);
138 CompareBinaryColumnToRowHelper<use_selection>(
139 offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
140 left_to_right_map, ctx, col, rows, match_bytevector,
141 [bit_offset](const uint8_t* left_base, const uint8_t* right_base,
142 uint32_t irow_left, uint32_t offset_right) {
143 uint8_t left = BitUtil::GetBit(left_base, irow_left + bit_offset) ? 0xff : 0x00;
144 uint8_t right = right_base[offset_right];
145 return left == right ? 0xff : 0;
146 });
147 } else if (col_width == 1) {
148 CompareBinaryColumnToRowHelper<use_selection>(
149 offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
150 left_to_right_map, ctx, col, rows, match_bytevector,
151 [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
152 uint32_t offset_right) {
153 uint8_t left = left_base[irow_left];
154 uint8_t right = right_base[offset_right];
155 return left == right ? 0xff : 0;
156 });
157 } else if (col_width == 2) {
158 CompareBinaryColumnToRowHelper<use_selection>(
159 offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
160 left_to_right_map, ctx, col, rows, match_bytevector,
161 [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
162 uint32_t offset_right) {
163 util::CheckAlignment<uint16_t>(left_base);
164 util::CheckAlignment<uint16_t>(right_base + offset_right);
165 uint16_t left = reinterpret_cast<const uint16_t*>(left_base)[irow_left];
166 uint16_t right = *reinterpret_cast<const uint16_t*>(right_base + offset_right);
167 return left == right ? 0xff : 0;
168 });
169 } else if (col_width == 4) {
170 CompareBinaryColumnToRowHelper<use_selection>(
171 offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
172 left_to_right_map, ctx, col, rows, match_bytevector,
173 [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
174 uint32_t offset_right) {
175 util::CheckAlignment<uint32_t>(left_base);
176 util::CheckAlignment<uint32_t>(right_base + offset_right);
177 uint32_t left = reinterpret_cast<const uint32_t*>(left_base)[irow_left];
178 uint32_t right = *reinterpret_cast<const uint32_t*>(right_base + offset_right);
179 return left == right ? 0xff : 0;
180 });
181 } else if (col_width == 8) {
182 CompareBinaryColumnToRowHelper<use_selection>(
183 offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
184 left_to_right_map, ctx, col, rows, match_bytevector,
185 [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
186 uint32_t offset_right) {
187 util::CheckAlignment<uint64_t>(left_base);
188 util::CheckAlignment<uint64_t>(right_base + offset_right);
189 uint64_t left = reinterpret_cast<const uint64_t*>(left_base)[irow_left];
190 uint64_t right = *reinterpret_cast<const uint64_t*>(right_base + offset_right);
191 return left == right ? 0xff : 0;
192 });
193 } else {
194 CompareBinaryColumnToRowHelper<use_selection>(
195 offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
196 left_to_right_map, ctx, col, rows, match_bytevector,
197 [&col](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
198 uint32_t offset_right) {
199 uint32_t length = col.metadata().fixed_length;
200
201 // Non-zero length guarantees no underflow
202 int32_t num_loops_less_one =
203 static_cast<int32_t>(BitUtil::CeilDiv(length, 8)) - 1;
204
205 uint64_t tail_mask = ~0ULL >> (64 - 8 * (length - num_loops_less_one * 8));
206
207 const uint64_t* key_left_ptr =
208 reinterpret_cast<const uint64_t*>(left_base + irow_left * length);
209 util::CheckAlignment<uint64_t>(right_base + offset_right);
210 const uint64_t* key_right_ptr =
211 reinterpret_cast<const uint64_t*>(right_base + offset_right);
212 uint64_t result_or = 0;
213 int32_t i;
214 // length cannot be zero
215 for (i = 0; i < num_loops_less_one; ++i) {
216 uint64_t key_left = util::SafeLoad(key_left_ptr + i);
217 uint64_t key_right = key_right_ptr[i];
218 result_or |= key_left ^ key_right;
219 }
220 uint64_t key_left = util::SafeLoad(key_left_ptr + i);
221 uint64_t key_right = key_right_ptr[i];
222 result_or |= tail_mask & (key_left ^ key_right);
223 return result_or == 0 ? 0xff : 0;
224 });
225 }
226 }
227
228 // Overwrites the match_bytevector instead of updating it
229 template <bool use_selection, bool is_first_varbinary_col>
CompareVarBinaryColumnToRow(uint32_t id_varbinary_col,uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,const KeyEncoder::KeyColumnArray & col,const KeyEncoder::KeyRowArray & rows,uint8_t * match_bytevector)230 void KeyCompare::CompareVarBinaryColumnToRow(
231 uint32_t id_varbinary_col, uint32_t num_rows_to_compare,
232 const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
233 KeyEncoder::KeyEncoderContext* ctx, const KeyEncoder::KeyColumnArray& col,
234 const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector) {
235 #if defined(ARROW_HAVE_AVX2)
236 if (ctx->has_avx2()) {
237 CompareVarBinaryColumnToRow_avx2(
238 use_selection, is_first_varbinary_col, id_varbinary_col, num_rows_to_compare,
239 sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector);
240 return;
241 }
242 #endif
243
244 const uint32_t* offsets_left = col.offsets();
245 const uint32_t* offsets_right = rows.offsets();
246 const uint8_t* rows_left = col.data(2);
247 const uint8_t* rows_right = rows.data(2);
248 for (uint32_t i = 0; i < num_rows_to_compare; ++i) {
249 uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
250 uint32_t irow_right = left_to_right_map[irow_left];
251 uint32_t begin_left = offsets_left[irow_left];
252 uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
253 uint32_t begin_right = offsets_right[irow_right];
254 uint32_t length_right;
255 uint32_t offset_within_row;
256 if (!is_first_varbinary_col) {
257 rows.metadata().nth_varbinary_offset_and_length(
258 rows_right + begin_right, id_varbinary_col, &offset_within_row, &length_right);
259 } else {
260 rows.metadata().first_varbinary_offset_and_length(
261 rows_right + begin_right, &offset_within_row, &length_right);
262 }
263 begin_right += offset_within_row;
264 uint32_t length = std::min(length_left, length_right);
265 const uint64_t* key_left_ptr =
266 reinterpret_cast<const uint64_t*>(rows_left + begin_left);
267 util::CheckAlignment<uint64_t>(rows_right + begin_right);
268 const uint64_t* key_right_ptr =
269 reinterpret_cast<const uint64_t*>(rows_right + begin_right);
270 uint64_t result_or = 0;
271 if (length > 0) {
272 int32_t j;
273 // length can be zero
274 for (j = 0; j < static_cast<int32_t>(BitUtil::CeilDiv(length, 8)) - 1; ++j) {
275 uint64_t key_left = util::SafeLoad(key_left_ptr + j);
276 uint64_t key_right = key_right_ptr[j];
277 result_or |= key_left ^ key_right;
278 }
279 uint64_t tail_mask = ~0ULL >> (64 - 8 * (length - j * 8));
280 uint64_t key_left = util::SafeLoad(key_left_ptr + j);
281 uint64_t key_right = key_right_ptr[j];
282 result_or |= tail_mask & (key_left ^ key_right);
283 }
284 int result = result_or == 0 ? 0xff : 0;
285 result *= (length_left == length_right ? 1 : 0);
286 match_bytevector[i] = result;
287 }
288 }
289
AndByteVectors(KeyEncoder::KeyEncoderContext * ctx,uint32_t num_elements,uint8_t * bytevector_A,const uint8_t * bytevector_B)290 void KeyCompare::AndByteVectors(KeyEncoder::KeyEncoderContext* ctx, uint32_t num_elements,
291 uint8_t* bytevector_A, const uint8_t* bytevector_B) {
292 uint32_t num_processed = 0;
293 #if defined(ARROW_HAVE_AVX2)
294 if (ctx->has_avx2()) {
295 num_processed = AndByteVectors_avx2(num_elements, bytevector_A, bytevector_B);
296 }
297 #endif
298
299 for (uint32_t i = num_processed / 8; i < BitUtil::CeilDiv(num_elements, 8); ++i) {
300 uint64_t* a = reinterpret_cast<uint64_t*>(bytevector_A);
301 const uint64_t* b = reinterpret_cast<const uint64_t*>(bytevector_B);
302 a[i] &= b[i];
303 }
304 }
305
CompareColumnsToRows(uint32_t num_rows_to_compare,const uint16_t * sel_left_maybe_null,const uint32_t * left_to_right_map,KeyEncoder::KeyEncoderContext * ctx,uint32_t * out_num_rows,uint16_t * out_sel_left_maybe_same,const std::vector<KeyEncoder::KeyColumnArray> & cols,const KeyEncoder::KeyRowArray & rows)306 void KeyCompare::CompareColumnsToRows(uint32_t num_rows_to_compare,
307 const uint16_t* sel_left_maybe_null,
308 const uint32_t* left_to_right_map,
309 KeyEncoder::KeyEncoderContext* ctx,
310 uint32_t* out_num_rows,
311 uint16_t* out_sel_left_maybe_same,
312 const std::vector<KeyEncoder::KeyColumnArray>& cols,
313 const KeyEncoder::KeyRowArray& rows) {
314 if (num_rows_to_compare == 0) {
315 *out_num_rows = 0;
316 return;
317 }
318
319 // Allocate temporary byte and bit vectors
320 auto bytevector_A_holder =
321 util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
322 auto bytevector_B_holder =
323 util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
324 auto bitvector_holder =
325 util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
326
327 uint8_t* match_bytevector_A = bytevector_A_holder.mutable_data();
328 uint8_t* match_bytevector_B = bytevector_B_holder.mutable_data();
329 uint8_t* match_bitvector = bitvector_holder.mutable_data();
330
331 bool is_first_column = true;
332 for (size_t icol = 0; icol < cols.size(); ++icol) {
333 const KeyEncoder::KeyColumnArray& col = cols[icol];
334 uint32_t offset_within_row =
335 rows.metadata().encoded_field_offset(static_cast<uint32_t>(icol));
336 if (col.metadata().is_fixed_length) {
337 if (sel_left_maybe_null) {
338 CompareBinaryColumnToRow<true>(
339 offset_within_row, num_rows_to_compare, sel_left_maybe_null,
340 left_to_right_map, ctx, col, rows,
341 is_first_column ? match_bytevector_A : match_bytevector_B);
342 NullUpdateColumnToRow<true>(
343 static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
344 left_to_right_map, ctx, col, rows,
345 is_first_column ? match_bytevector_A : match_bytevector_B);
346 } else {
347 // Version without using selection vector
348 CompareBinaryColumnToRow<false>(
349 offset_within_row, num_rows_to_compare, sel_left_maybe_null,
350 left_to_right_map, ctx, col, rows,
351 is_first_column ? match_bytevector_A : match_bytevector_B);
352 NullUpdateColumnToRow<false>(
353 static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
354 left_to_right_map, ctx, col, rows,
355 is_first_column ? match_bytevector_A : match_bytevector_B);
356 }
357 if (!is_first_column) {
358 AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
359 }
360 is_first_column = false;
361 }
362 }
363
364 uint32_t ivarbinary = 0;
365 for (size_t icol = 0; icol < cols.size(); ++icol) {
366 const KeyEncoder::KeyColumnArray& col = cols[icol];
367 if (!col.metadata().is_fixed_length) {
368 // Process varbinary and nulls
369 if (sel_left_maybe_null) {
370 if (ivarbinary == 0) {
371 CompareVarBinaryColumnToRow<true, true>(
372 ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
373 ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
374 } else {
375 CompareVarBinaryColumnToRow<true, false>(ivarbinary, num_rows_to_compare,
376 sel_left_maybe_null, left_to_right_map,
377 ctx, col, rows, match_bytevector_B);
378 }
379 NullUpdateColumnToRow<true>(
380 static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
381 left_to_right_map, ctx, col, rows,
382 is_first_column ? match_bytevector_A : match_bytevector_B);
383 } else {
384 if (ivarbinary == 0) {
385 CompareVarBinaryColumnToRow<false, true>(
386 ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
387 ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
388 } else {
389 CompareVarBinaryColumnToRow<false, false>(
390 ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
391 ctx, col, rows, match_bytevector_B);
392 }
393 NullUpdateColumnToRow<false>(
394 static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
395 left_to_right_map, ctx, col, rows,
396 is_first_column ? match_bytevector_A : match_bytevector_B);
397 }
398 if (!is_first_column) {
399 AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
400 }
401 is_first_column = false;
402 ++ivarbinary;
403 }
404 }
405
406 util::BitUtil::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare,
407 match_bytevector_A, match_bitvector);
408 if (sel_left_maybe_null) {
409 int out_num_rows_int;
410 util::BitUtil::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare,
411 match_bitvector, sel_left_maybe_null,
412 &out_num_rows_int, out_sel_left_maybe_same);
413 *out_num_rows = out_num_rows_int;
414 } else {
415 int out_num_rows_int;
416 util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare,
417 match_bitvector, &out_num_rows_int,
418 out_sel_left_maybe_same);
419 *out_num_rows = out_num_rows_int;
420 }
421 }
422
423 } // namespace compute
424 } // namespace arrow
425