1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "benchmark/benchmark.h"
19
20 #include <vector>
21
22 #include "arrow/compute/api.h"
23 #include "arrow/testing/gtest_util.h"
24 #include "arrow/testing/random.h"
25 #include "arrow/util/benchmark_util.h"
26 #include "arrow/util/bit_util.h"
27 #include "arrow/util/bitmap_reader.h"
28
29 namespace arrow {
30 namespace compute {
31
32 #include <cassert>
33 #include <cmath>
34 #include <iostream>
35 #include <random>
36
37 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
38
39 namespace BitUtil = arrow::BitUtil;
40 using arrow::internal::BitmapReader;
41
42 template <typename T>
43 struct SumState {
44 using ValueType = T;
45
SumStatearrow::compute::SumState46 SumState() : total(0), valid_count(0) {}
47
48 T total = 0;
49 int64_t valid_count = 0;
50 };
51
52 template <typename T>
53 struct Traits {};
54
55 template <>
56 struct Traits<int64_t> {
57 using ArrayType = typename CTypeTraits<int64_t>::ArrayType;
58 static constexpr int64_t null_sentinel = std::numeric_limits<int64_t>::lowest();
59
FixSentinelarrow::compute::Traits60 static void FixSentinel(std::shared_ptr<ArrayType>& array) {
61 auto data = array->data();
62 for (int64_t i = 0; i < array->length(); i++)
63 if (array->IsNull(i)) {
64 int64_t* val_ptr = data->GetMutableValues<int64_t>(1, i);
65 *val_ptr = null_sentinel;
66 }
67 }
68
IsNullarrow::compute::Traits69 static inline bool IsNull(int64_t val) { return val == null_sentinel; }
70
NotNullarrow::compute::Traits71 static inline bool NotNull(int64_t val) { return val != null_sentinel; }
72 };
73
74 template <typename T>
75 struct Summer {
76 public:
77 using ValueType = T;
78 using ArrowType = typename CTypeTraits<T>::ArrowType;
79 };
80
81 template <typename T>
82 struct SumNoNulls : public Summer<T> {
83 using ArrayType = typename CTypeTraits<T>::ArrayType;
84
Sumarrow::compute::SumNoNulls85 static void Sum(const ArrayType& array, SumState<T>* state) {
86 SumState<T> local;
87
88 const auto values = array.raw_values();
89 for (int64_t i = 0; i < array.length(); ++i) {
90 local.total += values[i];
91 }
92
93 local.valid_count = array.length();
94 *state = local;
95 }
96 };
97
98 template <typename T>
99 struct SumNoNullsUnrolled : public Summer<T> {
100 using ArrayType = typename CTypeTraits<T>::ArrayType;
101
Sumarrow::compute::SumNoNullsUnrolled102 static void Sum(const ArrayType& array, SumState<T>* state) {
103 SumState<T> local;
104
105 const auto values = array.raw_values();
106 const auto length = array.length();
107 const int64_t length_rounded = BitUtil::RoundDown(length, 8);
108 for (int64_t i = 0; i < length_rounded; i += 8) {
109 local.total += values[i + 0] + values[i + 1] + values[i + 2] + values[i + 3] +
110 values[i + 4] + values[i + 5] + values[i + 6] + values[i + 7];
111 }
112
113 for (int64_t i = length_rounded; i < length; ++i) {
114 local.total += values[i];
115 }
116
117 local.valid_count = length;
118
119 *state = local;
120 }
121 };
122
123 template <typename T>
124 struct SumSentinel : public Summer<T> {
125 using ArrayType = typename CTypeTraits<T>::ArrayType;
126
Sumarrow::compute::SumSentinel127 static void Sum(const ArrayType& array, SumState<T>* state) {
128 SumState<T> local;
129
130 const auto values = array.raw_values();
131 const auto length = array.length();
132 for (int64_t i = 0; i < length; i++) {
133 // NaN is not equal to itself
134 local.total += values[i] * Traits<T>::NotNull(values[i]);
135 local.valid_count++;
136 }
137
138 *state = local;
139 }
140 };
141
142 template <typename T>
143 struct SumSentinelUnrolled : public Summer<T> {
144 using ArrayType = typename CTypeTraits<T>::ArrayType;
145
Sumarrow::compute::SumSentinelUnrolled146 static void Sum(const ArrayType& array, SumState<T>* state) {
147 SumState<T> local;
148
149 #define SUM_NOT_NULL(ITEM) \
150 do { \
151 local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
152 local.valid_count++; \
153 } while (0)
154
155 const auto values = array.raw_values();
156 const auto length = array.length();
157 const int64_t length_rounded = BitUtil::RoundDown(length, 8);
158 for (int64_t i = 0; i < length_rounded; i += 8) {
159 SUM_NOT_NULL(0);
160 SUM_NOT_NULL(1);
161 SUM_NOT_NULL(2);
162 SUM_NOT_NULL(3);
163 SUM_NOT_NULL(4);
164 SUM_NOT_NULL(5);
165 SUM_NOT_NULL(6);
166 SUM_NOT_NULL(7);
167 }
168
169 #undef SUM_NOT_NULL
170
171 for (int64_t i = length_rounded * 8; i < length; ++i) {
172 local.total += values[i] * Traits<T>::NotNull(values[i]);
173 ++local.valid_count;
174 }
175
176 *state = local;
177 }
178 };
179
180 template <typename T>
181 struct SumBitmapNaive : public Summer<T> {
182 using ArrayType = typename CTypeTraits<T>::ArrayType;
183
Sumarrow::compute::SumBitmapNaive184 static void Sum(const ArrayType& array, SumState<T>* state) {
185 SumState<T> local;
186
187 const auto values = array.raw_values();
188 const auto bitmap = array.null_bitmap_data();
189 const auto length = array.length();
190
191 for (int64_t i = 0; i < length; ++i) {
192 if (BitUtil::GetBit(bitmap, i)) {
193 local.total += values[i];
194 ++local.valid_count;
195 }
196 }
197
198 *state = local;
199 }
200 };
201
202 template <typename T>
203 struct SumBitmapReader : public Summer<T> {
204 using ArrayType = typename CTypeTraits<T>::ArrayType;
205
Sumarrow::compute::SumBitmapReader206 static void Sum(const ArrayType& array, SumState<T>* state) {
207 SumState<T> local;
208
209 const auto values = array.raw_values();
210 const auto bitmap = array.null_bitmap_data();
211 const auto length = array.length();
212 BitmapReader bit_reader(bitmap, 0, length);
213 for (int64_t i = 0; i < length; ++i) {
214 if (bit_reader.IsSet()) {
215 local.total += values[i];
216 ++local.valid_count;
217 }
218
219 bit_reader.Next();
220 }
221
222 *state = local;
223 }
224 };
225
226 template <typename T>
227 struct SumBitmapVectorizeUnroll : public Summer<T> {
228 using ArrayType = typename CTypeTraits<T>::ArrayType;
229
Sumarrow::compute::SumBitmapVectorizeUnroll230 static void Sum(const ArrayType& array, SumState<T>* state) {
231 SumState<T> local;
232
233 const auto values = array.raw_values();
234 const auto bitmap = array.null_bitmap_data();
235 const auto length = array.length();
236 const int64_t length_rounded = BitUtil::RoundDown(length, 8);
237 for (int64_t i = 0; i < length_rounded; i += 8) {
238 const uint8_t valid_byte = bitmap[i / 8];
239
240 #define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
241
242 if (valid_byte < 0xFF) {
243 // Some nulls
244 local.total += SUM_SHIFT(0);
245 local.total += SUM_SHIFT(1);
246 local.total += SUM_SHIFT(2);
247 local.total += SUM_SHIFT(3);
248 local.total += SUM_SHIFT(4);
249 local.total += SUM_SHIFT(5);
250 local.total += SUM_SHIFT(6);
251 local.total += SUM_SHIFT(7);
252 local.valid_count += BitUtil::kBytePopcount[valid_byte];
253 } else {
254 // No nulls
255 local.total += values[i + 0] + values[i + 1] + values[i + 2] + values[i + 3] +
256 values[i + 4] + values[i + 5] + values[i + 6] + values[i + 7];
257 local.valid_count += 8;
258 }
259 }
260
261 #undef SUM_SHIFT
262
263 for (int64_t i = length_rounded; i < length; ++i) {
264 if (BitUtil::GetBit(bitmap, i)) {
265 local.total = values[i];
266 ++local.valid_count;
267 }
268 }
269
270 *state = local;
271 }
272 };
273
274 template <typename Functor>
ReferenceSum(benchmark::State & state)275 void ReferenceSum(benchmark::State& state) {
276 using T = typename Functor::ValueType;
277
278 RegressionArgs args(state);
279 const int64_t array_size = args.size / sizeof(int64_t);
280 auto rand = random::RandomArrayGenerator(1923);
281 auto array = std::static_pointer_cast<NumericArray<Int64Type>>(
282 rand.Int64(array_size, -100, 100, args.null_proportion));
283
284 Traits<T>::FixSentinel(array);
285
286 for (auto _ : state) {
287 SumState<T> sum_state;
288 Functor::Sum(*array, &sum_state);
289 benchmark::DoNotOptimize(sum_state);
290 }
291 }
292
293 BENCHMARK_TEMPLATE(ReferenceSum, SumNoNulls<int64_t>)->Apply(BenchmarkSetArgs);
294 BENCHMARK_TEMPLATE(ReferenceSum, SumNoNullsUnrolled<int64_t>)->Apply(BenchmarkSetArgs);
295 BENCHMARK_TEMPLATE(ReferenceSum, SumSentinel<int64_t>)->Apply(BenchmarkSetArgs);
296 BENCHMARK_TEMPLATE(ReferenceSum, SumSentinelUnrolled<int64_t>)->Apply(BenchmarkSetArgs);
297 BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapNaive<int64_t>)->Apply(BenchmarkSetArgs);
298 BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapReader<int64_t>)->Apply(BenchmarkSetArgs);
299 BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapVectorizeUnroll<int64_t>)
300 ->Apply(BenchmarkSetArgs);
301 #endif // ARROW_WITH_BENCHMARKS_REFERENCE
302
303 //
304 // GroupBy
305 //
306
BenchmarkGroupBy(benchmark::State & state,std::vector<internal::Aggregate> aggregates,std::vector<Datum> arguments,std::vector<Datum> keys)307 static void BenchmarkGroupBy(benchmark::State& state,
308 std::vector<internal::Aggregate> aggregates,
309 std::vector<Datum> arguments, std::vector<Datum> keys) {
310 for (auto _ : state) {
311 ABORT_NOT_OK(GroupBy(arguments, keys, aggregates).status());
312 }
313 }
314
315 #define GROUP_BY_BENCHMARK(Name, Impl) \
316 static void Name(benchmark::State& state) { \
317 RegressionArgs args(state, false); \
318 auto rng = random::RandomArrayGenerator(1923); \
319 (Impl)(); \
320 } \
321 BENCHMARK(Name)->Apply([](benchmark::internal::Benchmark* bench) { \
322 BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024}); \
323 })
324
__anon3485dbfb0102null325 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyStringSet, [&] {
326 auto summand = rng.Float64(args.size,
327 /*min=*/0.0,
328 /*max=*/1.0e14,
329 /*null_probability=*/args.null_proportion,
330 /*nan_probability=*/args.null_proportion / 10);
331
332 auto key = rng.StringWithRepeats(args.size,
333 /*unique=*/16,
334 /*min_length=*/3,
335 /*max_length=*/32);
336
337 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
338 });
339
__anon3485dbfb0202null340 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallStringSet, [&] {
341 auto summand = rng.Float64(args.size,
342 /*min=*/0.0,
343 /*max=*/1.0e14,
344 /*null_probability=*/args.null_proportion,
345 /*nan_probability=*/args.null_proportion / 10);
346
347 auto key = rng.StringWithRepeats(args.size,
348 /*unique=*/256,
349 /*min_length=*/3,
350 /*max_length=*/32);
351
352 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
353 });
354
__anon3485dbfb0302null355 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumStringSet, [&] {
356 auto summand = rng.Float64(args.size,
357 /*min=*/0.0,
358 /*max=*/1.0e14,
359 /*null_probability=*/args.null_proportion,
360 /*nan_probability=*/args.null_proportion / 10);
361
362 auto key = rng.StringWithRepeats(args.size,
363 /*unique=*/4096,
364 /*min_length=*/3,
365 /*max_length=*/32);
366
367 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
368 });
369
__anon3485dbfb0402null370 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntegerSet, [&] {
371 auto summand = rng.Float64(args.size,
372 /*min=*/0.0,
373 /*max=*/1.0e14,
374 /*null_probability=*/args.null_proportion,
375 /*nan_probability=*/args.null_proportion / 10);
376
377 auto key = rng.Int64(args.size,
378 /*min=*/0,
379 /*max=*/15);
380
381 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
382 });
383
__anon3485dbfb0502null384 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntegerSet, [&] {
385 auto summand = rng.Float64(args.size,
386 /*min=*/0.0,
387 /*max=*/1.0e14,
388 /*null_probability=*/args.null_proportion,
389 /*nan_probability=*/args.null_proportion / 10);
390
391 auto key = rng.Int64(args.size,
392 /*min=*/0,
393 /*max=*/255);
394
395 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
396 });
397
__anon3485dbfb0602null398 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntegerSet, [&] {
399 auto summand = rng.Float64(args.size,
400 /*min=*/0.0,
401 /*max=*/1.0e14,
402 /*null_probability=*/args.null_proportion,
403 /*nan_probability=*/args.null_proportion / 10);
404
405 auto key = rng.Int64(args.size,
406 /*min=*/0,
407 /*max=*/4095);
408
409 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
410 });
411
__anon3485dbfb0702null412 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntStringPairSet, [&] {
413 auto summand = rng.Float64(args.size,
414 /*min=*/0.0,
415 /*max=*/1.0e14,
416 /*null_probability=*/args.null_proportion,
417 /*nan_probability=*/args.null_proportion / 10);
418
419 auto int_key = rng.Int64(args.size,
420 /*min=*/0,
421 /*max=*/4);
422 auto str_key = rng.StringWithRepeats(args.size,
423 /*unique=*/4,
424 /*min_length=*/3,
425 /*max_length=*/32);
426
427 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {int_key, str_key});
428 });
429
__anon3485dbfb0802null430 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntStringPairSet, [&] {
431 auto summand = rng.Float64(args.size,
432 /*min=*/0.0,
433 /*max=*/1.0e14,
434 /*null_probability=*/args.null_proportion,
435 /*nan_probability=*/args.null_proportion / 10);
436
437 auto int_key = rng.Int64(args.size,
438 /*min=*/0,
439 /*max=*/15);
440 auto str_key = rng.StringWithRepeats(args.size,
441 /*unique=*/16,
442 /*min_length=*/3,
443 /*max_length=*/32);
444
445 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {int_key, str_key});
446 });
447
__anon3485dbfb0902null448 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntStringPairSet, [&] {
449 auto summand = rng.Float64(args.size,
450 /*min=*/0.0,
451 /*max=*/1.0e14,
452 /*null_probability=*/args.null_proportion,
453 /*nan_probability=*/args.null_proportion / 10);
454
455 auto int_key = rng.Int64(args.size,
456 /*min=*/0,
457 /*max=*/63);
458 auto str_key = rng.StringWithRepeats(args.size,
459 /*unique=*/64,
460 /*min_length=*/3,
461 /*max_length=*/32);
462
463 BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {int_key, str_key});
464 });
465
466 //
467 // Sum
468 //
469
470 template <typename ArrowType>
SumKernel(benchmark::State & state)471 static void SumKernel(benchmark::State& state) {
472 using CType = typename TypeTraits<ArrowType>::CType;
473
474 RegressionArgs args(state);
475 const int64_t array_size = args.size / sizeof(CType);
476 auto rand = random::RandomArrayGenerator(1923);
477 auto array = rand.Numeric<ArrowType>(array_size, -100, 100, args.null_proportion);
478
479 for (auto _ : state) {
480 ABORT_NOT_OK(Sum(array).status());
481 }
482 }
483
SumKernelArgs(benchmark::internal::Benchmark * bench)484 static void SumKernelArgs(benchmark::internal::Benchmark* bench) {
485 BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024}); // 1M
486 }
487
488 #define SUM_KERNEL_BENCHMARK(FuncName, Type) \
489 static void FuncName(benchmark::State& state) { SumKernel<Type>(state); } \
490 BENCHMARK(FuncName)->Apply(SumKernelArgs)
491
492 SUM_KERNEL_BENCHMARK(SumKernelFloat, FloatType);
493 SUM_KERNEL_BENCHMARK(SumKernelDouble, DoubleType);
494 SUM_KERNEL_BENCHMARK(SumKernelInt8, Int8Type);
495 SUM_KERNEL_BENCHMARK(SumKernelInt16, Int16Type);
496 SUM_KERNEL_BENCHMARK(SumKernelInt32, Int32Type);
497 SUM_KERNEL_BENCHMARK(SumKernelInt64, Int64Type);
498
499 //
500 // Mode
501 //
502
503 template <typename ArrowType>
ModeKernel(benchmark::State & state,int min,int max)504 void ModeKernel(benchmark::State& state, int min, int max) {
505 using CType = typename TypeTraits<ArrowType>::CType;
506
507 RegressionArgs args(state);
508 const int64_t array_size = args.size / sizeof(CType);
509 auto rand = random::RandomArrayGenerator(1924);
510 auto array = rand.Numeric<ArrowType>(array_size, min, max, args.null_proportion);
511
512 for (auto _ : state) {
513 ABORT_NOT_OK(Mode(array).status());
514 }
515 }
516
517 template <typename ArrowType>
ModeKernelNarrow(benchmark::State & state)518 void ModeKernelNarrow(benchmark::State& state) {
519 ModeKernel<ArrowType>(state, -5000, 8000); // max - min < 16384
520 }
521
522 template <>
ModeKernelNarrow(benchmark::State & state)523 void ModeKernelNarrow<Int8Type>(benchmark::State& state) {
524 ModeKernel<Int8Type>(state, -128, 127);
525 }
526
527 template <>
ModeKernelNarrow(benchmark::State & state)528 void ModeKernelNarrow<BooleanType>(benchmark::State& state) {
529 RegressionArgs args(state);
530 auto rand = random::RandomArrayGenerator(1924);
531 auto array = rand.Boolean(args.size * 8, 0.5, args.null_proportion);
532
533 for (auto _ : state) {
534 ABORT_NOT_OK(Mode(array).status());
535 }
536 }
537
538 template <typename ArrowType>
ModeKernelWide(benchmark::State & state)539 void ModeKernelWide(benchmark::State& state) {
540 ModeKernel<ArrowType>(state, -1234567, 7654321);
541 }
542
ModeKernelArgs(benchmark::internal::Benchmark * bench)543 static void ModeKernelArgs(benchmark::internal::Benchmark* bench) {
544 BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024}); // 1M
545 }
546
547 BENCHMARK_TEMPLATE(ModeKernelNarrow, BooleanType)->Apply(ModeKernelArgs);
548 BENCHMARK_TEMPLATE(ModeKernelNarrow, Int8Type)->Apply(ModeKernelArgs);
549 BENCHMARK_TEMPLATE(ModeKernelNarrow, Int32Type)->Apply(ModeKernelArgs);
550 BENCHMARK_TEMPLATE(ModeKernelNarrow, Int64Type)->Apply(ModeKernelArgs);
551 BENCHMARK_TEMPLATE(ModeKernelWide, Int32Type)->Apply(ModeKernelArgs);
552 BENCHMARK_TEMPLATE(ModeKernelWide, Int64Type)->Apply(ModeKernelArgs);
553 BENCHMARK_TEMPLATE(ModeKernelWide, FloatType)->Apply(ModeKernelArgs);
554 BENCHMARK_TEMPLATE(ModeKernelWide, DoubleType)->Apply(ModeKernelArgs);
555
556 //
557 // MinMax
558 //
559
560 template <typename ArrowType>
MinMaxKernelBench(benchmark::State & state)561 static void MinMaxKernelBench(benchmark::State& state) {
562 using CType = typename TypeTraits<ArrowType>::CType;
563
564 RegressionArgs args(state);
565 const int64_t array_size = args.size / sizeof(CType);
566 auto rand = random::RandomArrayGenerator(1923);
567 auto array = rand.Numeric<ArrowType>(array_size, -100, 100, args.null_proportion);
568
569 for (auto _ : state) {
570 ABORT_NOT_OK(MinMax(array).status());
571 }
572 }
573
MinMaxKernelBenchArgs(benchmark::internal::Benchmark * bench)574 static void MinMaxKernelBenchArgs(benchmark::internal::Benchmark* bench) {
575 BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024}); // 1M
576 }
577
578 #define MINMAX_KERNEL_BENCHMARK(FuncName, Type) \
579 static void FuncName(benchmark::State& state) { MinMaxKernelBench<Type>(state); } \
580 BENCHMARK(FuncName)->Apply(MinMaxKernelBenchArgs)
581
582 MINMAX_KERNEL_BENCHMARK(MinMaxKernelFloat, FloatType);
583 MINMAX_KERNEL_BENCHMARK(MinMaxKernelDouble, DoubleType);
584 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt8, Int8Type);
585 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt16, Int16Type);
586 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt32, Int32Type);
587 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt64, Int64Type);
588
589 //
590 // Count
591 //
592
CountKernelBenchInt64(benchmark::State & state)593 static void CountKernelBenchInt64(benchmark::State& state) {
594 RegressionArgs args(state);
595 const int64_t array_size = args.size / sizeof(int64_t);
596 auto rand = random::RandomArrayGenerator(1923);
597 auto array = rand.Numeric<Int64Type>(array_size, -100, 100, args.null_proportion);
598
599 for (auto _ : state) {
600 ABORT_NOT_OK(Count(array->Slice(1, array_size)).status());
601 }
602 }
603 BENCHMARK(CountKernelBenchInt64)->Args({1 * 1024 * 1024, 2}); // 1M with 50% null.
604
605 //
606 // Variance
607 //
608
609 template <typename ArrowType>
VarianceKernelBench(benchmark::State & state)610 void VarianceKernelBench(benchmark::State& state) {
611 using CType = typename TypeTraits<ArrowType>::CType;
612
613 VarianceOptions options;
614 RegressionArgs args(state);
615 const int64_t array_size = args.size / sizeof(CType);
616 auto rand = random::RandomArrayGenerator(1925);
617 auto array = rand.Numeric<ArrowType>(array_size, -100000, 100000, args.null_proportion);
618
619 for (auto _ : state) {
620 ABORT_NOT_OK(Variance(array, options).status());
621 }
622 }
623
VarianceKernelBenchArgs(benchmark::internal::Benchmark * bench)624 static void VarianceKernelBenchArgs(benchmark::internal::Benchmark* bench) {
625 BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});
626 }
627
628 #define VARIANCE_KERNEL_BENCHMARK(FuncName, Type) \
629 static void FuncName(benchmark::State& state) { VarianceKernelBench<Type>(state); } \
630 BENCHMARK(FuncName)->Apply(VarianceKernelBenchArgs)
631
632 VARIANCE_KERNEL_BENCHMARK(VarianceKernelInt32, Int32Type);
633 VARIANCE_KERNEL_BENCHMARK(VarianceKernelInt64, Int64Type);
634 VARIANCE_KERNEL_BENCHMARK(VarianceKernelFloat, FloatType);
635 VARIANCE_KERNEL_BENCHMARK(VarianceKernelDouble, DoubleType);
636
637 //
638 // Quantile
639 //
640
deciles()641 static std::vector<double> deciles() {
642 return {0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0};
643 }
644
centiles()645 static std::vector<double> centiles() {
646 std::vector<double> q(101);
647 for (int i = 0; i <= 100; ++i) {
648 q[i] = i / 100.0;
649 }
650 return q;
651 }
652
653 template <typename ArrowType>
QuantileKernel(benchmark::State & state,int min,int max,std::vector<double> q)654 void QuantileKernel(benchmark::State& state, int min, int max, std::vector<double> q) {
655 using CType = typename TypeTraits<ArrowType>::CType;
656
657 QuantileOptions options(std::move(q));
658 RegressionArgs args(state);
659 const int64_t array_size = args.size / sizeof(CType);
660 auto rand = random::RandomArrayGenerator(1926);
661 auto array = rand.Numeric<ArrowType>(array_size, min, max, args.null_proportion);
662
663 for (auto _ : state) {
664 ABORT_NOT_OK(Quantile(array, options).status());
665 }
666 state.SetItemsProcessed(state.iterations() * array_size);
667 }
668
669 template <typename ArrowType>
QuantileKernelMedian(benchmark::State & state,int min,int max)670 void QuantileKernelMedian(benchmark::State& state, int min, int max) {
671 QuantileKernel<ArrowType>(state, min, max, {0.5});
672 }
673
674 template <typename ArrowType>
QuantileKernelMedianWide(benchmark::State & state)675 void QuantileKernelMedianWide(benchmark::State& state) {
676 QuantileKernel<ArrowType>(state, 0, 1 << 24, {0.5});
677 }
678
679 template <typename ArrowType>
QuantileKernelMedianNarrow(benchmark::State & state)680 void QuantileKernelMedianNarrow(benchmark::State& state) {
681 QuantileKernel<ArrowType>(state, -30000, 30000, {0.5});
682 }
683
684 template <typename ArrowType>
QuantileKernelDecilesWide(benchmark::State & state)685 void QuantileKernelDecilesWide(benchmark::State& state) {
686 QuantileKernel<ArrowType>(state, 0, 1 << 24, deciles());
687 }
688
689 template <typename ArrowType>
QuantileKernelDecilesNarrow(benchmark::State & state)690 void QuantileKernelDecilesNarrow(benchmark::State& state) {
691 QuantileKernel<ArrowType>(state, -30000, 30000, deciles());
692 }
693
694 template <typename ArrowType>
QuantileKernelCentilesWide(benchmark::State & state)695 void QuantileKernelCentilesWide(benchmark::State& state) {
696 QuantileKernel<ArrowType>(state, 0, 1 << 24, centiles());
697 }
698
699 template <typename ArrowType>
QuantileKernelCentilesNarrow(benchmark::State & state)700 void QuantileKernelCentilesNarrow(benchmark::State& state) {
701 QuantileKernel<ArrowType>(state, -30000, 30000, centiles());
702 }
703
QuantileKernelArgs(benchmark::internal::Benchmark * bench)704 static void QuantileKernelArgs(benchmark::internal::Benchmark* bench) {
705 BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});
706 }
707
708 BENCHMARK_TEMPLATE(QuantileKernelMedianNarrow, Int32Type)->Apply(QuantileKernelArgs);
709 BENCHMARK_TEMPLATE(QuantileKernelMedianWide, Int32Type)->Apply(QuantileKernelArgs);
710 BENCHMARK_TEMPLATE(QuantileKernelMedianNarrow, Int64Type)->Apply(QuantileKernelArgs);
711 BENCHMARK_TEMPLATE(QuantileKernelMedianWide, Int64Type)->Apply(QuantileKernelArgs);
712 BENCHMARK_TEMPLATE(QuantileKernelMedianWide, DoubleType)->Apply(QuantileKernelArgs);
713
714 BENCHMARK_TEMPLATE(QuantileKernelDecilesNarrow, Int32Type)->Apply(QuantileKernelArgs);
715 BENCHMARK_TEMPLATE(QuantileKernelDecilesWide, Int32Type)->Apply(QuantileKernelArgs);
716 BENCHMARK_TEMPLATE(QuantileKernelDecilesWide, DoubleType)->Apply(QuantileKernelArgs);
717
718 BENCHMARK_TEMPLATE(QuantileKernelCentilesNarrow, Int32Type)->Apply(QuantileKernelArgs);
719 BENCHMARK_TEMPLATE(QuantileKernelCentilesWide, Int32Type)->Apply(QuantileKernelArgs);
720 BENCHMARK_TEMPLATE(QuantileKernelCentilesWide, DoubleType)->Apply(QuantileKernelArgs);
721
TDigestKernelDouble(benchmark::State & state,std::vector<double> q)722 static void TDigestKernelDouble(benchmark::State& state, std::vector<double> q) {
723 TDigestOptions options{std::move(q)};
724 RegressionArgs args(state);
725 const int64_t array_size = args.size / sizeof(double);
726 auto rand = random::RandomArrayGenerator(1926);
727 auto array = rand.Numeric<DoubleType>(array_size, 0, 1 << 24, args.null_proportion);
728
729 for (auto _ : state) {
730 ABORT_NOT_OK(TDigest(array, options).status());
731 }
732 state.SetItemsProcessed(state.iterations() * array_size);
733 }
734
TDigestKernelDoubleMedian(benchmark::State & state)735 static void TDigestKernelDoubleMedian(benchmark::State& state) {
736 TDigestKernelDouble(state, {0.5});
737 }
738
TDigestKernelDoubleDeciles(benchmark::State & state)739 static void TDigestKernelDoubleDeciles(benchmark::State& state) {
740 TDigestKernelDouble(state, deciles());
741 }
742
TDigestKernelDoubleCentiles(benchmark::State & state)743 static void TDigestKernelDoubleCentiles(benchmark::State& state) {
744 TDigestKernelDouble(state, centiles());
745 }
746
747 BENCHMARK(TDigestKernelDoubleMedian)->Apply(QuantileKernelArgs);
748 BENCHMARK(TDigestKernelDoubleDeciles)->Apply(QuantileKernelArgs);
749 BENCHMARK(TDigestKernelDoubleCentiles)->Apply(QuantileKernelArgs);
750
751 } // namespace compute
752 } // namespace arrow
753