1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "benchmark/benchmark.h"
19 
20 #include <vector>
21 
22 #include "arrow/compute/api.h"
23 #include "arrow/testing/gtest_util.h"
24 #include "arrow/testing/random.h"
25 #include "arrow/util/benchmark_util.h"
26 #include "arrow/util/bit_util.h"
27 #include "arrow/util/bitmap_reader.h"
28 
29 namespace arrow {
30 namespace compute {
31 
32 #include <cassert>
33 #include <cmath>
34 #include <iostream>
35 #include <random>
36 
37 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
38 
39 namespace BitUtil = arrow::BitUtil;
40 using arrow::internal::BitmapReader;
41 
42 template <typename T>
43 struct SumState {
44   using ValueType = T;
45 
SumStatearrow::compute::SumState46   SumState() : total(0), valid_count(0) {}
47 
48   T total = 0;
49   int64_t valid_count = 0;
50 };
51 
52 template <typename T>
53 struct Traits {};
54 
55 template <>
56 struct Traits<int64_t> {
57   using ArrayType = typename CTypeTraits<int64_t>::ArrayType;
58   static constexpr int64_t null_sentinel = std::numeric_limits<int64_t>::lowest();
59 
FixSentinelarrow::compute::Traits60   static void FixSentinel(std::shared_ptr<ArrayType>& array) {
61     auto data = array->data();
62     for (int64_t i = 0; i < array->length(); i++)
63       if (array->IsNull(i)) {
64         int64_t* val_ptr = data->GetMutableValues<int64_t>(1, i);
65         *val_ptr = null_sentinel;
66       }
67   }
68 
IsNullarrow::compute::Traits69   static inline bool IsNull(int64_t val) { return val == null_sentinel; }
70 
NotNullarrow::compute::Traits71   static inline bool NotNull(int64_t val) { return val != null_sentinel; }
72 };
73 
74 template <typename T>
75 struct Summer {
76  public:
77   using ValueType = T;
78   using ArrowType = typename CTypeTraits<T>::ArrowType;
79 };
80 
81 template <typename T>
82 struct SumNoNulls : public Summer<T> {
83   using ArrayType = typename CTypeTraits<T>::ArrayType;
84 
Sumarrow::compute::SumNoNulls85   static void Sum(const ArrayType& array, SumState<T>* state) {
86     SumState<T> local;
87 
88     const auto values = array.raw_values();
89     for (int64_t i = 0; i < array.length(); ++i) {
90       local.total += values[i];
91     }
92 
93     local.valid_count = array.length();
94     *state = local;
95   }
96 };
97 
98 template <typename T>
99 struct SumNoNullsUnrolled : public Summer<T> {
100   using ArrayType = typename CTypeTraits<T>::ArrayType;
101 
Sumarrow::compute::SumNoNullsUnrolled102   static void Sum(const ArrayType& array, SumState<T>* state) {
103     SumState<T> local;
104 
105     const auto values = array.raw_values();
106     const auto length = array.length();
107     const int64_t length_rounded = BitUtil::RoundDown(length, 8);
108     for (int64_t i = 0; i < length_rounded; i += 8) {
109       local.total += values[i + 0] + values[i + 1] + values[i + 2] + values[i + 3] +
110                      values[i + 4] + values[i + 5] + values[i + 6] + values[i + 7];
111     }
112 
113     for (int64_t i = length_rounded; i < length; ++i) {
114       local.total += values[i];
115     }
116 
117     local.valid_count = length;
118 
119     *state = local;
120   }
121 };
122 
123 template <typename T>
124 struct SumSentinel : public Summer<T> {
125   using ArrayType = typename CTypeTraits<T>::ArrayType;
126 
Sumarrow::compute::SumSentinel127   static void Sum(const ArrayType& array, SumState<T>* state) {
128     SumState<T> local;
129 
130     const auto values = array.raw_values();
131     const auto length = array.length();
132     for (int64_t i = 0; i < length; i++) {
133       // NaN is not equal to itself
134       local.total += values[i] * Traits<T>::NotNull(values[i]);
135       local.valid_count++;
136     }
137 
138     *state = local;
139   }
140 };
141 
142 template <typename T>
143 struct SumSentinelUnrolled : public Summer<T> {
144   using ArrayType = typename CTypeTraits<T>::ArrayType;
145 
Sumarrow::compute::SumSentinelUnrolled146   static void Sum(const ArrayType& array, SumState<T>* state) {
147     SumState<T> local;
148 
149 #define SUM_NOT_NULL(ITEM)                                                  \
150   do {                                                                      \
151     local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
152     local.valid_count++;                                                    \
153   } while (0)
154 
155     const auto values = array.raw_values();
156     const auto length = array.length();
157     const int64_t length_rounded = BitUtil::RoundDown(length, 8);
158     for (int64_t i = 0; i < length_rounded; i += 8) {
159       SUM_NOT_NULL(0);
160       SUM_NOT_NULL(1);
161       SUM_NOT_NULL(2);
162       SUM_NOT_NULL(3);
163       SUM_NOT_NULL(4);
164       SUM_NOT_NULL(5);
165       SUM_NOT_NULL(6);
166       SUM_NOT_NULL(7);
167     }
168 
169 #undef SUM_NOT_NULL
170 
171     for (int64_t i = length_rounded * 8; i < length; ++i) {
172       local.total += values[i] * Traits<T>::NotNull(values[i]);
173       ++local.valid_count;
174     }
175 
176     *state = local;
177   }
178 };
179 
180 template <typename T>
181 struct SumBitmapNaive : public Summer<T> {
182   using ArrayType = typename CTypeTraits<T>::ArrayType;
183 
Sumarrow::compute::SumBitmapNaive184   static void Sum(const ArrayType& array, SumState<T>* state) {
185     SumState<T> local;
186 
187     const auto values = array.raw_values();
188     const auto bitmap = array.null_bitmap_data();
189     const auto length = array.length();
190 
191     for (int64_t i = 0; i < length; ++i) {
192       if (BitUtil::GetBit(bitmap, i)) {
193         local.total += values[i];
194         ++local.valid_count;
195       }
196     }
197 
198     *state = local;
199   }
200 };
201 
202 template <typename T>
203 struct SumBitmapReader : public Summer<T> {
204   using ArrayType = typename CTypeTraits<T>::ArrayType;
205 
Sumarrow::compute::SumBitmapReader206   static void Sum(const ArrayType& array, SumState<T>* state) {
207     SumState<T> local;
208 
209     const auto values = array.raw_values();
210     const auto bitmap = array.null_bitmap_data();
211     const auto length = array.length();
212     BitmapReader bit_reader(bitmap, 0, length);
213     for (int64_t i = 0; i < length; ++i) {
214       if (bit_reader.IsSet()) {
215         local.total += values[i];
216         ++local.valid_count;
217       }
218 
219       bit_reader.Next();
220     }
221 
222     *state = local;
223   }
224 };
225 
226 template <typename T>
227 struct SumBitmapVectorizeUnroll : public Summer<T> {
228   using ArrayType = typename CTypeTraits<T>::ArrayType;
229 
Sumarrow::compute::SumBitmapVectorizeUnroll230   static void Sum(const ArrayType& array, SumState<T>* state) {
231     SumState<T> local;
232 
233     const auto values = array.raw_values();
234     const auto bitmap = array.null_bitmap_data();
235     const auto length = array.length();
236     const int64_t length_rounded = BitUtil::RoundDown(length, 8);
237     for (int64_t i = 0; i < length_rounded; i += 8) {
238       const uint8_t valid_byte = bitmap[i / 8];
239 
240 #define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
241 
242       if (valid_byte < 0xFF) {
243         // Some nulls
244         local.total += SUM_SHIFT(0);
245         local.total += SUM_SHIFT(1);
246         local.total += SUM_SHIFT(2);
247         local.total += SUM_SHIFT(3);
248         local.total += SUM_SHIFT(4);
249         local.total += SUM_SHIFT(5);
250         local.total += SUM_SHIFT(6);
251         local.total += SUM_SHIFT(7);
252         local.valid_count += BitUtil::kBytePopcount[valid_byte];
253       } else {
254         // No nulls
255         local.total += values[i + 0] + values[i + 1] + values[i + 2] + values[i + 3] +
256                        values[i + 4] + values[i + 5] + values[i + 6] + values[i + 7];
257         local.valid_count += 8;
258       }
259     }
260 
261 #undef SUM_SHIFT
262 
263     for (int64_t i = length_rounded; i < length; ++i) {
264       if (BitUtil::GetBit(bitmap, i)) {
265         local.total = values[i];
266         ++local.valid_count;
267       }
268     }
269 
270     *state = local;
271   }
272 };
273 
274 template <typename Functor>
ReferenceSum(benchmark::State & state)275 void ReferenceSum(benchmark::State& state) {
276   using T = typename Functor::ValueType;
277 
278   RegressionArgs args(state);
279   const int64_t array_size = args.size / sizeof(int64_t);
280   auto rand = random::RandomArrayGenerator(1923);
281   auto array = std::static_pointer_cast<NumericArray<Int64Type>>(
282       rand.Int64(array_size, -100, 100, args.null_proportion));
283 
284   Traits<T>::FixSentinel(array);
285 
286   for (auto _ : state) {
287     SumState<T> sum_state;
288     Functor::Sum(*array, &sum_state);
289     benchmark::DoNotOptimize(sum_state);
290   }
291 }
292 
293 BENCHMARK_TEMPLATE(ReferenceSum, SumNoNulls<int64_t>)->Apply(BenchmarkSetArgs);
294 BENCHMARK_TEMPLATE(ReferenceSum, SumNoNullsUnrolled<int64_t>)->Apply(BenchmarkSetArgs);
295 BENCHMARK_TEMPLATE(ReferenceSum, SumSentinel<int64_t>)->Apply(BenchmarkSetArgs);
296 BENCHMARK_TEMPLATE(ReferenceSum, SumSentinelUnrolled<int64_t>)->Apply(BenchmarkSetArgs);
297 BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapNaive<int64_t>)->Apply(BenchmarkSetArgs);
298 BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapReader<int64_t>)->Apply(BenchmarkSetArgs);
299 BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapVectorizeUnroll<int64_t>)
300     ->Apply(BenchmarkSetArgs);
301 #endif  // ARROW_WITH_BENCHMARKS_REFERENCE
302 
303 //
304 // GroupBy
305 //
306 
BenchmarkGroupBy(benchmark::State & state,std::vector<internal::Aggregate> aggregates,std::vector<Datum> arguments,std::vector<Datum> keys)307 static void BenchmarkGroupBy(benchmark::State& state,
308                              std::vector<internal::Aggregate> aggregates,
309                              std::vector<Datum> arguments, std::vector<Datum> keys) {
310   for (auto _ : state) {
311     ABORT_NOT_OK(GroupBy(arguments, keys, aggregates).status());
312   }
313 }
314 
315 #define GROUP_BY_BENCHMARK(Name, Impl)                               \
316   static void Name(benchmark::State& state) {                        \
317     RegressionArgs args(state, false);                               \
318     auto rng = random::RandomArrayGenerator(1923);                   \
319     (Impl)();                                                        \
320   }                                                                  \
321   BENCHMARK(Name)->Apply([](benchmark::internal::Benchmark* bench) { \
322     BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});             \
323   })
324 
__anon3485dbfb0102null325 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyStringSet, [&] {
326   auto summand = rng.Float64(args.size,
327                              /*min=*/0.0,
328                              /*max=*/1.0e14,
329                              /*null_probability=*/args.null_proportion,
330                              /*nan_probability=*/args.null_proportion / 10);
331 
332   auto key = rng.StringWithRepeats(args.size,
333                                    /*unique=*/16,
334                                    /*min_length=*/3,
335                                    /*max_length=*/32);
336 
337   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
338 });
339 
__anon3485dbfb0202null340 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallStringSet, [&] {
341   auto summand = rng.Float64(args.size,
342                              /*min=*/0.0,
343                              /*max=*/1.0e14,
344                              /*null_probability=*/args.null_proportion,
345                              /*nan_probability=*/args.null_proportion / 10);
346 
347   auto key = rng.StringWithRepeats(args.size,
348                                    /*unique=*/256,
349                                    /*min_length=*/3,
350                                    /*max_length=*/32);
351 
352   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
353 });
354 
__anon3485dbfb0302null355 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumStringSet, [&] {
356   auto summand = rng.Float64(args.size,
357                              /*min=*/0.0,
358                              /*max=*/1.0e14,
359                              /*null_probability=*/args.null_proportion,
360                              /*nan_probability=*/args.null_proportion / 10);
361 
362   auto key = rng.StringWithRepeats(args.size,
363                                    /*unique=*/4096,
364                                    /*min_length=*/3,
365                                    /*max_length=*/32);
366 
367   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
368 });
369 
__anon3485dbfb0402null370 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntegerSet, [&] {
371   auto summand = rng.Float64(args.size,
372                              /*min=*/0.0,
373                              /*max=*/1.0e14,
374                              /*null_probability=*/args.null_proportion,
375                              /*nan_probability=*/args.null_proportion / 10);
376 
377   auto key = rng.Int64(args.size,
378                        /*min=*/0,
379                        /*max=*/15);
380 
381   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
382 });
383 
__anon3485dbfb0502null384 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntegerSet, [&] {
385   auto summand = rng.Float64(args.size,
386                              /*min=*/0.0,
387                              /*max=*/1.0e14,
388                              /*null_probability=*/args.null_proportion,
389                              /*nan_probability=*/args.null_proportion / 10);
390 
391   auto key = rng.Int64(args.size,
392                        /*min=*/0,
393                        /*max=*/255);
394 
395   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
396 });
397 
__anon3485dbfb0602null398 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntegerSet, [&] {
399   auto summand = rng.Float64(args.size,
400                              /*min=*/0.0,
401                              /*max=*/1.0e14,
402                              /*null_probability=*/args.null_proportion,
403                              /*nan_probability=*/args.null_proportion / 10);
404 
405   auto key = rng.Int64(args.size,
406                        /*min=*/0,
407                        /*max=*/4095);
408 
409   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {key});
410 });
411 
__anon3485dbfb0702null412 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntStringPairSet, [&] {
413   auto summand = rng.Float64(args.size,
414                              /*min=*/0.0,
415                              /*max=*/1.0e14,
416                              /*null_probability=*/args.null_proportion,
417                              /*nan_probability=*/args.null_proportion / 10);
418 
419   auto int_key = rng.Int64(args.size,
420                            /*min=*/0,
421                            /*max=*/4);
422   auto str_key = rng.StringWithRepeats(args.size,
423                                        /*unique=*/4,
424                                        /*min_length=*/3,
425                                        /*max_length=*/32);
426 
427   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {int_key, str_key});
428 });
429 
__anon3485dbfb0802null430 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntStringPairSet, [&] {
431   auto summand = rng.Float64(args.size,
432                              /*min=*/0.0,
433                              /*max=*/1.0e14,
434                              /*null_probability=*/args.null_proportion,
435                              /*nan_probability=*/args.null_proportion / 10);
436 
437   auto int_key = rng.Int64(args.size,
438                            /*min=*/0,
439                            /*max=*/15);
440   auto str_key = rng.StringWithRepeats(args.size,
441                                        /*unique=*/16,
442                                        /*min_length=*/3,
443                                        /*max_length=*/32);
444 
445   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {int_key, str_key});
446 });
447 
__anon3485dbfb0902null448 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntStringPairSet, [&] {
449   auto summand = rng.Float64(args.size,
450                              /*min=*/0.0,
451                              /*max=*/1.0e14,
452                              /*null_probability=*/args.null_proportion,
453                              /*nan_probability=*/args.null_proportion / 10);
454 
455   auto int_key = rng.Int64(args.size,
456                            /*min=*/0,
457                            /*max=*/63);
458   auto str_key = rng.StringWithRepeats(args.size,
459                                        /*unique=*/64,
460                                        /*min_length=*/3,
461                                        /*max_length=*/32);
462 
463   BenchmarkGroupBy(state, {{"hash_sum", NULLPTR}}, {summand}, {int_key, str_key});
464 });
465 
466 //
467 // Sum
468 //
469 
470 template <typename ArrowType>
SumKernel(benchmark::State & state)471 static void SumKernel(benchmark::State& state) {
472   using CType = typename TypeTraits<ArrowType>::CType;
473 
474   RegressionArgs args(state);
475   const int64_t array_size = args.size / sizeof(CType);
476   auto rand = random::RandomArrayGenerator(1923);
477   auto array = rand.Numeric<ArrowType>(array_size, -100, 100, args.null_proportion);
478 
479   for (auto _ : state) {
480     ABORT_NOT_OK(Sum(array).status());
481   }
482 }
483 
SumKernelArgs(benchmark::internal::Benchmark * bench)484 static void SumKernelArgs(benchmark::internal::Benchmark* bench) {
485   BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});  // 1M
486 }
487 
488 #define SUM_KERNEL_BENCHMARK(FuncName, Type)                                \
489   static void FuncName(benchmark::State& state) { SumKernel<Type>(state); } \
490   BENCHMARK(FuncName)->Apply(SumKernelArgs)
491 
492 SUM_KERNEL_BENCHMARK(SumKernelFloat, FloatType);
493 SUM_KERNEL_BENCHMARK(SumKernelDouble, DoubleType);
494 SUM_KERNEL_BENCHMARK(SumKernelInt8, Int8Type);
495 SUM_KERNEL_BENCHMARK(SumKernelInt16, Int16Type);
496 SUM_KERNEL_BENCHMARK(SumKernelInt32, Int32Type);
497 SUM_KERNEL_BENCHMARK(SumKernelInt64, Int64Type);
498 
499 //
500 // Mode
501 //
502 
503 template <typename ArrowType>
ModeKernel(benchmark::State & state,int min,int max)504 void ModeKernel(benchmark::State& state, int min, int max) {
505   using CType = typename TypeTraits<ArrowType>::CType;
506 
507   RegressionArgs args(state);
508   const int64_t array_size = args.size / sizeof(CType);
509   auto rand = random::RandomArrayGenerator(1924);
510   auto array = rand.Numeric<ArrowType>(array_size, min, max, args.null_proportion);
511 
512   for (auto _ : state) {
513     ABORT_NOT_OK(Mode(array).status());
514   }
515 }
516 
517 template <typename ArrowType>
ModeKernelNarrow(benchmark::State & state)518 void ModeKernelNarrow(benchmark::State& state) {
519   ModeKernel<ArrowType>(state, -5000, 8000);  // max - min < 16384
520 }
521 
522 template <>
ModeKernelNarrow(benchmark::State & state)523 void ModeKernelNarrow<Int8Type>(benchmark::State& state) {
524   ModeKernel<Int8Type>(state, -128, 127);
525 }
526 
527 template <>
ModeKernelNarrow(benchmark::State & state)528 void ModeKernelNarrow<BooleanType>(benchmark::State& state) {
529   RegressionArgs args(state);
530   auto rand = random::RandomArrayGenerator(1924);
531   auto array = rand.Boolean(args.size * 8, 0.5, args.null_proportion);
532 
533   for (auto _ : state) {
534     ABORT_NOT_OK(Mode(array).status());
535   }
536 }
537 
538 template <typename ArrowType>
ModeKernelWide(benchmark::State & state)539 void ModeKernelWide(benchmark::State& state) {
540   ModeKernel<ArrowType>(state, -1234567, 7654321);
541 }
542 
ModeKernelArgs(benchmark::internal::Benchmark * bench)543 static void ModeKernelArgs(benchmark::internal::Benchmark* bench) {
544   BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});  // 1M
545 }
546 
547 BENCHMARK_TEMPLATE(ModeKernelNarrow, BooleanType)->Apply(ModeKernelArgs);
548 BENCHMARK_TEMPLATE(ModeKernelNarrow, Int8Type)->Apply(ModeKernelArgs);
549 BENCHMARK_TEMPLATE(ModeKernelNarrow, Int32Type)->Apply(ModeKernelArgs);
550 BENCHMARK_TEMPLATE(ModeKernelNarrow, Int64Type)->Apply(ModeKernelArgs);
551 BENCHMARK_TEMPLATE(ModeKernelWide, Int32Type)->Apply(ModeKernelArgs);
552 BENCHMARK_TEMPLATE(ModeKernelWide, Int64Type)->Apply(ModeKernelArgs);
553 BENCHMARK_TEMPLATE(ModeKernelWide, FloatType)->Apply(ModeKernelArgs);
554 BENCHMARK_TEMPLATE(ModeKernelWide, DoubleType)->Apply(ModeKernelArgs);
555 
556 //
557 // MinMax
558 //
559 
560 template <typename ArrowType>
MinMaxKernelBench(benchmark::State & state)561 static void MinMaxKernelBench(benchmark::State& state) {
562   using CType = typename TypeTraits<ArrowType>::CType;
563 
564   RegressionArgs args(state);
565   const int64_t array_size = args.size / sizeof(CType);
566   auto rand = random::RandomArrayGenerator(1923);
567   auto array = rand.Numeric<ArrowType>(array_size, -100, 100, args.null_proportion);
568 
569   for (auto _ : state) {
570     ABORT_NOT_OK(MinMax(array).status());
571   }
572 }
573 
MinMaxKernelBenchArgs(benchmark::internal::Benchmark * bench)574 static void MinMaxKernelBenchArgs(benchmark::internal::Benchmark* bench) {
575   BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});  // 1M
576 }
577 
578 #define MINMAX_KERNEL_BENCHMARK(FuncName, Type)                                     \
579   static void FuncName(benchmark::State& state) { MinMaxKernelBench<Type>(state); } \
580   BENCHMARK(FuncName)->Apply(MinMaxKernelBenchArgs)
581 
582 MINMAX_KERNEL_BENCHMARK(MinMaxKernelFloat, FloatType);
583 MINMAX_KERNEL_BENCHMARK(MinMaxKernelDouble, DoubleType);
584 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt8, Int8Type);
585 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt16, Int16Type);
586 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt32, Int32Type);
587 MINMAX_KERNEL_BENCHMARK(MinMaxKernelInt64, Int64Type);
588 
589 //
590 // Count
591 //
592 
CountKernelBenchInt64(benchmark::State & state)593 static void CountKernelBenchInt64(benchmark::State& state) {
594   RegressionArgs args(state);
595   const int64_t array_size = args.size / sizeof(int64_t);
596   auto rand = random::RandomArrayGenerator(1923);
597   auto array = rand.Numeric<Int64Type>(array_size, -100, 100, args.null_proportion);
598 
599   for (auto _ : state) {
600     ABORT_NOT_OK(Count(array->Slice(1, array_size)).status());
601   }
602 }
603 BENCHMARK(CountKernelBenchInt64)->Args({1 * 1024 * 1024, 2});  // 1M with 50% null.
604 
605 //
606 // Variance
607 //
608 
609 template <typename ArrowType>
VarianceKernelBench(benchmark::State & state)610 void VarianceKernelBench(benchmark::State& state) {
611   using CType = typename TypeTraits<ArrowType>::CType;
612 
613   VarianceOptions options;
614   RegressionArgs args(state);
615   const int64_t array_size = args.size / sizeof(CType);
616   auto rand = random::RandomArrayGenerator(1925);
617   auto array = rand.Numeric<ArrowType>(array_size, -100000, 100000, args.null_proportion);
618 
619   for (auto _ : state) {
620     ABORT_NOT_OK(Variance(array, options).status());
621   }
622 }
623 
VarianceKernelBenchArgs(benchmark::internal::Benchmark * bench)624 static void VarianceKernelBenchArgs(benchmark::internal::Benchmark* bench) {
625   BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});
626 }
627 
628 #define VARIANCE_KERNEL_BENCHMARK(FuncName, Type)                                     \
629   static void FuncName(benchmark::State& state) { VarianceKernelBench<Type>(state); } \
630   BENCHMARK(FuncName)->Apply(VarianceKernelBenchArgs)
631 
632 VARIANCE_KERNEL_BENCHMARK(VarianceKernelInt32, Int32Type);
633 VARIANCE_KERNEL_BENCHMARK(VarianceKernelInt64, Int64Type);
634 VARIANCE_KERNEL_BENCHMARK(VarianceKernelFloat, FloatType);
635 VARIANCE_KERNEL_BENCHMARK(VarianceKernelDouble, DoubleType);
636 
637 //
638 // Quantile
639 //
640 
deciles()641 static std::vector<double> deciles() {
642   return {0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0};
643 }
644 
centiles()645 static std::vector<double> centiles() {
646   std::vector<double> q(101);
647   for (int i = 0; i <= 100; ++i) {
648     q[i] = i / 100.0;
649   }
650   return q;
651 }
652 
653 template <typename ArrowType>
QuantileKernel(benchmark::State & state,int min,int max,std::vector<double> q)654 void QuantileKernel(benchmark::State& state, int min, int max, std::vector<double> q) {
655   using CType = typename TypeTraits<ArrowType>::CType;
656 
657   QuantileOptions options(std::move(q));
658   RegressionArgs args(state);
659   const int64_t array_size = args.size / sizeof(CType);
660   auto rand = random::RandomArrayGenerator(1926);
661   auto array = rand.Numeric<ArrowType>(array_size, min, max, args.null_proportion);
662 
663   for (auto _ : state) {
664     ABORT_NOT_OK(Quantile(array, options).status());
665   }
666   state.SetItemsProcessed(state.iterations() * array_size);
667 }
668 
669 template <typename ArrowType>
QuantileKernelMedian(benchmark::State & state,int min,int max)670 void QuantileKernelMedian(benchmark::State& state, int min, int max) {
671   QuantileKernel<ArrowType>(state, min, max, {0.5});
672 }
673 
674 template <typename ArrowType>
QuantileKernelMedianWide(benchmark::State & state)675 void QuantileKernelMedianWide(benchmark::State& state) {
676   QuantileKernel<ArrowType>(state, 0, 1 << 24, {0.5});
677 }
678 
679 template <typename ArrowType>
QuantileKernelMedianNarrow(benchmark::State & state)680 void QuantileKernelMedianNarrow(benchmark::State& state) {
681   QuantileKernel<ArrowType>(state, -30000, 30000, {0.5});
682 }
683 
684 template <typename ArrowType>
QuantileKernelDecilesWide(benchmark::State & state)685 void QuantileKernelDecilesWide(benchmark::State& state) {
686   QuantileKernel<ArrowType>(state, 0, 1 << 24, deciles());
687 }
688 
689 template <typename ArrowType>
QuantileKernelDecilesNarrow(benchmark::State & state)690 void QuantileKernelDecilesNarrow(benchmark::State& state) {
691   QuantileKernel<ArrowType>(state, -30000, 30000, deciles());
692 }
693 
694 template <typename ArrowType>
QuantileKernelCentilesWide(benchmark::State & state)695 void QuantileKernelCentilesWide(benchmark::State& state) {
696   QuantileKernel<ArrowType>(state, 0, 1 << 24, centiles());
697 }
698 
699 template <typename ArrowType>
QuantileKernelCentilesNarrow(benchmark::State & state)700 void QuantileKernelCentilesNarrow(benchmark::State& state) {
701   QuantileKernel<ArrowType>(state, -30000, 30000, centiles());
702 }
703 
QuantileKernelArgs(benchmark::internal::Benchmark * bench)704 static void QuantileKernelArgs(benchmark::internal::Benchmark* bench) {
705   BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});
706 }
707 
708 BENCHMARK_TEMPLATE(QuantileKernelMedianNarrow, Int32Type)->Apply(QuantileKernelArgs);
709 BENCHMARK_TEMPLATE(QuantileKernelMedianWide, Int32Type)->Apply(QuantileKernelArgs);
710 BENCHMARK_TEMPLATE(QuantileKernelMedianNarrow, Int64Type)->Apply(QuantileKernelArgs);
711 BENCHMARK_TEMPLATE(QuantileKernelMedianWide, Int64Type)->Apply(QuantileKernelArgs);
712 BENCHMARK_TEMPLATE(QuantileKernelMedianWide, DoubleType)->Apply(QuantileKernelArgs);
713 
714 BENCHMARK_TEMPLATE(QuantileKernelDecilesNarrow, Int32Type)->Apply(QuantileKernelArgs);
715 BENCHMARK_TEMPLATE(QuantileKernelDecilesWide, Int32Type)->Apply(QuantileKernelArgs);
716 BENCHMARK_TEMPLATE(QuantileKernelDecilesWide, DoubleType)->Apply(QuantileKernelArgs);
717 
718 BENCHMARK_TEMPLATE(QuantileKernelCentilesNarrow, Int32Type)->Apply(QuantileKernelArgs);
719 BENCHMARK_TEMPLATE(QuantileKernelCentilesWide, Int32Type)->Apply(QuantileKernelArgs);
720 BENCHMARK_TEMPLATE(QuantileKernelCentilesWide, DoubleType)->Apply(QuantileKernelArgs);
721 
TDigestKernelDouble(benchmark::State & state,std::vector<double> q)722 static void TDigestKernelDouble(benchmark::State& state, std::vector<double> q) {
723   TDigestOptions options{std::move(q)};
724   RegressionArgs args(state);
725   const int64_t array_size = args.size / sizeof(double);
726   auto rand = random::RandomArrayGenerator(1926);
727   auto array = rand.Numeric<DoubleType>(array_size, 0, 1 << 24, args.null_proportion);
728 
729   for (auto _ : state) {
730     ABORT_NOT_OK(TDigest(array, options).status());
731   }
732   state.SetItemsProcessed(state.iterations() * array_size);
733 }
734 
TDigestKernelDoubleMedian(benchmark::State & state)735 static void TDigestKernelDoubleMedian(benchmark::State& state) {
736   TDigestKernelDouble(state, {0.5});
737 }
738 
TDigestKernelDoubleDeciles(benchmark::State & state)739 static void TDigestKernelDoubleDeciles(benchmark::State& state) {
740   TDigestKernelDouble(state, deciles());
741 }
742 
TDigestKernelDoubleCentiles(benchmark::State & state)743 static void TDigestKernelDoubleCentiles(benchmark::State& state) {
744   TDigestKernelDouble(state, centiles());
745 }
746 
747 BENCHMARK(TDigestKernelDoubleMedian)->Apply(QuantileKernelArgs);
748 BENCHMARK(TDigestKernelDoubleDeciles)->Apply(QuantileKernelArgs);
749 BENCHMARK(TDigestKernelDoubleCentiles)->Apply(QuantileKernelArgs);
750 
751 }  // namespace compute
752 }  // namespace arrow
753