1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "benchmark/benchmark.h"
19 
20 #include "arrow/compute/api.h"
21 
22 #include "arrow/compute/benchmark_util.h"
23 #include "arrow/compute/test_util.h"
24 #include "arrow/testing/gtest_util.h"
25 #include "arrow/testing/random.h"
26 
27 namespace arrow {
28 namespace compute {
29 
30 constexpr auto kSeed = 0x0ff1ce;
31 
TakeBenchmark(benchmark::State & state,const std::shared_ptr<Array> & values,const std::shared_ptr<Array> & indices)32 static void TakeBenchmark(benchmark::State& state, const std::shared_ptr<Array>& values,
33                           const std::shared_ptr<Array>& indices) {
34   for (auto _ : state) {
35     ABORT_NOT_OK(Take(values, indices).status());
36   }
37 }
38 
TakeInt64(benchmark::State & state)39 static void TakeInt64(benchmark::State& state) {
40   RegressionArgs args(state);
41 
42   const int64_t array_size = args.size / sizeof(int64_t);
43   auto rand = random::RandomArrayGenerator(kSeed);
44 
45   auto values = rand.Int64(array_size, -100, 100, args.null_proportion);
46 
47   auto indices = rand.Int32(static_cast<int32_t>(array_size), 0,
48                             static_cast<int32_t>(array_size - 1), args.null_proportion);
49 
50   TakeBenchmark(state, values, indices);
51 }
52 
TakeFixedSizeList1Int64(benchmark::State & state)53 static void TakeFixedSizeList1Int64(benchmark::State& state) {
54   RegressionArgs args(state);
55 
56   const int64_t array_size = args.size / sizeof(int64_t);
57   auto rand = random::RandomArrayGenerator(kSeed);
58 
59   auto int_array = rand.Int64(array_size, -100, 100, args.null_proportion);
60   auto values = std::make_shared<FixedSizeListArray>(
61       fixed_size_list(int64(), 1), array_size, int_array, int_array->null_bitmap(),
62       int_array->null_count());
63 
64   auto indices = rand.Int32(static_cast<int32_t>(array_size), 0,
65                             static_cast<int32_t>(array_size - 1), args.null_proportion);
66 
67   TakeBenchmark(state, values, indices);
68 }
69 
TakeInt64VsFilter(benchmark::State & state)70 static void TakeInt64VsFilter(benchmark::State& state) {
71   RegressionArgs args(state);
72 
73   const int64_t array_size = args.size / sizeof(int64_t);
74   auto rand = random::RandomArrayGenerator(kSeed);
75 
76   auto values = rand.Int64(array_size, -100, 100, args.null_proportion);
77 
78   auto filter = std::static_pointer_cast<BooleanArray>(
79       rand.Boolean(array_size, 0.75, args.null_proportion));
80 
81   Int32Builder indices_builder;
82   ABORT_NOT_OK(indices_builder.Resize(array_size));
83 
84   for (int64_t i = 0; i < array_size; ++i) {
85     if (filter->IsNull(i)) {
86       indices_builder.UnsafeAppendNull();
87     } else if (filter->Value(i)) {
88       indices_builder.UnsafeAppend(static_cast<int32_t>(i));
89     }
90   }
91 
92   std::shared_ptr<Array> indices;
93   ABORT_NOT_OK(indices_builder.Finish(&indices));
94   TakeBenchmark(state, values, indices);
95 }
96 
TakeString(benchmark::State & state)97 static void TakeString(benchmark::State& state) {
98   RegressionArgs args(state);
99 
100   int32_t string_min_length = 0, string_max_length = 128;
101   int32_t string_mean_length = (string_max_length + string_min_length) / 2;
102   // for an array of 50% null strings, we need to generate twice as many strings
103   // to ensure that they have an average of args.size total characters
104   auto array_size =
105       static_cast<int64_t>(args.size / string_mean_length / (1 - args.null_proportion));
106 
107   auto rand = random::RandomArrayGenerator(kSeed);
108   auto values = std::static_pointer_cast<StringArray>(rand.String(
109       array_size, string_min_length, string_max_length, args.null_proportion));
110 
111   auto indices = rand.Int32(static_cast<int32_t>(array_size), 0,
112                             static_cast<int32_t>(array_size - 1), args.null_proportion);
113 
114   TakeBenchmark(state, values, indices);
115 }
116 
117 BENCHMARK(TakeInt64)
118     ->Apply(RegressionSetArgs)
119     ->Args({1 << 20, 1})
120     ->Args({1 << 23, 1})
121     ->MinTime(1.0)
122     ->Unit(benchmark::TimeUnit::kNanosecond);
123 
124 BENCHMARK(TakeFixedSizeList1Int64)
125     ->Apply(RegressionSetArgs)
126     ->Args({1 << 20, 1})
127     ->Args({1 << 23, 1})
128     ->MinTime(1.0)
129     ->Unit(benchmark::TimeUnit::kNanosecond);
130 
131 BENCHMARK(TakeInt64VsFilter)
132     ->Apply(RegressionSetArgs)
133     ->Args({1 << 20, 1})
134     ->Args({1 << 23, 1})
135     ->MinTime(1.0)
136     ->Unit(benchmark::TimeUnit::kNanosecond);
137 
138 BENCHMARK(TakeString)
139     ->Apply(RegressionSetArgs)
140     ->Args({1 << 20, 1})
141     ->Args({1 << 23, 1})
142     ->MinTime(1.0)
143     ->Unit(benchmark::TimeUnit::kNanosecond);
144 
145 }  // namespace compute
146 }  // namespace arrow
147