1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "benchmark/benchmark.h"
19 
20 #include <array>
21 #include <iostream>
22 #include <random>
23 
24 #include "parquet/arrow/reader.h"
25 #include "parquet/arrow/writer.h"
26 #include "parquet/column_reader.h"
27 #include "parquet/column_writer.h"
28 #include "parquet/file_reader.h"
29 #include "parquet/file_writer.h"
30 #include "parquet/platform.h"
31 
32 #include "arrow/array.h"
33 #include "arrow/array/builder_primitive.h"
34 #include "arrow/io/memory.h"
35 #include "arrow/table.h"
36 #include "arrow/testing/random.h"
37 #include "arrow/util/bitmap_ops.h"
38 #include "arrow/util/logging.h"
39 
40 using arrow::Array;
41 using arrow::ArrayVector;
42 using arrow::BooleanBuilder;
43 using arrow::FieldVector;
44 using arrow::NumericBuilder;
45 
46 #define EXIT_NOT_OK(s)                                        \
47   do {                                                        \
48     ::arrow::Status _s = (s);                                 \
49     if (ARROW_PREDICT_FALSE(!_s.ok())) {                      \
50       std::cout << "Exiting: " << _s.ToString() << std::endl; \
51       exit(EXIT_FAILURE);                                     \
52     }                                                         \
53   } while (0)
54 
55 namespace parquet {
56 
57 using arrow::FileReader;
58 using arrow::WriteTable;
59 using schema::PrimitiveNode;
60 
61 namespace benchmark {
62 
63 // This should result in multiple pages for most primitive types
64 constexpr int64_t BENCHMARK_SIZE = 10 * 1024 * 1024;
65 
66 template <typename ParquetType>
67 struct benchmark_traits {};
68 
69 template <>
70 struct benchmark_traits<Int32Type> {
71   using arrow_type = ::arrow::Int32Type;
72 };
73 
74 template <>
75 struct benchmark_traits<Int64Type> {
76   using arrow_type = ::arrow::Int64Type;
77 };
78 
79 template <>
80 struct benchmark_traits<DoubleType> {
81   using arrow_type = ::arrow::DoubleType;
82 };
83 
84 template <>
85 struct benchmark_traits<BooleanType> {
86   using arrow_type = ::arrow::BooleanType;
87 };
88 
89 template <typename ParquetType>
90 using ArrowType = typename benchmark_traits<ParquetType>::arrow_type;
91 
92 template <typename ParquetType>
MakeSchema(Repetition::type repetition)93 std::shared_ptr<ColumnDescriptor> MakeSchema(Repetition::type repetition) {
94   auto node = PrimitiveNode::Make("int64", repetition, ParquetType::type_num);
95   return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED,
96                                             repetition == Repetition::REPEATED);
97 }
98 
99 template <bool nullable, typename ParquetType>
SetBytesProcessed(::benchmark::State & state,int64_t num_values=BENCHMARK_SIZE)100 void SetBytesProcessed(::benchmark::State& state, int64_t num_values = BENCHMARK_SIZE) {
101   const int64_t items_processed = state.iterations() * num_values;
102   const int64_t bytes_processed = items_processed * sizeof(typename ParquetType::c_type);
103 
104   state.SetItemsProcessed(bytes_processed);
105   state.SetBytesProcessed(bytes_processed);
106 }
107 
108 constexpr int64_t kAlternatingOrNa = -1;
109 
110 template <typename T>
RandomVector(int64_t true_percentage,int64_t vector_size,const std::array<T,2> & sample_values,int seed=500)111 std::vector<T> RandomVector(int64_t true_percentage, int64_t vector_size,
112                             const std::array<T, 2>& sample_values, int seed = 500) {
113   std::vector<T> values(vector_size, {});
114   if (true_percentage == kAlternatingOrNa) {
115     int n = {0};
116     std::generate(values.begin(), values.end(), [&n] { return n++ % 2; });
117   } else {
118     std::default_random_engine rng(seed);
119     double true_probability = static_cast<double>(true_percentage) / 100.0;
120     std::bernoulli_distribution dist(true_probability);
121     std::generate(values.begin(), values.end(), [&] { return sample_values[dist(rng)]; });
122   }
123   return values;
124 }
125 
126 template <typename ParquetType>
TableFromVector(const std::vector<typename ParquetType::c_type> & vec,bool nullable,int64_t null_percentage=kAlternatingOrNa)127 std::shared_ptr<::arrow::Table> TableFromVector(
128     const std::vector<typename ParquetType::c_type>& vec, bool nullable,
129     int64_t null_percentage = kAlternatingOrNa) {
130   if (!nullable) {
131     ARROW_CHECK_EQ(null_percentage, kAlternatingOrNa);
132   }
133   std::shared_ptr<::arrow::DataType> type = std::make_shared<ArrowType<ParquetType>>();
134   NumericBuilder<ArrowType<ParquetType>> builder;
135   if (nullable) {
136     // Note true values select index 1 of sample_values
137     auto valid_bytes = RandomVector<uint8_t>(/*true_percentage=*/null_percentage,
138                                              vec.size(), /*sample_values=*/{1, 0});
139     EXIT_NOT_OK(builder.AppendValues(vec.data(), vec.size(), valid_bytes.data()));
140   } else {
141     EXIT_NOT_OK(builder.AppendValues(vec.data(), vec.size(), nullptr));
142   }
143   std::shared_ptr<::arrow::Array> array;
144   EXIT_NOT_OK(builder.Finish(&array));
145 
146   auto field = ::arrow::field("column", type, nullable);
147   auto schema = ::arrow::schema({field});
148   return ::arrow::Table::Make(schema, {array});
149 }
150 
151 template <>
TableFromVector(const std::vector<bool> & vec,bool nullable,int64_t null_percentage)152 std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<bool>& vec,
153                                                              bool nullable,
154                                                              int64_t null_percentage) {
155   BooleanBuilder builder;
156   if (nullable) {
157     auto valid_bytes = RandomVector<bool>(/*true_percentage=*/null_percentage, vec.size(),
158                                           {true, false});
159     EXIT_NOT_OK(builder.AppendValues(vec, valid_bytes));
160   } else {
161     EXIT_NOT_OK(builder.AppendValues(vec));
162   }
163   std::shared_ptr<::arrow::Array> array;
164   EXIT_NOT_OK(builder.Finish(&array));
165 
166   auto field = ::arrow::field("column", ::arrow::boolean(), nullable);
167   auto schema = std::make_shared<::arrow::Schema>(
168       std::vector<std::shared_ptr<::arrow::Field>>({field}));
169   return ::arrow::Table::Make(schema, {array});
170 }
171 
172 template <bool nullable, typename ParquetType>
BM_WriteColumn(::benchmark::State & state)173 static void BM_WriteColumn(::benchmark::State& state) {
174   using T = typename ParquetType::c_type;
175   std::vector<T> values(BENCHMARK_SIZE, static_cast<T>(128));
176   std::shared_ptr<::arrow::Table> table = TableFromVector<ParquetType>(values, nullable);
177 
178   while (state.KeepRunning()) {
179     auto output = CreateOutputStream();
180     EXIT_NOT_OK(
181         WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE));
182   }
183   SetBytesProcessed<nullable, ParquetType>(state);
184 }
185 
186 BENCHMARK_TEMPLATE2(BM_WriteColumn, false, Int32Type);
187 BENCHMARK_TEMPLATE2(BM_WriteColumn, true, Int32Type);
188 
189 BENCHMARK_TEMPLATE2(BM_WriteColumn, false, Int64Type);
190 BENCHMARK_TEMPLATE2(BM_WriteColumn, true, Int64Type);
191 
192 BENCHMARK_TEMPLATE2(BM_WriteColumn, false, DoubleType);
193 BENCHMARK_TEMPLATE2(BM_WriteColumn, true, DoubleType);
194 
195 BENCHMARK_TEMPLATE2(BM_WriteColumn, false, BooleanType);
196 BENCHMARK_TEMPLATE2(BM_WriteColumn, true, BooleanType);
197 
198 template <typename T>
199 struct Examples {
valuesparquet::benchmark::Examples200   static constexpr std::array<T, 2> values() { return {127, 128}; }
201 };
202 
203 template <>
204 struct Examples<bool> {
valuesparquet::benchmark::Examples205   static constexpr std::array<bool, 2> values() { return {false, true}; }
206 };
207 
BenchmarkReadTable(::benchmark::State & state,const::arrow::Table & table,int64_t num_values=-1,int64_t bytes_per_value=-1)208 static void BenchmarkReadTable(::benchmark::State& state, const ::arrow::Table& table,
209                                int64_t num_values = -1, int64_t bytes_per_value = -1) {
210   auto output = CreateOutputStream();
211   EXIT_NOT_OK(
212       WriteTable(table, ::arrow::default_memory_pool(), output, table.num_rows()));
213   PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
214 
215   while (state.KeepRunning()) {
216     auto reader =
217         ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
218     std::unique_ptr<FileReader> arrow_reader;
219     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
220                                  &arrow_reader));
221     std::shared_ptr<::arrow::Table> table;
222     EXIT_NOT_OK(arrow_reader->ReadTable(&table));
223   }
224 
225   if (num_values == -1) {
226     num_values = table.num_rows();
227   }
228   state.SetItemsProcessed(num_values * state.iterations());
229   if (bytes_per_value != -1) {
230     state.SetBytesProcessed(num_values * state.iterations() * bytes_per_value);
231   }
232 }
233 
BenchmarkReadArray(::benchmark::State & state,const std::shared_ptr<Array> & array,bool nullable,int64_t num_values=-1,int64_t bytes_per_value=-1)234 static void BenchmarkReadArray(::benchmark::State& state,
235                                const std::shared_ptr<Array>& array, bool nullable,
236                                int64_t num_values = -1, int64_t bytes_per_value = -1) {
237   auto schema = ::arrow::schema({field("s", array->type(), nullable)});
238   auto table = ::arrow::Table::Make(schema, {array}, array->length());
239 
240   EXIT_NOT_OK(table->Validate());
241 
242   BenchmarkReadTable(state, *table, num_values, bytes_per_value);
243 }
244 
245 //
246 // Benchmark reading a primitive column
247 //
248 
249 template <bool nullable, typename ParquetType>
BM_ReadColumn(::benchmark::State & state)250 static void BM_ReadColumn(::benchmark::State& state) {
251   using T = typename ParquetType::c_type;
252 
253   auto values = RandomVector<T>(/*percentage=*/state.range(1), BENCHMARK_SIZE,
254                                 Examples<T>::values());
255 
256   std::shared_ptr<::arrow::Table> table =
257       TableFromVector<ParquetType>(values, nullable, state.range(0));
258 
259   BenchmarkReadTable(state, *table, table->num_rows(),
260                      sizeof(typename ParquetType::c_type));
261 }
262 
263 // There are two parameters here that cover different data distributions.
264 // null_percentage governs distribution and therefore runs of null values.
265 // first_value_percentage governs distribution of values (we select from 1 of 2)
266 // so when 0 or 100 RLE is triggered all the time.  When a value in the range (0, 100)
267 // there will be some percentage of RLE encoded values and some percentage of literal
268 // encoded values (RLE is much less likely with percentages close to 50).
269 BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int32Type)
270     ->Args({/*null_percentage=*/kAlternatingOrNa, 1})
271     ->Args({/*null_percentage=*/kAlternatingOrNa, 10})
272     ->Args({/*null_percentage=*/kAlternatingOrNa, 50});
273 
274 BENCHMARK_TEMPLATE2(BM_ReadColumn, true, Int32Type)
275     ->Args({/*null_percentage=*/kAlternatingOrNa, /*first_value_percentage=*/0})
276     ->Args({/*null_percentage=*/1, /*first_value_percentage=*/1})
277     ->Args({/*null_percentage=*/10, /*first_value_percentage=*/10})
278     ->Args({/*null_percentage=*/25, /*first_value_percentage=*/5})
279     ->Args({/*null_percentage=*/50, /*first_value_percentage=*/50})
280     ->Args({/*null_percentage=*/50, /*first_value_percentage=*/0})
281     ->Args({/*null_percentage=*/99, /*first_value_percentage=*/50})
282     ->Args({/*null_percentage=*/99, /*first_value_percentage=*/0});
283 
284 BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int64Type)
285     ->Args({/*null_percentage=*/kAlternatingOrNa, 1})
286     ->Args({/*null_percentage=*/kAlternatingOrNa, 10})
287     ->Args({/*null_percentage=*/kAlternatingOrNa, 50});
288 BENCHMARK_TEMPLATE2(BM_ReadColumn, true, Int64Type)
289     ->Args({/*null_percentage=*/kAlternatingOrNa, /*first_value_percentage=*/0})
290     ->Args({/*null_percentage=*/1, /*first_value_percentage=*/1})
291     ->Args({/*null_percentage=*/5, /*first_value_percentage=*/5})
292     ->Args({/*null_percentage=*/10, /*first_value_percentage=*/5})
293     ->Args({/*null_percentage=*/25, /*first_value_percentage=*/10})
294     ->Args({/*null_percentage=*/30, /*first_value_percentage=*/10})
295     ->Args({/*null_percentage=*/35, /*first_value_percentage=*/10})
296     ->Args({/*null_percentage=*/45, /*first_value_percentage=*/25})
297     ->Args({/*null_percentage=*/50, /*first_value_percentage=*/50})
298     ->Args({/*null_percentage=*/50, /*first_value_percentage=*/1})
299     ->Args({/*null_percentage=*/75, /*first_value_percentage=*/1})
300     ->Args({/*null_percentage=*/99, /*first_value_percentage=*/50})
301     ->Args({/*null_percentage=*/99, /*first_value_percentage=*/0});
302 
303 BENCHMARK_TEMPLATE2(BM_ReadColumn, false, DoubleType)
304     ->Args({kAlternatingOrNa, 0})
305     ->Args({kAlternatingOrNa, 20});
306 // Less coverage because int64_t should be pretty good representation for nullability and
307 // repeating values.
308 BENCHMARK_TEMPLATE2(BM_ReadColumn, true, DoubleType)
309     ->Args({/*null_percentage=*/kAlternatingOrNa, /*first_value_percentage=*/0})
310     ->Args({/*null_percentage=*/10, /*first_value_percentage=*/50})
311     ->Args({/*null_percentage=*/25, /*first_value_percentage=*/25});
312 
313 BENCHMARK_TEMPLATE2(BM_ReadColumn, false, BooleanType)
314     ->Args({kAlternatingOrNa, 0})
315     ->Args({1, 20});
316 BENCHMARK_TEMPLATE2(BM_ReadColumn, true, BooleanType)
317     ->Args({kAlternatingOrNa, 1})
318     ->Args({5, 10});
319 
320 //
321 // Benchmark reading a nested column
322 //
323 
324 const std::vector<int64_t> kNestedNullPercents = {0, 1, 50, 99};
325 
326 // XXX We can use ArgsProduct() starting from Benchmark 1.5.2
NestedReadArguments(::benchmark::internal::Benchmark * b)327 static void NestedReadArguments(::benchmark::internal::Benchmark* b) {
328   for (const auto null_percentage : kNestedNullPercents) {
329     b->Arg(null_percentage);
330   }
331 }
332 
MakeStructArray(::arrow::random::RandomArrayGenerator * rng,const ArrayVector & children,double null_probability,bool propagate_validity=false)333 static std::shared_ptr<Array> MakeStructArray(::arrow::random::RandomArrayGenerator* rng,
334                                               const ArrayVector& children,
335                                               double null_probability,
336                                               bool propagate_validity = false) {
337   ARROW_CHECK_GT(children.size(), 0);
338   const int64_t length = children[0]->length();
339 
340   std::shared_ptr<::arrow::Buffer> null_bitmap;
341   if (null_probability > 0.0) {
342     null_bitmap = rng->NullBitmap(length, null_probability);
343     if (propagate_validity) {
344       // HACK: the Parquet writer currently doesn't allow non-empty list
345       // entries where a parent node is null (for instance, a struct-of-list
346       // where the outer struct is marked null but the inner list value is
347       // non-empty).
348       for (const auto& child : children) {
349         null_bitmap = *::arrow::internal::BitmapOr(
350             ::arrow::default_memory_pool(), null_bitmap->data(), 0,
351             child->null_bitmap_data(), 0, length, 0);
352       }
353     }
354   }
355   FieldVector fields(children.size());
356   char field_name = 'a';
357   for (size_t i = 0; i < children.size(); ++i) {
358     fields[i] = field(std::string{field_name++}, children[i]->type(),
359                       /*nullable=*/null_probability > 0.0);
360   }
361   return *::arrow::StructArray::Make(children, std::move(fields), null_bitmap);
362 }
363 
364 // Make a (int32, int64) struct array
MakeStructArray(::arrow::random::RandomArrayGenerator * rng,int64_t size,double null_probability)365 static std::shared_ptr<Array> MakeStructArray(::arrow::random::RandomArrayGenerator* rng,
366                                               int64_t size, double null_probability) {
367   auto values1 = rng->Int32(size, -5, 5, null_probability);
368   auto values2 = rng->Int64(size, -12345678912345LL, 12345678912345LL, null_probability);
369   return MakeStructArray(rng, {values1, values2}, null_probability);
370 }
371 
BM_ReadStructColumn(::benchmark::State & state)372 static void BM_ReadStructColumn(::benchmark::State& state) {
373   constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
374   const double null_probability = static_cast<double>(state.range(0)) / 100.0;
375   const bool nullable = (null_probability != 0.0);
376 
377   ARROW_CHECK_GE(null_probability, 0.0);
378 
379   const int64_t kBytesPerValue = sizeof(int32_t) + sizeof(int64_t);
380 
381   ::arrow::random::RandomArrayGenerator rng(42);
382   auto array = MakeStructArray(&rng, kNumValues, null_probability);
383 
384   BenchmarkReadArray(state, array, nullable, kNumValues, kBytesPerValue);
385 }
386 
387 BENCHMARK(BM_ReadStructColumn)->Apply(NestedReadArguments);
388 
BM_ReadStructOfStructColumn(::benchmark::State & state)389 static void BM_ReadStructOfStructColumn(::benchmark::State& state) {
390   constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
391   const double null_probability = static_cast<double>(state.range(0)) / 100.0;
392   const bool nullable = (null_probability != 0.0);
393 
394   ARROW_CHECK_GE(null_probability, 0.0);
395 
396   const int64_t kBytesPerValue = 2 * (sizeof(int32_t) + sizeof(int64_t));
397 
398   ::arrow::random::RandomArrayGenerator rng(42);
399   auto values1 = MakeStructArray(&rng, kNumValues, null_probability);
400   auto values2 = MakeStructArray(&rng, kNumValues, null_probability);
401   auto array = MakeStructArray(&rng, {values1, values2}, null_probability);
402 
403   BenchmarkReadArray(state, array, nullable, kNumValues, kBytesPerValue);
404 }
405 
406 BENCHMARK(BM_ReadStructOfStructColumn)->Apply(NestedReadArguments);
407 
BM_ReadStructOfListColumn(::benchmark::State & state)408 static void BM_ReadStructOfListColumn(::benchmark::State& state) {
409   constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
410   const double null_probability = static_cast<double>(state.range(0)) / 100.0;
411   const bool nullable = (null_probability != 0.0);
412 
413   ARROW_CHECK_GE(null_probability, 0.0);
414 
415   ::arrow::random::RandomArrayGenerator rng(42);
416 
417   const int64_t kBytesPerValue = sizeof(int32_t) + sizeof(int64_t);
418 
419   auto values1 = rng.Int32(kNumValues, -5, 5, null_probability);
420   auto values2 =
421       rng.Int64(kNumValues, -12345678912345LL, 12345678912345LL, null_probability);
422   auto list1 = rng.List(*values1, kNumValues / 10, null_probability);
423   auto list2 = rng.List(*values2, kNumValues / 10, null_probability);
424   auto array = MakeStructArray(&rng, {list1, list2}, null_probability,
425                                /*propagate_validity =*/true);
426 
427   BenchmarkReadArray(state, array, nullable, kNumValues, kBytesPerValue);
428 }
429 
430 BENCHMARK(BM_ReadStructOfListColumn)->Apply(NestedReadArguments);
431 
BM_ReadListColumn(::benchmark::State & state)432 static void BM_ReadListColumn(::benchmark::State& state) {
433   constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
434   const double null_probability = static_cast<double>(state.range(0)) / 100.0;
435   const bool nullable = (null_probability != 0.0);
436 
437   ARROW_CHECK_GE(null_probability, 0.0);
438 
439   ::arrow::random::RandomArrayGenerator rng(42);
440 
441   auto values = rng.Int64(kNumValues, /*min=*/-5, /*max=*/5, null_probability);
442   const int64_t kBytesPerValue = sizeof(int64_t);
443 
444   auto array = rng.List(*values, kNumValues / 10, null_probability);
445 
446   BenchmarkReadArray(state, array, nullable, kNumValues, kBytesPerValue);
447 }
448 
449 BENCHMARK(BM_ReadListColumn)->Apply(NestedReadArguments);
450 
BM_ReadListOfStructColumn(::benchmark::State & state)451 static void BM_ReadListOfStructColumn(::benchmark::State& state) {
452   constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
453   const double null_probability = static_cast<double>(state.range(0)) / 100.0;
454   const bool nullable = (null_probability != 0.0);
455 
456   ARROW_CHECK_GE(null_probability, 0.0);
457 
458   ::arrow::random::RandomArrayGenerator rng(42);
459 
460   auto values = MakeStructArray(&rng, kNumValues, null_probability);
461   const int64_t kBytesPerValue = sizeof(int32_t) + sizeof(int64_t);
462 
463   auto array = rng.List(*values, kNumValues / 10, null_probability);
464 
465   BenchmarkReadArray(state, array, nullable, kNumValues, kBytesPerValue);
466 }
467 
468 BENCHMARK(BM_ReadListOfStructColumn)->Apply(NestedReadArguments);
469 
BM_ReadListOfListColumn(::benchmark::State & state)470 static void BM_ReadListOfListColumn(::benchmark::State& state) {
471   constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
472   const double null_probability = static_cast<double>(state.range(0)) / 100.0;
473   const bool nullable = (null_probability != 0.0);
474 
475   ARROW_CHECK_GE(null_probability, 0.0);
476 
477   ::arrow::random::RandomArrayGenerator rng(42);
478 
479   auto values = rng.Int64(kNumValues, /*min=*/-5, /*max=*/5, null_probability);
480   const int64_t kBytesPerValue = sizeof(int64_t);
481 
482   auto inner = rng.List(*values, kNumValues / 10, null_probability);
483   auto array = rng.List(*inner, kNumValues / 100, null_probability);
484 
485   BenchmarkReadArray(state, array, nullable, kNumValues, kBytesPerValue);
486 }
487 
488 BENCHMARK(BM_ReadListOfListColumn)->Apply(NestedReadArguments);
489 
490 //
491 // Benchmark different ways of reading select row groups
492 //
493 
BM_ReadIndividualRowGroups(::benchmark::State & state)494 static void BM_ReadIndividualRowGroups(::benchmark::State& state) {
495   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
496   std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
497   auto output = CreateOutputStream();
498   // This writes 10 RowGroups
499   EXIT_NOT_OK(
500       WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE / 10));
501 
502   PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
503 
504   while (state.KeepRunning()) {
505     auto reader =
506         ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
507     std::unique_ptr<FileReader> arrow_reader;
508     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
509                                  &arrow_reader));
510 
511     std::vector<std::shared_ptr<::arrow::Table>> tables;
512     for (int i = 0; i < arrow_reader->num_row_groups(); i++) {
513       // Only read the even numbered RowGroups
514       if ((i % 2) == 0) {
515         std::shared_ptr<::arrow::Table> table;
516         EXIT_NOT_OK(arrow_reader->RowGroup(i)->ReadTable(&table));
517         tables.push_back(table);
518       }
519     }
520 
521     std::shared_ptr<::arrow::Table> final_table;
522     PARQUET_ASSIGN_OR_THROW(final_table, ConcatenateTables(tables));
523   }
524   SetBytesProcessed<true, Int64Type>(state);
525 }
526 
527 BENCHMARK(BM_ReadIndividualRowGroups);
528 
BM_ReadMultipleRowGroups(::benchmark::State & state)529 static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
530   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
531   std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
532   auto output = CreateOutputStream();
533   // This writes 10 RowGroups
534   EXIT_NOT_OK(
535       WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE / 10));
536   PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
537 
538   while (state.KeepRunning()) {
539     auto reader =
540         ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
541     std::unique_ptr<FileReader> arrow_reader;
542     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
543                                  &arrow_reader));
544 
545     std::vector<std::shared_ptr<::arrow::Table>> tables;
546     std::vector<int> rgs;
547     for (int i = 0; i < arrow_reader->num_row_groups(); i++) {
548       // Only read the even numbered RowGroups
549       if ((i % 2) == 0) {
550         rgs.push_back(i);
551       }
552     }
553 
554     std::shared_ptr<::arrow::Table> table;
555     EXIT_NOT_OK(arrow_reader->ReadRowGroups(rgs, &table));
556   }
557   SetBytesProcessed<true, Int64Type>(state);
558 }
559 
560 BENCHMARK(BM_ReadMultipleRowGroups);
561 
562 }  // namespace benchmark
563 
564 }  // namespace parquet
565