1 // Copyright 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "google/cloud/bigtable/benchmarks/benchmark.h"
16 #include <chrono>
17 #include <future>
18 #include <iomanip>
19 #include <iostream>
20 #include <sstream>
21 
22 /**
23  * @file
24  *
25  * Measure the throughput of `bigtable::Table::ReadRows()`.
26  *
27  * This benchmark measures the throughput of `bigtable::Table::ReadRows()` on a
28  * "typical" table used for serving data.  The benchmark:
29  * - Creates a table with 10,000,000 rows, each row with a single column family,
30  *   but with 10 columns.
31  * - The name of the table starts with `scant`, followed by random characters.
32  * - If there is a collision on the table name the benchmark aborts immediately.
33  * - The benchmark populates the table during an initial phase.  The benchmark
34  *   uses `BulkApply()` to populate the table, multiple threads to populate
35  *   in parallel, and provides an initial split hint when creating the table.
36  * - The benchmark reports the throughput of this bulk upload phase.
37  *
38  * After successfully uploading the initial data, the main phase of the
39  * benchmark starts. During this phase the benchmark will:
40  *
41  * - Execute the following block with different scan sizes:
42  *   - Execute the following loop for S seconds:
43  *     - Pick one of the 10,000,000 keys at random, with uniform probability.
44  *     - Scan the number rows starting the the key selected above.
45  *     - Go back and pick a new random key.
46  *
47  * The benchmark will report throughput in rows per second for each scans with
48  * 100, 1,000 and 10,000 rows.
49  *
50  * Using a command-line parameter the benchmark can be configured to create a
51  * local gRPC server that implements the Cloud Bigtable APIs used by the
52  * benchmark.  If this parameter is not used, the benchmark uses the default
53  * configuration, that is, a production instance of Cloud Bigtable unless the
54  * CLOUD_BIGTABLE_EMULATOR environment variable is set.
55  */
56 
57 /// Helper functions and types for the scan_throughput_benchmark.
58 namespace {
59 namespace bigtable = google::cloud::bigtable;
60 using bigtable::benchmarks::Benchmark;
61 using bigtable::benchmarks::BenchmarkResult;
62 using bigtable::benchmarks::FormatDuration;
63 using bigtable::benchmarks::kColumnFamily;
64 
65 constexpr int kScanSizes[] = {100, 1000, 10000};
66 
67 /// Run an iteration of the test.
68 BenchmarkResult RunBenchmark(bigtable::benchmarks::Benchmark const& benchmark,
69                              std::shared_ptr<bigtable::DataClient> data_client,
70                              long table_size,  // NOLINT(google-runtime-int)
71                              std::string app_profile_id,
72                              std::string const& table_id,
73                              long scan_size,  // NOLINT(google-runtime-int)
74                              std::chrono::seconds test_duration);
75 }  // anonymous namespace
76 
main(int argc,char * argv[])77 int main(int argc, char* argv[]) {
78   auto setup = bigtable::benchmarks::MakeBenchmarkSetup("scant", argc, argv);
79   if (!setup) {
80     std::cerr << setup.status() << "\n";
81     return -1;
82   }
83 
84   Benchmark benchmark(*setup);
85 
86   // Create and populate the table for the benchmark.
87   benchmark.CreateTable();
88   auto populate_results = benchmark.PopulateTable();
89   Benchmark::PrintThroughputResult(std::cout, "scant", "Upload",
90                                    *populate_results);
91 
92   auto data_client = benchmark.MakeDataClient();
93   std::map<std::string, BenchmarkResult> results_by_size;
94   for (auto scan_size : kScanSizes) {
95     std::cout << "# Running benchmark [" << scan_size << "] " << std::flush;
96     auto start = std::chrono::steady_clock::now();
97     auto combined = RunBenchmark(benchmark, data_client, setup->table_size(),
98                                  setup->app_profile_id(), setup->table_id(),
99                                  scan_size, setup->test_duration());
100     using std::chrono::duration_cast;
101     combined.elapsed = duration_cast<std::chrono::milliseconds>(
102         std::chrono::steady_clock::now() - start);
103     std::cout << " DONE. Elapsed=" << FormatDuration(combined.elapsed)
104               << ", Ops=" << combined.operations.size()
105               << ", Rows=" << combined.row_count << "\n";
106     auto op_name = "Scan(" + std::to_string(scan_size) + ")";
107     Benchmark::PrintLatencyResult(std::cout, "scant", op_name, combined);
108     results_by_size[op_name] = std::move(combined);
109   }
110 
111   std::cout << bigtable::benchmarks::Benchmark::ResultsCsvHeader() << "\n";
112   benchmark.PrintResultCsv(std::cout, "scant", "BulkApply()", "Latency",
113                            *populate_results);
114   for (auto& kv : results_by_size) {
115     benchmark.PrintResultCsv(std::cout, "scant", kv.first, "IterationTime",
116                              kv.second);
117   }
118 
119   benchmark.DeleteTable();
120 
121   return 0;
122 }
123 
124 namespace {
125 
RunBenchmark(bigtable::benchmarks::Benchmark const & benchmark,std::shared_ptr<bigtable::DataClient> data_client,long table_size,std::string app_profile_id,std::string const & table_id,long scan_size,std::chrono::seconds test_duration)126 BenchmarkResult RunBenchmark(bigtable::benchmarks::Benchmark const& benchmark,
127                              std::shared_ptr<bigtable::DataClient> data_client,
128                              long table_size,  // NOLINT(google-runtime-int)
129                              std::string app_profile_id,
130                              std::string const& table_id,
131                              long scan_size,  // NOLINT(google-runtime-int)
132                              std::chrono::seconds test_duration) {
133   BenchmarkResult result = {};
134 
135   bigtable::Table table(std::move(data_client), std::move(app_profile_id),
136                         table_id);
137 
138   auto generator = google::cloud::internal::MakeDefaultPRNG();
139   // NOLINTNEXTLINE(google-runtime-int)
140   std::uniform_int_distribution<long> prng(0, table_size - scan_size - 1);
141 
142   auto test_start = std::chrono::steady_clock::now();
143   while (std::chrono::steady_clock::now() < test_start + test_duration) {
144     auto range =
145         bigtable::RowRange::StartingAt(benchmark.MakeKey(prng(generator)));
146 
147     long count = 0;  // NOLINT(google-runtime-int)
148     auto op = [&count, &table, &scan_size, &range]() -> google::cloud::Status {
149       auto reader =
150           table.ReadRows(bigtable::RowSet(std::move(range)), scan_size,
151                          bigtable::Filter::ColumnRangeClosed(
152                              kColumnFamily, "field0", "field9"));
153       for (auto& row : reader) {
154         if (!row) {
155           return row.status();
156         }
157         ++count;
158       }
159       return google::cloud::Status{};
160     };
161     result.operations.push_back(Benchmark::TimeOperation(op));
162     result.row_count += count;
163   }
164   return result;
165 }
166 
167 }  // anonymous namespace
168