1 // Copyright 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "google/cloud/bigtable/benchmarks/benchmark.h"
16 #include <chrono>
17 #include <future>
18 #include <iomanip>
19 #include <iostream>
20 #include <sstream>
21
22 /**
23 * @file
24 *
25 * Measure the throughput of `bigtable::Table::ReadRows()`.
26 *
27 * This benchmark measures the throughput of `bigtable::Table::ReadRows()` on a
28 * "typical" table used for serving data. The benchmark:
29 * - Creates a table with 10,000,000 rows, each row with a single column family,
30 * but with 10 columns.
31 * - The name of the table starts with `scant`, followed by random characters.
32 * - If there is a collision on the table name the benchmark aborts immediately.
33 * - The benchmark populates the table during an initial phase. The benchmark
34 * uses `BulkApply()` to populate the table, multiple threads to populate
35 * in parallel, and provides an initial split hint when creating the table.
36 * - The benchmark reports the throughput of this bulk upload phase.
37 *
38 * After successfully uploading the initial data, the main phase of the
39 * benchmark starts. During this phase the benchmark will:
40 *
41 * - Execute the following block with different scan sizes:
42 * - Execute the following loop for S seconds:
43 * - Pick one of the 10,000,000 keys at random, with uniform probability.
44 * - Scan the number rows starting the the key selected above.
45 * - Go back and pick a new random key.
46 *
47 * The benchmark will report throughput in rows per second for each scans with
48 * 100, 1,000 and 10,000 rows.
49 *
50 * Using a command-line parameter the benchmark can be configured to create a
51 * local gRPC server that implements the Cloud Bigtable APIs used by the
52 * benchmark. If this parameter is not used, the benchmark uses the default
53 * configuration, that is, a production instance of Cloud Bigtable unless the
54 * CLOUD_BIGTABLE_EMULATOR environment variable is set.
55 */
56
57 /// Helper functions and types for the scan_throughput_benchmark.
58 namespace {
59 namespace bigtable = google::cloud::bigtable;
60 using bigtable::benchmarks::Benchmark;
61 using bigtable::benchmarks::BenchmarkResult;
62 using bigtable::benchmarks::FormatDuration;
63 using bigtable::benchmarks::kColumnFamily;
64
65 constexpr int kScanSizes[] = {100, 1000, 10000};
66
67 /// Run an iteration of the test.
68 BenchmarkResult RunBenchmark(bigtable::benchmarks::Benchmark const& benchmark,
69 std::shared_ptr<bigtable::DataClient> data_client,
70 long table_size, // NOLINT(google-runtime-int)
71 std::string app_profile_id,
72 std::string const& table_id,
73 long scan_size, // NOLINT(google-runtime-int)
74 std::chrono::seconds test_duration);
75 } // anonymous namespace
76
main(int argc,char * argv[])77 int main(int argc, char* argv[]) {
78 auto setup = bigtable::benchmarks::MakeBenchmarkSetup("scant", argc, argv);
79 if (!setup) {
80 std::cerr << setup.status() << "\n";
81 return -1;
82 }
83
84 Benchmark benchmark(*setup);
85
86 // Create and populate the table for the benchmark.
87 benchmark.CreateTable();
88 auto populate_results = benchmark.PopulateTable();
89 Benchmark::PrintThroughputResult(std::cout, "scant", "Upload",
90 *populate_results);
91
92 auto data_client = benchmark.MakeDataClient();
93 std::map<std::string, BenchmarkResult> results_by_size;
94 for (auto scan_size : kScanSizes) {
95 std::cout << "# Running benchmark [" << scan_size << "] " << std::flush;
96 auto start = std::chrono::steady_clock::now();
97 auto combined = RunBenchmark(benchmark, data_client, setup->table_size(),
98 setup->app_profile_id(), setup->table_id(),
99 scan_size, setup->test_duration());
100 using std::chrono::duration_cast;
101 combined.elapsed = duration_cast<std::chrono::milliseconds>(
102 std::chrono::steady_clock::now() - start);
103 std::cout << " DONE. Elapsed=" << FormatDuration(combined.elapsed)
104 << ", Ops=" << combined.operations.size()
105 << ", Rows=" << combined.row_count << "\n";
106 auto op_name = "Scan(" + std::to_string(scan_size) + ")";
107 Benchmark::PrintLatencyResult(std::cout, "scant", op_name, combined);
108 results_by_size[op_name] = std::move(combined);
109 }
110
111 std::cout << bigtable::benchmarks::Benchmark::ResultsCsvHeader() << "\n";
112 benchmark.PrintResultCsv(std::cout, "scant", "BulkApply()", "Latency",
113 *populate_results);
114 for (auto& kv : results_by_size) {
115 benchmark.PrintResultCsv(std::cout, "scant", kv.first, "IterationTime",
116 kv.second);
117 }
118
119 benchmark.DeleteTable();
120
121 return 0;
122 }
123
124 namespace {
125
RunBenchmark(bigtable::benchmarks::Benchmark const & benchmark,std::shared_ptr<bigtable::DataClient> data_client,long table_size,std::string app_profile_id,std::string const & table_id,long scan_size,std::chrono::seconds test_duration)126 BenchmarkResult RunBenchmark(bigtable::benchmarks::Benchmark const& benchmark,
127 std::shared_ptr<bigtable::DataClient> data_client,
128 long table_size, // NOLINT(google-runtime-int)
129 std::string app_profile_id,
130 std::string const& table_id,
131 long scan_size, // NOLINT(google-runtime-int)
132 std::chrono::seconds test_duration) {
133 BenchmarkResult result = {};
134
135 bigtable::Table table(std::move(data_client), std::move(app_profile_id),
136 table_id);
137
138 auto generator = google::cloud::internal::MakeDefaultPRNG();
139 // NOLINTNEXTLINE(google-runtime-int)
140 std::uniform_int_distribution<long> prng(0, table_size - scan_size - 1);
141
142 auto test_start = std::chrono::steady_clock::now();
143 while (std::chrono::steady_clock::now() < test_start + test_duration) {
144 auto range =
145 bigtable::RowRange::StartingAt(benchmark.MakeKey(prng(generator)));
146
147 long count = 0; // NOLINT(google-runtime-int)
148 auto op = [&count, &table, &scan_size, &range]() -> google::cloud::Status {
149 auto reader =
150 table.ReadRows(bigtable::RowSet(std::move(range)), scan_size,
151 bigtable::Filter::ColumnRangeClosed(
152 kColumnFamily, "field0", "field9"));
153 for (auto& row : reader) {
154 if (!row) {
155 return row.status();
156 }
157 ++count;
158 }
159 return google::cloud::Status{};
160 };
161 result.operations.push_back(Benchmark::TimeOperation(op));
162 result.row_count += count;
163 }
164 return result;
165 }
166
167 } // anonymous namespace
168