1 // This source code is licensed under both the GPLv2 (found in the
2 // COPYING file in the root directory) and Apache 2.0 License
3 // (found in the LICENSE.Apache file in the root directory).
4 //
5 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
6 //
7 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10
11 #include "db/internal_stats.h"
12
13 #include <algorithm>
14 #include <cinttypes>
15 #include <cstddef>
16 #include <limits>
17 #include <sstream>
18 #include <string>
19 #include <utility>
20 #include <vector>
21
22 #include "cache/cache_entry_roles.h"
23 #include "cache/cache_entry_stats.h"
24 #include "db/column_family.h"
25 #include "db/db_impl/db_impl.h"
26 #include "port/port.h"
27 #include "rocksdb/system_clock.h"
28 #include "rocksdb/table.h"
29 #include "table/block_based/cachable_entry.h"
30 #include "util/string_util.h"
31
32 namespace ROCKSDB_NAMESPACE {
33
34 #ifndef ROCKSDB_LITE
35
36 const std::map<LevelStatType, LevelStat> InternalStats::compaction_level_stats =
37 {
38 {LevelStatType::NUM_FILES, LevelStat{"NumFiles", "Files"}},
39 {LevelStatType::COMPACTED_FILES,
40 LevelStat{"CompactedFiles", "CompactedFiles"}},
41 {LevelStatType::SIZE_BYTES, LevelStat{"SizeBytes", "Size"}},
42 {LevelStatType::SCORE, LevelStat{"Score", "Score"}},
43 {LevelStatType::READ_GB, LevelStat{"ReadGB", "Read(GB)"}},
44 {LevelStatType::RN_GB, LevelStat{"RnGB", "Rn(GB)"}},
45 {LevelStatType::RNP1_GB, LevelStat{"Rnp1GB", "Rnp1(GB)"}},
46 {LevelStatType::WRITE_GB, LevelStat{"WriteGB", "Write(GB)"}},
47 {LevelStatType::W_NEW_GB, LevelStat{"WnewGB", "Wnew(GB)"}},
48 {LevelStatType::MOVED_GB, LevelStat{"MovedGB", "Moved(GB)"}},
49 {LevelStatType::WRITE_AMP, LevelStat{"WriteAmp", "W-Amp"}},
50 {LevelStatType::READ_MBPS, LevelStat{"ReadMBps", "Rd(MB/s)"}},
51 {LevelStatType::WRITE_MBPS, LevelStat{"WriteMBps", "Wr(MB/s)"}},
52 {LevelStatType::COMP_SEC, LevelStat{"CompSec", "Comp(sec)"}},
53 {LevelStatType::COMP_CPU_SEC,
54 LevelStat{"CompMergeCPU", "CompMergeCPU(sec)"}},
55 {LevelStatType::COMP_COUNT, LevelStat{"CompCount", "Comp(cnt)"}},
56 {LevelStatType::AVG_SEC, LevelStat{"AvgSec", "Avg(sec)"}},
57 {LevelStatType::KEY_IN, LevelStat{"KeyIn", "KeyIn"}},
58 {LevelStatType::KEY_DROP, LevelStat{"KeyDrop", "KeyDrop"}},
59 {LevelStatType::R_BLOB_GB, LevelStat{"RblobGB", "Rblob(GB)"}},
60 {LevelStatType::W_BLOB_GB, LevelStat{"WblobGB", "Wblob(GB)"}},
61 };
62
63 const std::map<InternalStats::InternalDBStatsType, DBStatInfo>
64 InternalStats::db_stats_type_to_info = {
65 {InternalStats::kIntStatsWalFileBytes,
66 DBStatInfo{"db.wal_bytes_written"}},
67 {InternalStats::kIntStatsWalFileSynced, DBStatInfo{"db.wal_syncs"}},
68 {InternalStats::kIntStatsBytesWritten,
69 DBStatInfo{"db.user_bytes_written"}},
70 {InternalStats::kIntStatsNumKeysWritten,
71 DBStatInfo{"db.user_keys_written"}},
72 {InternalStats::kIntStatsWriteDoneByOther,
73 DBStatInfo{"db.user_writes_by_other"}},
74 {InternalStats::kIntStatsWriteDoneBySelf,
75 DBStatInfo{"db.user_writes_by_self"}},
76 {InternalStats::kIntStatsWriteWithWal,
77 DBStatInfo{"db.user_writes_with_wal"}},
78 {InternalStats::kIntStatsWriteStallMicros,
79 DBStatInfo{"db.user_write_stall_micros"}},
80 };
81
82 namespace {
83 const double kMB = 1048576.0;
84 const double kGB = kMB * 1024;
85 const double kMicrosInSec = 1000000.0;
86
PrintLevelStatsHeader(char * buf,size_t len,const std::string & cf_name,const std::string & group_by)87 void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name,
88 const std::string& group_by) {
89 int written_size =
90 snprintf(buf, len, "\n** Compaction Stats [%s] **\n", cf_name.c_str());
91 written_size = std::min(written_size, static_cast<int>(len));
92 auto hdr = [](LevelStatType t) {
93 return InternalStats::compaction_level_stats.at(t).header_name.c_str();
94 };
95 int line_size = snprintf(
96 buf + written_size, len - written_size,
97 "%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s "
98 "%s\n",
99 // Note that we skip COMPACTED_FILES and merge it with Files column
100 group_by.c_str(), hdr(LevelStatType::NUM_FILES),
101 hdr(LevelStatType::SIZE_BYTES), hdr(LevelStatType::SCORE),
102 hdr(LevelStatType::READ_GB), hdr(LevelStatType::RN_GB),
103 hdr(LevelStatType::RNP1_GB), hdr(LevelStatType::WRITE_GB),
104 hdr(LevelStatType::W_NEW_GB), hdr(LevelStatType::MOVED_GB),
105 hdr(LevelStatType::WRITE_AMP), hdr(LevelStatType::READ_MBPS),
106 hdr(LevelStatType::WRITE_MBPS), hdr(LevelStatType::COMP_SEC),
107 hdr(LevelStatType::COMP_CPU_SEC), hdr(LevelStatType::COMP_COUNT),
108 hdr(LevelStatType::AVG_SEC), hdr(LevelStatType::KEY_IN),
109 hdr(LevelStatType::KEY_DROP), hdr(LevelStatType::R_BLOB_GB),
110 hdr(LevelStatType::W_BLOB_GB));
111
112 written_size += line_size;
113 written_size = std::min(written_size, static_cast<int>(len));
114 snprintf(buf + written_size, len - written_size, "%s\n",
115 std::string(line_size, '-').c_str());
116 }
117
PrepareLevelStats(std::map<LevelStatType,double> * level_stats,int num_files,int being_compacted,double total_file_size,double score,double w_amp,const InternalStats::CompactionStats & stats)118 void PrepareLevelStats(std::map<LevelStatType, double>* level_stats,
119 int num_files, int being_compacted,
120 double total_file_size, double score, double w_amp,
121 const InternalStats::CompactionStats& stats) {
122 const uint64_t bytes_read = stats.bytes_read_non_output_levels +
123 stats.bytes_read_output_level +
124 stats.bytes_read_blob;
125 const uint64_t bytes_written = stats.bytes_written + stats.bytes_written_blob;
126 const int64_t bytes_new = stats.bytes_written - stats.bytes_read_output_level;
127 const double elapsed = (stats.micros + 1) / kMicrosInSec;
128
129 (*level_stats)[LevelStatType::NUM_FILES] = num_files;
130 (*level_stats)[LevelStatType::COMPACTED_FILES] = being_compacted;
131 (*level_stats)[LevelStatType::SIZE_BYTES] = total_file_size;
132 (*level_stats)[LevelStatType::SCORE] = score;
133 (*level_stats)[LevelStatType::READ_GB] = bytes_read / kGB;
134 (*level_stats)[LevelStatType::RN_GB] =
135 stats.bytes_read_non_output_levels / kGB;
136 (*level_stats)[LevelStatType::RNP1_GB] = stats.bytes_read_output_level / kGB;
137 (*level_stats)[LevelStatType::WRITE_GB] = stats.bytes_written / kGB;
138 (*level_stats)[LevelStatType::W_NEW_GB] = bytes_new / kGB;
139 (*level_stats)[LevelStatType::MOVED_GB] = stats.bytes_moved / kGB;
140 (*level_stats)[LevelStatType::WRITE_AMP] = w_amp;
141 (*level_stats)[LevelStatType::READ_MBPS] = bytes_read / kMB / elapsed;
142 (*level_stats)[LevelStatType::WRITE_MBPS] = bytes_written / kMB / elapsed;
143 (*level_stats)[LevelStatType::COMP_SEC] = stats.micros / kMicrosInSec;
144 (*level_stats)[LevelStatType::COMP_CPU_SEC] = stats.cpu_micros / kMicrosInSec;
145 (*level_stats)[LevelStatType::COMP_COUNT] = stats.count;
146 (*level_stats)[LevelStatType::AVG_SEC] =
147 stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count;
148 (*level_stats)[LevelStatType::KEY_IN] =
149 static_cast<double>(stats.num_input_records);
150 (*level_stats)[LevelStatType::KEY_DROP] =
151 static_cast<double>(stats.num_dropped_records);
152 (*level_stats)[LevelStatType::R_BLOB_GB] = stats.bytes_read_blob / kGB;
153 (*level_stats)[LevelStatType::W_BLOB_GB] = stats.bytes_written_blob / kGB;
154 }
155
PrintLevelStats(char * buf,size_t len,const std::string & name,const std::map<LevelStatType,double> & stat_value)156 void PrintLevelStats(char* buf, size_t len, const std::string& name,
157 const std::map<LevelStatType, double>& stat_value) {
158 snprintf(
159 buf, len,
160 "%4s " /* Level */
161 "%6d/%-3d " /* Files */
162 "%8s " /* Size */
163 "%5.1f " /* Score */
164 "%8.1f " /* Read(GB) */
165 "%7.1f " /* Rn(GB) */
166 "%8.1f " /* Rnp1(GB) */
167 "%9.1f " /* Write(GB) */
168 "%8.1f " /* Wnew(GB) */
169 "%9.1f " /* Moved(GB) */
170 "%5.1f " /* W-Amp */
171 "%8.1f " /* Rd(MB/s) */
172 "%8.1f " /* Wr(MB/s) */
173 "%9.2f " /* Comp(sec) */
174 "%17.2f " /* CompMergeCPU(sec) */
175 "%9d " /* Comp(cnt) */
176 "%8.3f " /* Avg(sec) */
177 "%7s " /* KeyIn */
178 "%6s " /* KeyDrop */
179 "%9.1f " /* Rblob(GB) */
180 "%9.1f\n", /* Wblob(GB) */
181 name.c_str(), static_cast<int>(stat_value.at(LevelStatType::NUM_FILES)),
182 static_cast<int>(stat_value.at(LevelStatType::COMPACTED_FILES)),
183 BytesToHumanString(
184 static_cast<uint64_t>(stat_value.at(LevelStatType::SIZE_BYTES)))
185 .c_str(),
186 stat_value.at(LevelStatType::SCORE),
187 stat_value.at(LevelStatType::READ_GB),
188 stat_value.at(LevelStatType::RN_GB),
189 stat_value.at(LevelStatType::RNP1_GB),
190 stat_value.at(LevelStatType::WRITE_GB),
191 stat_value.at(LevelStatType::W_NEW_GB),
192 stat_value.at(LevelStatType::MOVED_GB),
193 stat_value.at(LevelStatType::WRITE_AMP),
194 stat_value.at(LevelStatType::READ_MBPS),
195 stat_value.at(LevelStatType::WRITE_MBPS),
196 stat_value.at(LevelStatType::COMP_SEC),
197 stat_value.at(LevelStatType::COMP_CPU_SEC),
198 static_cast<int>(stat_value.at(LevelStatType::COMP_COUNT)),
199 stat_value.at(LevelStatType::AVG_SEC),
200 NumberToHumanString(
201 static_cast<std::int64_t>(stat_value.at(LevelStatType::KEY_IN)))
202 .c_str(),
203 NumberToHumanString(
204 static_cast<std::int64_t>(stat_value.at(LevelStatType::KEY_DROP)))
205 .c_str(),
206 stat_value.at(LevelStatType::R_BLOB_GB),
207 stat_value.at(LevelStatType::W_BLOB_GB));
208 }
209
PrintLevelStats(char * buf,size_t len,const std::string & name,int num_files,int being_compacted,double total_file_size,double score,double w_amp,const InternalStats::CompactionStats & stats)210 void PrintLevelStats(char* buf, size_t len, const std::string& name,
211 int num_files, int being_compacted, double total_file_size,
212 double score, double w_amp,
213 const InternalStats::CompactionStats& stats) {
214 std::map<LevelStatType, double> level_stats;
215 PrepareLevelStats(&level_stats, num_files, being_compacted, total_file_size,
216 score, w_amp, stats);
217 PrintLevelStats(buf, len, name, level_stats);
218 }
219
220 // Assumes that trailing numbers represent an optional argument. This requires
221 // property names to not end with numbers.
GetPropertyNameAndArg(const Slice & property)222 std::pair<Slice, Slice> GetPropertyNameAndArg(const Slice& property) {
223 Slice name = property, arg = property;
224 size_t sfx_len = 0;
225 while (sfx_len < property.size() &&
226 isdigit(property[property.size() - sfx_len - 1])) {
227 ++sfx_len;
228 }
229 name.remove_suffix(sfx_len);
230 arg.remove_prefix(property.size() - sfx_len);
231 return {name, arg};
232 }
233 } // anonymous namespace
234
235 static const std::string rocksdb_prefix = "rocksdb.";
236
237 static const std::string num_files_at_level_prefix = "num-files-at-level";
238 static const std::string compression_ratio_at_level_prefix =
239 "compression-ratio-at-level";
240 static const std::string allstats = "stats";
241 static const std::string sstables = "sstables";
242 static const std::string cfstats = "cfstats";
243 static const std::string cfstats_no_file_histogram =
244 "cfstats-no-file-histogram";
245 static const std::string cf_file_histogram = "cf-file-histogram";
246 static const std::string dbstats = "dbstats";
247 static const std::string levelstats = "levelstats";
248 static const std::string block_cache_entry_stats = "block-cache-entry-stats";
249 static const std::string num_immutable_mem_table = "num-immutable-mem-table";
250 static const std::string num_immutable_mem_table_flushed =
251 "num-immutable-mem-table-flushed";
252 static const std::string mem_table_flush_pending = "mem-table-flush-pending";
253 static const std::string compaction_pending = "compaction-pending";
254 static const std::string background_errors = "background-errors";
255 static const std::string cur_size_active_mem_table =
256 "cur-size-active-mem-table";
257 static const std::string cur_size_all_mem_tables = "cur-size-all-mem-tables";
258 static const std::string size_all_mem_tables = "size-all-mem-tables";
259 static const std::string num_entries_active_mem_table =
260 "num-entries-active-mem-table";
261 static const std::string num_entries_imm_mem_tables =
262 "num-entries-imm-mem-tables";
263 static const std::string num_deletes_active_mem_table =
264 "num-deletes-active-mem-table";
265 static const std::string num_deletes_imm_mem_tables =
266 "num-deletes-imm-mem-tables";
267 static const std::string estimate_num_keys = "estimate-num-keys";
268 static const std::string estimate_table_readers_mem =
269 "estimate-table-readers-mem";
270 static const std::string is_file_deletions_enabled =
271 "is-file-deletions-enabled";
272 static const std::string num_snapshots = "num-snapshots";
273 static const std::string oldest_snapshot_time = "oldest-snapshot-time";
274 static const std::string oldest_snapshot_sequence = "oldest-snapshot-sequence";
275 static const std::string num_live_versions = "num-live-versions";
276 static const std::string current_version_number =
277 "current-super-version-number";
278 static const std::string estimate_live_data_size = "estimate-live-data-size";
279 static const std::string min_log_number_to_keep_str = "min-log-number-to-keep";
280 static const std::string min_obsolete_sst_number_to_keep_str =
281 "min-obsolete-sst-number-to-keep";
282 static const std::string base_level_str = "base-level";
283 static const std::string total_sst_files_size = "total-sst-files-size";
284 static const std::string live_sst_files_size = "live-sst-files-size";
285 static const std::string live_sst_files_size_at_temperature =
286 "live-sst-files-size-at-temperature";
287 static const std::string estimate_pending_comp_bytes =
288 "estimate-pending-compaction-bytes";
289 static const std::string aggregated_table_properties =
290 "aggregated-table-properties";
291 static const std::string aggregated_table_properties_at_level =
292 aggregated_table_properties + "-at-level";
293 static const std::string num_running_compactions = "num-running-compactions";
294 static const std::string num_running_flushes = "num-running-flushes";
295 static const std::string actual_delayed_write_rate =
296 "actual-delayed-write-rate";
297 static const std::string is_write_stopped = "is-write-stopped";
298 static const std::string estimate_oldest_key_time = "estimate-oldest-key-time";
299 static const std::string block_cache_capacity = "block-cache-capacity";
300 static const std::string block_cache_usage = "block-cache-usage";
301 static const std::string block_cache_pinned_usage = "block-cache-pinned-usage";
302 static const std::string options_statistics = "options-statistics";
303 static const std::string num_blob_files = "num-blob-files";
304 static const std::string blob_stats = "blob-stats";
305 static const std::string total_blob_file_size = "total-blob-file-size";
306 static const std::string live_blob_file_size = "live-blob-file-size";
307
308 const std::string DB::Properties::kNumFilesAtLevelPrefix =
309 rocksdb_prefix + num_files_at_level_prefix;
310 const std::string DB::Properties::kCompressionRatioAtLevelPrefix =
311 rocksdb_prefix + compression_ratio_at_level_prefix;
312 const std::string DB::Properties::kStats = rocksdb_prefix + allstats;
313 const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables;
314 const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats;
315 const std::string DB::Properties::kCFStatsNoFileHistogram =
316 rocksdb_prefix + cfstats_no_file_histogram;
317 const std::string DB::Properties::kCFFileHistogram =
318 rocksdb_prefix + cf_file_histogram;
319 const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats;
320 const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats;
321 const std::string DB::Properties::kBlockCacheEntryStats =
322 rocksdb_prefix + block_cache_entry_stats;
323 const std::string DB::Properties::kNumImmutableMemTable =
324 rocksdb_prefix + num_immutable_mem_table;
325 const std::string DB::Properties::kNumImmutableMemTableFlushed =
326 rocksdb_prefix + num_immutable_mem_table_flushed;
327 const std::string DB::Properties::kMemTableFlushPending =
328 rocksdb_prefix + mem_table_flush_pending;
329 const std::string DB::Properties::kCompactionPending =
330 rocksdb_prefix + compaction_pending;
331 const std::string DB::Properties::kNumRunningCompactions =
332 rocksdb_prefix + num_running_compactions;
333 const std::string DB::Properties::kNumRunningFlushes =
334 rocksdb_prefix + num_running_flushes;
335 const std::string DB::Properties::kBackgroundErrors =
336 rocksdb_prefix + background_errors;
337 const std::string DB::Properties::kCurSizeActiveMemTable =
338 rocksdb_prefix + cur_size_active_mem_table;
339 const std::string DB::Properties::kCurSizeAllMemTables =
340 rocksdb_prefix + cur_size_all_mem_tables;
341 const std::string DB::Properties::kSizeAllMemTables =
342 rocksdb_prefix + size_all_mem_tables;
343 const std::string DB::Properties::kNumEntriesActiveMemTable =
344 rocksdb_prefix + num_entries_active_mem_table;
345 const std::string DB::Properties::kNumEntriesImmMemTables =
346 rocksdb_prefix + num_entries_imm_mem_tables;
347 const std::string DB::Properties::kNumDeletesActiveMemTable =
348 rocksdb_prefix + num_deletes_active_mem_table;
349 const std::string DB::Properties::kNumDeletesImmMemTables =
350 rocksdb_prefix + num_deletes_imm_mem_tables;
351 const std::string DB::Properties::kEstimateNumKeys =
352 rocksdb_prefix + estimate_num_keys;
353 const std::string DB::Properties::kEstimateTableReadersMem =
354 rocksdb_prefix + estimate_table_readers_mem;
355 const std::string DB::Properties::kIsFileDeletionsEnabled =
356 rocksdb_prefix + is_file_deletions_enabled;
357 const std::string DB::Properties::kNumSnapshots =
358 rocksdb_prefix + num_snapshots;
359 const std::string DB::Properties::kOldestSnapshotTime =
360 rocksdb_prefix + oldest_snapshot_time;
361 const std::string DB::Properties::kOldestSnapshotSequence =
362 rocksdb_prefix + oldest_snapshot_sequence;
363 const std::string DB::Properties::kNumLiveVersions =
364 rocksdb_prefix + num_live_versions;
365 const std::string DB::Properties::kCurrentSuperVersionNumber =
366 rocksdb_prefix + current_version_number;
367 const std::string DB::Properties::kEstimateLiveDataSize =
368 rocksdb_prefix + estimate_live_data_size;
369 const std::string DB::Properties::kMinLogNumberToKeep =
370 rocksdb_prefix + min_log_number_to_keep_str;
371 const std::string DB::Properties::kMinObsoleteSstNumberToKeep =
372 rocksdb_prefix + min_obsolete_sst_number_to_keep_str;
373 const std::string DB::Properties::kTotalSstFilesSize =
374 rocksdb_prefix + total_sst_files_size;
375 const std::string DB::Properties::kLiveSstFilesSize =
376 rocksdb_prefix + live_sst_files_size;
377 const std::string DB::Properties::kBaseLevel = rocksdb_prefix + base_level_str;
378 const std::string DB::Properties::kEstimatePendingCompactionBytes =
379 rocksdb_prefix + estimate_pending_comp_bytes;
380 const std::string DB::Properties::kAggregatedTableProperties =
381 rocksdb_prefix + aggregated_table_properties;
382 const std::string DB::Properties::kAggregatedTablePropertiesAtLevel =
383 rocksdb_prefix + aggregated_table_properties_at_level;
384 const std::string DB::Properties::kActualDelayedWriteRate =
385 rocksdb_prefix + actual_delayed_write_rate;
386 const std::string DB::Properties::kIsWriteStopped =
387 rocksdb_prefix + is_write_stopped;
388 const std::string DB::Properties::kEstimateOldestKeyTime =
389 rocksdb_prefix + estimate_oldest_key_time;
390 const std::string DB::Properties::kBlockCacheCapacity =
391 rocksdb_prefix + block_cache_capacity;
392 const std::string DB::Properties::kBlockCacheUsage =
393 rocksdb_prefix + block_cache_usage;
394 const std::string DB::Properties::kBlockCachePinnedUsage =
395 rocksdb_prefix + block_cache_pinned_usage;
396 const std::string DB::Properties::kOptionsStatistics =
397 rocksdb_prefix + options_statistics;
398 const std::string DB::Properties::kLiveSstFilesSizeAtTemperature =
399 rocksdb_prefix + live_sst_files_size_at_temperature;
400 const std::string DB::Properties::kNumBlobFiles =
401 rocksdb_prefix + num_blob_files;
402 const std::string DB::Properties::kBlobStats = rocksdb_prefix + blob_stats;
403 const std::string DB::Properties::kTotalBlobFileSize =
404 rocksdb_prefix + total_blob_file_size;
405 const std::string DB::Properties::kLiveBlobFileSize =
406 rocksdb_prefix + live_blob_file_size;
407
408 const std::unordered_map<std::string, DBPropertyInfo>
409 InternalStats::ppt_name_to_info = {
410 {DB::Properties::kNumFilesAtLevelPrefix,
411 {false, &InternalStats::HandleNumFilesAtLevel, nullptr, nullptr,
412 nullptr}},
413 {DB::Properties::kCompressionRatioAtLevelPrefix,
414 {false, &InternalStats::HandleCompressionRatioAtLevelPrefix, nullptr,
415 nullptr, nullptr}},
416 {DB::Properties::kLevelStats,
417 {false, &InternalStats::HandleLevelStats, nullptr, nullptr, nullptr}},
418 {DB::Properties::kStats,
419 {false, &InternalStats::HandleStats, nullptr, nullptr, nullptr}},
420 {DB::Properties::kCFStats,
421 {false, &InternalStats::HandleCFStats, nullptr,
422 &InternalStats::HandleCFMapStats, nullptr}},
423 {DB::Properties::kCFStatsNoFileHistogram,
424 {false, &InternalStats::HandleCFStatsNoFileHistogram, nullptr, nullptr,
425 nullptr}},
426 {DB::Properties::kCFFileHistogram,
427 {false, &InternalStats::HandleCFFileHistogram, nullptr, nullptr,
428 nullptr}},
429 {DB::Properties::kDBStats,
430 {false, &InternalStats::HandleDBStats, nullptr,
431 &InternalStats::HandleDBMapStats, nullptr}},
432 {DB::Properties::kBlockCacheEntryStats,
433 {true, &InternalStats::HandleBlockCacheEntryStats, nullptr,
434 &InternalStats::HandleBlockCacheEntryStatsMap, nullptr}},
435 {DB::Properties::kSSTables,
436 {false, &InternalStats::HandleSsTables, nullptr, nullptr, nullptr}},
437 {DB::Properties::kAggregatedTableProperties,
438 {false, &InternalStats::HandleAggregatedTableProperties, nullptr,
439 &InternalStats::HandleAggregatedTablePropertiesMap, nullptr}},
440 {DB::Properties::kAggregatedTablePropertiesAtLevel,
441 {false, &InternalStats::HandleAggregatedTablePropertiesAtLevel,
442 nullptr, &InternalStats::HandleAggregatedTablePropertiesAtLevelMap,
443 nullptr}},
444 {DB::Properties::kNumImmutableMemTable,
445 {false, nullptr, &InternalStats::HandleNumImmutableMemTable, nullptr,
446 nullptr}},
447 {DB::Properties::kNumImmutableMemTableFlushed,
448 {false, nullptr, &InternalStats::HandleNumImmutableMemTableFlushed,
449 nullptr, nullptr}},
450 {DB::Properties::kMemTableFlushPending,
451 {false, nullptr, &InternalStats::HandleMemTableFlushPending, nullptr,
452 nullptr}},
453 {DB::Properties::kCompactionPending,
454 {false, nullptr, &InternalStats::HandleCompactionPending, nullptr,
455 nullptr}},
456 {DB::Properties::kBackgroundErrors,
457 {false, nullptr, &InternalStats::HandleBackgroundErrors, nullptr,
458 nullptr}},
459 {DB::Properties::kCurSizeActiveMemTable,
460 {false, nullptr, &InternalStats::HandleCurSizeActiveMemTable, nullptr,
461 nullptr}},
462 {DB::Properties::kCurSizeAllMemTables,
463 {false, nullptr, &InternalStats::HandleCurSizeAllMemTables, nullptr,
464 nullptr}},
465 {DB::Properties::kSizeAllMemTables,
466 {false, nullptr, &InternalStats::HandleSizeAllMemTables, nullptr,
467 nullptr}},
468 {DB::Properties::kNumEntriesActiveMemTable,
469 {false, nullptr, &InternalStats::HandleNumEntriesActiveMemTable,
470 nullptr, nullptr}},
471 {DB::Properties::kNumEntriesImmMemTables,
472 {false, nullptr, &InternalStats::HandleNumEntriesImmMemTables, nullptr,
473 nullptr}},
474 {DB::Properties::kNumDeletesActiveMemTable,
475 {false, nullptr, &InternalStats::HandleNumDeletesActiveMemTable,
476 nullptr, nullptr}},
477 {DB::Properties::kNumDeletesImmMemTables,
478 {false, nullptr, &InternalStats::HandleNumDeletesImmMemTables, nullptr,
479 nullptr}},
480 {DB::Properties::kEstimateNumKeys,
481 {false, nullptr, &InternalStats::HandleEstimateNumKeys, nullptr,
482 nullptr}},
483 {DB::Properties::kEstimateTableReadersMem,
484 {true, nullptr, &InternalStats::HandleEstimateTableReadersMem, nullptr,
485 nullptr}},
486 {DB::Properties::kIsFileDeletionsEnabled,
487 {false, nullptr, &InternalStats::HandleIsFileDeletionsEnabled, nullptr,
488 nullptr}},
489 {DB::Properties::kNumSnapshots,
490 {false, nullptr, &InternalStats::HandleNumSnapshots, nullptr,
491 nullptr}},
492 {DB::Properties::kOldestSnapshotTime,
493 {false, nullptr, &InternalStats::HandleOldestSnapshotTime, nullptr,
494 nullptr}},
495 {DB::Properties::kOldestSnapshotSequence,
496 {false, nullptr, &InternalStats::HandleOldestSnapshotSequence, nullptr,
497 nullptr}},
498 {DB::Properties::kNumLiveVersions,
499 {false, nullptr, &InternalStats::HandleNumLiveVersions, nullptr,
500 nullptr}},
501 {DB::Properties::kCurrentSuperVersionNumber,
502 {false, nullptr, &InternalStats::HandleCurrentSuperVersionNumber,
503 nullptr, nullptr}},
504 {DB::Properties::kEstimateLiveDataSize,
505 {true, nullptr, &InternalStats::HandleEstimateLiveDataSize, nullptr,
506 nullptr}},
507 {DB::Properties::kMinLogNumberToKeep,
508 {false, nullptr, &InternalStats::HandleMinLogNumberToKeep, nullptr,
509 nullptr}},
510 {DB::Properties::kMinObsoleteSstNumberToKeep,
511 {false, nullptr, &InternalStats::HandleMinObsoleteSstNumberToKeep,
512 nullptr, nullptr}},
513 {DB::Properties::kBaseLevel,
514 {false, nullptr, &InternalStats::HandleBaseLevel, nullptr, nullptr}},
515 {DB::Properties::kTotalSstFilesSize,
516 {false, nullptr, &InternalStats::HandleTotalSstFilesSize, nullptr,
517 nullptr}},
518 {DB::Properties::kLiveSstFilesSize,
519 {false, nullptr, &InternalStats::HandleLiveSstFilesSize, nullptr,
520 nullptr}},
521 {DB::Properties::kLiveSstFilesSizeAtTemperature,
522 {true, &InternalStats::HandleLiveSstFilesSizeAtTemperature, nullptr,
523 nullptr, nullptr}},
524 {DB::Properties::kEstimatePendingCompactionBytes,
525 {false, nullptr, &InternalStats::HandleEstimatePendingCompactionBytes,
526 nullptr, nullptr}},
527 {DB::Properties::kNumRunningFlushes,
528 {false, nullptr, &InternalStats::HandleNumRunningFlushes, nullptr,
529 nullptr}},
530 {DB::Properties::kNumRunningCompactions,
531 {false, nullptr, &InternalStats::HandleNumRunningCompactions, nullptr,
532 nullptr}},
533 {DB::Properties::kActualDelayedWriteRate,
534 {false, nullptr, &InternalStats::HandleActualDelayedWriteRate, nullptr,
535 nullptr}},
536 {DB::Properties::kIsWriteStopped,
537 {false, nullptr, &InternalStats::HandleIsWriteStopped, nullptr,
538 nullptr}},
539 {DB::Properties::kEstimateOldestKeyTime,
540 {false, nullptr, &InternalStats::HandleEstimateOldestKeyTime, nullptr,
541 nullptr}},
542 {DB::Properties::kBlockCacheCapacity,
543 {false, nullptr, &InternalStats::HandleBlockCacheCapacity, nullptr,
544 nullptr}},
545 {DB::Properties::kBlockCacheUsage,
546 {false, nullptr, &InternalStats::HandleBlockCacheUsage, nullptr,
547 nullptr}},
548 {DB::Properties::kBlockCachePinnedUsage,
549 {false, nullptr, &InternalStats::HandleBlockCachePinnedUsage, nullptr,
550 nullptr}},
551 {DB::Properties::kOptionsStatistics,
552 {true, nullptr, nullptr, nullptr,
553 &DBImpl::GetPropertyHandleOptionsStatistics}},
554 {DB::Properties::kNumBlobFiles,
555 {false, nullptr, &InternalStats::HandleNumBlobFiles, nullptr,
556 nullptr}},
557 {DB::Properties::kBlobStats,
558 {false, &InternalStats::HandleBlobStats, nullptr, nullptr, nullptr}},
559 {DB::Properties::kTotalBlobFileSize,
560 {false, nullptr, &InternalStats::HandleTotalBlobFileSize, nullptr,
561 nullptr}},
562 {DB::Properties::kLiveBlobFileSize,
563 {false, nullptr, &InternalStats::HandleLiveBlobFileSize, nullptr,
564 nullptr}},
565 };
566
InternalStats(int num_levels,SystemClock * clock,ColumnFamilyData * cfd)567 InternalStats::InternalStats(int num_levels, SystemClock* clock,
568 ColumnFamilyData* cfd)
569 : db_stats_{},
570 cf_stats_value_{},
571 cf_stats_count_{},
572 comp_stats_(num_levels),
573 comp_stats_by_pri_(Env::Priority::TOTAL),
574 file_read_latency_(num_levels),
575 bg_error_count_(0),
576 number_levels_(num_levels),
577 clock_(clock),
578 cfd_(cfd),
579 started_at_(clock->NowMicros()) {
580 Cache* block_cache = nullptr;
581 bool ok = GetBlockCacheForStats(&block_cache);
582 if (ok) {
583 assert(block_cache);
584 // Extract or create stats collector. Could fail in rare cases.
585 Status s = CacheEntryStatsCollector<CacheEntryRoleStats>::GetShared(
586 block_cache, clock_, &cache_entry_stats_collector_);
587 if (s.ok()) {
588 assert(cache_entry_stats_collector_);
589 } else {
590 assert(!cache_entry_stats_collector_);
591 }
592 } else {
593 assert(!block_cache);
594 }
595 }
596
TEST_GetCacheEntryRoleStats(CacheEntryRoleStats * stats,bool foreground)597 void InternalStats::TEST_GetCacheEntryRoleStats(CacheEntryRoleStats* stats,
598 bool foreground) {
599 CollectCacheEntryStats(foreground);
600 if (cache_entry_stats_collector_) {
601 cache_entry_stats_collector_->GetStats(stats);
602 }
603 }
604
CollectCacheEntryStats(bool foreground)605 void InternalStats::CollectCacheEntryStats(bool foreground) {
606 // This function is safe to call from any thread because
607 // cache_entry_stats_collector_ field is const after constructor
608 // and ->GetStats does its own synchronization, which also suffices for
609 // cache_entry_stats_.
610
611 if (!cache_entry_stats_collector_) {
612 return; // nothing to do (e.g. no block cache)
613 }
614
615 // For "background" collections, strictly cap the collection time by
616 // expanding effective cache TTL. For foreground, be more aggressive about
617 // getting latest data.
618 int min_interval_seconds = foreground ? 10 : 180;
619 // 1/500 = max of 0.2% of one CPU thread
620 int min_interval_factor = foreground ? 10 : 500;
621 cache_entry_stats_collector_->CollectStats(min_interval_seconds,
622 min_interval_factor);
623 }
624
625 std::function<void(const Slice&, void*, size_t, Cache::DeleterFn)>
GetEntryCallback()626 InternalStats::CacheEntryRoleStats::GetEntryCallback() {
627 return [&](const Slice& /*key*/, void* /*value*/, size_t charge,
628 Cache::DeleterFn deleter) {
629 auto e = role_map_.find(deleter);
630 size_t role_idx;
631 if (e == role_map_.end()) {
632 role_idx = static_cast<size_t>(CacheEntryRole::kMisc);
633 } else {
634 role_idx = static_cast<size_t>(e->second);
635 }
636 entry_counts[role_idx]++;
637 total_charges[role_idx] += charge;
638 };
639 }
640
BeginCollection(Cache * cache,SystemClock *,uint64_t start_time_micros)641 void InternalStats::CacheEntryRoleStats::BeginCollection(
642 Cache* cache, SystemClock*, uint64_t start_time_micros) {
643 Clear();
644 last_start_time_micros_ = start_time_micros;
645 ++collection_count;
646 role_map_ = CopyCacheDeleterRoleMap();
647 std::ostringstream str;
648 str << cache->Name() << "@" << static_cast<void*>(cache) << "#"
649 << port::GetProcessID();
650 cache_id = str.str();
651 cache_capacity = cache->GetCapacity();
652 }
653
EndCollection(Cache *,SystemClock *,uint64_t end_time_micros)654 void InternalStats::CacheEntryRoleStats::EndCollection(
655 Cache*, SystemClock*, uint64_t end_time_micros) {
656 last_end_time_micros_ = end_time_micros;
657 }
658
SkippedCollection()659 void InternalStats::CacheEntryRoleStats::SkippedCollection() {
660 ++copies_of_last_collection;
661 }
662
GetLastDurationMicros() const663 uint64_t InternalStats::CacheEntryRoleStats::GetLastDurationMicros() const {
664 if (last_end_time_micros_ > last_start_time_micros_) {
665 return last_end_time_micros_ - last_start_time_micros_;
666 } else {
667 return 0U;
668 }
669 }
670
ToString(SystemClock * clock) const671 std::string InternalStats::CacheEntryRoleStats::ToString(
672 SystemClock* clock) const {
673 std::ostringstream str;
674 str << "Block cache " << cache_id
675 << " capacity: " << BytesToHumanString(cache_capacity)
676 << " collections: " << collection_count
677 << " last_copies: " << copies_of_last_collection
678 << " last_secs: " << (GetLastDurationMicros() / 1000000.0)
679 << " secs_since: "
680 << ((clock->NowMicros() - last_end_time_micros_) / 1000000U) << "\n";
681 str << "Block cache entry stats(count,size,portion):";
682 for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
683 if (entry_counts[i] > 0) {
684 str << " " << kCacheEntryRoleToCamelString[i] << "(" << entry_counts[i]
685 << "," << BytesToHumanString(total_charges[i]) << ","
686 << (100.0 * total_charges[i] / cache_capacity) << "%)";
687 }
688 }
689 str << "\n";
690 return str.str();
691 }
692
ToMap(std::map<std::string,std::string> * values,SystemClock * clock) const693 void InternalStats::CacheEntryRoleStats::ToMap(
694 std::map<std::string, std::string>* values, SystemClock* clock) const {
695 values->clear();
696 auto& v = *values;
697 v["id"] = cache_id;
698 v["capacity"] = ROCKSDB_NAMESPACE::ToString(cache_capacity);
699 v["secs_for_last_collection"] =
700 ROCKSDB_NAMESPACE::ToString(GetLastDurationMicros() / 1000000.0);
701 v["secs_since_last_collection"] = ROCKSDB_NAMESPACE::ToString(
702 (clock->NowMicros() - last_end_time_micros_) / 1000000U);
703 for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
704 std::string role = kCacheEntryRoleToHyphenString[i];
705 v["count." + role] = ROCKSDB_NAMESPACE::ToString(entry_counts[i]);
706 v["bytes." + role] = ROCKSDB_NAMESPACE::ToString(total_charges[i]);
707 v["percent." + role] =
708 ROCKSDB_NAMESPACE::ToString(100.0 * total_charges[i] / cache_capacity);
709 }
710 }
711
HandleBlockCacheEntryStats(std::string * value,Slice)712 bool InternalStats::HandleBlockCacheEntryStats(std::string* value,
713 Slice /*suffix*/) {
714 if (!cache_entry_stats_collector_) {
715 return false;
716 }
717 CollectCacheEntryStats(/*foreground*/ true);
718 CacheEntryRoleStats stats;
719 cache_entry_stats_collector_->GetStats(&stats);
720 *value = stats.ToString(clock_);
721 return true;
722 }
723
HandleBlockCacheEntryStatsMap(std::map<std::string,std::string> * values,Slice)724 bool InternalStats::HandleBlockCacheEntryStatsMap(
725 std::map<std::string, std::string>* values, Slice /*suffix*/) {
726 if (!cache_entry_stats_collector_) {
727 return false;
728 }
729 CollectCacheEntryStats(/*foreground*/ true);
730 CacheEntryRoleStats stats;
731 cache_entry_stats_collector_->GetStats(&stats);
732 stats.ToMap(values, clock_);
733 return true;
734 }
735
HandleLiveSstFilesSizeAtTemperature(std::string * value,Slice suffix)736 bool InternalStats::HandleLiveSstFilesSizeAtTemperature(std::string* value,
737 Slice suffix) {
738 uint64_t temperature;
739 bool ok = ConsumeDecimalNumber(&suffix, &temperature) && suffix.empty();
740 if (!ok) {
741 return false;
742 }
743
744 uint64_t size = 0;
745 const auto* vstorage = cfd_->current()->storage_info();
746 for (int level = 0; level < vstorage->num_levels(); level++) {
747 for (const auto& file_meta : vstorage->LevelFiles(level)) {
748 if (static_cast<uint8_t>(file_meta->temperature) == temperature) {
749 size += file_meta->fd.GetFileSize();
750 }
751 }
752 }
753
754 *value = ToString(size);
755 return true;
756 }
757
HandleNumBlobFiles(uint64_t * value,DBImpl *,Version *)758 bool InternalStats::HandleNumBlobFiles(uint64_t* value, DBImpl* /*db*/,
759 Version* /*version*/) {
760 const auto* vstorage = cfd_->current()->storage_info();
761 const auto& blob_files = vstorage->GetBlobFiles();
762 *value = blob_files.size();
763 return true;
764 }
765
HandleBlobStats(std::string * value,Slice)766 bool InternalStats::HandleBlobStats(std::string* value, Slice /*suffix*/) {
767 std::ostringstream oss;
768 auto* current_version = cfd_->current();
769 const auto& blob_files = current_version->storage_info()->GetBlobFiles();
770 uint64_t current_num_blob_files = blob_files.size();
771 uint64_t current_file_size = 0;
772 uint64_t current_garbage_size = 0;
773 for (const auto& pair : blob_files) {
774 const auto& meta = pair.second;
775 current_file_size += meta->GetBlobFileSize();
776 current_garbage_size += meta->GetGarbageBlobBytes();
777 }
778 oss << "Number of blob files: " << current_num_blob_files
779 << "\nTotal size of blob files: " << current_file_size
780 << "\nTotal size of garbage in blob files: " << current_garbage_size
781 << '\n';
782 value->append(oss.str());
783 return true;
784 }
785
HandleTotalBlobFileSize(uint64_t * value,DBImpl *,Version *)786 bool InternalStats::HandleTotalBlobFileSize(uint64_t* value, DBImpl* /*db*/,
787 Version* /*version*/) {
788 *value = cfd_->GetTotalBlobFileSize();
789 return true;
790 }
791
HandleLiveBlobFileSize(uint64_t * value,DBImpl *,Version *)792 bool InternalStats::HandleLiveBlobFileSize(uint64_t* value, DBImpl* /*db*/,
793 Version* /*version*/) {
794 const auto* vstorage = cfd_->current()->storage_info();
795 *value = vstorage->GetTotalBlobFileSize();
796 return true;
797 }
798
GetPropertyInfo(const Slice & property)799 const DBPropertyInfo* GetPropertyInfo(const Slice& property) {
800 std::string ppt_name = GetPropertyNameAndArg(property).first.ToString();
801 auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name);
802 if (ppt_info_iter == InternalStats::ppt_name_to_info.end()) {
803 return nullptr;
804 }
805 return &ppt_info_iter->second;
806 }
807
GetStringProperty(const DBPropertyInfo & property_info,const Slice & property,std::string * value)808 bool InternalStats::GetStringProperty(const DBPropertyInfo& property_info,
809 const Slice& property,
810 std::string* value) {
811 assert(value != nullptr);
812 assert(property_info.handle_string != nullptr);
813 Slice arg = GetPropertyNameAndArg(property).second;
814 return (this->*(property_info.handle_string))(value, arg);
815 }
816
GetMapProperty(const DBPropertyInfo & property_info,const Slice & property,std::map<std::string,std::string> * value)817 bool InternalStats::GetMapProperty(const DBPropertyInfo& property_info,
818 const Slice& property,
819 std::map<std::string, std::string>* value) {
820 assert(value != nullptr);
821 assert(property_info.handle_map != nullptr);
822 Slice arg = GetPropertyNameAndArg(property).second;
823 return (this->*(property_info.handle_map))(value, arg);
824 }
825
GetIntProperty(const DBPropertyInfo & property_info,uint64_t * value,DBImpl * db)826 bool InternalStats::GetIntProperty(const DBPropertyInfo& property_info,
827 uint64_t* value, DBImpl* db) {
828 assert(value != nullptr);
829 assert(property_info.handle_int != nullptr &&
830 !property_info.need_out_of_mutex);
831 db->mutex_.AssertHeld();
832 return (this->*(property_info.handle_int))(value, db, nullptr /* version */);
833 }
834
GetIntPropertyOutOfMutex(const DBPropertyInfo & property_info,Version * version,uint64_t * value)835 bool InternalStats::GetIntPropertyOutOfMutex(
836 const DBPropertyInfo& property_info, Version* version, uint64_t* value) {
837 assert(value != nullptr);
838 assert(property_info.handle_int != nullptr &&
839 property_info.need_out_of_mutex);
840 return (this->*(property_info.handle_int))(value, nullptr /* db */, version);
841 }
842
HandleNumFilesAtLevel(std::string * value,Slice suffix)843 bool InternalStats::HandleNumFilesAtLevel(std::string* value, Slice suffix) {
844 uint64_t level;
845 const auto* vstorage = cfd_->current()->storage_info();
846 bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
847 if (!ok || static_cast<int>(level) >= number_levels_) {
848 return false;
849 } else {
850 char buf[100];
851 snprintf(buf, sizeof(buf), "%d",
852 vstorage->NumLevelFiles(static_cast<int>(level)));
853 *value = buf;
854 return true;
855 }
856 }
857
HandleCompressionRatioAtLevelPrefix(std::string * value,Slice suffix)858 bool InternalStats::HandleCompressionRatioAtLevelPrefix(std::string* value,
859 Slice suffix) {
860 uint64_t level;
861 const auto* vstorage = cfd_->current()->storage_info();
862 bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
863 if (!ok || level >= static_cast<uint64_t>(number_levels_)) {
864 return false;
865 }
866 *value = ToString(
867 vstorage->GetEstimatedCompressionRatioAtLevel(static_cast<int>(level)));
868 return true;
869 }
870
HandleLevelStats(std::string * value,Slice)871 bool InternalStats::HandleLevelStats(std::string* value, Slice /*suffix*/) {
872 char buf[1000];
873 const auto* vstorage = cfd_->current()->storage_info();
874 snprintf(buf, sizeof(buf),
875 "Level Files Size(MB)\n"
876 "--------------------\n");
877 value->append(buf);
878
879 for (int level = 0; level < number_levels_; level++) {
880 snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level,
881 vstorage->NumLevelFiles(level),
882 vstorage->NumLevelBytes(level) / kMB);
883 value->append(buf);
884 }
885 return true;
886 }
887
HandleStats(std::string * value,Slice suffix)888 bool InternalStats::HandleStats(std::string* value, Slice suffix) {
889 if (!HandleCFStats(value, suffix)) {
890 return false;
891 }
892 if (!HandleDBStats(value, suffix)) {
893 return false;
894 }
895 return true;
896 }
897
HandleCFMapStats(std::map<std::string,std::string> * cf_stats,Slice)898 bool InternalStats::HandleCFMapStats(
899 std::map<std::string, std::string>* cf_stats, Slice /*suffix*/) {
900 DumpCFMapStats(cf_stats);
901 return true;
902 }
903
HandleCFStats(std::string * value,Slice)904 bool InternalStats::HandleCFStats(std::string* value, Slice /*suffix*/) {
905 DumpCFStats(value);
906 return true;
907 }
908
HandleCFStatsNoFileHistogram(std::string * value,Slice)909 bool InternalStats::HandleCFStatsNoFileHistogram(std::string* value,
910 Slice /*suffix*/) {
911 DumpCFStatsNoFileHistogram(value);
912 return true;
913 }
914
HandleCFFileHistogram(std::string * value,Slice)915 bool InternalStats::HandleCFFileHistogram(std::string* value,
916 Slice /*suffix*/) {
917 DumpCFFileHistogram(value);
918 return true;
919 }
920
HandleDBMapStats(std::map<std::string,std::string> * db_stats,Slice)921 bool InternalStats::HandleDBMapStats(
922 std::map<std::string, std::string>* db_stats, Slice /*suffix*/) {
923 DumpDBMapStats(db_stats);
924 return true;
925 }
926
HandleDBStats(std::string * value,Slice)927 bool InternalStats::HandleDBStats(std::string* value, Slice /*suffix*/) {
928 DumpDBStats(value);
929 return true;
930 }
931
HandleSsTables(std::string * value,Slice)932 bool InternalStats::HandleSsTables(std::string* value, Slice /*suffix*/) {
933 auto* current = cfd_->current();
934 *value = current->DebugString(true, true);
935 return true;
936 }
937
HandleAggregatedTableProperties(std::string * value,Slice)938 bool InternalStats::HandleAggregatedTableProperties(std::string* value,
939 Slice /*suffix*/) {
940 std::shared_ptr<const TableProperties> tp;
941 auto s = cfd_->current()->GetAggregatedTableProperties(&tp);
942 if (!s.ok()) {
943 return false;
944 }
945 *value = tp->ToString();
946 return true;
947 }
948
MapUint64ValuesToString(const std::map<std::string,uint64_t> & from)949 static std::map<std::string, std::string> MapUint64ValuesToString(
950 const std::map<std::string, uint64_t>& from) {
951 std::map<std::string, std::string> to;
952 for (const auto& e : from) {
953 to[e.first] = ToString(e.second);
954 }
955 return to;
956 }
957
HandleAggregatedTablePropertiesMap(std::map<std::string,std::string> * values,Slice)958 bool InternalStats::HandleAggregatedTablePropertiesMap(
959 std::map<std::string, std::string>* values, Slice /*suffix*/) {
960 std::shared_ptr<const TableProperties> tp;
961 auto s = cfd_->current()->GetAggregatedTableProperties(&tp);
962 if (!s.ok()) {
963 return false;
964 }
965 *values = MapUint64ValuesToString(tp->GetAggregatablePropertiesAsMap());
966 return true;
967 }
968
HandleAggregatedTablePropertiesAtLevel(std::string * values,Slice suffix)969 bool InternalStats::HandleAggregatedTablePropertiesAtLevel(std::string* values,
970 Slice suffix) {
971 uint64_t level;
972 bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
973 if (!ok || static_cast<int>(level) >= number_levels_) {
974 return false;
975 }
976 std::shared_ptr<const TableProperties> tp;
977 auto s = cfd_->current()->GetAggregatedTableProperties(
978 &tp, static_cast<int>(level));
979 if (!s.ok()) {
980 return false;
981 }
982 *values = tp->ToString();
983 return true;
984 }
985
HandleAggregatedTablePropertiesAtLevelMap(std::map<std::string,std::string> * values,Slice suffix)986 bool InternalStats::HandleAggregatedTablePropertiesAtLevelMap(
987 std::map<std::string, std::string>* values, Slice suffix) {
988 uint64_t level;
989 bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
990 if (!ok || static_cast<int>(level) >= number_levels_) {
991 return false;
992 }
993 std::shared_ptr<const TableProperties> tp;
994 auto s = cfd_->current()->GetAggregatedTableProperties(
995 &tp, static_cast<int>(level));
996 if (!s.ok()) {
997 return false;
998 }
999 *values = MapUint64ValuesToString(tp->GetAggregatablePropertiesAsMap());
1000 return true;
1001 }
1002
HandleNumImmutableMemTable(uint64_t * value,DBImpl *,Version *)1003 bool InternalStats::HandleNumImmutableMemTable(uint64_t* value, DBImpl* /*db*/,
1004 Version* /*version*/) {
1005 *value = cfd_->imm()->NumNotFlushed();
1006 return true;
1007 }
1008
HandleNumImmutableMemTableFlushed(uint64_t * value,DBImpl *,Version *)1009 bool InternalStats::HandleNumImmutableMemTableFlushed(uint64_t* value,
1010 DBImpl* /*db*/,
1011 Version* /*version*/) {
1012 *value = cfd_->imm()->NumFlushed();
1013 return true;
1014 }
1015
HandleMemTableFlushPending(uint64_t * value,DBImpl *,Version *)1016 bool InternalStats::HandleMemTableFlushPending(uint64_t* value, DBImpl* /*db*/,
1017 Version* /*version*/) {
1018 *value = (cfd_->imm()->IsFlushPending() ? 1 : 0);
1019 return true;
1020 }
1021
HandleNumRunningFlushes(uint64_t * value,DBImpl * db,Version *)1022 bool InternalStats::HandleNumRunningFlushes(uint64_t* value, DBImpl* db,
1023 Version* /*version*/) {
1024 *value = db->num_running_flushes();
1025 return true;
1026 }
1027
HandleCompactionPending(uint64_t * value,DBImpl *,Version *)1028 bool InternalStats::HandleCompactionPending(uint64_t* value, DBImpl* /*db*/,
1029 Version* /*version*/) {
1030 // 1 if the system already determines at least one compaction is needed.
1031 // 0 otherwise,
1032 const auto* vstorage = cfd_->current()->storage_info();
1033 *value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0);
1034 return true;
1035 }
1036
HandleNumRunningCompactions(uint64_t * value,DBImpl * db,Version *)1037 bool InternalStats::HandleNumRunningCompactions(uint64_t* value, DBImpl* db,
1038 Version* /*version*/) {
1039 *value = db->num_running_compactions_;
1040 return true;
1041 }
1042
HandleBackgroundErrors(uint64_t * value,DBImpl *,Version *)1043 bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* /*db*/,
1044 Version* /*version*/) {
1045 // Accumulated number of errors in background flushes or compactions.
1046 *value = GetBackgroundErrorCount();
1047 return true;
1048 }
1049
HandleCurSizeActiveMemTable(uint64_t * value,DBImpl *,Version *)1050 bool InternalStats::HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* /*db*/,
1051 Version* /*version*/) {
1052 // Current size of the active memtable
1053 // Using ApproximateMemoryUsageFast to avoid the need for synchronization
1054 *value = cfd_->mem()->ApproximateMemoryUsageFast();
1055 return true;
1056 }
1057
HandleCurSizeAllMemTables(uint64_t * value,DBImpl *,Version *)1058 bool InternalStats::HandleCurSizeAllMemTables(uint64_t* value, DBImpl* /*db*/,
1059 Version* /*version*/) {
1060 // Current size of the active memtable + immutable memtables
1061 // Using ApproximateMemoryUsageFast to avoid the need for synchronization
1062 *value = cfd_->mem()->ApproximateMemoryUsageFast() +
1063 cfd_->imm()->ApproximateUnflushedMemTablesMemoryUsage();
1064 return true;
1065 }
1066
HandleSizeAllMemTables(uint64_t * value,DBImpl *,Version *)1067 bool InternalStats::HandleSizeAllMemTables(uint64_t* value, DBImpl* /*db*/,
1068 Version* /*version*/) {
1069 // Using ApproximateMemoryUsageFast to avoid the need for synchronization
1070 *value = cfd_->mem()->ApproximateMemoryUsageFast() +
1071 cfd_->imm()->ApproximateMemoryUsage();
1072 return true;
1073 }
1074
HandleNumEntriesActiveMemTable(uint64_t * value,DBImpl *,Version *)1075 bool InternalStats::HandleNumEntriesActiveMemTable(uint64_t* value,
1076 DBImpl* /*db*/,
1077 Version* /*version*/) {
1078 // Current number of entires in the active memtable
1079 *value = cfd_->mem()->num_entries();
1080 return true;
1081 }
1082
HandleNumEntriesImmMemTables(uint64_t * value,DBImpl *,Version *)1083 bool InternalStats::HandleNumEntriesImmMemTables(uint64_t* value,
1084 DBImpl* /*db*/,
1085 Version* /*version*/) {
1086 // Current number of entries in the immutable memtables
1087 *value = cfd_->imm()->current()->GetTotalNumEntries();
1088 return true;
1089 }
1090
HandleNumDeletesActiveMemTable(uint64_t * value,DBImpl *,Version *)1091 bool InternalStats::HandleNumDeletesActiveMemTable(uint64_t* value,
1092 DBImpl* /*db*/,
1093 Version* /*version*/) {
1094 // Current number of entires in the active memtable
1095 *value = cfd_->mem()->num_deletes();
1096 return true;
1097 }
1098
HandleNumDeletesImmMemTables(uint64_t * value,DBImpl *,Version *)1099 bool InternalStats::HandleNumDeletesImmMemTables(uint64_t* value,
1100 DBImpl* /*db*/,
1101 Version* /*version*/) {
1102 // Current number of entries in the immutable memtables
1103 *value = cfd_->imm()->current()->GetTotalNumDeletes();
1104 return true;
1105 }
1106
HandleEstimateNumKeys(uint64_t * value,DBImpl *,Version *)1107 bool InternalStats::HandleEstimateNumKeys(uint64_t* value, DBImpl* /*db*/,
1108 Version* /*version*/) {
1109 // Estimate number of entries in the column family:
1110 // Use estimated entries in tables + total entries in memtables.
1111 const auto* vstorage = cfd_->current()->storage_info();
1112 uint64_t estimate_keys = cfd_->mem()->num_entries() +
1113 cfd_->imm()->current()->GetTotalNumEntries() +
1114 vstorage->GetEstimatedActiveKeys();
1115 uint64_t estimate_deletes =
1116 cfd_->mem()->num_deletes() + cfd_->imm()->current()->GetTotalNumDeletes();
1117 *value = estimate_keys > estimate_deletes * 2
1118 ? estimate_keys - (estimate_deletes * 2)
1119 : 0;
1120 return true;
1121 }
1122
HandleNumSnapshots(uint64_t * value,DBImpl * db,Version *)1123 bool InternalStats::HandleNumSnapshots(uint64_t* value, DBImpl* db,
1124 Version* /*version*/) {
1125 *value = db->snapshots().count();
1126 return true;
1127 }
1128
HandleOldestSnapshotTime(uint64_t * value,DBImpl * db,Version *)1129 bool InternalStats::HandleOldestSnapshotTime(uint64_t* value, DBImpl* db,
1130 Version* /*version*/) {
1131 *value = static_cast<uint64_t>(db->snapshots().GetOldestSnapshotTime());
1132 return true;
1133 }
1134
HandleOldestSnapshotSequence(uint64_t * value,DBImpl * db,Version *)1135 bool InternalStats::HandleOldestSnapshotSequence(uint64_t* value, DBImpl* db,
1136 Version* /*version*/) {
1137 *value = static_cast<uint64_t>(db->snapshots().GetOldestSnapshotSequence());
1138 return true;
1139 }
1140
HandleNumLiveVersions(uint64_t * value,DBImpl *,Version *)1141 bool InternalStats::HandleNumLiveVersions(uint64_t* value, DBImpl* /*db*/,
1142 Version* /*version*/) {
1143 *value = cfd_->GetNumLiveVersions();
1144 return true;
1145 }
1146
HandleCurrentSuperVersionNumber(uint64_t * value,DBImpl *,Version *)1147 bool InternalStats::HandleCurrentSuperVersionNumber(uint64_t* value,
1148 DBImpl* /*db*/,
1149 Version* /*version*/) {
1150 *value = cfd_->GetSuperVersionNumber();
1151 return true;
1152 }
1153
HandleIsFileDeletionsEnabled(uint64_t * value,DBImpl * db,Version *)1154 bool InternalStats::HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db,
1155 Version* /*version*/) {
1156 *value = db->IsFileDeletionsEnabled() ? 1 : 0;
1157 return true;
1158 }
1159
HandleBaseLevel(uint64_t * value,DBImpl *,Version *)1160 bool InternalStats::HandleBaseLevel(uint64_t* value, DBImpl* /*db*/,
1161 Version* /*version*/) {
1162 const auto* vstorage = cfd_->current()->storage_info();
1163 *value = vstorage->base_level();
1164 return true;
1165 }
1166
HandleTotalSstFilesSize(uint64_t * value,DBImpl *,Version *)1167 bool InternalStats::HandleTotalSstFilesSize(uint64_t* value, DBImpl* /*db*/,
1168 Version* /*version*/) {
1169 *value = cfd_->GetTotalSstFilesSize();
1170 return true;
1171 }
1172
HandleLiveSstFilesSize(uint64_t * value,DBImpl *,Version *)1173 bool InternalStats::HandleLiveSstFilesSize(uint64_t* value, DBImpl* /*db*/,
1174 Version* /*version*/) {
1175 *value = cfd_->GetLiveSstFilesSize();
1176 return true;
1177 }
1178
HandleEstimatePendingCompactionBytes(uint64_t * value,DBImpl *,Version *)1179 bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value,
1180 DBImpl* /*db*/,
1181 Version* /*version*/) {
1182 const auto* vstorage = cfd_->current()->storage_info();
1183 *value = vstorage->estimated_compaction_needed_bytes();
1184 return true;
1185 }
1186
HandleEstimateTableReadersMem(uint64_t * value,DBImpl *,Version * version)1187 bool InternalStats::HandleEstimateTableReadersMem(uint64_t* value,
1188 DBImpl* /*db*/,
1189 Version* version) {
1190 *value = (version == nullptr) ? 0 : version->GetMemoryUsageByTableReaders();
1191 return true;
1192 }
1193
HandleEstimateLiveDataSize(uint64_t * value,DBImpl *,Version * version)1194 bool InternalStats::HandleEstimateLiveDataSize(uint64_t* value, DBImpl* /*db*/,
1195 Version* version) {
1196 const auto* vstorage = version->storage_info();
1197 *value = vstorage->EstimateLiveDataSize();
1198 return true;
1199 }
1200
HandleMinLogNumberToKeep(uint64_t * value,DBImpl * db,Version *)1201 bool InternalStats::HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db,
1202 Version* /*version*/) {
1203 *value = db->MinLogNumberToKeep();
1204 return true;
1205 }
1206
HandleMinObsoleteSstNumberToKeep(uint64_t * value,DBImpl * db,Version *)1207 bool InternalStats::HandleMinObsoleteSstNumberToKeep(uint64_t* value,
1208 DBImpl* db,
1209 Version* /*version*/) {
1210 *value = db->MinObsoleteSstNumberToKeep();
1211 return true;
1212 }
1213
HandleActualDelayedWriteRate(uint64_t * value,DBImpl * db,Version *)1214 bool InternalStats::HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db,
1215 Version* /*version*/) {
1216 const WriteController& wc = db->write_controller();
1217 if (!wc.NeedsDelay()) {
1218 *value = 0;
1219 } else {
1220 *value = wc.delayed_write_rate();
1221 }
1222 return true;
1223 }
1224
HandleIsWriteStopped(uint64_t * value,DBImpl * db,Version *)1225 bool InternalStats::HandleIsWriteStopped(uint64_t* value, DBImpl* db,
1226 Version* /*version*/) {
1227 *value = db->write_controller().IsStopped() ? 1 : 0;
1228 return true;
1229 }
1230
HandleEstimateOldestKeyTime(uint64_t * value,DBImpl *,Version *)1231 bool InternalStats::HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* /*db*/,
1232 Version* /*version*/) {
1233 // TODO(yiwu): The property is currently available for fifo compaction
1234 // with allow_compaction = false. This is because we don't propagate
1235 // oldest_key_time on compaction.
1236 if (cfd_->ioptions()->compaction_style != kCompactionStyleFIFO ||
1237 cfd_->GetCurrentMutableCFOptions()
1238 ->compaction_options_fifo.allow_compaction) {
1239 return false;
1240 }
1241
1242 TablePropertiesCollection collection;
1243 auto s = cfd_->current()->GetPropertiesOfAllTables(&collection);
1244 if (!s.ok()) {
1245 return false;
1246 }
1247 *value = std::numeric_limits<uint64_t>::max();
1248 for (auto& p : collection) {
1249 *value = std::min(*value, p.second->oldest_key_time);
1250 if (*value == 0) {
1251 break;
1252 }
1253 }
1254 if (*value > 0) {
1255 *value = std::min({cfd_->mem()->ApproximateOldestKeyTime(),
1256 cfd_->imm()->ApproximateOldestKeyTime(), *value});
1257 }
1258 return *value > 0 && *value < std::numeric_limits<uint64_t>::max();
1259 }
1260
GetBlockCacheForStats(Cache ** block_cache)1261 bool InternalStats::GetBlockCacheForStats(Cache** block_cache) {
1262 assert(block_cache != nullptr);
1263 auto* table_factory = cfd_->ioptions()->table_factory.get();
1264 assert(table_factory != nullptr);
1265 *block_cache =
1266 table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
1267 return *block_cache != nullptr;
1268 }
1269
HandleBlockCacheCapacity(uint64_t * value,DBImpl *,Version *)1270 bool InternalStats::HandleBlockCacheCapacity(uint64_t* value, DBImpl* /*db*/,
1271 Version* /*version*/) {
1272 Cache* block_cache;
1273 bool ok = GetBlockCacheForStats(&block_cache);
1274 if (!ok) {
1275 return false;
1276 }
1277 *value = static_cast<uint64_t>(block_cache->GetCapacity());
1278 return true;
1279 }
1280
HandleBlockCacheUsage(uint64_t * value,DBImpl *,Version *)1281 bool InternalStats::HandleBlockCacheUsage(uint64_t* value, DBImpl* /*db*/,
1282 Version* /*version*/) {
1283 Cache* block_cache;
1284 bool ok = GetBlockCacheForStats(&block_cache);
1285 if (!ok) {
1286 return false;
1287 }
1288 *value = static_cast<uint64_t>(block_cache->GetUsage());
1289 return true;
1290 }
1291
HandleBlockCachePinnedUsage(uint64_t * value,DBImpl *,Version *)1292 bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
1293 Version* /*version*/) {
1294 Cache* block_cache;
1295 bool ok = GetBlockCacheForStats(&block_cache);
1296 if (!ok) {
1297 return false;
1298 }
1299 *value = static_cast<uint64_t>(block_cache->GetPinnedUsage());
1300 return true;
1301 }
1302
DumpDBMapStats(std::map<std::string,std::string> * db_stats)1303 void InternalStats::DumpDBMapStats(
1304 std::map<std::string, std::string>* db_stats) {
1305 for (int i = 0; i < static_cast<int>(kIntStatsNumMax); ++i) {
1306 InternalDBStatsType type = static_cast<InternalDBStatsType>(i);
1307 (*db_stats)[db_stats_type_to_info.at(type).property_name] =
1308 std::to_string(GetDBStats(type));
1309 }
1310 double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec;
1311 (*db_stats)["db.uptime"] = std::to_string(seconds_up);
1312 }
1313
DumpDBStats(std::string * value)1314 void InternalStats::DumpDBStats(std::string* value) {
1315 char buf[1000];
1316 // DB-level stats, only available from default column family
1317 double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec;
1318 double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up;
1319 snprintf(buf, sizeof(buf),
1320 "\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n",
1321 seconds_up, interval_seconds_up);
1322 value->append(buf);
1323 // Cumulative
1324 uint64_t user_bytes_written =
1325 GetDBStats(InternalStats::kIntStatsBytesWritten);
1326 uint64_t num_keys_written =
1327 GetDBStats(InternalStats::kIntStatsNumKeysWritten);
1328 uint64_t write_other = GetDBStats(InternalStats::kIntStatsWriteDoneByOther);
1329 uint64_t write_self = GetDBStats(InternalStats::kIntStatsWriteDoneBySelf);
1330 uint64_t wal_bytes = GetDBStats(InternalStats::kIntStatsWalFileBytes);
1331 uint64_t wal_synced = GetDBStats(InternalStats::kIntStatsWalFileSynced);
1332 uint64_t write_with_wal = GetDBStats(InternalStats::kIntStatsWriteWithWal);
1333 uint64_t write_stall_micros =
1334 GetDBStats(InternalStats::kIntStatsWriteStallMicros);
1335
1336 const int kHumanMicrosLen = 32;
1337 char human_micros[kHumanMicrosLen];
1338
1339 // Data
1340 // writes: total number of write requests.
1341 // keys: total number of key updates issued by all the write requests
1342 // commit groups: number of group commits issued to the DB. Each group can
1343 // contain one or more writes.
1344 // so writes/keys is the average number of put in multi-put or put
1345 // writes/groups is the average group commit size.
1346 //
1347 // The format is the same for interval stats.
1348 snprintf(buf, sizeof(buf),
1349 "Cumulative writes: %s writes, %s keys, %s commit groups, "
1350 "%.1f writes per commit group, ingest: %.2f GB, %.2f MB/s\n",
1351 NumberToHumanString(write_other + write_self).c_str(),
1352 NumberToHumanString(num_keys_written).c_str(),
1353 NumberToHumanString(write_self).c_str(),
1354 (write_other + write_self) /
1355 std::max(1.0, static_cast<double>(write_self)),
1356 user_bytes_written / kGB,
1357 user_bytes_written / kMB / std::max(seconds_up, 0.001));
1358 value->append(buf);
1359 // WAL
1360 snprintf(buf, sizeof(buf),
1361 "Cumulative WAL: %s writes, %s syncs, "
1362 "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
1363 NumberToHumanString(write_with_wal).c_str(),
1364 NumberToHumanString(wal_synced).c_str(),
1365 write_with_wal / std::max(1.0, static_cast<double>(wal_synced)),
1366 wal_bytes / kGB, wal_bytes / kMB / std::max(seconds_up, 0.001));
1367 value->append(buf);
1368 // Stall
1369 AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true);
1370 snprintf(buf, sizeof(buf), "Cumulative stall: %s, %.1f percent\n",
1371 human_micros,
1372 // 10000 = divide by 1M to get secs, then multiply by 100 for pct
1373 write_stall_micros / 10000.0 / std::max(seconds_up, 0.001));
1374 value->append(buf);
1375
1376 // Interval
1377 uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other;
1378 uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self;
1379 uint64_t interval_num_keys_written =
1380 num_keys_written - db_stats_snapshot_.num_keys_written;
1381 snprintf(
1382 buf, sizeof(buf),
1383 "Interval writes: %s writes, %s keys, %s commit groups, "
1384 "%.1f writes per commit group, ingest: %.2f MB, %.2f MB/s\n",
1385 NumberToHumanString(interval_write_other + interval_write_self).c_str(),
1386 NumberToHumanString(interval_num_keys_written).c_str(),
1387 NumberToHumanString(interval_write_self).c_str(),
1388 static_cast<double>(interval_write_other + interval_write_self) /
1389 std::max(1.0, static_cast<double>(interval_write_self)),
1390 (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB,
1391 (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB /
1392 std::max(interval_seconds_up, 0.001)),
1393 value->append(buf);
1394
1395 uint64_t interval_write_with_wal =
1396 write_with_wal - db_stats_snapshot_.write_with_wal;
1397 uint64_t interval_wal_synced = wal_synced - db_stats_snapshot_.wal_synced;
1398 uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes;
1399
1400 snprintf(buf, sizeof(buf),
1401 "Interval WAL: %s writes, %s syncs, "
1402 "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
1403 NumberToHumanString(interval_write_with_wal).c_str(),
1404 NumberToHumanString(interval_wal_synced).c_str(),
1405 interval_write_with_wal /
1406 std::max(1.0, static_cast<double>(interval_wal_synced)),
1407 interval_wal_bytes / kGB,
1408 interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001));
1409 value->append(buf);
1410
1411 // Stall
1412 AppendHumanMicros(write_stall_micros - db_stats_snapshot_.write_stall_micros,
1413 human_micros, kHumanMicrosLen, true);
1414 snprintf(buf, sizeof(buf), "Interval stall: %s, %.1f percent\n", human_micros,
1415 // 10000 = divide by 1M to get secs, then multiply by 100 for pct
1416 (write_stall_micros - db_stats_snapshot_.write_stall_micros) /
1417 10000.0 / std::max(interval_seconds_up, 0.001));
1418 value->append(buf);
1419
1420 db_stats_snapshot_.seconds_up = seconds_up;
1421 db_stats_snapshot_.ingest_bytes = user_bytes_written;
1422 db_stats_snapshot_.write_other = write_other;
1423 db_stats_snapshot_.write_self = write_self;
1424 db_stats_snapshot_.num_keys_written = num_keys_written;
1425 db_stats_snapshot_.wal_bytes = wal_bytes;
1426 db_stats_snapshot_.wal_synced = wal_synced;
1427 db_stats_snapshot_.write_with_wal = write_with_wal;
1428 db_stats_snapshot_.write_stall_micros = write_stall_micros;
1429 }
1430
1431 /**
1432 * Dump Compaction Level stats to a map of stat name with "compaction." prefix
1433 * to value in double as string. The level in stat name is represented with
1434 * a prefix "Lx" where "x" is the level number. A special level "Sum"
1435 * represents the sum of a stat for all levels.
1436 * The result also contains IO stall counters which keys start with "io_stalls."
1437 * and values represent uint64 encoded as strings.
1438 */
DumpCFMapStats(std::map<std::string,std::string> * cf_stats)1439 void InternalStats::DumpCFMapStats(
1440 std::map<std::string, std::string>* cf_stats) {
1441 const VersionStorageInfo* vstorage = cfd_->current()->storage_info();
1442 CompactionStats compaction_stats_sum;
1443 std::map<int, std::map<LevelStatType, double>> levels_stats;
1444 DumpCFMapStats(vstorage, &levels_stats, &compaction_stats_sum);
1445 for (auto const& level_ent : levels_stats) {
1446 auto level_str =
1447 level_ent.first == -1 ? "Sum" : "L" + ToString(level_ent.first);
1448 for (auto const& stat_ent : level_ent.second) {
1449 auto stat_type = stat_ent.first;
1450 auto key_str =
1451 "compaction." + level_str + "." +
1452 InternalStats::compaction_level_stats.at(stat_type).property_name;
1453 (*cf_stats)[key_str] = std::to_string(stat_ent.second);
1454 }
1455 }
1456
1457 DumpCFMapStatsIOStalls(cf_stats);
1458 }
1459
DumpCFMapStats(const VersionStorageInfo * vstorage,std::map<int,std::map<LevelStatType,double>> * levels_stats,CompactionStats * compaction_stats_sum)1460 void InternalStats::DumpCFMapStats(
1461 const VersionStorageInfo* vstorage,
1462 std::map<int, std::map<LevelStatType, double>>* levels_stats,
1463 CompactionStats* compaction_stats_sum) {
1464 assert(vstorage);
1465
1466 int num_levels_to_check =
1467 (cfd_->ioptions()->compaction_style != kCompactionStyleFIFO)
1468 ? vstorage->num_levels() - 1
1469 : 1;
1470
1471 // Compaction scores are sorted based on its value. Restore them to the
1472 // level order
1473 std::vector<double> compaction_score(number_levels_, 0);
1474 for (int i = 0; i < num_levels_to_check; ++i) {
1475 compaction_score[vstorage->CompactionScoreLevel(i)] =
1476 vstorage->CompactionScore(i);
1477 }
1478 // Count # of files being compacted for each level
1479 std::vector<int> files_being_compacted(number_levels_, 0);
1480 for (int level = 0; level < number_levels_; ++level) {
1481 for (auto* f : vstorage->LevelFiles(level)) {
1482 if (f->being_compacted) {
1483 ++files_being_compacted[level];
1484 }
1485 }
1486 }
1487
1488 int total_files = 0;
1489 int total_files_being_compacted = 0;
1490 double total_file_size = 0;
1491 uint64_t flush_ingest = cf_stats_value_[BYTES_FLUSHED];
1492 uint64_t add_file_ingest = cf_stats_value_[BYTES_INGESTED_ADD_FILE];
1493 uint64_t curr_ingest = flush_ingest + add_file_ingest;
1494 for (int level = 0; level < number_levels_; level++) {
1495 int files = vstorage->NumLevelFiles(level);
1496 total_files += files;
1497 total_files_being_compacted += files_being_compacted[level];
1498 if (comp_stats_[level].micros > 0 || files > 0) {
1499 compaction_stats_sum->Add(comp_stats_[level]);
1500 total_file_size += vstorage->NumLevelBytes(level);
1501 uint64_t input_bytes;
1502 if (level == 0) {
1503 input_bytes = curr_ingest;
1504 } else {
1505 input_bytes = comp_stats_[level].bytes_read_non_output_levels +
1506 comp_stats_[level].bytes_read_blob;
1507 }
1508 double w_amp =
1509 (input_bytes == 0)
1510 ? 0.0
1511 : static_cast<double>(comp_stats_[level].bytes_written +
1512 comp_stats_[level].bytes_written_blob) /
1513 input_bytes;
1514 std::map<LevelStatType, double> level_stats;
1515 PrepareLevelStats(&level_stats, files, files_being_compacted[level],
1516 static_cast<double>(vstorage->NumLevelBytes(level)),
1517 compaction_score[level], w_amp, comp_stats_[level]);
1518 (*levels_stats)[level] = level_stats;
1519 }
1520 }
1521 // Cumulative summary
1522 double w_amp = (compaction_stats_sum->bytes_written +
1523 compaction_stats_sum->bytes_written_blob) /
1524 static_cast<double>(curr_ingest + 1);
1525 // Stats summary across levels
1526 std::map<LevelStatType, double> sum_stats;
1527 PrepareLevelStats(&sum_stats, total_files, total_files_being_compacted,
1528 total_file_size, 0, w_amp, *compaction_stats_sum);
1529 (*levels_stats)[-1] = sum_stats; // -1 is for the Sum level
1530 }
1531
DumpCFMapStatsByPriority(std::map<int,std::map<LevelStatType,double>> * priorities_stats)1532 void InternalStats::DumpCFMapStatsByPriority(
1533 std::map<int, std::map<LevelStatType, double>>* priorities_stats) {
1534 for (size_t priority = 0; priority < comp_stats_by_pri_.size(); priority++) {
1535 if (comp_stats_by_pri_[priority].micros > 0) {
1536 std::map<LevelStatType, double> priority_stats;
1537 PrepareLevelStats(&priority_stats, 0 /* num_files */,
1538 0 /* being_compacted */, 0 /* total_file_size */,
1539 0 /* compaction_score */, 0 /* w_amp */,
1540 comp_stats_by_pri_[priority]);
1541 (*priorities_stats)[static_cast<int>(priority)] = priority_stats;
1542 }
1543 }
1544 }
1545
DumpCFMapStatsIOStalls(std::map<std::string,std::string> * cf_stats)1546 void InternalStats::DumpCFMapStatsIOStalls(
1547 std::map<std::string, std::string>* cf_stats) {
1548 (*cf_stats)["io_stalls.level0_slowdown"] =
1549 std::to_string(cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS]);
1550 (*cf_stats)["io_stalls.level0_slowdown_with_compaction"] =
1551 std::to_string(cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS]);
1552 (*cf_stats)["io_stalls.level0_numfiles"] =
1553 std::to_string(cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS]);
1554 (*cf_stats)["io_stalls.level0_numfiles_with_compaction"] =
1555 std::to_string(cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_STOPS]);
1556 (*cf_stats)["io_stalls.stop_for_pending_compaction_bytes"] =
1557 std::to_string(cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS]);
1558 (*cf_stats)["io_stalls.slowdown_for_pending_compaction_bytes"] =
1559 std::to_string(cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS]);
1560 (*cf_stats)["io_stalls.memtable_compaction"] =
1561 std::to_string(cf_stats_count_[MEMTABLE_LIMIT_STOPS]);
1562 (*cf_stats)["io_stalls.memtable_slowdown"] =
1563 std::to_string(cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS]);
1564
1565 uint64_t total_stop = cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS] +
1566 cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS] +
1567 cf_stats_count_[MEMTABLE_LIMIT_STOPS];
1568
1569 uint64_t total_slowdown =
1570 cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS] +
1571 cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS] +
1572 cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS];
1573
1574 (*cf_stats)["io_stalls.total_stop"] = std::to_string(total_stop);
1575 (*cf_stats)["io_stalls.total_slowdown"] = std::to_string(total_slowdown);
1576 }
1577
DumpCFStats(std::string * value)1578 void InternalStats::DumpCFStats(std::string* value) {
1579 DumpCFStatsNoFileHistogram(value);
1580 DumpCFFileHistogram(value);
1581 }
1582
DumpCFStatsNoFileHistogram(std::string * value)1583 void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
1584 char buf[2000];
1585 // Per-ColumnFamily stats
1586 PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Level");
1587 value->append(buf);
1588
1589 // Print stats for each level
1590 const VersionStorageInfo* vstorage = cfd_->current()->storage_info();
1591 std::map<int, std::map<LevelStatType, double>> levels_stats;
1592 CompactionStats compaction_stats_sum;
1593 DumpCFMapStats(vstorage, &levels_stats, &compaction_stats_sum);
1594 for (int l = 0; l < number_levels_; ++l) {
1595 if (levels_stats.find(l) != levels_stats.end()) {
1596 PrintLevelStats(buf, sizeof(buf), "L" + ToString(l), levels_stats[l]);
1597 value->append(buf);
1598 }
1599 }
1600
1601 // Print sum of level stats
1602 PrintLevelStats(buf, sizeof(buf), "Sum", levels_stats[-1]);
1603 value->append(buf);
1604
1605 uint64_t flush_ingest = cf_stats_value_[BYTES_FLUSHED];
1606 uint64_t add_file_ingest = cf_stats_value_[BYTES_INGESTED_ADD_FILE];
1607 uint64_t ingest_files_addfile = cf_stats_value_[INGESTED_NUM_FILES_TOTAL];
1608 uint64_t ingest_l0_files_addfile =
1609 cf_stats_value_[INGESTED_LEVEL0_NUM_FILES_TOTAL];
1610 uint64_t ingest_keys_addfile = cf_stats_value_[INGESTED_NUM_KEYS_TOTAL];
1611 // Cumulative summary
1612 uint64_t total_stall_count =
1613 cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS] +
1614 cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS] +
1615 cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS] +
1616 cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS] +
1617 cf_stats_count_[MEMTABLE_LIMIT_STOPS] +
1618 cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS];
1619 // Interval summary
1620 uint64_t interval_flush_ingest =
1621 flush_ingest - cf_stats_snapshot_.ingest_bytes_flush;
1622 uint64_t interval_add_file_inget =
1623 add_file_ingest - cf_stats_snapshot_.ingest_bytes_addfile;
1624 uint64_t interval_ingest =
1625 interval_flush_ingest + interval_add_file_inget + 1;
1626 CompactionStats interval_stats(compaction_stats_sum);
1627 interval_stats.Subtract(cf_stats_snapshot_.comp_stats);
1628 double w_amp =
1629 (interval_stats.bytes_written + interval_stats.bytes_written_blob) /
1630 static_cast<double>(interval_ingest);
1631 PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0, w_amp, interval_stats);
1632 value->append(buf);
1633
1634 PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Priority");
1635 value->append(buf);
1636 std::map<int, std::map<LevelStatType, double>> priorities_stats;
1637 DumpCFMapStatsByPriority(&priorities_stats);
1638 for (size_t priority = 0; priority < comp_stats_by_pri_.size(); ++priority) {
1639 if (priorities_stats.find(static_cast<int>(priority)) !=
1640 priorities_stats.end()) {
1641 PrintLevelStats(
1642 buf, sizeof(buf),
1643 Env::PriorityToString(static_cast<Env::Priority>(priority)),
1644 priorities_stats[static_cast<int>(priority)]);
1645 value->append(buf);
1646 }
1647 }
1648
1649 snprintf(buf, sizeof(buf),
1650 "\nBlob file count: %" ROCKSDB_PRIszt ", total size: %.1f GB\n\n",
1651 vstorage->GetBlobFiles().size(),
1652 vstorage->GetTotalBlobFileSize() / kGB);
1653 value->append(buf);
1654
1655 uint64_t now_micros = clock_->NowMicros();
1656 double seconds_up = (now_micros - started_at_) / kMicrosInSec;
1657 double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up;
1658 snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n",
1659 seconds_up, interval_seconds_up);
1660 value->append(buf);
1661 snprintf(buf, sizeof(buf), "Flush(GB): cumulative %.3f, interval %.3f\n",
1662 flush_ingest / kGB, interval_flush_ingest / kGB);
1663 value->append(buf);
1664 snprintf(buf, sizeof(buf), "AddFile(GB): cumulative %.3f, interval %.3f\n",
1665 add_file_ingest / kGB, interval_add_file_inget / kGB);
1666 value->append(buf);
1667
1668 uint64_t interval_ingest_files_addfile =
1669 ingest_files_addfile - cf_stats_snapshot_.ingest_files_addfile;
1670 snprintf(buf, sizeof(buf),
1671 "AddFile(Total Files): cumulative %" PRIu64 ", interval %" PRIu64
1672 "\n",
1673 ingest_files_addfile, interval_ingest_files_addfile);
1674 value->append(buf);
1675
1676 uint64_t interval_ingest_l0_files_addfile =
1677 ingest_l0_files_addfile - cf_stats_snapshot_.ingest_l0_files_addfile;
1678 snprintf(buf, sizeof(buf),
1679 "AddFile(L0 Files): cumulative %" PRIu64 ", interval %" PRIu64 "\n",
1680 ingest_l0_files_addfile, interval_ingest_l0_files_addfile);
1681 value->append(buf);
1682
1683 uint64_t interval_ingest_keys_addfile =
1684 ingest_keys_addfile - cf_stats_snapshot_.ingest_keys_addfile;
1685 snprintf(buf, sizeof(buf),
1686 "AddFile(Keys): cumulative %" PRIu64 ", interval %" PRIu64 "\n",
1687 ingest_keys_addfile, interval_ingest_keys_addfile);
1688 value->append(buf);
1689
1690 // Compact
1691 uint64_t compact_bytes_read = 0;
1692 uint64_t compact_bytes_write = 0;
1693 uint64_t compact_micros = 0;
1694 for (int level = 0; level < number_levels_; level++) {
1695 compact_bytes_read += comp_stats_[level].bytes_read_output_level +
1696 comp_stats_[level].bytes_read_non_output_levels +
1697 comp_stats_[level].bytes_read_blob;
1698 compact_bytes_write += comp_stats_[level].bytes_written +
1699 comp_stats_[level].bytes_written_blob;
1700 compact_micros += comp_stats_[level].micros;
1701 }
1702
1703 snprintf(buf, sizeof(buf),
1704 "Cumulative compaction: %.2f GB write, %.2f MB/s write, "
1705 "%.2f GB read, %.2f MB/s read, %.1f seconds\n",
1706 compact_bytes_write / kGB,
1707 compact_bytes_write / kMB / std::max(seconds_up, 0.001),
1708 compact_bytes_read / kGB,
1709 compact_bytes_read / kMB / std::max(seconds_up, 0.001),
1710 compact_micros / kMicrosInSec);
1711 value->append(buf);
1712
1713 // Compaction interval
1714 uint64_t interval_compact_bytes_write =
1715 compact_bytes_write - cf_stats_snapshot_.compact_bytes_write;
1716 uint64_t interval_compact_bytes_read =
1717 compact_bytes_read - cf_stats_snapshot_.compact_bytes_read;
1718 uint64_t interval_compact_micros =
1719 compact_micros - cf_stats_snapshot_.compact_micros;
1720
1721 snprintf(
1722 buf, sizeof(buf),
1723 "Interval compaction: %.2f GB write, %.2f MB/s write, "
1724 "%.2f GB read, %.2f MB/s read, %.1f seconds\n",
1725 interval_compact_bytes_write / kGB,
1726 interval_compact_bytes_write / kMB / std::max(interval_seconds_up, 0.001),
1727 interval_compact_bytes_read / kGB,
1728 interval_compact_bytes_read / kMB / std::max(interval_seconds_up, 0.001),
1729 interval_compact_micros / kMicrosInSec);
1730 value->append(buf);
1731 cf_stats_snapshot_.compact_bytes_write = compact_bytes_write;
1732 cf_stats_snapshot_.compact_bytes_read = compact_bytes_read;
1733 cf_stats_snapshot_.compact_micros = compact_micros;
1734
1735 snprintf(buf, sizeof(buf),
1736 "Stalls(count): %" PRIu64
1737 " level0_slowdown, "
1738 "%" PRIu64
1739 " level0_slowdown_with_compaction, "
1740 "%" PRIu64
1741 " level0_numfiles, "
1742 "%" PRIu64
1743 " level0_numfiles_with_compaction, "
1744 "%" PRIu64
1745 " stop for pending_compaction_bytes, "
1746 "%" PRIu64
1747 " slowdown for pending_compaction_bytes, "
1748 "%" PRIu64
1749 " memtable_compaction, "
1750 "%" PRIu64
1751 " memtable_slowdown, "
1752 "interval %" PRIu64 " total count\n",
1753 cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS],
1754 cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS],
1755 cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS],
1756 cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_STOPS],
1757 cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS],
1758 cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS],
1759 cf_stats_count_[MEMTABLE_LIMIT_STOPS],
1760 cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS],
1761 total_stall_count - cf_stats_snapshot_.stall_count);
1762 value->append(buf);
1763
1764 cf_stats_snapshot_.seconds_up = seconds_up;
1765 cf_stats_snapshot_.ingest_bytes_flush = flush_ingest;
1766 cf_stats_snapshot_.ingest_bytes_addfile = add_file_ingest;
1767 cf_stats_snapshot_.ingest_files_addfile = ingest_files_addfile;
1768 cf_stats_snapshot_.ingest_l0_files_addfile = ingest_l0_files_addfile;
1769 cf_stats_snapshot_.ingest_keys_addfile = ingest_keys_addfile;
1770 cf_stats_snapshot_.comp_stats = compaction_stats_sum;
1771 cf_stats_snapshot_.stall_count = total_stall_count;
1772
1773 // Do not gather cache entry stats during CFStats because DB
1774 // mutex is held. Only dump last cached collection (rely on DB
1775 // periodic stats dump to update)
1776 if (cache_entry_stats_collector_) {
1777 CacheEntryRoleStats stats;
1778 // thread safe
1779 cache_entry_stats_collector_->GetStats(&stats);
1780
1781 constexpr uint64_t kDayInMicros = uint64_t{86400} * 1000000U;
1782
1783 // Skip if stats are extremely old (> 1 day, incl not yet populated)
1784 if (now_micros - stats.last_end_time_micros_ < kDayInMicros) {
1785 value->append(stats.ToString(clock_));
1786 }
1787 }
1788 }
1789
DumpCFFileHistogram(std::string * value)1790 void InternalStats::DumpCFFileHistogram(std::string* value) {
1791 assert(value);
1792 assert(cfd_);
1793
1794 std::ostringstream oss;
1795 oss << "\n** File Read Latency Histogram By Level [" << cfd_->GetName()
1796 << "] **\n";
1797
1798 for (int level = 0; level < number_levels_; level++) {
1799 if (!file_read_latency_[level].Empty()) {
1800 oss << "** Level " << level << " read latency histogram (micros):\n"
1801 << file_read_latency_[level].ToString() << '\n';
1802 }
1803 }
1804
1805 if (!blob_file_read_latency_.Empty()) {
1806 oss << "** Blob file read latency histogram (micros):\n"
1807 << blob_file_read_latency_.ToString() << '\n';
1808 }
1809
1810 value->append(oss.str());
1811 }
1812
1813 #else
1814
1815 const DBPropertyInfo* GetPropertyInfo(const Slice& /*property*/) {
1816 return nullptr;
1817 }
1818
1819 #endif // !ROCKSDB_LITE
1820
1821 } // namespace ROCKSDB_NAMESPACE
1822