1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "tracing/tracing/value/histogram.h"
6
7 #include <cassert>
8 #include <cmath>
9 #include <cstdint>
10 #include <map>
11 #include <random>
12
13 #include "tracing/tracing/value/running_statistics.h"
14
15 namespace catapult {
16
17 namespace proto = tracing::tracing::proto;
18
19 static constexpr std::pair<const char*, proto::Unit> kJsonUnitToProtoUnit[] = {
20 {"ms", proto::MS},
21 {"msBestFitFormat", proto::MS_BEST_FIT_FORMAT},
22 {"tsMs", proto::TS_MS},
23 {"n%", proto::N_PERCENT},
24 {"sizeInBytes", proto::SIZE_IN_BYTES},
25 {"bytesPerSecond", proto::BYTES_PER_SECOND},
26 {"J", proto::J},
27 {"W", proto::W},
28 {"A", proto::A},
29 {"Ah", proto::AH},
30 {"V", proto::V},
31 {"Hz", proto::HERTZ},
32 {"unitless", proto::UNITLESS},
33 {"count", proto::COUNT},
34 {"sigma", proto::SIGMA}};
35
36 // Assume a single bin. The default num sample values is num bins * 10.
37 static constexpr int kDefaultNumSampleValues = 10;
38
39 class HistogramBuilder::Resampler {
40 public:
Resampler()41 Resampler() : distribution_(0.0, 1.0) {}
42
43 // When processing a stream of samples, call this method for each new sample
44 // in order to decide whether to keep it in |samples|.
45 // Modifies |samples| in-place such that its length never exceeds
46 // |max_num_samples|. After |stream_length| samples have been processed, each
47 // sample has equal probability of being retained in |samples|. The order of
48 // samples is not preserved after |stream_length| exceeds |num_samples|.
UniformlySampleStream(std::vector<double> * samples,uint32_t stream_length,double new_element,uint32_t max_num_samples)49 void UniformlySampleStream(std::vector<double>* samples,
50 uint32_t stream_length,
51 double new_element,
52 uint32_t max_num_samples) {
53 assert(max_num_samples > 0);
54
55 if (stream_length <= max_num_samples) {
56 if (samples->size() >= stream_length) {
57 (*samples)[stream_length - 1] = new_element;
58 } else {
59 samples->push_back(new_element);
60 }
61 return;
62 }
63 double prob_keep = static_cast<double>(max_num_samples) / stream_length;
64 if (random() > prob_keep) {
65 // Reject new sample.
66 return;
67 }
68
69 // Replace a random element.
70 int victim = static_cast<int>(std::floor(random() * max_num_samples));
71 (*samples)[victim] = new_element;
72 }
73
74 private:
random()75 double random() { return distribution_(generator_); }
76
77 std::default_random_engine generator_;
78 std::uniform_real_distribution<double> distribution_;
79 };
80
HistogramBuilder(const std::string & name,proto::UnitAndDirection unit)81 HistogramBuilder::HistogramBuilder(
82 const std::string& name, proto::UnitAndDirection unit)
83 : resampler_(std::make_unique<Resampler>()),
84 running_statistics_(std::make_unique<RunningStatistics>()),
85 name_(name),
86 unit_(unit),
87 num_nans_(0) {
88 max_num_sample_values_ = kDefaultNumSampleValues;
89 }
90
91 HistogramBuilder::~HistogramBuilder() = default;
92
AddSample(double value)93 void HistogramBuilder::AddSample(double value) {
94 if (std::isnan(value)) {
95 num_nans_++;
96 } else {
97 running_statistics_->Add(value);
98 int num_values = running_statistics_->count();
99 resampler_->UniformlySampleStream(&sample_values_, num_nans_ + num_values,
100 value, max_num_sample_values_);
101 }
102 }
103
AddDiagnostic(const std::string & key,tracing::tracing::proto::Diagnostic diagnostic)104 void HistogramBuilder::AddDiagnostic(
105 const std::string& key,
106 tracing::tracing::proto::Diagnostic diagnostic) {
107 diagnostics_[key] = diagnostic;
108 }
109
SetSummaryOptions(proto::SummaryOptions options)110 void HistogramBuilder::SetSummaryOptions(proto::SummaryOptions options) {
111 options_ = options;
112 }
113
toProto() const114 std::unique_ptr<proto::Histogram> HistogramBuilder::toProto() const {
115 auto histogram = std::make_unique<proto::Histogram>();
116 histogram->set_name(name_);
117 *histogram->mutable_unit() = unit_;
118 histogram->set_description(description_);
119
120 proto::DiagnosticMap* diagnostics = histogram->mutable_diagnostics();
121 for (const auto& pair : diagnostics_) {
122 auto* diagnostic_map = diagnostics->mutable_diagnostic_map();
123 (*diagnostic_map)[pair.first] = pair.second;
124 }
125
126 for (double sample: sample_values_) {
127 histogram->add_sample_values(sample);
128 }
129
130 histogram->set_max_num_sample_values(max_num_sample_values_);
131
132 histogram->set_num_nans(num_nans_);
133
134 proto::RunningStatistics* running = histogram->mutable_running();
135 running->set_count(running_statistics_->count());
136 running->set_max(running_statistics_->max());
137 if (running_statistics_->meanlogs_valid()) {
138 running->set_meanlogs(running_statistics_->meanlogs());
139 }
140 running->set_mean(running_statistics_->mean());
141 running->set_min(running_statistics_->min());
142 running->set_sum(running_statistics_->sum());
143 running->set_variance(running_statistics_->variance());
144
145 proto::SummaryOptions* options = histogram->mutable_summary_options();
146 *options = options_;
147
148 return histogram;
149 }
150
UnitFromJsonUnit(std::string unit)151 tracing::tracing::proto::Unit UnitFromJsonUnit(std::string unit) {
152 unit.erase(std::find(unit.begin(), unit.end(), '_'), unit.end());
153
154 for (const auto& pair : kJsonUnitToProtoUnit) {
155 if (unit == pair.first) {
156 return pair.second;
157 }
158 }
159
160 return proto::UNITLESS;
161 }
162
163 } // namespace catapult
164