1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "tracing/tracing/value/histogram.h"
6 
7 #include <cassert>
8 #include <cmath>
9 #include <cstdint>
10 #include <map>
11 #include <random>
12 
13 #include "tracing/tracing/value/running_statistics.h"
14 
15 namespace catapult {
16 
17 namespace proto = tracing::tracing::proto;
18 
19 static constexpr std::pair<const char*, proto::Unit> kJsonUnitToProtoUnit[] = {
20     {"ms", proto::MS},
21     {"msBestFitFormat", proto::MS_BEST_FIT_FORMAT},
22     {"tsMs", proto::TS_MS},
23     {"n%", proto::N_PERCENT},
24     {"sizeInBytes", proto::SIZE_IN_BYTES},
25     {"bytesPerSecond", proto::BYTES_PER_SECOND},
26     {"J", proto::J},
27     {"W", proto::W},
28     {"A", proto::A},
29     {"Ah", proto::AH},
30     {"V", proto::V},
31     {"Hz", proto::HERTZ},
32     {"unitless", proto::UNITLESS},
33     {"count", proto::COUNT},
34     {"sigma", proto::SIGMA}};
35 
36 // Assume a single bin. The default num sample values is num bins * 10.
37 static constexpr int kDefaultNumSampleValues = 10;
38 
39 class HistogramBuilder::Resampler {
40  public:
Resampler()41   Resampler() : distribution_(0.0, 1.0) {}
42 
43   // When processing a stream of samples, call this method for each new sample
44   // in order to decide whether to keep it in |samples|.
45   // Modifies |samples| in-place such that its length never exceeds
46   // |max_num_samples|. After |stream_length| samples have been processed, each
47   // sample has equal probability of being retained in |samples|. The order of
48   // samples is not preserved after |stream_length| exceeds |num_samples|.
UniformlySampleStream(std::vector<double> * samples,uint32_t stream_length,double new_element,uint32_t max_num_samples)49   void UniformlySampleStream(std::vector<double>* samples,
50                              uint32_t stream_length,
51                              double new_element,
52                              uint32_t max_num_samples) {
53     assert(max_num_samples > 0);
54 
55     if (stream_length <= max_num_samples) {
56       if (samples->size() >= stream_length) {
57         (*samples)[stream_length - 1] = new_element;
58       } else {
59         samples->push_back(new_element);
60       }
61       return;
62     }
63     double prob_keep = static_cast<double>(max_num_samples) / stream_length;
64     if (random() > prob_keep) {
65       // Reject new sample.
66       return;
67     }
68 
69     // Replace a random element.
70     int victim = static_cast<int>(std::floor(random() * max_num_samples));
71     (*samples)[victim] = new_element;
72   }
73 
74  private:
random()75   double random() { return distribution_(generator_); }
76 
77   std::default_random_engine generator_;
78   std::uniform_real_distribution<double> distribution_;
79 };
80 
HistogramBuilder(const std::string & name,proto::UnitAndDirection unit)81 HistogramBuilder::HistogramBuilder(
82     const std::string& name, proto::UnitAndDirection unit)
83     : resampler_(std::make_unique<Resampler>()),
84       running_statistics_(std::make_unique<RunningStatistics>()),
85       name_(name),
86       unit_(unit),
87       num_nans_(0) {
88   max_num_sample_values_ = kDefaultNumSampleValues;
89 }
90 
91 HistogramBuilder::~HistogramBuilder() = default;
92 
AddSample(double value)93 void HistogramBuilder::AddSample(double value) {
94   if (std::isnan(value)) {
95     num_nans_++;
96   } else {
97     running_statistics_->Add(value);
98     int num_values = running_statistics_->count();
99     resampler_->UniformlySampleStream(&sample_values_, num_nans_ + num_values,
100                                       value, max_num_sample_values_);
101   }
102 }
103 
AddDiagnostic(const std::string & key,tracing::tracing::proto::Diagnostic diagnostic)104 void HistogramBuilder::AddDiagnostic(
105     const std::string& key,
106     tracing::tracing::proto::Diagnostic diagnostic) {
107   diagnostics_[key] = diagnostic;
108 }
109 
SetSummaryOptions(proto::SummaryOptions options)110 void HistogramBuilder::SetSummaryOptions(proto::SummaryOptions options) {
111   options_ = options;
112 }
113 
toProto() const114 std::unique_ptr<proto::Histogram> HistogramBuilder::toProto() const {
115   auto histogram = std::make_unique<proto::Histogram>();
116   histogram->set_name(name_);
117   *histogram->mutable_unit() = unit_;
118   histogram->set_description(description_);
119 
120   proto::DiagnosticMap* diagnostics = histogram->mutable_diagnostics();
121   for (const auto& pair : diagnostics_) {
122     auto* diagnostic_map = diagnostics->mutable_diagnostic_map();
123     (*diagnostic_map)[pair.first] = pair.second;
124   }
125 
126   for (double sample: sample_values_) {
127     histogram->add_sample_values(sample);
128   }
129 
130   histogram->set_max_num_sample_values(max_num_sample_values_);
131 
132   histogram->set_num_nans(num_nans_);
133 
134   proto::RunningStatistics* running = histogram->mutable_running();
135   running->set_count(running_statistics_->count());
136   running->set_max(running_statistics_->max());
137   if (running_statistics_->meanlogs_valid()) {
138     running->set_meanlogs(running_statistics_->meanlogs());
139   }
140   running->set_mean(running_statistics_->mean());
141   running->set_min(running_statistics_->min());
142   running->set_sum(running_statistics_->sum());
143   running->set_variance(running_statistics_->variance());
144 
145   proto::SummaryOptions* options = histogram->mutable_summary_options();
146   *options = options_;
147 
148   return histogram;
149 }
150 
UnitFromJsonUnit(std::string unit)151 tracing::tracing::proto::Unit UnitFromJsonUnit(std::string unit) {
152   unit.erase(std::find(unit.begin(), unit.end(), '_'), unit.end());
153 
154   for (const auto& pair : kJsonUnitToProtoUnit) {
155     if (unit == pair.first) {
156       return pair.second;
157     }
158   }
159 
160   return proto::UNITLESS;
161 }
162 
163 }  // namespace catapult
164