1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <fb303/Timeseries.h>
20 #include <folly/stats/Histogram.h>
21 #include <folly/stats/TimeseriesHistogram.h>
22 
23 namespace facebook {
24 namespace fb303 {
25 
26 /**
27  * TimeseriesHistogram is a class which allows you to track data distributions
28  * as they change over time.
29  *
30  * Specifically, it is a bucketed histogram with different value ranges
31  * assigned to each bucket.  Within each bucket is a MultiLevelTimeSeries as
32  * from 'common/stats/Timeseries.h'. This means that each bucket contains a
33  * different set of data for different historical time periods, and one can
34  * query data distributions over different trailing time windows.
35  *
36  * For example, this can answer questions: "What is the data distribution over
37  * the last minute? Over the last 10 minutes?  Since I last cleared this
38  * histogram?"
39  *
40  * The class can also estimate percentiles and answer questions like:
41  *
42  *   "What was the 99th percentile data value over the last 10 minutes?"
43  *
44  * However, note that depending on the size of your buckets and the smoothness
45  * of your data distribution, the estimate may be way off from the actual
46  * value.  In particular, if the given percentile falls outside of the bucket
47  * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
48  * around 115,000) this estimate may be very wrong.
49  *
50  * The memory usage for a typical histogram is roughly 3k * (# of buckets).  All
51  * insertion operations are amortized O(1), and all queries are O(# of buckets).
52  */
53 
54 template <class T>
55 class TimeseriesHistogram : public folly::TimeseriesHistogram<
56                                 T,
57                                 folly::LegacyStatsClock<std::chrono::seconds>,
58                                 MultiLevelTimeSeries<T>> {
59  public:
60   // values to be inserted into container
61   using ValueType = T;
62   // the container type we use internally for each bucket
63   using ContainerType = MultiLevelTimeSeries<T>;
64   // The parent type
65   using BaseType = folly::TimeseriesHistogram<
66       T,
67       folly::LegacyStatsClock<std::chrono::seconds>,
68       MultiLevelTimeSeries<T>>;
69   // The time type.
70   using TimeType = typename BaseType::Duration;
71 
72   /**
73    * Creates a TimeSeries histogram and initializes the bucketing and levels.
74    *
75    * The buckets are created by chopping the range [min, max) into pieces
76    * of size bucketSize, with the last bucket being potentially shorter.  Two
77    * additional buckets are always created -- the "under" bucket for the range
78    * (-inf, min) and the "over" bucket for the range [max, +inf).
79    *
80    * By default, the histogram will use levels of 60/600/3600/alltime (seconds),
81    * but his can be overridden by passing in an already-constructed multilevel
82    * timeseries with the desired level durations.
83    *
84    * @param bucketSize the width of each bucket
85    * @param min the smallest value for the bucket range.
86    * @param max the largest value for the bucket range
87    * @param defaultContainer a pre-initialized timeseries with the desired
88    *                         number of levels and their durations.
89    */
90   TimeseriesHistogram(
91       ValueType bucketSize,
92       ValueType min,
93       ValueType max,
94       const ContainerType& defaultContainer =
95           MinuteTenMinuteHourTimeSeries<T>())
BaseType(bucketSize,min,max,defaultContainer)96       : BaseType(bucketSize, min, max, defaultContainer) {}
97 
98   /**
99    * Updates every underlying timeseries object with the given timestamp. You
100    * must call this directly before querying to ensure that the data in all
101    * buckets is decayed properly.
102    */
update(time_t now)103   void update(time_t now) {
104     BaseType::update(std::chrono::seconds(now));
105   }
106 
107   // Inherit the folly::TimeseriesHistogram versions of addValue() and
108   // addValues() too
109   using BaseType::addValue;
110   using BaseType::addValues;
111 
112   /** Adds a value into the histogram with timestamp 'now' */
addValue(time_t now,const ValueType & value)113   void addValue(time_t now, const ValueType& value) {
114     BaseType::addValue(std::chrono::seconds(now), value);
115   }
116 
117   /** Adds a value the given number of times with timestamp 'now' */
addValue(time_t now,const ValueType & value,int64_t times)118   void addValue(time_t now, const ValueType& value, int64_t times) {
119     BaseType::addValue(std::chrono::seconds(now), value, times);
120   }
121 
122   /*
123    * Adds all of the values from the specified histogram.
124    *
125    * All of the values will be added to the current time-slot.
126    *
127    * One use of this is for thread-local caching of frequently updated
128    * histogram data.  For example, each thread can store a thread-local
129    * Histogram that is updated frequently, and only add it to the global
130    * TimeseriesHistogram once a second.
131    */
addValues(time_t now,const folly::Histogram<ValueType> & values)132   void addValues(time_t now, const folly::Histogram<ValueType>& values) {
133     BaseType::addValues(std::chrono::seconds(now), values);
134   }
135 
136   /** Prints out the whole histogram timeseries in human-readable form */
137   std::string debugString() const;
138 };
139 
140 } // namespace fb303
141 } // namespace facebook
142 
143 #include <fb303/TimeseriesHistogram-inl.h>
144