1 /* 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <fb303/Timeseries.h> 20 #include <folly/stats/Histogram.h> 21 #include <folly/stats/TimeseriesHistogram.h> 22 23 namespace facebook { 24 namespace fb303 { 25 26 /** 27 * TimeseriesHistogram is a class which allows you to track data distributions 28 * as they change over time. 29 * 30 * Specifically, it is a bucketed histogram with different value ranges 31 * assigned to each bucket. Within each bucket is a MultiLevelTimeSeries as 32 * from 'common/stats/Timeseries.h'. This means that each bucket contains a 33 * different set of data for different historical time periods, and one can 34 * query data distributions over different trailing time windows. 35 * 36 * For example, this can answer questions: "What is the data distribution over 37 * the last minute? Over the last 10 minutes? Since I last cleared this 38 * histogram?" 39 * 40 * The class can also estimate percentiles and answer questions like: 41 * 42 * "What was the 99th percentile data value over the last 10 minutes?" 43 * 44 * However, note that depending on the size of your buckets and the smoothness 45 * of your data distribution, the estimate may be way off from the actual 46 * value. In particular, if the given percentile falls outside of the bucket 47 * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is 48 * around 115,000) this estimate may be very wrong. 49 * 50 * The memory usage for a typical histogram is roughly 3k * (# of buckets). All 51 * insertion operations are amortized O(1), and all queries are O(# of buckets). 52 */ 53 54 template <class T> 55 class TimeseriesHistogram : public folly::TimeseriesHistogram< 56 T, 57 folly::LegacyStatsClock<std::chrono::seconds>, 58 MultiLevelTimeSeries<T>> { 59 public: 60 // values to be inserted into container 61 using ValueType = T; 62 // the container type we use internally for each bucket 63 using ContainerType = MultiLevelTimeSeries<T>; 64 // The parent type 65 using BaseType = folly::TimeseriesHistogram< 66 T, 67 folly::LegacyStatsClock<std::chrono::seconds>, 68 MultiLevelTimeSeries<T>>; 69 // The time type. 70 using TimeType = typename BaseType::Duration; 71 72 /** 73 * Creates a TimeSeries histogram and initializes the bucketing and levels. 74 * 75 * The buckets are created by chopping the range [min, max) into pieces 76 * of size bucketSize, with the last bucket being potentially shorter. Two 77 * additional buckets are always created -- the "under" bucket for the range 78 * (-inf, min) and the "over" bucket for the range [max, +inf). 79 * 80 * By default, the histogram will use levels of 60/600/3600/alltime (seconds), 81 * but his can be overridden by passing in an already-constructed multilevel 82 * timeseries with the desired level durations. 83 * 84 * @param bucketSize the width of each bucket 85 * @param min the smallest value for the bucket range. 86 * @param max the largest value for the bucket range 87 * @param defaultContainer a pre-initialized timeseries with the desired 88 * number of levels and their durations. 89 */ 90 TimeseriesHistogram( 91 ValueType bucketSize, 92 ValueType min, 93 ValueType max, 94 const ContainerType& defaultContainer = 95 MinuteTenMinuteHourTimeSeries<T>()) BaseType(bucketSize,min,max,defaultContainer)96 : BaseType(bucketSize, min, max, defaultContainer) {} 97 98 /** 99 * Updates every underlying timeseries object with the given timestamp. You 100 * must call this directly before querying to ensure that the data in all 101 * buckets is decayed properly. 102 */ update(time_t now)103 void update(time_t now) { 104 BaseType::update(std::chrono::seconds(now)); 105 } 106 107 // Inherit the folly::TimeseriesHistogram versions of addValue() and 108 // addValues() too 109 using BaseType::addValue; 110 using BaseType::addValues; 111 112 /** Adds a value into the histogram with timestamp 'now' */ addValue(time_t now,const ValueType & value)113 void addValue(time_t now, const ValueType& value) { 114 BaseType::addValue(std::chrono::seconds(now), value); 115 } 116 117 /** Adds a value the given number of times with timestamp 'now' */ addValue(time_t now,const ValueType & value,int64_t times)118 void addValue(time_t now, const ValueType& value, int64_t times) { 119 BaseType::addValue(std::chrono::seconds(now), value, times); 120 } 121 122 /* 123 * Adds all of the values from the specified histogram. 124 * 125 * All of the values will be added to the current time-slot. 126 * 127 * One use of this is for thread-local caching of frequently updated 128 * histogram data. For example, each thread can store a thread-local 129 * Histogram that is updated frequently, and only add it to the global 130 * TimeseriesHistogram once a second. 131 */ addValues(time_t now,const folly::Histogram<ValueType> & values)132 void addValues(time_t now, const folly::Histogram<ValueType>& values) { 133 BaseType::addValues(std::chrono::seconds(now), values); 134 } 135 136 /** Prints out the whole histogram timeseries in human-readable form */ 137 std::string debugString() const; 138 }; 139 140 } // namespace fb303 141 } // namespace facebook 142 143 #include <fb303/TimeseriesHistogram-inl.h> 144