1 // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2 // Copyright (c) 2008, Google Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // ---
32 // All Rights Reserved.
33 //
34 // Author: Daniel Ford
35 
36 #ifndef TCMALLOC_SAMPLER_H_
37 #define TCMALLOC_SAMPLER_H_
38 
39 #include "config.h"
40 #include <stddef.h>                     // for size_t
41 #ifdef HAVE_STDINT_H
42 #include <stdint.h>                     // for uint64_t, uint32_t, int32_t
43 #endif
44 #include <string.h>                     // for memcpy
45 #include "base/basictypes.h"  // for ASSERT
46 #include "internal_logging.h"  // for ASSERT
47 
48 namespace tcmalloc {
49 
50 //-------------------------------------------------------------------
51 // Sampler to decide when to create a sample trace for an allocation
52 // Not thread safe: Each thread should have it's own sampler object.
53 // Caller must use external synchronization if used
54 // from multiple threads.
55 //
56 // With 512K average sample step (the default):
57 //  the probability of sampling a 4K allocation is about 0.00778
58 //  the probability of sampling a 1MB allocation is about 0.865
59 //  the probability of sampling a 1GB allocation is about 1.00000
60 // In general, the probablity of sampling is an allocation of size X
61 // given a flag value of Y (default 1M) is:
62 //  1 - e^(-X/Y)
63 //
64 // With 128K average sample step:
65 //  the probability of sampling a 1MB allocation is about 0.99966
66 //  the probability of sampling a 1GB allocation is about 1.0
67 //  (about 1 - 2**(-26))
68 // With 1M average sample step:
69 //  the probability of sampling a 4K allocation is about 0.00390
70 //  the probability of sampling a 1MB allocation is about 0.632
71 //  the probability of sampling a 1GB allocation is about 1.0
72 //
73 // The sampler works by representing memory as a long stream from
74 // which allocations are taken. Some of the bytes in this stream are
75 // marked and if an allocation includes a marked byte then it is
76 // sampled. Bytes are marked according to a Poisson point process
77 // with each byte being marked independently with probability
78 // p = 1/tcmalloc_sample_parameter.  This makes the probability
79 // of sampling an allocation of X bytes equal to the CDF of
80 // a geometric with mean tcmalloc_sample_parameter. (ie. the
81 // probability that at least one byte in the range is marked). This
82 // is accurately given by the CDF of the corresponding exponential
83 // distribution : 1 - e^(X/tcmalloc_sample_parameter_)
84 // Independence of the byte marking ensures independence of
85 // the sampling of each allocation.
86 //
87 // This scheme is implemented by noting that, starting from any
88 // fixed place, the number of bytes until the next marked byte
89 // is geometrically distributed. This number is recorded as
90 // bytes_until_sample_.  Every allocation subtracts from this
91 // number until it is less than 0. When this happens the current
92 // allocation is sampled.
93 //
94 // When an allocation occurs, bytes_until_sample_ is reset to
95 // a new independtly sampled geometric number of bytes. The
96 // memoryless property of the point process means that this may
97 // be taken as the number of bytes after the end of the current
98 // allocation until the next marked byte. This ensures that
99 // very large allocations which would intersect many marked bytes
100 // only result in a single call to PickNextSamplingPoint.
101 //-------------------------------------------------------------------
102 
103 class PERFTOOLS_DLL_DECL Sampler {
104  public:
105   // Initialize this sampler.
106   // Passing a seed of 0 gives a non-deterministic
107   // seed value given by casting the object ("this")
108   void Init(uint32_t seed);
109   void Cleanup();
110 
111   // Record allocation of "k" bytes.  Return true iff allocation
112   // should be sampled
113   bool SampleAllocation(size_t k);
114 
115   // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
116   size_t PickNextSamplingPoint();
117 
118   // Initialize the statics for the Sampler class
119   static void InitStatics();
120 
121   // Returns the current sample period
122   int GetSamplePeriod();
123 
124   // The following are public for the purposes of testing
125   static uint64_t NextRandom(uint64_t rnd_);  // Returns the next prng value
126   static double FastLog2(const double & d);  // Computes Log2(x) quickly
127   static void PopulateFastLog2Table();  // Populate the lookup table
128 
129  private:
130   size_t        bytes_until_sample_;    // Bytes until we sample next
131   uint64_t      rnd_;                   // Cheap random number generator
132 
133   // Statics for the fast log
134   // Note that this code may not depend on anything in //util
135   // hence the duplication of functionality here
136   static const int kFastlogNumBits = 10;
137   static const int kFastlogMask = (1 << kFastlogNumBits) - 1;
138   static double log_table_[1<<kFastlogNumBits];  // Constant
139 };
140 
SampleAllocation(size_t k)141 inline bool Sampler::SampleAllocation(size_t k) {
142   if (bytes_until_sample_ < k) {
143     bytes_until_sample_ = PickNextSamplingPoint();
144     return true;
145   } else {
146     bytes_until_sample_ -= k;
147     return false;
148   }
149 }
150 
151 // Inline functions which are public for testing purposes
152 
153 // Returns the next prng value.
154 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
155 // This is the lrand64 generator.
NextRandom(uint64_t rnd)156 inline uint64_t Sampler::NextRandom(uint64_t rnd) {
157   const uint64_t prng_mult = 0x5DEECE66DLL;
158   const uint64_t prng_add = 0xB;
159   const uint64_t prng_mod_power = 48;
160   const uint64_t prng_mod_mask =
161                 ~((~static_cast<uint64_t>(0)) << prng_mod_power);
162   return (prng_mult * rnd + prng_add) & prng_mod_mask;
163 }
164 
165 // Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
166 // This mimics the VeryFastLog2 code in those files
FastLog2(const double & d)167 inline double Sampler::FastLog2(const double & d) {
168   ASSERT(d>0);
169   COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits);
170   uint64_t x;
171   memcpy(&x, &d, sizeof(x));   // we depend on the compiler inlining this
172   const uint32_t x_high = x >> 32;
173   const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask;
174   const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
175   return exponent + log_table_[y];
176 }
177 
178 }  // namespace tcmalloc
179 
180 #endif  // TCMALLOC_SAMPLER_H_
181