1 //
2 // Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2021
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 #include "memprof/memprof.h"
8 
9 #include "td/utils/port/platform.h"
10 
11 #if (TD_DARWIN || TD_LINUX) && defined(USE_MEMPROF)
12 #include <algorithm>
13 #include <atomic>
14 #include <cstddef>
15 #include <cstdint>
16 #include <cstdlib>
17 #include <cstring>
18 #include <functional>
19 #include <new>
20 #include <utility>
21 #include <vector>
22 
23 #include <dlfcn.h>
24 #include <execinfo.h>
25 
is_memprof_on()26 bool is_memprof_on() {
27   return true;
28 }
29 
30 #if USE_MEMPROF_SAFE
get_fast_backtrace_success_rate()31 double get_fast_backtrace_success_rate() {
32   return 0;
33 }
34 #else
35 
36 #define my_assert(f) \
37   if (!(f)) {        \
38     std::abort();    \
39   }
40 
41 #if TD_LINUX
42 extern void *__libc_stack_end;
43 #endif
44 
get_bp()45 static void *get_bp() {
46   void *bp;
47 #if defined(__i386__)
48   __asm__ volatile("movl %%ebp, %[r]" : [ r ] "=r"(bp));
49 #elif defined(__x86_64__)
50   __asm__ volatile("movq %%rbp, %[r]" : [ r ] "=r"(bp));
51 #endif
52   return bp;
53 }
54 
fast_backtrace(void ** buffer,int size)55 static int fast_backtrace(void **buffer, int size) {
56   struct stack_frame {
57     stack_frame *bp;
58     void *ip;
59   };
60 
61   auto *bp = reinterpret_cast<stack_frame *>(get_bp());
62   int i = 0;
63   while (i < size &&
64 #if TD_LINUX
65          static_cast<void *>(bp) <= __libc_stack_end &&
66 #endif
67          !(reinterpret_cast<std::uintptr_t>(static_cast<void *>(bp)) & (sizeof(void *) - 1))) {
68     void *ip = bp->ip;
69     buffer[i++] = ip;
70     stack_frame *p = bp->bp;
71     if (p <= bp) {
72       break;
73     }
74     bp = p;
75   }
76   return i;
77 }
78 
79 static std::atomic<std::size_t> fast_backtrace_failed_cnt;
80 static std::atomic<std::size_t> backtrace_total_cnt;
get_fast_backtrace_success_rate()81 double get_fast_backtrace_success_rate() {
82   return 1 - static_cast<double>(fast_backtrace_failed_cnt.load(std::memory_order_relaxed)) /
83                  static_cast<double>(std::max(std::size_t(1), backtrace_total_cnt.load(std::memory_order_relaxed)));
84 }
85 
86 #endif
87 
get_backtrace()88 static Backtrace get_backtrace() {
89   static __thread bool in_backtrace;  // static zero-initialized
90   Backtrace res{{nullptr}};
91   if (in_backtrace) {
92     return res;
93   }
94   in_backtrace = true;
95   std::array<void *, res.size() + BACKTRACE_SHIFT + 10> tmp{{nullptr}};
96   std::size_t n;
97 #if USE_MEMPROF_SAFE
98   n = backtrace(tmp.data(), static_cast<int>(tmp.size()));
99 #else
100   n = fast_backtrace(tmp.data(), static_cast<int>(tmp.size()));
101   auto from_shared = [](void *ptr) {
102     return reinterpret_cast<std::uintptr_t>(ptr) > static_cast<std::uintptr_t>(0x700000000000ull);
103   };
104 
105 #if !USE_MEMPROF_FAST
106   auto end = tmp.begin() + std::min(res.size() + BACKTRACE_SHIFT, n);
107   if (std::find_if(tmp.begin(), end, from_shared) != end) {
108     fast_backtrace_failed_cnt.fetch_add(1, std::memory_order_relaxed);
109     n = backtrace(tmp.data(), static_cast<int>(tmp.size()));
110   }
111   backtrace_total_cnt.fetch_add(1, std::memory_order_relaxed);
112 #endif
113   n = std::remove_if(tmp.begin(), tmp.begin() + n, from_shared) - tmp.begin();
114 #endif
115   n = std::min(res.size() + BACKTRACE_SHIFT, n);
116 
117   for (std::size_t i = BACKTRACE_SHIFT; i < n; i++) {
118     res[i - BACKTRACE_SHIFT] = tmp[i];
119   }
120   in_backtrace = false;
121   return res;
122 }
123 
124 static constexpr std::size_t RESERVED_SIZE = 16;
125 static constexpr std::int32_t MALLOC_INFO_MAGIC = 0x27138373;
126 struct malloc_info {
127   std::int32_t magic;
128   std::int32_t size;
129   std::int32_t ht_pos;
130 };
131 
get_hash(const Backtrace & bt)132 static std::uint64_t get_hash(const Backtrace &bt) {
133   std::uint64_t h = 7;
134   for (std::size_t i = 0; i < bt.size() && i < BACKTRACE_HASHED_LENGTH; i++) {
135     h = h * 0x4372897893428797lu + reinterpret_cast<std::uintptr_t>(bt[i]);
136   }
137   return h;
138 }
139 
140 struct HashtableNode {
141   std::atomic<std::uint64_t> hash;
142   Backtrace backtrace;
143   std::atomic<std::size_t> size;
144 };
145 
146 static constexpr std::size_t HT_MAX_SIZE = 1000000;
147 static std::atomic<std::size_t> ht_size{0};
148 static std::array<HashtableNode, HT_MAX_SIZE> ht;
149 
get_ht_size()150 std::size_t get_ht_size() {
151   return ht_size.load();
152 }
153 
get_ht_pos(const Backtrace & bt,bool force=false)154 std::int32_t get_ht_pos(const Backtrace &bt, bool force = false) {
155   auto hash = get_hash(bt);
156   auto pos = static_cast<std::int32_t>(hash % ht.size());
157   bool was_overflow = false;
158   while (true) {
159     auto pos_hash = ht[pos].hash.load();
160     if (pos_hash == 0) {
161       if (ht_size > HT_MAX_SIZE / 2) {
162         if (force) {
163           my_assert(ht_size * 10 < HT_MAX_SIZE * 7);
164         } else {
165           Backtrace unknown_bt{{nullptr}};
166           unknown_bt[0] = reinterpret_cast<void *>(1);
167           return get_ht_pos(unknown_bt, true);
168         }
169       }
170 
171       std::uint64_t expected = 0;
172       if (ht[pos].hash.compare_exchange_strong(expected, hash)) {
173         ht[pos].backtrace = bt;
174         ++ht_size;
175         return pos;
176       }
177     } else if (pos_hash == hash) {
178       return pos;
179     } else {
180       pos++;
181       if (pos == static_cast<std::int32_t>(ht.size())) {
182         pos = 0;
183         if (was_overflow) {
184           // unreachable
185           std::abort();
186         }
187         was_overflow = true;
188       }
189     }
190   }
191 }
192 
dump_alloc(const std::function<void (const AllocInfo &)> & func)193 void dump_alloc(const std::function<void(const AllocInfo &)> &func) {
194   for (auto &node : ht) {
195     auto size = node.size.load(std::memory_order_relaxed);
196     if (size == 0) {
197       continue;
198     }
199     func(AllocInfo{node.backtrace, size});
200   }
201 }
202 
register_xalloc(malloc_info * info,std::int32_t diff)203 void register_xalloc(malloc_info *info, std::int32_t diff) {
204   my_assert(info->size >= 0);
205   if (diff > 0) {
206     ht[info->ht_pos].size.fetch_add(info->size, std::memory_order_relaxed);
207   } else {
208     auto old_value = ht[info->ht_pos].size.fetch_sub(info->size, std::memory_order_relaxed);
209     my_assert(old_value >= static_cast<std::size_t>(info->size));
210   }
211 }
212 
213 extern "C" {
214 
malloc_with_frame(std::size_t size,const Backtrace & frame)215 static void *malloc_with_frame(std::size_t size, const Backtrace &frame) {
216   static_assert(RESERVED_SIZE % alignof(std::max_align_t) == 0, "fail");
217   static_assert(RESERVED_SIZE >= sizeof(malloc_info), "fail");
218 #if TD_DARWIN
219   static void *malloc_void = dlsym(RTLD_NEXT, "malloc");
220   static auto malloc_old = *reinterpret_cast<decltype(malloc) **>(&malloc_void);
221 #else
222   extern decltype(malloc) __libc_malloc;
223   static auto malloc_old = __libc_malloc;
224 #endif
225   auto *info = static_cast<malloc_info *>(malloc_old(size + RESERVED_SIZE));
226   auto *buf = reinterpret_cast<char *>(info);
227 
228   info->magic = MALLOC_INFO_MAGIC;
229   info->size = static_cast<std::int32_t>(size);
230   info->ht_pos = get_ht_pos(frame);
231 
232   register_xalloc(info, +1);
233 
234   void *data = buf + RESERVED_SIZE;
235 
236   return data;
237 }
238 
get_info(void * data_void)239 static malloc_info *get_info(void *data_void) {
240   auto *data = static_cast<char *>(data_void);
241   auto *buf = data - RESERVED_SIZE;
242 
243   auto *info = reinterpret_cast<malloc_info *>(buf);
244   my_assert(info->magic == MALLOC_INFO_MAGIC);
245   return info;
246 }
247 
malloc(std::size_t size)248 void *malloc(std::size_t size) {
249   return malloc_with_frame(size, get_backtrace());
250 }
251 
free(void * data_void)252 void free(void *data_void) {
253   if (data_void == nullptr) {
254     return;
255   }
256   auto *info = get_info(data_void);
257   register_xalloc(info, -1);
258 
259 #if TD_DARWIN
260   static void *free_void = dlsym(RTLD_NEXT, "free");
261   static auto free_old = *reinterpret_cast<decltype(free) **>(&free_void);
262 #else
263   extern decltype(free) __libc_free;
264   static auto free_old = __libc_free;
265 #endif
266   return free_old(info);
267 }
calloc(std::size_t size_a,std::size_t size_b)268 void *calloc(std::size_t size_a, std::size_t size_b) {
269   auto size = size_a * size_b;
270   void *res = malloc_with_frame(size, get_backtrace());
271   std::memset(res, 0, size);
272   return res;
273 }
realloc(void * ptr,std::size_t size)274 void *realloc(void *ptr, std::size_t size) {
275   if (ptr == nullptr) {
276     return malloc_with_frame(size, get_backtrace());
277   }
278   auto *info = get_info(ptr);
279   auto *new_ptr = malloc_with_frame(size, get_backtrace());
280   auto to_copy = std::min(static_cast<std::int32_t>(size), info->size);
281   std::memcpy(new_ptr, ptr, to_copy);
282   free(ptr);
283   return new_ptr;
284 }
memalign(std::size_t aligment,std::size_t size)285 void *memalign(std::size_t aligment, std::size_t size) {
286   my_assert(false && "Memalign is unsupported");
287   return nullptr;
288 }
289 }
290 
291 // c++14 guarantees that it is enough to override these two operators.
operator new(std::size_t count)292 void *operator new(std::size_t count) {
293   return malloc_with_frame(count, get_backtrace());
294 }
operator delete(void * ptr)295 void operator delete(void *ptr) noexcept(true) {
296   free(ptr);
297 }
298 // because of gcc warning: the program should also define 'void operator delete(void*, std::size_t)'
operator delete(void * ptr,std::size_t)299 void operator delete(void *ptr, std::size_t) noexcept(true) {
300   free(ptr);
301 }
302 
303 // c++17
304 // void *operator new(std::size_t count, std::align_val_t al);
305 // void operator delete(void *ptr, std::align_val_t al);
306 
307 #else
is_memprof_on()308 bool is_memprof_on() {
309   return false;
310 }
dump_alloc(const std::function<void (const AllocInfo &)> & func)311 void dump_alloc(const std::function<void(const AllocInfo &)> &func) {
312 }
get_fast_backtrace_success_rate()313 double get_fast_backtrace_success_rate() {
314   return 0;
315 }
get_ht_size()316 std::size_t get_ht_size() {
317   return 0;
318 }
319 #endif
320 
get_used_memory_size()321 std::size_t get_used_memory_size() {
322   std::size_t res = 0;
323   dump_alloc([&](const auto info) { res += info.size; });
324   return res;
325 }
326