1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Functions to provide smarter use of jemalloc, if jemalloc is being used.
18 // http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html
19 
20 #pragma once
21 
22 #include <folly/CPortability.h>
23 #include <folly/portability/Config.h>
24 #include <folly/portability/Malloc.h>
25 
26 /**
27  * Define various MALLOCX_* macros normally provided by jemalloc.  We define
28  * them so that we don't have to include jemalloc.h, in case the program is
29  * built without jemalloc support.
30  */
31 #if (defined(USE_JEMALLOC) || defined(FOLLY_USE_JEMALLOC)) && \
32     !defined(FOLLY_SANITIZE)
33 // We have JEMalloc, so use it.
34 #else
35 #ifndef MALLOCX_LG_ALIGN
36 #define MALLOCX_LG_ALIGN(la) (la)
37 #endif
38 #ifndef MALLOCX_ZERO
39 #define MALLOCX_ZERO (static_cast<int>(0x40))
40 #endif
41 #endif
42 
43 #include <folly/lang/Exception.h> /* nolint */
44 #include <folly/memory/detail/MallocImpl.h> /* nolint */
45 
46 #include <cassert>
47 #include <cstddef>
48 #include <cstdint>
49 #include <cstdlib>
50 #include <cstring>
51 
52 #include <atomic>
53 #include <new>
54 
55 // clang-format off
56 
57 namespace folly {
58 
59 #if defined(__GNUC__)
60 // This is for checked malloc-like functions (returns non-null pointer
61 // which cannot alias any outstanding pointer).
62 #define FOLLY_MALLOC_CHECKED_MALLOC \
63   __attribute__((__returns_nonnull__, __malloc__))
64 #else
65 #define FOLLY_MALLOC_CHECKED_MALLOC
66 #endif
67 
68 /**
69  * Determine if we are using jemalloc or not.
70  */
71 #if defined(FOLLY_ASSUME_NO_JEMALLOC) || defined(FOLLY_SANITIZE)
usingJEMalloc()72   inline bool usingJEMalloc() noexcept {
73     return false;
74   }
75 #elif defined(USE_JEMALLOC) && !defined(FOLLY_SANITIZE)
76   inline bool usingJEMalloc() noexcept {
77     return true;
78   }
79 #else
80 FOLLY_NOINLINE inline bool usingJEMalloc() noexcept {
81   // Checking for rallocx != nullptr is not sufficient; we may be in a
82   // dlopen()ed module that depends on libjemalloc, so rallocx is resolved, but
83   // the main program might be using a different memory allocator.
84   // How do we determine that we're using jemalloc? In the hackiest
85   // way possible. We allocate memory using malloc() and see if the
86   // per-thread counter of allocated memory increases. This makes me
87   // feel dirty inside. Also note that this requires jemalloc to have
88   // been compiled with --enable-stats.
89   static const bool result = []() noexcept {
90     // Some platforms (*cough* OSX *cough*) require weak symbol checks to be
91     // in the form if (mallctl != nullptr). Not if (mallctl) or if (!mallctl)
92     // (!!). http://goo.gl/xpmctm
93     if (mallocx == nullptr || rallocx == nullptr || xallocx == nullptr ||
94         sallocx == nullptr || dallocx == nullptr || sdallocx == nullptr ||
95         nallocx == nullptr || mallctl == nullptr ||
96         mallctlnametomib == nullptr || mallctlbymib == nullptr) {
97       return false;
98     }
99 
100     // "volatile" because gcc optimizes out the reads from *counter, because
101     // it "knows" malloc doesn't modify global state...
102     /* nolint */ volatile uint64_t* counter;
103     size_t counterLen = sizeof(uint64_t*);
104 
105     if (mallctl(
106             "thread.allocatedp",
107             static_cast<void*>(&counter),
108             &counterLen,
109             nullptr,
110             0) != 0) {
111       return false;
112     }
113 
114     if (counterLen != sizeof(uint64_t*)) {
115       return false;
116     }
117 
118     uint64_t origAllocated = *counter;
119 
120     static void* volatile ptr = malloc(1);
121     if (!ptr) {
122       // wtf, failing to allocate 1 byte
123       return false;
124     }
125 
126     free(ptr);
127 
128     return (origAllocated != *counter);
129   }
130   ();
131 
132   return result;
133 }
134 #endif
135 
getTCMallocNumericProperty(const char * name,size_t * out)136 inline bool getTCMallocNumericProperty(const char* name, size_t* out) noexcept {
137   return MallocExtension_Internal_GetNumericProperty(name, strlen(name), out);
138 }
139 
140 #if defined(FOLLY_ASSUME_NO_TCMALLOC) || defined(FOLLY_SANITIZE)
usingTCMalloc()141   inline bool usingTCMalloc() noexcept {
142     return false;
143   }
144 #elif defined(USE_TCMALLOC) && !defined(FOLLY_SANITIZE)
usingTCMalloc()145   inline bool usingTCMalloc() noexcept {
146     return true;
147   }
148 #else
usingTCMalloc()149 FOLLY_NOINLINE inline bool usingTCMalloc() noexcept {
150   static const bool result = []() noexcept {
151     // Some platforms (*cough* OSX *cough*) require weak symbol checks to be
152     // in the form if (mallctl != nullptr). Not if (mallctl) or if (!mallctl)
153     // (!!). http://goo.gl/xpmctm
154     if (MallocExtension_Internal_GetNumericProperty == nullptr ||
155         sdallocx == nullptr || nallocx == nullptr) {
156       return false;
157     }
158     static const char kAllocBytes[] = "generic.current_allocated_bytes";
159 
160     size_t before_bytes = 0;
161     getTCMallocNumericProperty(kAllocBytes, &before_bytes);
162 
163     static void* volatile ptr = malloc(1);
164     if (!ptr) {
165       // wtf, failing to allocate 1 byte
166       return false;
167     }
168 
169     size_t after_bytes = 0;
170     getTCMallocNumericProperty(kAllocBytes, &after_bytes);
171 
172     free(ptr);
173 
174     return (before_bytes != after_bytes);
175   }
176   ();
177 
178   return result;
179 }
180 #endif
181 
canSdallocx()182 FOLLY_NOINLINE inline bool canSdallocx() noexcept {
183   static bool rv = usingJEMalloc() || usingTCMalloc();
184   return rv;
185 }
186 
canNallocx()187 FOLLY_NOINLINE inline bool canNallocx() noexcept {
188   static bool rv = usingJEMalloc() || usingTCMalloc();
189   return rv;
190 }
191 
goodMallocSize(size_t minSize)192 inline size_t goodMallocSize(size_t minSize) noexcept {
193   if (minSize == 0) {
194     return 0;
195   }
196 
197   if (!canNallocx()) {
198     // No nallocx - no smarts
199     return minSize;
200   }
201 
202   // nallocx returns 0 if minSize can't succeed, but 0 is not actually
203   // a goodMallocSize if you want minSize
204   auto rv = nallocx(minSize, 0);
205   return rv ? rv : minSize;
206 }
207 
208 // We always request "good" sizes for allocation, so jemalloc can
209 // never grow in place small blocks; they're already occupied to the
210 // brim.  Blocks larger than or equal to 4096 bytes can in fact be
211 // expanded in place, and this constant reflects that.
212 static const size_t jemallocMinInPlaceExpandable = 4096;
213 
214 /**
215  * Trivial wrappers around malloc, calloc, realloc that check for allocation
216  * failure and throw std::bad_alloc in that case.
217  */
checkedMalloc(size_t size)218 inline void* checkedMalloc(size_t size) {
219   void* p = malloc(size);
220   if (!p) {
221     throw_exception<std::bad_alloc>();
222   }
223   return p;
224 }
225 
checkedCalloc(size_t n,size_t size)226 inline void* checkedCalloc(size_t n, size_t size) {
227   void* p = calloc(n, size);
228   if (!p) {
229     throw_exception<std::bad_alloc>();
230   }
231   return p;
232 }
233 
checkedRealloc(void * ptr,size_t size)234 inline void* checkedRealloc(void* ptr, size_t size) {
235   void* p = realloc(ptr, size);
236   if (!p) {
237     throw_exception<std::bad_alloc>();
238   }
239   return p;
240 }
241 
sizedFree(void * ptr,size_t size)242 inline void sizedFree(void* ptr, size_t size) {
243   if (canSdallocx()) {
244     sdallocx(ptr, size, 0);
245   } else {
246     free(ptr);
247   }
248 }
249 
250 /**
251  * This function tries to reallocate a buffer of which only the first
252  * currentSize bytes are used. The problem with using realloc is that
253  * if currentSize is relatively small _and_ if realloc decides it
254  * needs to move the memory chunk to a new buffer, then realloc ends
255  * up copying data that is not used. It's generally not a win to try
256  * to hook in to realloc() behavior to avoid copies - at least in
257  * jemalloc, realloc() almost always ends up doing a copy, because
258  * there is little fragmentation / slack space to take advantage of.
259  */
smartRealloc(void * p,const size_t currentSize,const size_t currentCapacity,const size_t newCapacity)260 FOLLY_MALLOC_CHECKED_MALLOC FOLLY_NOINLINE inline void* smartRealloc(
261     void* p,
262     const size_t currentSize,
263     const size_t currentCapacity,
264     const size_t newCapacity) {
265   assert(p);
266   assert(currentSize <= currentCapacity &&
267          currentCapacity < newCapacity);
268 
269   auto const slack = currentCapacity - currentSize;
270   if (slack * 2 > currentSize) {
271     // Too much slack, malloc-copy-free cycle:
272     auto const result = checkedMalloc(newCapacity);
273     std::memcpy(result, p, currentSize);
274     free(p);
275     return result;
276   }
277   // If there's not too much slack, we realloc in hope of coalescing
278   return checkedRealloc(p, newCapacity);
279 }
280 
281 } // namespace folly
282 
283 // clang-format on
284