1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // Functions to provide smarter use of jemalloc, if jemalloc is being used.
18 // http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html
19
20 #pragma once
21
22 #include <folly/CPortability.h>
23 #include <folly/portability/Config.h>
24 #include <folly/portability/Malloc.h>
25
26 /**
27 * Define various MALLOCX_* macros normally provided by jemalloc. We define
28 * them so that we don't have to include jemalloc.h, in case the program is
29 * built without jemalloc support.
30 */
31 #if (defined(USE_JEMALLOC) || defined(FOLLY_USE_JEMALLOC)) && \
32 !defined(FOLLY_SANITIZE)
33 // We have JEMalloc, so use it.
34 #else
35 #ifndef MALLOCX_LG_ALIGN
36 #define MALLOCX_LG_ALIGN(la) (la)
37 #endif
38 #ifndef MALLOCX_ZERO
39 #define MALLOCX_ZERO (static_cast<int>(0x40))
40 #endif
41 #endif
42
43 #include <folly/lang/Exception.h> /* nolint */
44 #include <folly/memory/detail/MallocImpl.h> /* nolint */
45
46 #include <cassert>
47 #include <cstddef>
48 #include <cstdint>
49 #include <cstdlib>
50 #include <cstring>
51
52 #include <atomic>
53 #include <new>
54
55 // clang-format off
56
57 namespace folly {
58
59 #if defined(__GNUC__)
60 // This is for checked malloc-like functions (returns non-null pointer
61 // which cannot alias any outstanding pointer).
62 #define FOLLY_MALLOC_CHECKED_MALLOC \
63 __attribute__((__returns_nonnull__, __malloc__))
64 #else
65 #define FOLLY_MALLOC_CHECKED_MALLOC
66 #endif
67
68 /**
69 * Determine if we are using jemalloc or not.
70 */
71 #if defined(FOLLY_ASSUME_NO_JEMALLOC) || defined(FOLLY_SANITIZE)
usingJEMalloc()72 inline bool usingJEMalloc() noexcept {
73 return false;
74 }
75 #elif defined(USE_JEMALLOC) && !defined(FOLLY_SANITIZE)
76 inline bool usingJEMalloc() noexcept {
77 return true;
78 }
79 #else
80 FOLLY_NOINLINE inline bool usingJEMalloc() noexcept {
81 // Checking for rallocx != nullptr is not sufficient; we may be in a
82 // dlopen()ed module that depends on libjemalloc, so rallocx is resolved, but
83 // the main program might be using a different memory allocator.
84 // How do we determine that we're using jemalloc? In the hackiest
85 // way possible. We allocate memory using malloc() and see if the
86 // per-thread counter of allocated memory increases. This makes me
87 // feel dirty inside. Also note that this requires jemalloc to have
88 // been compiled with --enable-stats.
89 static const bool result = []() noexcept {
90 // Some platforms (*cough* OSX *cough*) require weak symbol checks to be
91 // in the form if (mallctl != nullptr). Not if (mallctl) or if (!mallctl)
92 // (!!). http://goo.gl/xpmctm
93 if (mallocx == nullptr || rallocx == nullptr || xallocx == nullptr ||
94 sallocx == nullptr || dallocx == nullptr || sdallocx == nullptr ||
95 nallocx == nullptr || mallctl == nullptr ||
96 mallctlnametomib == nullptr || mallctlbymib == nullptr) {
97 return false;
98 }
99
100 // "volatile" because gcc optimizes out the reads from *counter, because
101 // it "knows" malloc doesn't modify global state...
102 /* nolint */ volatile uint64_t* counter;
103 size_t counterLen = sizeof(uint64_t*);
104
105 if (mallctl(
106 "thread.allocatedp",
107 static_cast<void*>(&counter),
108 &counterLen,
109 nullptr,
110 0) != 0) {
111 return false;
112 }
113
114 if (counterLen != sizeof(uint64_t*)) {
115 return false;
116 }
117
118 uint64_t origAllocated = *counter;
119
120 static void* volatile ptr = malloc(1);
121 if (!ptr) {
122 // wtf, failing to allocate 1 byte
123 return false;
124 }
125
126 free(ptr);
127
128 return (origAllocated != *counter);
129 }
130 ();
131
132 return result;
133 }
134 #endif
135
getTCMallocNumericProperty(const char * name,size_t * out)136 inline bool getTCMallocNumericProperty(const char* name, size_t* out) noexcept {
137 return MallocExtension_Internal_GetNumericProperty(name, strlen(name), out);
138 }
139
140 #if defined(FOLLY_ASSUME_NO_TCMALLOC) || defined(FOLLY_SANITIZE)
usingTCMalloc()141 inline bool usingTCMalloc() noexcept {
142 return false;
143 }
144 #elif defined(USE_TCMALLOC) && !defined(FOLLY_SANITIZE)
usingTCMalloc()145 inline bool usingTCMalloc() noexcept {
146 return true;
147 }
148 #else
usingTCMalloc()149 FOLLY_NOINLINE inline bool usingTCMalloc() noexcept {
150 static const bool result = []() noexcept {
151 // Some platforms (*cough* OSX *cough*) require weak symbol checks to be
152 // in the form if (mallctl != nullptr). Not if (mallctl) or if (!mallctl)
153 // (!!). http://goo.gl/xpmctm
154 if (MallocExtension_Internal_GetNumericProperty == nullptr ||
155 sdallocx == nullptr || nallocx == nullptr) {
156 return false;
157 }
158 static const char kAllocBytes[] = "generic.current_allocated_bytes";
159
160 size_t before_bytes = 0;
161 getTCMallocNumericProperty(kAllocBytes, &before_bytes);
162
163 static void* volatile ptr = malloc(1);
164 if (!ptr) {
165 // wtf, failing to allocate 1 byte
166 return false;
167 }
168
169 size_t after_bytes = 0;
170 getTCMallocNumericProperty(kAllocBytes, &after_bytes);
171
172 free(ptr);
173
174 return (before_bytes != after_bytes);
175 }
176 ();
177
178 return result;
179 }
180 #endif
181
canSdallocx()182 FOLLY_NOINLINE inline bool canSdallocx() noexcept {
183 static bool rv = usingJEMalloc() || usingTCMalloc();
184 return rv;
185 }
186
canNallocx()187 FOLLY_NOINLINE inline bool canNallocx() noexcept {
188 static bool rv = usingJEMalloc() || usingTCMalloc();
189 return rv;
190 }
191
goodMallocSize(size_t minSize)192 inline size_t goodMallocSize(size_t minSize) noexcept {
193 if (minSize == 0) {
194 return 0;
195 }
196
197 if (!canNallocx()) {
198 // No nallocx - no smarts
199 return minSize;
200 }
201
202 // nallocx returns 0 if minSize can't succeed, but 0 is not actually
203 // a goodMallocSize if you want minSize
204 auto rv = nallocx(minSize, 0);
205 return rv ? rv : minSize;
206 }
207
208 // We always request "good" sizes for allocation, so jemalloc can
209 // never grow in place small blocks; they're already occupied to the
210 // brim. Blocks larger than or equal to 4096 bytes can in fact be
211 // expanded in place, and this constant reflects that.
212 static const size_t jemallocMinInPlaceExpandable = 4096;
213
214 /**
215 * Trivial wrappers around malloc, calloc, realloc that check for allocation
216 * failure and throw std::bad_alloc in that case.
217 */
checkedMalloc(size_t size)218 inline void* checkedMalloc(size_t size) {
219 void* p = malloc(size);
220 if (!p) {
221 throw_exception<std::bad_alloc>();
222 }
223 return p;
224 }
225
checkedCalloc(size_t n,size_t size)226 inline void* checkedCalloc(size_t n, size_t size) {
227 void* p = calloc(n, size);
228 if (!p) {
229 throw_exception<std::bad_alloc>();
230 }
231 return p;
232 }
233
checkedRealloc(void * ptr,size_t size)234 inline void* checkedRealloc(void* ptr, size_t size) {
235 void* p = realloc(ptr, size);
236 if (!p) {
237 throw_exception<std::bad_alloc>();
238 }
239 return p;
240 }
241
sizedFree(void * ptr,size_t size)242 inline void sizedFree(void* ptr, size_t size) {
243 if (canSdallocx()) {
244 sdallocx(ptr, size, 0);
245 } else {
246 free(ptr);
247 }
248 }
249
250 /**
251 * This function tries to reallocate a buffer of which only the first
252 * currentSize bytes are used. The problem with using realloc is that
253 * if currentSize is relatively small _and_ if realloc decides it
254 * needs to move the memory chunk to a new buffer, then realloc ends
255 * up copying data that is not used. It's generally not a win to try
256 * to hook in to realloc() behavior to avoid copies - at least in
257 * jemalloc, realloc() almost always ends up doing a copy, because
258 * there is little fragmentation / slack space to take advantage of.
259 */
smartRealloc(void * p,const size_t currentSize,const size_t currentCapacity,const size_t newCapacity)260 FOLLY_MALLOC_CHECKED_MALLOC FOLLY_NOINLINE inline void* smartRealloc(
261 void* p,
262 const size_t currentSize,
263 const size_t currentCapacity,
264 const size_t newCapacity) {
265 assert(p);
266 assert(currentSize <= currentCapacity &&
267 currentCapacity < newCapacity);
268
269 auto const slack = currentCapacity - currentSize;
270 if (slack * 2 > currentSize) {
271 // Too much slack, malloc-copy-free cycle:
272 auto const result = checkedMalloc(newCapacity);
273 std::memcpy(result, p, currentSize);
274 free(p);
275 return result;
276 }
277 // If there's not too much slack, we realloc in hope of coalescing
278 return checkedRealloc(p, newCapacity);
279 }
280
281 } // namespace folly
282
283 // clang-format on
284