1 #define JEMALLOC_PAGES_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3
4 #include "jemalloc/internal/pages.h"
5
6 #include "jemalloc/internal/jemalloc_internal_includes.h"
7
8 #include "jemalloc/internal/assert.h"
9 #include "jemalloc/internal/malloc_io.h"
10
11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
12 #include <sys/sysctl.h>
13 #ifdef __FreeBSD__
14 #include <vm/vm_param.h>
15 #endif
16 #endif
17
18 /******************************************************************************/
19 /* Data. */
20
21 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
22 static size_t os_page;
23
24 #ifndef _WIN32
25 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
26 # define PAGES_PROT_DECOMMIT (PROT_NONE)
27 static int mmap_flags;
28 #endif
29 static bool os_overcommits;
30
31 const char *thp_mode_names[] = {
32 "default",
33 "always",
34 "never",
35 "not supported"
36 };
37 thp_mode_t opt_thp = THP_MODE_DEFAULT;
38 thp_mode_t init_system_thp_mode;
39
40 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
41 static bool pages_can_purge_lazy_runtime = true;
42
43 /******************************************************************************/
44 /*
45 * Function prototypes for static functions that are referenced prior to
46 * definition.
47 */
48
49 static void os_pages_unmap(void *addr, size_t size);
50
51 /******************************************************************************/
52
53 static void *
os_pages_map(void * addr,size_t size,size_t alignment,bool * commit)54 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
55 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
56 assert(ALIGNMENT_CEILING(size, os_page) == size);
57 assert(size != 0);
58
59 if (os_overcommits) {
60 *commit = true;
61 }
62
63 void *ret;
64 #ifdef _WIN32
65 /*
66 * If VirtualAlloc can't allocate at the given address when one is
67 * given, it fails and returns NULL.
68 */
69 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
70 PAGE_READWRITE);
71 #else
72 /*
73 * We don't use MAP_FIXED here, because it can cause the *replacement*
74 * of existing mappings, and we only want to create new mappings.
75 */
76 {
77 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
78
79 ret = mmap(addr, size, prot, mmap_flags, -1, 0);
80 }
81 assert(ret != NULL);
82
83 if (ret == MAP_FAILED) {
84 ret = NULL;
85 } else if (addr != NULL && ret != addr) {
86 /*
87 * We succeeded in mapping memory, but not in the right place.
88 */
89 os_pages_unmap(ret, size);
90 ret = NULL;
91 }
92 #endif
93 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
94 ret == addr));
95 return ret;
96 }
97
98 static void *
os_pages_trim(void * addr,size_t alloc_size,size_t leadsize,size_t size,bool * commit)99 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
100 bool *commit) {
101 void *ret = (void *)((uintptr_t)addr + leadsize);
102
103 assert(alloc_size >= leadsize + size);
104 #ifdef _WIN32
105 os_pages_unmap(addr, alloc_size);
106 void *new_addr = os_pages_map(ret, size, PAGE, commit);
107 if (new_addr == ret) {
108 return ret;
109 }
110 if (new_addr != NULL) {
111 os_pages_unmap(new_addr, size);
112 }
113 return NULL;
114 #else
115 size_t trailsize = alloc_size - leadsize - size;
116
117 if (leadsize != 0) {
118 os_pages_unmap(addr, leadsize);
119 }
120 if (trailsize != 0) {
121 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
122 }
123 return ret;
124 #endif
125 }
126
127 static void
os_pages_unmap(void * addr,size_t size)128 os_pages_unmap(void *addr, size_t size) {
129 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
130 assert(ALIGNMENT_CEILING(size, os_page) == size);
131
132 #ifdef _WIN32
133 if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
134 #else
135 if (munmap(addr, size) == -1)
136 #endif
137 {
138 char buf[BUFERROR_BUF];
139
140 buferror(get_errno(), buf, sizeof(buf));
141 malloc_printf("<jemalloc>: Error in "
142 #ifdef _WIN32
143 "VirtualFree"
144 #else
145 "munmap"
146 #endif
147 "(): %s\n", buf);
148 if (opt_abort) {
149 abort();
150 }
151 }
152 }
153
154 static void *
pages_map_slow(size_t size,size_t alignment,bool * commit)155 pages_map_slow(size_t size, size_t alignment, bool *commit) {
156 size_t alloc_size = size + alignment - os_page;
157 /* Beware size_t wrap-around. */
158 if (alloc_size < size) {
159 return NULL;
160 }
161
162 void *ret;
163 do {
164 void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
165 if (pages == NULL) {
166 return NULL;
167 }
168 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
169 - (uintptr_t)pages;
170 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
171 } while (ret == NULL);
172
173 assert(ret != NULL);
174 assert(PAGE_ADDR2BASE(ret) == ret);
175 return ret;
176 }
177
178 void *
pages_map(void * addr,size_t size,size_t alignment,bool * commit)179 pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
180 assert(alignment >= PAGE);
181 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
182
183 /*
184 * Ideally, there would be a way to specify alignment to mmap() (like
185 * NetBSD has), but in the absence of such a feature, we have to work
186 * hard to efficiently create aligned mappings. The reliable, but
187 * slow method is to create a mapping that is over-sized, then trim the
188 * excess. However, that always results in one or two calls to
189 * os_pages_unmap(), and it can leave holes in the process's virtual
190 * memory map if memory grows downward.
191 *
192 * Optimistically try mapping precisely the right amount before falling
193 * back to the slow method, with the expectation that the optimistic
194 * approach works most of the time.
195 */
196
197 void *ret = os_pages_map(addr, size, os_page, commit);
198 if (ret == NULL || ret == addr) {
199 return ret;
200 }
201 assert(addr == NULL);
202 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
203 os_pages_unmap(ret, size);
204 return pages_map_slow(size, alignment, commit);
205 }
206
207 assert(PAGE_ADDR2BASE(ret) == ret);
208 return ret;
209 }
210
211 void
pages_unmap(void * addr,size_t size)212 pages_unmap(void *addr, size_t size) {
213 assert(PAGE_ADDR2BASE(addr) == addr);
214 assert(PAGE_CEILING(size) == size);
215
216 os_pages_unmap(addr, size);
217 }
218
219 static bool
pages_commit_impl(void * addr,size_t size,bool commit)220 pages_commit_impl(void *addr, size_t size, bool commit) {
221 assert(PAGE_ADDR2BASE(addr) == addr);
222 assert(PAGE_CEILING(size) == size);
223
224 if (os_overcommits) {
225 return true;
226 }
227
228 #ifdef _WIN32
229 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
230 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
231 #else
232 {
233 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
234 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
235 -1, 0);
236 if (result == MAP_FAILED) {
237 return true;
238 }
239 if (result != addr) {
240 /*
241 * We succeeded in mapping memory, but not in the right
242 * place.
243 */
244 os_pages_unmap(result, size);
245 return true;
246 }
247 return false;
248 }
249 #endif
250 }
251
252 bool
pages_commit(void * addr,size_t size)253 pages_commit(void *addr, size_t size) {
254 return pages_commit_impl(addr, size, true);
255 }
256
257 bool
pages_decommit(void * addr,size_t size)258 pages_decommit(void *addr, size_t size) {
259 return pages_commit_impl(addr, size, false);
260 }
261
262 bool
pages_purge_lazy(void * addr,size_t size)263 pages_purge_lazy(void *addr, size_t size) {
264 assert(PAGE_ADDR2BASE(addr) == addr);
265 assert(PAGE_CEILING(size) == size);
266
267 if (!pages_can_purge_lazy) {
268 return true;
269 }
270 if (!pages_can_purge_lazy_runtime) {
271 /*
272 * Built with lazy purge enabled, but detected it was not
273 * supported on the current system.
274 */
275 return true;
276 }
277
278 #ifdef _WIN32
279 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
280 return false;
281 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
282 return (madvise(addr, size,
283 # ifdef MADV_FREE
284 MADV_FREE
285 # else
286 JEMALLOC_MADV_FREE
287 # endif
288 ) != 0);
289 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
290 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
291 return (madvise(addr, size, MADV_DONTNEED) != 0);
292 #else
293 not_reached();
294 #endif
295 }
296
297 bool
pages_purge_forced(void * addr,size_t size)298 pages_purge_forced(void *addr, size_t size) {
299 assert(PAGE_ADDR2BASE(addr) == addr);
300 assert(PAGE_CEILING(size) == size);
301
302 if (!pages_can_purge_forced) {
303 return true;
304 }
305
306 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
307 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
308 return (madvise(addr, size, MADV_DONTNEED) != 0);
309 #elif defined(JEMALLOC_MAPS_COALESCE)
310 /* Try to overlay a new demand-zeroed mapping. */
311 return pages_commit(addr, size);
312 #else
313 not_reached();
314 #endif
315 }
316
317 static bool
pages_huge_impl(void * addr,size_t size,bool aligned)318 pages_huge_impl(void *addr, size_t size, bool aligned) {
319 if (aligned) {
320 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
321 assert(HUGEPAGE_CEILING(size) == size);
322 }
323 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
324 return (madvise(addr, size, MADV_HUGEPAGE) != 0);
325 #else
326 return true;
327 #endif
328 }
329
330 bool
pages_huge(void * addr,size_t size)331 pages_huge(void *addr, size_t size) {
332 return pages_huge_impl(addr, size, true);
333 }
334
335 static bool
pages_huge_unaligned(void * addr,size_t size)336 pages_huge_unaligned(void *addr, size_t size) {
337 return pages_huge_impl(addr, size, false);
338 }
339
340 static bool
pages_nohuge_impl(void * addr,size_t size,bool aligned)341 pages_nohuge_impl(void *addr, size_t size, bool aligned) {
342 if (aligned) {
343 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
344 assert(HUGEPAGE_CEILING(size) == size);
345 }
346
347 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
348 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
349 #else
350 return false;
351 #endif
352 }
353
354 bool
pages_nohuge(void * addr,size_t size)355 pages_nohuge(void *addr, size_t size) {
356 return pages_nohuge_impl(addr, size, true);
357 }
358
359 static bool
pages_nohuge_unaligned(void * addr,size_t size)360 pages_nohuge_unaligned(void *addr, size_t size) {
361 return pages_nohuge_impl(addr, size, false);
362 }
363
364 bool
pages_dontdump(void * addr,size_t size)365 pages_dontdump(void *addr, size_t size) {
366 assert(PAGE_ADDR2BASE(addr) == addr);
367 assert(PAGE_CEILING(size) == size);
368 #ifdef JEMALLOC_MADVISE_DONTDUMP
369 return madvise(addr, size, MADV_DONTDUMP) != 0;
370 #else
371 return false;
372 #endif
373 }
374
375 bool
pages_dodump(void * addr,size_t size)376 pages_dodump(void *addr, size_t size) {
377 assert(PAGE_ADDR2BASE(addr) == addr);
378 assert(PAGE_CEILING(size) == size);
379 #ifdef JEMALLOC_MADVISE_DONTDUMP
380 return madvise(addr, size, MADV_DODUMP) != 0;
381 #else
382 return false;
383 #endif
384 }
385
386
387 static size_t
os_page_detect(void)388 os_page_detect(void) {
389 #ifdef _WIN32
390 SYSTEM_INFO si;
391 GetSystemInfo(&si);
392 return si.dwPageSize;
393 #elif defined(__FreeBSD__)
394 return getpagesize();
395 #else
396 long result = sysconf(_SC_PAGESIZE);
397 if (result == -1) {
398 return LG_PAGE;
399 }
400 return (size_t)result;
401 #endif
402 }
403
404 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
405 static bool
os_overcommits_sysctl(void)406 os_overcommits_sysctl(void) {
407 int vm_overcommit;
408 size_t sz;
409
410 sz = sizeof(vm_overcommit);
411 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
412 int mib[2];
413
414 mib[0] = CTL_VM;
415 mib[1] = VM_OVERCOMMIT;
416 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
417 return false; /* Error. */
418 }
419 #else
420 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
421 return false; /* Error. */
422 }
423 #endif
424
425 return ((vm_overcommit & 0x3) == 0);
426 }
427 #endif
428
429 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
430 /*
431 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
432 * reentry during bootstrapping if another library has interposed system call
433 * wrappers.
434 */
435 static bool
os_overcommits_proc(void)436 os_overcommits_proc(void) {
437 int fd;
438 char buf[1];
439
440 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
441 #if defined(O_CLOEXEC)
442 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
443 O_CLOEXEC);
444 #else
445 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
446 if (fd != -1) {
447 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
448 }
449 #endif
450 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
451 #if defined(O_CLOEXEC)
452 fd = (int)syscall(SYS_openat,
453 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
454 #else
455 fd = (int)syscall(SYS_openat,
456 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
457 if (fd != -1) {
458 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
459 }
460 #endif
461 #else
462 #if defined(O_CLOEXEC)
463 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
464 #else
465 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
466 if (fd != -1) {
467 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
468 }
469 #endif
470 #endif
471
472 if (fd == -1) {
473 return false; /* Error. */
474 }
475
476 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
477 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
478 syscall(SYS_close, fd);
479 #else
480 close(fd);
481 #endif
482
483 if (nread < 1) {
484 return false; /* Error. */
485 }
486 /*
487 * /proc/sys/vm/overcommit_memory meanings:
488 * 0: Heuristic overcommit.
489 * 1: Always overcommit.
490 * 2: Never overcommit.
491 */
492 return (buf[0] == '0' || buf[0] == '1');
493 }
494 #endif
495
496 void
pages_set_thp_state(void * ptr,size_t size)497 pages_set_thp_state (void *ptr, size_t size) {
498 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
499 return;
500 }
501 assert(opt_thp != thp_mode_not_supported &&
502 init_system_thp_mode != thp_mode_not_supported);
503
504 if (opt_thp == thp_mode_always
505 && init_system_thp_mode != thp_mode_never) {
506 assert(init_system_thp_mode == thp_mode_default);
507 pages_huge_unaligned(ptr, size);
508 } else if (opt_thp == thp_mode_never) {
509 assert(init_system_thp_mode == thp_mode_default ||
510 init_system_thp_mode == thp_mode_always);
511 pages_nohuge_unaligned(ptr, size);
512 }
513 }
514
515 static void
init_thp_state(void)516 init_thp_state(void) {
517 if (!have_madvise_huge) {
518 if (metadata_thp_enabled() && opt_abort) {
519 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
520 abort();
521 }
522 goto label_error;
523 }
524
525 static const char sys_state_madvise[] = "always [madvise] never\n";
526 static const char sys_state_always[] = "[always] madvise never\n";
527 static const char sys_state_never[] = "always madvise [never]\n";
528 char buf[sizeof(sys_state_madvise)];
529
530 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
531 int fd = (int)syscall(SYS_open,
532 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
533 #else
534 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
535 #endif
536 if (fd == -1) {
537 goto label_error;
538 }
539
540 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
541 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
542 syscall(SYS_close, fd);
543 #else
544 close(fd);
545 #endif
546
547 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
548 init_system_thp_mode = thp_mode_default;
549 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
550 init_system_thp_mode = thp_mode_always;
551 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
552 init_system_thp_mode = thp_mode_never;
553 } else {
554 goto label_error;
555 }
556 return;
557 label_error:
558 opt_thp = init_system_thp_mode = thp_mode_not_supported;
559 }
560
561 bool
pages_boot(void)562 pages_boot(void) {
563 os_page = os_page_detect();
564 if (os_page > PAGE) {
565 malloc_write("<jemalloc>: Unsupported system page size\n");
566 if (opt_abort) {
567 abort();
568 }
569 return true;
570 }
571
572 #ifndef _WIN32
573 mmap_flags = MAP_PRIVATE | MAP_ANON;
574 #endif
575
576 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
577 os_overcommits = os_overcommits_sysctl();
578 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
579 os_overcommits = os_overcommits_proc();
580 # ifdef MAP_NORESERVE
581 if (os_overcommits) {
582 mmap_flags |= MAP_NORESERVE;
583 }
584 # endif
585 #else
586 os_overcommits = false;
587 #endif
588
589 init_thp_state();
590
591 /* Detect lazy purge runtime support. */
592 if (pages_can_purge_lazy) {
593 bool committed = false;
594 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
595 if (madv_free_page == NULL) {
596 return true;
597 }
598 assert(pages_can_purge_lazy_runtime);
599 if (pages_purge_lazy(madv_free_page, PAGE)) {
600 pages_can_purge_lazy_runtime = false;
601 }
602 os_pages_unmap(madv_free_page, PAGE);
603 }
604
605 return false;
606 }
607