xref: /freebsd/contrib/jemalloc/src/jemalloc.c (revision ca86bcf2)
1 #define	JEMALLOC_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 
4 /******************************************************************************/
5 /* Data. */
6 
7 /* Work around <http://llvm.org/bugs/show_bug.cgi?id=12623>: */
8 const char	*__malloc_options_1_0 = NULL;
9 __sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0);
10 
11 /* Runtime configuration options. */
12 const char	*je_malloc_conf
13 #ifndef _WIN32
14     JEMALLOC_ATTR(weak)
15 #endif
16     ;
17 bool	opt_abort =
18 #ifdef JEMALLOC_DEBUG
19     true
20 #else
21     false
22 #endif
23     ;
24 const char	*opt_junk =
25 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
26     "true"
27 #else
28     "false"
29 #endif
30     ;
31 bool	opt_junk_alloc =
32 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
33     true
34 #else
35     false
36 #endif
37     ;
38 bool	opt_junk_free =
39 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
40     true
41 #else
42     false
43 #endif
44     ;
45 
46 size_t	opt_quarantine = ZU(0);
47 bool	opt_redzone = false;
48 bool	opt_utrace = false;
49 bool	opt_xmalloc = false;
50 bool	opt_zero = false;
51 unsigned	opt_narenas = 0;
52 
53 /* Initialized to true if the process is running inside Valgrind. */
54 bool	in_valgrind;
55 
56 unsigned	ncpus;
57 
58 /* Protects arenas initialization. */
59 static malloc_mutex_t	arenas_lock;
60 /*
61  * Arenas that are used to service external requests.  Not all elements of the
62  * arenas array are necessarily used; arenas are created lazily as needed.
63  *
64  * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
65  * arenas.  arenas[narenas_auto..narenas_total) are only used if the application
66  * takes some action to create them and allocate from them.
67  */
68 arena_t			**arenas;
69 static unsigned		narenas_total; /* Use narenas_total_*(). */
70 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
71 unsigned		narenas_auto; /* Read-only after initialization. */
72 
73 typedef enum {
74 	malloc_init_uninitialized	= 3,
75 	malloc_init_a0_initialized	= 2,
76 	malloc_init_recursible		= 1,
77 	malloc_init_initialized		= 0 /* Common case --> jnz. */
78 } malloc_init_t;
79 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
80 
81 /* False should be the common case.  Set to true to trigger initialization. */
82 static bool	malloc_slow = true;
83 
84 /* When malloc_slow is true, set the corresponding bits for sanity check. */
85 enum {
86 	flag_opt_junk_alloc	= (1U),
87 	flag_opt_junk_free	= (1U << 1),
88 	flag_opt_quarantine	= (1U << 2),
89 	flag_opt_zero		= (1U << 3),
90 	flag_opt_utrace		= (1U << 4),
91 	flag_in_valgrind	= (1U << 5),
92 	flag_opt_xmalloc	= (1U << 6)
93 };
94 static uint8_t	malloc_slow_flags;
95 
96 JEMALLOC_ALIGNED(CACHELINE)
97 const size_t	pind2sz_tab[NPSIZES] = {
98 #define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
99 	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
100 #define	PSZ_no(lg_grp, ndelta, lg_delta)
101 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
102 	PSZ_##psz(lg_grp, ndelta, lg_delta)
103 	SIZE_CLASSES
104 #undef PSZ_yes
105 #undef PSZ_no
106 #undef SC
107 };
108 
109 JEMALLOC_ALIGNED(CACHELINE)
110 const size_t	index2size_tab[NSIZES] = {
111 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
112 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
113 	SIZE_CLASSES
114 #undef SC
115 };
116 
117 JEMALLOC_ALIGNED(CACHELINE)
118 const uint8_t	size2index_tab[] = {
119 #if LG_TINY_MIN == 0
120 #warning "Dangerous LG_TINY_MIN"
121 #define	S2B_0(i)	i,
122 #elif LG_TINY_MIN == 1
123 #warning "Dangerous LG_TINY_MIN"
124 #define	S2B_1(i)	i,
125 #elif LG_TINY_MIN == 2
126 #warning "Dangerous LG_TINY_MIN"
127 #define	S2B_2(i)	i,
128 #elif LG_TINY_MIN == 3
129 #define	S2B_3(i)	i,
130 #elif LG_TINY_MIN == 4
131 #define	S2B_4(i)	i,
132 #elif LG_TINY_MIN == 5
133 #define	S2B_5(i)	i,
134 #elif LG_TINY_MIN == 6
135 #define	S2B_6(i)	i,
136 #elif LG_TINY_MIN == 7
137 #define	S2B_7(i)	i,
138 #elif LG_TINY_MIN == 8
139 #define	S2B_8(i)	i,
140 #elif LG_TINY_MIN == 9
141 #define	S2B_9(i)	i,
142 #elif LG_TINY_MIN == 10
143 #define	S2B_10(i)	i,
144 #elif LG_TINY_MIN == 11
145 #define	S2B_11(i)	i,
146 #else
147 #error "Unsupported LG_TINY_MIN"
148 #endif
149 #if LG_TINY_MIN < 1
150 #define	S2B_1(i)	S2B_0(i) S2B_0(i)
151 #endif
152 #if LG_TINY_MIN < 2
153 #define	S2B_2(i)	S2B_1(i) S2B_1(i)
154 #endif
155 #if LG_TINY_MIN < 3
156 #define	S2B_3(i)	S2B_2(i) S2B_2(i)
157 #endif
158 #if LG_TINY_MIN < 4
159 #define	S2B_4(i)	S2B_3(i) S2B_3(i)
160 #endif
161 #if LG_TINY_MIN < 5
162 #define	S2B_5(i)	S2B_4(i) S2B_4(i)
163 #endif
164 #if LG_TINY_MIN < 6
165 #define	S2B_6(i)	S2B_5(i) S2B_5(i)
166 #endif
167 #if LG_TINY_MIN < 7
168 #define	S2B_7(i)	S2B_6(i) S2B_6(i)
169 #endif
170 #if LG_TINY_MIN < 8
171 #define	S2B_8(i)	S2B_7(i) S2B_7(i)
172 #endif
173 #if LG_TINY_MIN < 9
174 #define	S2B_9(i)	S2B_8(i) S2B_8(i)
175 #endif
176 #if LG_TINY_MIN < 10
177 #define	S2B_10(i)	S2B_9(i) S2B_9(i)
178 #endif
179 #if LG_TINY_MIN < 11
180 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
181 #endif
182 #define	S2B_no(i)
183 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
184 	S2B_##lg_delta_lookup(index)
185 	SIZE_CLASSES
186 #undef S2B_3
187 #undef S2B_4
188 #undef S2B_5
189 #undef S2B_6
190 #undef S2B_7
191 #undef S2B_8
192 #undef S2B_9
193 #undef S2B_10
194 #undef S2B_11
195 #undef S2B_no
196 #undef SC
197 };
198 
199 #ifdef JEMALLOC_THREADED_INIT
200 /* Used to let the initializing thread recursively allocate. */
201 #  define NO_INITIALIZER	((unsigned long)0)
202 #  define INITIALIZER		pthread_self()
203 #  define IS_INITIALIZER	(malloc_initializer == pthread_self())
204 static pthread_t		malloc_initializer = NO_INITIALIZER;
205 #else
206 #  define NO_INITIALIZER	false
207 #  define INITIALIZER		true
208 #  define IS_INITIALIZER	malloc_initializer
209 static bool			malloc_initializer = NO_INITIALIZER;
210 #endif
211 
212 /* Used to avoid initialization races. */
213 #ifdef _WIN32
214 #if _WIN32_WINNT >= 0x0600
215 static malloc_mutex_t	init_lock = SRWLOCK_INIT;
216 #else
217 static malloc_mutex_t	init_lock;
218 static bool init_lock_initialized = false;
219 
220 JEMALLOC_ATTR(constructor)
221 static void WINAPI
222 _init_init_lock(void)
223 {
224 
225 	/* If another constructor in the same binary is using mallctl to
226 	 * e.g. setup chunk hooks, it may end up running before this one,
227 	 * and malloc_init_hard will crash trying to lock the uninitialized
228 	 * lock. So we force an initialization of the lock in
229 	 * malloc_init_hard as well. We don't try to care about atomicity
230 	 * of the accessed to the init_lock_initialized boolean, since it
231 	 * really only matters early in the process creation, before any
232 	 * separate thread normally starts doing anything. */
233 	if (!init_lock_initialized)
234 		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
235 	init_lock_initialized = true;
236 }
237 
238 #ifdef _MSC_VER
239 #  pragma section(".CRT$XCU", read)
240 JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
241 static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
242 #endif
243 #endif
244 #else
245 static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
246 #endif
247 
248 typedef struct {
249 	void	*p;	/* Input pointer (as in realloc(p, s)). */
250 	size_t	s;	/* Request size. */
251 	void	*r;	/* Result pointer. */
252 } malloc_utrace_t;
253 
254 #ifdef JEMALLOC_UTRACE
255 #  define UTRACE(a, b, c) do {						\
256 	if (unlikely(opt_utrace)) {					\
257 		int utrace_serrno = errno;				\
258 		malloc_utrace_t ut;					\
259 		ut.p = (a);						\
260 		ut.s = (b);						\
261 		ut.r = (c);						\
262 		utrace(&ut, sizeof(ut));				\
263 		errno = utrace_serrno;					\
264 	}								\
265 } while (0)
266 #else
267 #  define UTRACE(a, b, c)
268 #endif
269 
270 /******************************************************************************/
271 /*
272  * Function prototypes for static functions that are referenced prior to
273  * definition.
274  */
275 
276 static bool	malloc_init_hard_a0(void);
277 static bool	malloc_init_hard(void);
278 
279 /******************************************************************************/
280 /*
281  * Begin miscellaneous support functions.
282  */
283 
284 JEMALLOC_ALWAYS_INLINE_C bool
285 malloc_initialized(void)
286 {
287 
288 	return (malloc_init_state == malloc_init_initialized);
289 }
290 
291 JEMALLOC_ALWAYS_INLINE_C void
292 malloc_thread_init(void)
293 {
294 
295 	/*
296 	 * TSD initialization can't be safely done as a side effect of
297 	 * deallocation, because it is possible for a thread to do nothing but
298 	 * deallocate its TLS data via free(), in which case writing to TLS
299 	 * would cause write-after-free memory corruption.  The quarantine
300 	 * facility *only* gets used as a side effect of deallocation, so make
301 	 * a best effort attempt at initializing its TSD by hooking all
302 	 * allocation events.
303 	 */
304 	if (config_fill && unlikely(opt_quarantine))
305 		quarantine_alloc_hook();
306 }
307 
308 JEMALLOC_ALWAYS_INLINE_C bool
309 malloc_init_a0(void)
310 {
311 
312 	if (unlikely(malloc_init_state == malloc_init_uninitialized))
313 		return (malloc_init_hard_a0());
314 	return (false);
315 }
316 
317 JEMALLOC_ALWAYS_INLINE_C bool
318 malloc_init(void)
319 {
320 
321 	if (unlikely(!malloc_initialized()) && malloc_init_hard())
322 		return (true);
323 	malloc_thread_init();
324 
325 	return (false);
326 }
327 
328 /*
329  * The a0*() functions are used instead of i{d,}alloc() in situations that
330  * cannot tolerate TLS variable access.
331  */
332 
333 static void *
334 a0ialloc(size_t size, bool zero, bool is_metadata)
335 {
336 
337 	if (unlikely(malloc_init_a0()))
338 		return (NULL);
339 
340 	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
341 	    is_metadata, arena_get(TSDN_NULL, 0, true), true));
342 }
343 
344 static void
345 a0idalloc(void *ptr, bool is_metadata)
346 {
347 
348 	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
349 }
350 
351 arena_t *
352 a0get(void)
353 {
354 
355 	return (a0);
356 }
357 
358 void *
359 a0malloc(size_t size)
360 {
361 
362 	return (a0ialloc(size, false, true));
363 }
364 
365 void
366 a0dalloc(void *ptr)
367 {
368 
369 	a0idalloc(ptr, true);
370 }
371 
372 /*
373  * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive
374  * situations that cannot tolerate TLS variable access (TLS allocation and very
375  * early internal data structure initialization).
376  */
377 
378 void *
379 bootstrap_malloc(size_t size)
380 {
381 
382 	if (unlikely(size == 0))
383 		size = 1;
384 
385 	return (a0ialloc(size, false, false));
386 }
387 
388 void *
389 bootstrap_calloc(size_t num, size_t size)
390 {
391 	size_t num_size;
392 
393 	num_size = num * size;
394 	if (unlikely(num_size == 0)) {
395 		assert(num == 0 || size == 0);
396 		num_size = 1;
397 	}
398 
399 	return (a0ialloc(num_size, true, false));
400 }
401 
402 void
403 bootstrap_free(void *ptr)
404 {
405 
406 	if (unlikely(ptr == NULL))
407 		return;
408 
409 	a0idalloc(ptr, false);
410 }
411 
412 static void
413 arena_set(unsigned ind, arena_t *arena)
414 {
415 
416 	atomic_write_p((void **)&arenas[ind], arena);
417 }
418 
419 static void
420 narenas_total_set(unsigned narenas)
421 {
422 
423 	atomic_write_u(&narenas_total, narenas);
424 }
425 
426 static void
427 narenas_total_inc(void)
428 {
429 
430 	atomic_add_u(&narenas_total, 1);
431 }
432 
433 unsigned
434 narenas_total_get(void)
435 {
436 
437 	return (atomic_read_u(&narenas_total));
438 }
439 
440 /* Create a new arena and insert it into the arenas array at index ind. */
441 static arena_t *
442 arena_init_locked(tsdn_t *tsdn, unsigned ind)
443 {
444 	arena_t *arena;
445 
446 	assert(ind <= narenas_total_get());
447 	if (ind > MALLOCX_ARENA_MAX)
448 		return (NULL);
449 	if (ind == narenas_total_get())
450 		narenas_total_inc();
451 
452 	/*
453 	 * Another thread may have already initialized arenas[ind] if it's an
454 	 * auto arena.
455 	 */
456 	arena = arena_get(tsdn, ind, false);
457 	if (arena != NULL) {
458 		assert(ind < narenas_auto);
459 		return (arena);
460 	}
461 
462 	/* Actually initialize the arena. */
463 	arena = arena_new(tsdn, ind);
464 	arena_set(ind, arena);
465 	return (arena);
466 }
467 
468 arena_t *
469 arena_init(tsdn_t *tsdn, unsigned ind)
470 {
471 	arena_t *arena;
472 
473 	malloc_mutex_lock(tsdn, &arenas_lock);
474 	arena = arena_init_locked(tsdn, ind);
475 	malloc_mutex_unlock(tsdn, &arenas_lock);
476 	return (arena);
477 }
478 
479 static void
480 arena_bind(tsd_t *tsd, unsigned ind, bool internal)
481 {
482 	arena_t *arena;
483 
484 	if (!tsd_nominal(tsd))
485 		return;
486 
487 	arena = arena_get(tsd_tsdn(tsd), ind, false);
488 	arena_nthreads_inc(arena, internal);
489 
490 	if (internal)
491 		tsd_iarena_set(tsd, arena);
492 	else
493 		tsd_arena_set(tsd, arena);
494 }
495 
496 void
497 arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
498 {
499 	arena_t *oldarena, *newarena;
500 
501 	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
502 	newarena = arena_get(tsd_tsdn(tsd), newind, false);
503 	arena_nthreads_dec(oldarena, false);
504 	arena_nthreads_inc(newarena, false);
505 	tsd_arena_set(tsd, newarena);
506 }
507 
508 static void
509 arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
510 {
511 	arena_t *arena;
512 
513 	arena = arena_get(tsd_tsdn(tsd), ind, false);
514 	arena_nthreads_dec(arena, internal);
515 	if (internal)
516 		tsd_iarena_set(tsd, NULL);
517 	else
518 		tsd_arena_set(tsd, NULL);
519 }
520 
521 arena_tdata_t *
522 arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
523 {
524 	arena_tdata_t *tdata, *arenas_tdata_old;
525 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
526 	unsigned narenas_tdata_old, i;
527 	unsigned narenas_tdata = tsd_narenas_tdata_get(tsd);
528 	unsigned narenas_actual = narenas_total_get();
529 
530 	/*
531 	 * Dissociate old tdata array (and set up for deallocation upon return)
532 	 * if it's too small.
533 	 */
534 	if (arenas_tdata != NULL && narenas_tdata < narenas_actual) {
535 		arenas_tdata_old = arenas_tdata;
536 		narenas_tdata_old = narenas_tdata;
537 		arenas_tdata = NULL;
538 		narenas_tdata = 0;
539 		tsd_arenas_tdata_set(tsd, arenas_tdata);
540 		tsd_narenas_tdata_set(tsd, narenas_tdata);
541 	} else {
542 		arenas_tdata_old = NULL;
543 		narenas_tdata_old = 0;
544 	}
545 
546 	/* Allocate tdata array if it's missing. */
547 	if (arenas_tdata == NULL) {
548 		bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd);
549 		narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1;
550 
551 		if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) {
552 			*arenas_tdata_bypassp = true;
553 			arenas_tdata = (arena_tdata_t *)a0malloc(
554 			    sizeof(arena_tdata_t) * narenas_tdata);
555 			*arenas_tdata_bypassp = false;
556 		}
557 		if (arenas_tdata == NULL) {
558 			tdata = NULL;
559 			goto label_return;
560 		}
561 		assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp);
562 		tsd_arenas_tdata_set(tsd, arenas_tdata);
563 		tsd_narenas_tdata_set(tsd, narenas_tdata);
564 	}
565 
566 	/*
567 	 * Copy to tdata array.  It's possible that the actual number of arenas
568 	 * has increased since narenas_total_get() was called above, but that
569 	 * causes no correctness issues unless two threads concurrently execute
570 	 * the arenas.extend mallctl, which we trust mallctl synchronization to
571 	 * prevent.
572 	 */
573 
574 	/* Copy/initialize tickers. */
575 	for (i = 0; i < narenas_actual; i++) {
576 		if (i < narenas_tdata_old) {
577 			ticker_copy(&arenas_tdata[i].decay_ticker,
578 			    &arenas_tdata_old[i].decay_ticker);
579 		} else {
580 			ticker_init(&arenas_tdata[i].decay_ticker,
581 			    DECAY_NTICKS_PER_UPDATE);
582 		}
583 	}
584 	if (narenas_tdata > narenas_actual) {
585 		memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
586 		    * (narenas_tdata - narenas_actual));
587 	}
588 
589 	/* Read the refreshed tdata array. */
590 	tdata = &arenas_tdata[ind];
591 label_return:
592 	if (arenas_tdata_old != NULL)
593 		a0dalloc(arenas_tdata_old);
594 	return (tdata);
595 }
596 
597 /* Slow path, called only by arena_choose(). */
598 arena_t *
599 arena_choose_hard(tsd_t *tsd, bool internal)
600 {
601 	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
602 
603 	if (narenas_auto > 1) {
604 		unsigned i, j, choose[2], first_null;
605 
606 		/*
607 		 * Determine binding for both non-internal and internal
608 		 * allocation.
609 		 *
610 		 *   choose[0]: For application allocation.
611 		 *   choose[1]: For internal metadata allocation.
612 		 */
613 
614 		for (j = 0; j < 2; j++)
615 			choose[j] = 0;
616 
617 		first_null = narenas_auto;
618 		malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
619 		assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL);
620 		for (i = 1; i < narenas_auto; i++) {
621 			if (arena_get(tsd_tsdn(tsd), i, false) != NULL) {
622 				/*
623 				 * Choose the first arena that has the lowest
624 				 * number of threads assigned to it.
625 				 */
626 				for (j = 0; j < 2; j++) {
627 					if (arena_nthreads_get(arena_get(
628 					    tsd_tsdn(tsd), i, false), !!j) <
629 					    arena_nthreads_get(arena_get(
630 					    tsd_tsdn(tsd), choose[j], false),
631 					    !!j))
632 						choose[j] = i;
633 				}
634 			} else if (first_null == narenas_auto) {
635 				/*
636 				 * Record the index of the first uninitialized
637 				 * arena, in case all extant arenas are in use.
638 				 *
639 				 * NB: It is possible for there to be
640 				 * discontinuities in terms of initialized
641 				 * versus uninitialized arenas, due to the
642 				 * "thread.arena" mallctl.
643 				 */
644 				first_null = i;
645 			}
646 		}
647 
648 		for (j = 0; j < 2; j++) {
649 			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
650 			    choose[j], false), !!j) == 0 || first_null ==
651 			    narenas_auto) {
652 				/*
653 				 * Use an unloaded arena, or the least loaded
654 				 * arena if all arenas are already initialized.
655 				 */
656 				if (!!j == internal) {
657 					ret = arena_get(tsd_tsdn(tsd),
658 					    choose[j], false);
659 				}
660 			} else {
661 				arena_t *arena;
662 
663 				/* Initialize a new arena. */
664 				choose[j] = first_null;
665 				arena = arena_init_locked(tsd_tsdn(tsd),
666 				    choose[j]);
667 				if (arena == NULL) {
668 					malloc_mutex_unlock(tsd_tsdn(tsd),
669 					    &arenas_lock);
670 					return (NULL);
671 				}
672 				if (!!j == internal)
673 					ret = arena;
674 			}
675 			arena_bind(tsd, choose[j], !!j);
676 		}
677 		malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
678 	} else {
679 		ret = arena_get(tsd_tsdn(tsd), 0, false);
680 		arena_bind(tsd, 0, false);
681 		arena_bind(tsd, 0, true);
682 	}
683 
684 	return (ret);
685 }
686 
687 void
688 thread_allocated_cleanup(tsd_t *tsd)
689 {
690 
691 	/* Do nothing. */
692 }
693 
694 void
695 thread_deallocated_cleanup(tsd_t *tsd)
696 {
697 
698 	/* Do nothing. */
699 }
700 
701 void
702 iarena_cleanup(tsd_t *tsd)
703 {
704 	arena_t *iarena;
705 
706 	iarena = tsd_iarena_get(tsd);
707 	if (iarena != NULL)
708 		arena_unbind(tsd, iarena->ind, true);
709 }
710 
711 void
712 arena_cleanup(tsd_t *tsd)
713 {
714 	arena_t *arena;
715 
716 	arena = tsd_arena_get(tsd);
717 	if (arena != NULL)
718 		arena_unbind(tsd, arena->ind, false);
719 }
720 
721 void
722 arenas_tdata_cleanup(tsd_t *tsd)
723 {
724 	arena_tdata_t *arenas_tdata;
725 
726 	/* Prevent tsd->arenas_tdata from being (re)created. */
727 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
728 
729 	arenas_tdata = tsd_arenas_tdata_get(tsd);
730 	if (arenas_tdata != NULL) {
731 		tsd_arenas_tdata_set(tsd, NULL);
732 		a0dalloc(arenas_tdata);
733 	}
734 }
735 
736 void
737 narenas_tdata_cleanup(tsd_t *tsd)
738 {
739 
740 	/* Do nothing. */
741 }
742 
743 void
744 arenas_tdata_bypass_cleanup(tsd_t *tsd)
745 {
746 
747 	/* Do nothing. */
748 }
749 
750 static void
751 stats_print_atexit(void)
752 {
753 
754 	if (config_tcache && config_stats) {
755 		tsdn_t *tsdn;
756 		unsigned narenas, i;
757 
758 		tsdn = tsdn_fetch();
759 
760 		/*
761 		 * Merge stats from extant threads.  This is racy, since
762 		 * individual threads do not lock when recording tcache stats
763 		 * events.  As a consequence, the final stats may be slightly
764 		 * out of date by the time they are reported, if other threads
765 		 * continue to allocate.
766 		 */
767 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
768 			arena_t *arena = arena_get(tsdn, i, false);
769 			if (arena != NULL) {
770 				tcache_t *tcache;
771 
772 				/*
773 				 * tcache_stats_merge() locks bins, so if any
774 				 * code is introduced that acquires both arena
775 				 * and bin locks in the opposite order,
776 				 * deadlocks may result.
777 				 */
778 				malloc_mutex_lock(tsdn, &arena->lock);
779 				ql_foreach(tcache, &arena->tcache_ql, link) {
780 					tcache_stats_merge(tsdn, tcache, arena);
781 				}
782 				malloc_mutex_unlock(tsdn, &arena->lock);
783 			}
784 		}
785 	}
786 	je_malloc_stats_print(NULL, NULL, NULL);
787 }
788 
789 /*
790  * End miscellaneous support functions.
791  */
792 /******************************************************************************/
793 /*
794  * Begin initialization functions.
795  */
796 
797 static char *
798 jemalloc_secure_getenv(const char *name)
799 {
800 #ifdef JEMALLOC_HAVE_SECURE_GETENV
801 	return secure_getenv(name);
802 #else
803 #  ifdef JEMALLOC_HAVE_ISSETUGID
804 	if (issetugid() != 0)
805 		return (NULL);
806 #  endif
807 	return (getenv(name));
808 #endif
809 }
810 
811 static unsigned
812 malloc_ncpus(void)
813 {
814 	long result;
815 
816 #ifdef _WIN32
817 	SYSTEM_INFO si;
818 	GetSystemInfo(&si);
819 	result = si.dwNumberOfProcessors;
820 #elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
821 	/*
822 	 * glibc >= 2.6 has the CPU_COUNT macro.
823 	 *
824 	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
825 	 * *before* setting up the isspace tables.  Therefore we need a
826 	 * different method to get the number of CPUs.
827 	 */
828 	{
829 		cpu_set_t set;
830 
831 		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
832 		result = CPU_COUNT(&set);
833 	}
834 #else
835 	result = sysconf(_SC_NPROCESSORS_ONLN);
836 #endif
837 	return ((result == -1) ? 1 : (unsigned)result);
838 }
839 
840 static bool
841 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
842     char const **v_p, size_t *vlen_p)
843 {
844 	bool accept;
845 	const char *opts = *opts_p;
846 
847 	*k_p = opts;
848 
849 	for (accept = false; !accept;) {
850 		switch (*opts) {
851 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
852 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
853 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
854 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
855 		case 'Y': case 'Z':
856 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
857 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
858 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
859 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
860 		case 'y': case 'z':
861 		case '0': case '1': case '2': case '3': case '4': case '5':
862 		case '6': case '7': case '8': case '9':
863 		case '_':
864 			opts++;
865 			break;
866 		case ':':
867 			opts++;
868 			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
869 			*v_p = opts;
870 			accept = true;
871 			break;
872 		case '\0':
873 			if (opts != *opts_p) {
874 				malloc_write("<jemalloc>: Conf string ends "
875 				    "with key\n");
876 			}
877 			return (true);
878 		default:
879 			malloc_write("<jemalloc>: Malformed conf string\n");
880 			return (true);
881 		}
882 	}
883 
884 	for (accept = false; !accept;) {
885 		switch (*opts) {
886 		case ',':
887 			opts++;
888 			/*
889 			 * Look ahead one character here, because the next time
890 			 * this function is called, it will assume that end of
891 			 * input has been cleanly reached if no input remains,
892 			 * but we have optimistically already consumed the
893 			 * comma if one exists.
894 			 */
895 			if (*opts == '\0') {
896 				malloc_write("<jemalloc>: Conf string ends "
897 				    "with comma\n");
898 			}
899 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
900 			accept = true;
901 			break;
902 		case '\0':
903 			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
904 			accept = true;
905 			break;
906 		default:
907 			opts++;
908 			break;
909 		}
910 	}
911 
912 	*opts_p = opts;
913 	return (false);
914 }
915 
916 static void
917 malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
918     size_t vlen)
919 {
920 
921 	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
922 	    (int)vlen, v);
923 }
924 
925 static void
926 malloc_slow_flag_init(void)
927 {
928 	/*
929 	 * Combine the runtime options into malloc_slow for fast path.  Called
930 	 * after processing all the options.
931 	 */
932 	malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
933 	    | (opt_junk_free ? flag_opt_junk_free : 0)
934 	    | (opt_quarantine ? flag_opt_quarantine : 0)
935 	    | (opt_zero ? flag_opt_zero : 0)
936 	    | (opt_utrace ? flag_opt_utrace : 0)
937 	    | (opt_xmalloc ? flag_opt_xmalloc : 0);
938 
939 	if (config_valgrind)
940 		malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0);
941 
942 	malloc_slow = (malloc_slow_flags != 0);
943 }
944 
945 static void
946 malloc_conf_init(void)
947 {
948 	unsigned i;
949 	char buf[PATH_MAX + 1];
950 	const char *opts, *k, *v;
951 	size_t klen, vlen;
952 
953 	/*
954 	 * Automatically configure valgrind before processing options.  The
955 	 * valgrind option remains in jemalloc 3.x for compatibility reasons.
956 	 */
957 	if (config_valgrind) {
958 		in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false;
959 		if (config_fill && unlikely(in_valgrind)) {
960 			opt_junk = "false";
961 			opt_junk_alloc = false;
962 			opt_junk_free = false;
963 			assert(!opt_zero);
964 			opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT;
965 			opt_redzone = true;
966 		}
967 		if (config_tcache && unlikely(in_valgrind))
968 			opt_tcache = false;
969 	}
970 
971 	for (i = 0; i < 4; i++) {
972 		/* Get runtime configuration. */
973 		switch (i) {
974 		case 0:
975 			opts = config_malloc_conf;
976 			break;
977 		case 1:
978 			if (je_malloc_conf != NULL) {
979 				/*
980 				 * Use options that were compiled into the
981 				 * program.
982 				 */
983 				opts = je_malloc_conf;
984 			} else {
985 				/* No configuration specified. */
986 				buf[0] = '\0';
987 				opts = buf;
988 			}
989 			break;
990 		case 2: {
991 			ssize_t linklen = 0;
992 #ifndef _WIN32
993 			int saved_errno = errno;
994 			const char *linkname =
995 #  ifdef JEMALLOC_PREFIX
996 			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
997 #  else
998 			    "/etc/malloc.conf"
999 #  endif
1000 			    ;
1001 
1002 			/*
1003 			 * Try to use the contents of the "/etc/malloc.conf"
1004 			 * symbolic link's name.
1005 			 */
1006 			linklen = readlink(linkname, buf, sizeof(buf) - 1);
1007 			if (linklen == -1) {
1008 				/* No configuration specified. */
1009 				linklen = 0;
1010 				/* Restore errno. */
1011 				set_errno(saved_errno);
1012 			}
1013 #endif
1014 			buf[linklen] = '\0';
1015 			opts = buf;
1016 			break;
1017 		} case 3: {
1018 			const char *envname =
1019 #ifdef JEMALLOC_PREFIX
1020 			    JEMALLOC_CPREFIX"MALLOC_CONF"
1021 #else
1022 			    "MALLOC_CONF"
1023 #endif
1024 			    ;
1025 
1026 			if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
1027 				/*
1028 				 * Do nothing; opts is already initialized to
1029 				 * the value of the MALLOC_CONF environment
1030 				 * variable.
1031 				 */
1032 			} else {
1033 				/* No configuration specified. */
1034 				buf[0] = '\0';
1035 				opts = buf;
1036 			}
1037 			break;
1038 		} default:
1039 			not_reached();
1040 			buf[0] = '\0';
1041 			opts = buf;
1042 		}
1043 
1044 		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
1045 		    &vlen)) {
1046 #define	CONF_MATCH(n)							\
1047 	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
1048 #define	CONF_MATCH_VALUE(n)						\
1049 	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
1050 #define	CONF_HANDLE_BOOL(o, n, cont)					\
1051 			if (CONF_MATCH(n)) {				\
1052 				if (CONF_MATCH_VALUE("true"))		\
1053 					o = true;			\
1054 				else if (CONF_MATCH_VALUE("false"))	\
1055 					o = false;			\
1056 				else {					\
1057 					malloc_conf_error(		\
1058 					    "Invalid conf value",	\
1059 					    k, klen, v, vlen);		\
1060 				}					\
1061 				if (cont)				\
1062 					continue;			\
1063 			}
1064 #define	CONF_MIN_no(um, min)	false
1065 #define	CONF_MIN_yes(um, min)	((um) < (min))
1066 #define	CONF_MAX_no(um, max)	false
1067 #define	CONF_MAX_yes(um, max)	((um) > (max))
1068 #define	CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
1069 			if (CONF_MATCH(n)) {				\
1070 				uintmax_t um;				\
1071 				char *end;				\
1072 									\
1073 				set_errno(0);				\
1074 				um = malloc_strtoumax(v, &end, 0);	\
1075 				if (get_errno() != 0 || (uintptr_t)end -\
1076 				    (uintptr_t)v != vlen) {		\
1077 					malloc_conf_error(		\
1078 					    "Invalid conf value",	\
1079 					    k, klen, v, vlen);		\
1080 				} else if (clip) {			\
1081 					if (CONF_MIN_##check_min(um,	\
1082 					    (t)(min)))			\
1083 						o = (t)(min);		\
1084 					else if (CONF_MAX_##check_max(	\
1085 					    um, (t)(max)))		\
1086 						o = (t)(max);		\
1087 					else				\
1088 						o = (t)um;		\
1089 				} else {				\
1090 					if (CONF_MIN_##check_min(um,	\
1091 					    (t)(min)) ||		\
1092 					    CONF_MAX_##check_max(um,	\
1093 					    (t)(max))) {		\
1094 						malloc_conf_error(	\
1095 						    "Out-of-range "	\
1096 						    "conf value",	\
1097 						    k, klen, v, vlen);	\
1098 					} else				\
1099 						o = (t)um;		\
1100 				}					\
1101 				continue;				\
1102 			}
1103 #define	CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
1104     clip)								\
1105 			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
1106 			    check_min, check_max, clip)
1107 #define	CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
1108 			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
1109 			    check_min, check_max, clip)
1110 #define	CONF_HANDLE_SSIZE_T(o, n, min, max)				\
1111 			if (CONF_MATCH(n)) {				\
1112 				long l;					\
1113 				char *end;				\
1114 									\
1115 				set_errno(0);				\
1116 				l = strtol(v, &end, 0);			\
1117 				if (get_errno() != 0 || (uintptr_t)end -\
1118 				    (uintptr_t)v != vlen) {		\
1119 					malloc_conf_error(		\
1120 					    "Invalid conf value",	\
1121 					    k, klen, v, vlen);		\
1122 				} else if (l < (ssize_t)(min) || l >	\
1123 				    (ssize_t)(max)) {			\
1124 					malloc_conf_error(		\
1125 					    "Out-of-range conf value",	\
1126 					    k, klen, v, vlen);		\
1127 				} else					\
1128 					o = l;				\
1129 				continue;				\
1130 			}
1131 #define	CONF_HANDLE_CHAR_P(o, n, d)					\
1132 			if (CONF_MATCH(n)) {				\
1133 				size_t cpylen = (vlen <=		\
1134 				    sizeof(o)-1) ? vlen :		\
1135 				    sizeof(o)-1;			\
1136 				strncpy(o, v, cpylen);			\
1137 				o[cpylen] = '\0';			\
1138 				continue;				\
1139 			}
1140 
1141 			CONF_HANDLE_BOOL(opt_abort, "abort", true)
1142 			/*
1143 			 * Chunks always require at least one header page, as
1144 			 * many as 2^(LG_SIZE_CLASS_GROUP+1) data pages (plus an
1145 			 * additional page in the presence of cache-oblivious
1146 			 * large), and possibly an additional page in the
1147 			 * presence of redzones.  In order to simplify options
1148 			 * processing, use a conservative bound that
1149 			 * accommodates all these constraints.
1150 			 */
1151 			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
1152 			    LG_SIZE_CLASS_GROUP + 1 + ((config_cache_oblivious
1153 			    || config_fill) ? 1 : 0), (sizeof(size_t) << 3) - 1,
1154 			    yes, yes, true)
1155 			if (strncmp("dss", k, klen) == 0) {
1156 				int i;
1157 				bool match = false;
1158 				for (i = 0; i < dss_prec_limit; i++) {
1159 					if (strncmp(dss_prec_names[i], v, vlen)
1160 					    == 0) {
1161 						if (chunk_dss_prec_set(i)) {
1162 							malloc_conf_error(
1163 							    "Error setting dss",
1164 							    k, klen, v, vlen);
1165 						} else {
1166 							opt_dss =
1167 							    dss_prec_names[i];
1168 							match = true;
1169 							break;
1170 						}
1171 					}
1172 				}
1173 				if (!match) {
1174 					malloc_conf_error("Invalid conf value",
1175 					    k, klen, v, vlen);
1176 				}
1177 				continue;
1178 			}
1179 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
1180 			    UINT_MAX, yes, no, false)
1181 			if (strncmp("purge", k, klen) == 0) {
1182 				int i;
1183 				bool match = false;
1184 				for (i = 0; i < purge_mode_limit; i++) {
1185 					if (strncmp(purge_mode_names[i], v,
1186 					    vlen) == 0) {
1187 						opt_purge = (purge_mode_t)i;
1188 						match = true;
1189 						break;
1190 					}
1191 				}
1192 				if (!match) {
1193 					malloc_conf_error("Invalid conf value",
1194 					    k, klen, v, vlen);
1195 				}
1196 				continue;
1197 			}
1198 			CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
1199 			    -1, (sizeof(size_t) << 3) - 1)
1200 			CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1,
1201 			    NSTIME_SEC_MAX);
1202 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
1203 			if (config_fill) {
1204 				if (CONF_MATCH("junk")) {
1205 					if (CONF_MATCH_VALUE("true")) {
1206 						if (config_valgrind &&
1207 						    unlikely(in_valgrind)) {
1208 							malloc_conf_error(
1209 							"Deallocation-time "
1210 							"junk filling cannot "
1211 							"be enabled while "
1212 							"running inside "
1213 							"Valgrind", k, klen, v,
1214 							vlen);
1215 						} else {
1216 							opt_junk = "true";
1217 							opt_junk_alloc = true;
1218 							opt_junk_free = true;
1219 						}
1220 					} else if (CONF_MATCH_VALUE("false")) {
1221 						opt_junk = "false";
1222 						opt_junk_alloc = opt_junk_free =
1223 						    false;
1224 					} else if (CONF_MATCH_VALUE("alloc")) {
1225 						opt_junk = "alloc";
1226 						opt_junk_alloc = true;
1227 						opt_junk_free = false;
1228 					} else if (CONF_MATCH_VALUE("free")) {
1229 						if (config_valgrind &&
1230 						    unlikely(in_valgrind)) {
1231 							malloc_conf_error(
1232 							"Deallocation-time "
1233 							"junk filling cannot "
1234 							"be enabled while "
1235 							"running inside "
1236 							"Valgrind", k, klen, v,
1237 							vlen);
1238 						} else {
1239 							opt_junk = "free";
1240 							opt_junk_alloc = false;
1241 							opt_junk_free = true;
1242 						}
1243 					} else {
1244 						malloc_conf_error(
1245 						    "Invalid conf value", k,
1246 						    klen, v, vlen);
1247 					}
1248 					continue;
1249 				}
1250 				CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine",
1251 				    0, SIZE_T_MAX, no, no, false)
1252 				CONF_HANDLE_BOOL(opt_redzone, "redzone", true)
1253 				CONF_HANDLE_BOOL(opt_zero, "zero", true)
1254 			}
1255 			if (config_utrace) {
1256 				CONF_HANDLE_BOOL(opt_utrace, "utrace", true)
1257 			}
1258 			if (config_xmalloc) {
1259 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc", true)
1260 			}
1261 			if (config_tcache) {
1262 				CONF_HANDLE_BOOL(opt_tcache, "tcache",
1263 				    !config_valgrind || !in_valgrind)
1264 				if (CONF_MATCH("tcache")) {
1265 					assert(config_valgrind && in_valgrind);
1266 					if (opt_tcache) {
1267 						opt_tcache = false;
1268 						malloc_conf_error(
1269 						"tcache cannot be enabled "
1270 						"while running inside Valgrind",
1271 						k, klen, v, vlen);
1272 					}
1273 					continue;
1274 				}
1275 				CONF_HANDLE_SSIZE_T(opt_lg_tcache_max,
1276 				    "lg_tcache_max", -1,
1277 				    (sizeof(size_t) << 3) - 1)
1278 			}
1279 			if (config_thp) {
1280 				CONF_HANDLE_BOOL(opt_thp, "thp", true)
1281 			}
1282 			if (config_prof) {
1283 				CONF_HANDLE_BOOL(opt_prof, "prof", true)
1284 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
1285 				    "prof_prefix", "jeprof")
1286 				CONF_HANDLE_BOOL(opt_prof_active, "prof_active",
1287 				    true)
1288 				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
1289 				    "prof_thread_active_init", true)
1290 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
1291 				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
1292 				    - 1, no, yes, true)
1293 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum",
1294 				    true)
1295 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
1296 				    "lg_prof_interval", -1,
1297 				    (sizeof(uint64_t) << 3) - 1)
1298 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump",
1299 				    true)
1300 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final",
1301 				    true)
1302 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak",
1303 				    true)
1304 			}
1305 			malloc_conf_error("Invalid conf pair", k, klen, v,
1306 			    vlen);
1307 #undef CONF_MATCH
1308 #undef CONF_MATCH_VALUE
1309 #undef CONF_HANDLE_BOOL
1310 #undef CONF_MIN_no
1311 #undef CONF_MIN_yes
1312 #undef CONF_MAX_no
1313 #undef CONF_MAX_yes
1314 #undef CONF_HANDLE_T_U
1315 #undef CONF_HANDLE_UNSIGNED
1316 #undef CONF_HANDLE_SIZE_T
1317 #undef CONF_HANDLE_SSIZE_T
1318 #undef CONF_HANDLE_CHAR_P
1319 		}
1320 	}
1321 }
1322 
1323 static bool
1324 malloc_init_hard_needed(void)
1325 {
1326 
1327 	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
1328 	    malloc_init_recursible)) {
1329 		/*
1330 		 * Another thread initialized the allocator before this one
1331 		 * acquired init_lock, or this thread is the initializing
1332 		 * thread, and it is recursively allocating.
1333 		 */
1334 		return (false);
1335 	}
1336 #ifdef JEMALLOC_THREADED_INIT
1337 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
1338 		spin_t spinner;
1339 
1340 		/* Busy-wait until the initializing thread completes. */
1341 		spin_init(&spinner);
1342 		do {
1343 			malloc_mutex_unlock(TSDN_NULL, &init_lock);
1344 			spin_adaptive(&spinner);
1345 			malloc_mutex_lock(TSDN_NULL, &init_lock);
1346 		} while (!malloc_initialized());
1347 		return (false);
1348 	}
1349 #endif
1350 	return (true);
1351 }
1352 
1353 static bool
1354 malloc_init_hard_a0_locked()
1355 {
1356 
1357 	malloc_initializer = INITIALIZER;
1358 
1359 	if (config_prof)
1360 		prof_boot0();
1361 	malloc_conf_init();
1362 	if (opt_stats_print) {
1363 		/* Print statistics at exit. */
1364 		if (atexit(stats_print_atexit) != 0) {
1365 			malloc_write("<jemalloc>: Error in atexit()\n");
1366 			if (opt_abort)
1367 				abort();
1368 		}
1369 	}
1370 	pages_boot();
1371 	if (base_boot())
1372 		return (true);
1373 	if (chunk_boot())
1374 		return (true);
1375 	if (ctl_boot())
1376 		return (true);
1377 	if (config_prof)
1378 		prof_boot1();
1379 	arena_boot();
1380 	if (config_tcache && tcache_boot(TSDN_NULL))
1381 		return (true);
1382 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
1383 		return (true);
1384 	/*
1385 	 * Create enough scaffolding to allow recursive allocation in
1386 	 * malloc_ncpus().
1387 	 */
1388 	narenas_auto = 1;
1389 	narenas_total_set(narenas_auto);
1390 	arenas = &a0;
1391 	memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
1392 	/*
1393 	 * Initialize one arena here.  The rest are lazily created in
1394 	 * arena_choose_hard().
1395 	 */
1396 	if (arena_init(TSDN_NULL, 0) == NULL)
1397 		return (true);
1398 
1399 	malloc_init_state = malloc_init_a0_initialized;
1400 
1401 	return (false);
1402 }
1403 
1404 static bool
1405 malloc_init_hard_a0(void)
1406 {
1407 	bool ret;
1408 
1409 	malloc_mutex_lock(TSDN_NULL, &init_lock);
1410 	ret = malloc_init_hard_a0_locked();
1411 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
1412 	return (ret);
1413 }
1414 
1415 /* Initialize data structures which may trigger recursive allocation. */
1416 static bool
1417 malloc_init_hard_recursible(void)
1418 {
1419 
1420 	malloc_init_state = malloc_init_recursible;
1421 
1422 	ncpus = malloc_ncpus();
1423 
1424 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
1425     && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
1426     !defined(__native_client__))
1427 	/* LinuxThreads' pthread_atfork() allocates. */
1428 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
1429 	    jemalloc_postfork_child) != 0) {
1430 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
1431 		if (opt_abort)
1432 			abort();
1433 		return (true);
1434 	}
1435 #endif
1436 
1437 	return (false);
1438 }
1439 
1440 static bool
1441 malloc_init_hard_finish(tsdn_t *tsdn)
1442 {
1443 
1444 	if (malloc_mutex_boot())
1445 		return (true);
1446 
1447 	if (opt_narenas == 0) {
1448 		/*
1449 		 * For SMP systems, create more than one arena per CPU by
1450 		 * default.
1451 		 */
1452 		if (ncpus > 1)
1453 			opt_narenas = ncpus << 2;
1454 		else
1455 			opt_narenas = 1;
1456 	}
1457 	narenas_auto = opt_narenas;
1458 	/*
1459 	 * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
1460 	 */
1461 	if (narenas_auto > MALLOCX_ARENA_MAX) {
1462 		narenas_auto = MALLOCX_ARENA_MAX;
1463 		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
1464 		    narenas_auto);
1465 	}
1466 	narenas_total_set(narenas_auto);
1467 
1468 	/* Allocate and initialize arenas. */
1469 	arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) *
1470 	    (MALLOCX_ARENA_MAX+1));
1471 	if (arenas == NULL)
1472 		return (true);
1473 	/* Copy the pointer to the one arena that was already initialized. */
1474 	arena_set(0, a0);
1475 
1476 	malloc_init_state = malloc_init_initialized;
1477 	malloc_slow_flag_init();
1478 
1479 	return (false);
1480 }
1481 
1482 static bool
1483 malloc_init_hard(void)
1484 {
1485 	tsd_t *tsd;
1486 
1487 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
1488 	_init_init_lock();
1489 #endif
1490 	malloc_mutex_lock(TSDN_NULL, &init_lock);
1491 	if (!malloc_init_hard_needed()) {
1492 		malloc_mutex_unlock(TSDN_NULL, &init_lock);
1493 		return (false);
1494 	}
1495 
1496 	if (malloc_init_state != malloc_init_a0_initialized &&
1497 	    malloc_init_hard_a0_locked()) {
1498 		malloc_mutex_unlock(TSDN_NULL, &init_lock);
1499 		return (true);
1500 	}
1501 
1502 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
1503 	/* Recursive allocation relies on functional tsd. */
1504 	tsd = malloc_tsd_boot0();
1505 	if (tsd == NULL)
1506 		return (true);
1507 	if (malloc_init_hard_recursible())
1508 		return (true);
1509 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
1510 
1511 	if (config_prof && prof_boot2(tsd)) {
1512 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
1513 		return (true);
1514 	}
1515 
1516 	if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
1517 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
1518 		return (true);
1519 	}
1520 
1521 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
1522 	malloc_tsd_boot1();
1523 	return (false);
1524 }
1525 
1526 /*
1527  * End initialization functions.
1528  */
1529 /******************************************************************************/
1530 /*
1531  * Begin malloc(3)-compatible functions.
1532  */
1533 
1534 static void *
1535 ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
1536     prof_tctx_t *tctx, bool slow_path)
1537 {
1538 	void *p;
1539 
1540 	if (tctx == NULL)
1541 		return (NULL);
1542 	if (usize <= SMALL_MAXCLASS) {
1543 		szind_t ind_large = size2index(LARGE_MINCLASS);
1544 		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
1545 		if (p == NULL)
1546 			return (NULL);
1547 		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
1548 	} else
1549 		p = ialloc(tsd, usize, ind, zero, slow_path);
1550 
1551 	return (p);
1552 }
1553 
1554 JEMALLOC_ALWAYS_INLINE_C void *
1555 ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
1556 {
1557 	void *p;
1558 	prof_tctx_t *tctx;
1559 
1560 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
1561 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
1562 		p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path);
1563 	else
1564 		p = ialloc(tsd, usize, ind, zero, slow_path);
1565 	if (unlikely(p == NULL)) {
1566 		prof_alloc_rollback(tsd, tctx, true);
1567 		return (NULL);
1568 	}
1569 	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
1570 
1571 	return (p);
1572 }
1573 
1574 /*
1575  * ialloc_body() is inlined so that fast and slow paths are generated separately
1576  * with statically known slow_path.
1577  *
1578  * This function guarantees that *tsdn is non-NULL on success.
1579  */
1580 JEMALLOC_ALWAYS_INLINE_C void *
1581 ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
1582     bool slow_path)
1583 {
1584 	tsd_t *tsd;
1585 	szind_t ind;
1586 
1587 	if (slow_path && unlikely(malloc_init())) {
1588 		*tsdn = NULL;
1589 		return (NULL);
1590 	}
1591 
1592 	tsd = tsd_fetch();
1593 	*tsdn = tsd_tsdn(tsd);
1594 	witness_assert_lockless(tsd_tsdn(tsd));
1595 
1596 	ind = size2index(size);
1597 	if (unlikely(ind >= NSIZES))
1598 		return (NULL);
1599 
1600 	if (config_stats || (config_prof && opt_prof) || (slow_path &&
1601 	    config_valgrind && unlikely(in_valgrind))) {
1602 		*usize = index2size(ind);
1603 		assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
1604 	}
1605 
1606 	if (config_prof && opt_prof)
1607 		return (ialloc_prof(tsd, *usize, ind, zero, slow_path));
1608 
1609 	return (ialloc(tsd, size, ind, zero, slow_path));
1610 }
1611 
1612 JEMALLOC_ALWAYS_INLINE_C void
1613 ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
1614     bool update_errno, bool slow_path)
1615 {
1616 
1617 	assert(!tsdn_null(tsdn) || ret == NULL);
1618 
1619 	if (unlikely(ret == NULL)) {
1620 		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
1621 			malloc_printf("<jemalloc>: Error in %s(): out of "
1622 			    "memory\n", func);
1623 			abort();
1624 		}
1625 		if (update_errno)
1626 			set_errno(ENOMEM);
1627 	}
1628 	if (config_stats && likely(ret != NULL)) {
1629 		assert(usize == isalloc(tsdn, ret, config_prof));
1630 		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
1631 	}
1632 	witness_assert_lockless(tsdn);
1633 }
1634 
1635 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
1636 void JEMALLOC_NOTHROW *
1637 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
1638 je_malloc(size_t size)
1639 {
1640 	void *ret;
1641 	tsdn_t *tsdn;
1642 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
1643 
1644 	if (size == 0)
1645 		size = 1;
1646 
1647 	if (likely(!malloc_slow)) {
1648 		ret = ialloc_body(size, false, &tsdn, &usize, false);
1649 		ialloc_post_check(ret, tsdn, usize, "malloc", true, false);
1650 	} else {
1651 		ret = ialloc_body(size, false, &tsdn, &usize, true);
1652 		ialloc_post_check(ret, tsdn, usize, "malloc", true, true);
1653 		UTRACE(0, size, ret);
1654 		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
1655 	}
1656 
1657 	return (ret);
1658 }
1659 
1660 static void *
1661 imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
1662     prof_tctx_t *tctx)
1663 {
1664 	void *p;
1665 
1666 	if (tctx == NULL)
1667 		return (NULL);
1668 	if (usize <= SMALL_MAXCLASS) {
1669 		assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS);
1670 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
1671 		if (p == NULL)
1672 			return (NULL);
1673 		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
1674 	} else
1675 		p = ipalloc(tsd, usize, alignment, false);
1676 
1677 	return (p);
1678 }
1679 
1680 JEMALLOC_ALWAYS_INLINE_C void *
1681 imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
1682 {
1683 	void *p;
1684 	prof_tctx_t *tctx;
1685 
1686 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
1687 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
1688 		p = imemalign_prof_sample(tsd, alignment, usize, tctx);
1689 	else
1690 		p = ipalloc(tsd, usize, alignment, false);
1691 	if (unlikely(p == NULL)) {
1692 		prof_alloc_rollback(tsd, tctx, true);
1693 		return (NULL);
1694 	}
1695 	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
1696 
1697 	return (p);
1698 }
1699 
1700 JEMALLOC_ATTR(nonnull(1))
1701 static int
1702 imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
1703 {
1704 	int ret;
1705 	tsd_t *tsd;
1706 	size_t usize;
1707 	void *result;
1708 
1709 	assert(min_alignment != 0);
1710 
1711 	if (unlikely(malloc_init())) {
1712 		tsd = NULL;
1713 		result = NULL;
1714 		goto label_oom;
1715 	}
1716 	tsd = tsd_fetch();
1717 	witness_assert_lockless(tsd_tsdn(tsd));
1718 	if (size == 0)
1719 		size = 1;
1720 
1721 	/* Make sure that alignment is a large enough power of 2. */
1722 	if (unlikely(((alignment - 1) & alignment) != 0
1723 	    || (alignment < min_alignment))) {
1724 		if (config_xmalloc && unlikely(opt_xmalloc)) {
1725 			malloc_write("<jemalloc>: Error allocating "
1726 			    "aligned memory: invalid alignment\n");
1727 			abort();
1728 		}
1729 		result = NULL;
1730 		ret = EINVAL;
1731 		goto label_return;
1732 	}
1733 
1734 	usize = sa2u(size, alignment);
1735 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
1736 		result = NULL;
1737 		goto label_oom;
1738 	}
1739 
1740 	if (config_prof && opt_prof)
1741 		result = imemalign_prof(tsd, alignment, usize);
1742 	else
1743 		result = ipalloc(tsd, usize, alignment, false);
1744 	if (unlikely(result == NULL))
1745 		goto label_oom;
1746 	assert(((uintptr_t)result & (alignment - 1)) == ZU(0));
1747 
1748 	*memptr = result;
1749 	ret = 0;
1750 label_return:
1751 	if (config_stats && likely(result != NULL)) {
1752 		assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof));
1753 		*tsd_thread_allocatedp_get(tsd) += usize;
1754 	}
1755 	UTRACE(0, size, result);
1756 	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
1757 	    false);
1758 	witness_assert_lockless(tsd_tsdn(tsd));
1759 	return (ret);
1760 label_oom:
1761 	assert(result == NULL);
1762 	if (config_xmalloc && unlikely(opt_xmalloc)) {
1763 		malloc_write("<jemalloc>: Error allocating aligned memory: "
1764 		    "out of memory\n");
1765 		abort();
1766 	}
1767 	ret = ENOMEM;
1768 	witness_assert_lockless(tsd_tsdn(tsd));
1769 	goto label_return;
1770 }
1771 
1772 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
1773 JEMALLOC_ATTR(nonnull(1))
1774 je_posix_memalign(void **memptr, size_t alignment, size_t size)
1775 {
1776 	int ret;
1777 
1778 	ret = imemalign(memptr, alignment, size, sizeof(void *));
1779 
1780 	return (ret);
1781 }
1782 
1783 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
1784 void JEMALLOC_NOTHROW *
1785 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
1786 je_aligned_alloc(size_t alignment, size_t size)
1787 {
1788 	void *ret;
1789 	int err;
1790 
1791 	if (unlikely((err = imemalign(&ret, alignment, size, 1)) != 0)) {
1792 		ret = NULL;
1793 		set_errno(err);
1794 	}
1795 
1796 	return (ret);
1797 }
1798 
1799 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
1800 void JEMALLOC_NOTHROW *
1801 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
1802 je_calloc(size_t num, size_t size)
1803 {
1804 	void *ret;
1805 	tsdn_t *tsdn;
1806 	size_t num_size;
1807 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
1808 
1809 	num_size = num * size;
1810 	if (unlikely(num_size == 0)) {
1811 		if (num == 0 || size == 0)
1812 			num_size = 1;
1813 		else
1814 			num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */
1815 	/*
1816 	 * Try to avoid division here.  We know that it isn't possible to
1817 	 * overflow during multiplication if neither operand uses any of the
1818 	 * most significant half of the bits in a size_t.
1819 	 */
1820 	} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
1821 	    2))) && (num_size / size != num)))
1822 		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
1823 
1824 	if (likely(!malloc_slow)) {
1825 		ret = ialloc_body(num_size, true, &tsdn, &usize, false);
1826 		ialloc_post_check(ret, tsdn, usize, "calloc", true, false);
1827 	} else {
1828 		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
1829 		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
1830 		UTRACE(0, num_size, ret);
1831 		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, true);
1832 	}
1833 
1834 	return (ret);
1835 }
1836 
1837 static void *
1838 irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
1839     prof_tctx_t *tctx)
1840 {
1841 	void *p;
1842 
1843 	if (tctx == NULL)
1844 		return (NULL);
1845 	if (usize <= SMALL_MAXCLASS) {
1846 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
1847 		if (p == NULL)
1848 			return (NULL);
1849 		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
1850 	} else
1851 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
1852 
1853 	return (p);
1854 }
1855 
1856 JEMALLOC_ALWAYS_INLINE_C void *
1857 irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
1858 {
1859 	void *p;
1860 	bool prof_active;
1861 	prof_tctx_t *old_tctx, *tctx;
1862 
1863 	prof_active = prof_active_get_unlocked();
1864 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
1865 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
1866 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
1867 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
1868 	else
1869 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
1870 	if (unlikely(p == NULL)) {
1871 		prof_alloc_rollback(tsd, tctx, true);
1872 		return (NULL);
1873 	}
1874 	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
1875 	    old_tctx);
1876 
1877 	return (p);
1878 }
1879 
1880 JEMALLOC_INLINE_C void
1881 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
1882 {
1883 	size_t usize;
1884 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
1885 
1886 	witness_assert_lockless(tsd_tsdn(tsd));
1887 
1888 	assert(ptr != NULL);
1889 	assert(malloc_initialized() || IS_INITIALIZER);
1890 
1891 	if (config_prof && opt_prof) {
1892 		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
1893 		prof_free(tsd, ptr, usize);
1894 	} else if (config_stats || config_valgrind)
1895 		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
1896 	if (config_stats)
1897 		*tsd_thread_deallocatedp_get(tsd) += usize;
1898 
1899 	if (likely(!slow_path))
1900 		iqalloc(tsd, ptr, tcache, false);
1901 	else {
1902 		if (config_valgrind && unlikely(in_valgrind))
1903 			rzsize = p2rz(tsd_tsdn(tsd), ptr);
1904 		iqalloc(tsd, ptr, tcache, true);
1905 		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
1906 	}
1907 }
1908 
1909 JEMALLOC_INLINE_C void
1910 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
1911 {
1912 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
1913 
1914 	witness_assert_lockless(tsd_tsdn(tsd));
1915 
1916 	assert(ptr != NULL);
1917 	assert(malloc_initialized() || IS_INITIALIZER);
1918 
1919 	if (config_prof && opt_prof)
1920 		prof_free(tsd, ptr, usize);
1921 	if (config_stats)
1922 		*tsd_thread_deallocatedp_get(tsd) += usize;
1923 	if (config_valgrind && unlikely(in_valgrind))
1924 		rzsize = p2rz(tsd_tsdn(tsd), ptr);
1925 	isqalloc(tsd, ptr, usize, tcache, slow_path);
1926 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
1927 }
1928 
1929 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
1930 void JEMALLOC_NOTHROW *
1931 JEMALLOC_ALLOC_SIZE(2)
1932 je_realloc(void *ptr, size_t size)
1933 {
1934 	void *ret;
1935 	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
1936 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
1937 	size_t old_usize = 0;
1938 	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
1939 
1940 	if (unlikely(size == 0)) {
1941 		if (ptr != NULL) {
1942 			tsd_t *tsd;
1943 
1944 			/* realloc(ptr, 0) is equivalent to free(ptr). */
1945 			UTRACE(ptr, 0, 0);
1946 			tsd = tsd_fetch();
1947 			ifree(tsd, ptr, tcache_get(tsd, false), true);
1948 			return (NULL);
1949 		}
1950 		size = 1;
1951 	}
1952 
1953 	if (likely(ptr != NULL)) {
1954 		tsd_t *tsd;
1955 
1956 		assert(malloc_initialized() || IS_INITIALIZER);
1957 		malloc_thread_init();
1958 		tsd = tsd_fetch();
1959 
1960 		witness_assert_lockless(tsd_tsdn(tsd));
1961 
1962 		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
1963 		if (config_valgrind && unlikely(in_valgrind)) {
1964 			old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) :
1965 			    u2rz(old_usize);
1966 		}
1967 
1968 		if (config_prof && opt_prof) {
1969 			usize = s2u(size);
1970 			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
1971 			    NULL : irealloc_prof(tsd, ptr, old_usize, usize);
1972 		} else {
1973 			if (config_stats || (config_valgrind &&
1974 			    unlikely(in_valgrind)))
1975 				usize = s2u(size);
1976 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
1977 		}
1978 		tsdn = tsd_tsdn(tsd);
1979 	} else {
1980 		/* realloc(NULL, size) is equivalent to malloc(size). */
1981 		if (likely(!malloc_slow))
1982 			ret = ialloc_body(size, false, &tsdn, &usize, false);
1983 		else
1984 			ret = ialloc_body(size, false, &tsdn, &usize, true);
1985 		assert(!tsdn_null(tsdn) || ret == NULL);
1986 	}
1987 
1988 	if (unlikely(ret == NULL)) {
1989 		if (config_xmalloc && unlikely(opt_xmalloc)) {
1990 			malloc_write("<jemalloc>: Error in realloc(): "
1991 			    "out of memory\n");
1992 			abort();
1993 		}
1994 		set_errno(ENOMEM);
1995 	}
1996 	if (config_stats && likely(ret != NULL)) {
1997 		tsd_t *tsd;
1998 
1999 		assert(usize == isalloc(tsdn, ret, config_prof));
2000 		tsd = tsdn_tsd(tsdn);
2001 		*tsd_thread_allocatedp_get(tsd) += usize;
2002 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
2003 	}
2004 	UTRACE(ptr, size, ret);
2005 	JEMALLOC_VALGRIND_REALLOC(maybe, tsdn, ret, usize, maybe, ptr,
2006 	    old_usize, old_rzsize, maybe, false);
2007 	witness_assert_lockless(tsdn);
2008 	return (ret);
2009 }
2010 
2011 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
2012 je_free(void *ptr)
2013 {
2014 
2015 	UTRACE(ptr, 0, 0);
2016 	if (likely(ptr != NULL)) {
2017 		tsd_t *tsd = tsd_fetch();
2018 		witness_assert_lockless(tsd_tsdn(tsd));
2019 		if (likely(!malloc_slow))
2020 			ifree(tsd, ptr, tcache_get(tsd, false), false);
2021 		else
2022 			ifree(tsd, ptr, tcache_get(tsd, false), true);
2023 		witness_assert_lockless(tsd_tsdn(tsd));
2024 	}
2025 }
2026 
2027 /*
2028  * End malloc(3)-compatible functions.
2029  */
2030 /******************************************************************************/
2031 /*
2032  * Begin non-standard override functions.
2033  */
2034 
2035 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
2036 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
2037 void JEMALLOC_NOTHROW *
2038 JEMALLOC_ATTR(malloc)
2039 je_memalign(size_t alignment, size_t size)
2040 {
2041 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
2042 	if (unlikely(imemalign(&ret, alignment, size, 1) != 0))
2043 		ret = NULL;
2044 	return (ret);
2045 }
2046 #endif
2047 
2048 #ifdef JEMALLOC_OVERRIDE_VALLOC
2049 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
2050 void JEMALLOC_NOTHROW *
2051 JEMALLOC_ATTR(malloc)
2052 je_valloc(size_t size)
2053 {
2054 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
2055 	if (unlikely(imemalign(&ret, PAGE, size, 1) != 0))
2056 		ret = NULL;
2057 	return (ret);
2058 }
2059 #endif
2060 
2061 /*
2062  * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has
2063  * #define je_malloc malloc
2064  */
2065 #define	malloc_is_malloc 1
2066 #define	is_malloc_(a) malloc_is_ ## a
2067 #define	is_malloc(a) is_malloc_(a)
2068 
2069 #if ((is_malloc(je_malloc) == 1) && defined(JEMALLOC_GLIBC_MALLOC_HOOK))
2070 /*
2071  * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
2072  * to inconsistently reference libc's malloc(3)-compatible functions
2073  * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541).
2074  *
2075  * These definitions interpose hooks in glibc.  The functions are actually
2076  * passed an extra argument for the caller return address, which will be
2077  * ignored.
2078  */
2079 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
2080 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
2081 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
2082 # ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
2083 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
2084     je_memalign;
2085 # endif
2086 
2087 #ifdef CPU_COUNT
2088 /*
2089  * To enable static linking with glibc, the libc specific malloc interface must
2090  * be implemented also, so none of glibc's malloc.o functions are added to the
2091  * link.
2092  */
2093 #define	ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
2094 /* To force macro expansion of je_ prefix before stringification. */
2095 #define	PREALIAS(je_fn)  ALIAS(je_fn)
2096 void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
2097 void	__libc_free(void* ptr) PREALIAS(je_free);
2098 void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
2099 void	*__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
2100 void	*__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
2101 void	*__libc_valloc(size_t size) PREALIAS(je_valloc);
2102 int	__posix_memalign(void** r, size_t a, size_t s)
2103     PREALIAS(je_posix_memalign);
2104 #undef PREALIAS
2105 #undef ALIAS
2106 
2107 #endif
2108 
2109 #endif
2110 
2111 /*
2112  * End non-standard override functions.
2113  */
2114 /******************************************************************************/
2115 /*
2116  * Begin non-standard functions.
2117  */
2118 
2119 JEMALLOC_ALWAYS_INLINE_C bool
2120 imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
2121     size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
2122 {
2123 
2124 	if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) {
2125 		*alignment = 0;
2126 		*usize = s2u(size);
2127 	} else {
2128 		*alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
2129 		*usize = sa2u(size, *alignment);
2130 	}
2131 	if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
2132 		return (true);
2133 	*zero = MALLOCX_ZERO_GET(flags);
2134 	if ((flags & MALLOCX_TCACHE_MASK) != 0) {
2135 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
2136 			*tcache = NULL;
2137 		else
2138 			*tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
2139 	} else
2140 		*tcache = tcache_get(tsd, true);
2141 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
2142 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
2143 		*arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
2144 		if (unlikely(*arena == NULL))
2145 			return (true);
2146 	} else
2147 		*arena = NULL;
2148 	return (false);
2149 }
2150 
2151 JEMALLOC_ALWAYS_INLINE_C void *
2152 imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
2153     tcache_t *tcache, arena_t *arena, bool slow_path)
2154 {
2155 	szind_t ind;
2156 
2157 	if (unlikely(alignment != 0))
2158 		return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
2159 	ind = size2index(usize);
2160 	assert(ind < NSIZES);
2161 	return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena,
2162 	    slow_path));
2163 }
2164 
2165 static void *
2166 imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
2167     tcache_t *tcache, arena_t *arena, bool slow_path)
2168 {
2169 	void *p;
2170 
2171 	if (usize <= SMALL_MAXCLASS) {
2172 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
2173 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
2174 		p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero,
2175 		    tcache, arena, slow_path);
2176 		if (p == NULL)
2177 			return (NULL);
2178 		arena_prof_promoted(tsdn, p, usize);
2179 	} else {
2180 		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
2181 		    slow_path);
2182 	}
2183 
2184 	return (p);
2185 }
2186 
2187 JEMALLOC_ALWAYS_INLINE_C void *
2188 imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
2189 {
2190 	void *p;
2191 	size_t alignment;
2192 	bool zero;
2193 	tcache_t *tcache;
2194 	arena_t *arena;
2195 	prof_tctx_t *tctx;
2196 
2197 	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
2198 	    &zero, &tcache, &arena)))
2199 		return (NULL);
2200 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
2201 	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
2202 		p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero,
2203 		    tcache, arena, slow_path);
2204 	} else if ((uintptr_t)tctx > (uintptr_t)1U) {
2205 		p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero,
2206 		    tcache, arena, slow_path);
2207 	} else
2208 		p = NULL;
2209 	if (unlikely(p == NULL)) {
2210 		prof_alloc_rollback(tsd, tctx, true);
2211 		return (NULL);
2212 	}
2213 	prof_malloc(tsd_tsdn(tsd), p, *usize, tctx);
2214 
2215 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
2216 	return (p);
2217 }
2218 
2219 JEMALLOC_ALWAYS_INLINE_C void *
2220 imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
2221     bool slow_path)
2222 {
2223 	void *p;
2224 	size_t alignment;
2225 	bool zero;
2226 	tcache_t *tcache;
2227 	arena_t *arena;
2228 
2229 	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
2230 	    &zero, &tcache, &arena)))
2231 		return (NULL);
2232 	p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache,
2233 	    arena, slow_path);
2234 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
2235 	return (p);
2236 }
2237 
2238 /* This function guarantees that *tsdn is non-NULL on success. */
2239 JEMALLOC_ALWAYS_INLINE_C void *
2240 imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
2241     bool slow_path)
2242 {
2243 	tsd_t *tsd;
2244 
2245 	if (slow_path && unlikely(malloc_init())) {
2246 		*tsdn = NULL;
2247 		return (NULL);
2248 	}
2249 
2250 	tsd = tsd_fetch();
2251 	*tsdn = tsd_tsdn(tsd);
2252 	witness_assert_lockless(tsd_tsdn(tsd));
2253 
2254 	if (likely(flags == 0)) {
2255 		szind_t ind = size2index(size);
2256 		if (unlikely(ind >= NSIZES))
2257 			return (NULL);
2258 		if (config_stats || (config_prof && opt_prof) || (slow_path &&
2259 		    config_valgrind && unlikely(in_valgrind))) {
2260 			*usize = index2size(ind);
2261 			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
2262 		}
2263 
2264 		if (config_prof && opt_prof) {
2265 			return (ialloc_prof(tsd, *usize, ind, false,
2266 			    slow_path));
2267 		}
2268 
2269 		return (ialloc(tsd, size, ind, false, slow_path));
2270 	}
2271 
2272 	if (config_prof && opt_prof)
2273 		return (imallocx_prof(tsd, size, flags, usize, slow_path));
2274 
2275 	return (imallocx_no_prof(tsd, size, flags, usize, slow_path));
2276 }
2277 
2278 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
2279 void JEMALLOC_NOTHROW *
2280 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
2281 je_mallocx(size_t size, int flags)
2282 {
2283 	tsdn_t *tsdn;
2284 	void *p;
2285 	size_t usize;
2286 
2287 	assert(size != 0);
2288 
2289 	if (likely(!malloc_slow)) {
2290 		p = imallocx_body(size, flags, &tsdn, &usize, false);
2291 		ialloc_post_check(p, tsdn, usize, "mallocx", false, false);
2292 	} else {
2293 		p = imallocx_body(size, flags, &tsdn, &usize, true);
2294 		ialloc_post_check(p, tsdn, usize, "mallocx", false, true);
2295 		UTRACE(0, size, p);
2296 		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize,
2297 		    MALLOCX_ZERO_GET(flags));
2298 	}
2299 
2300 	return (p);
2301 }
2302 
2303 static void *
2304 irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
2305     size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
2306     prof_tctx_t *tctx)
2307 {
2308 	void *p;
2309 
2310 	if (tctx == NULL)
2311 		return (NULL);
2312 	if (usize <= SMALL_MAXCLASS) {
2313 		p = iralloct(tsd, old_ptr, old_usize, LARGE_MINCLASS, alignment,
2314 		    zero, tcache, arena);
2315 		if (p == NULL)
2316 			return (NULL);
2317 		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
2318 	} else {
2319 		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
2320 		    tcache, arena);
2321 	}
2322 
2323 	return (p);
2324 }
2325 
2326 JEMALLOC_ALWAYS_INLINE_C void *
2327 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
2328     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
2329     arena_t *arena)
2330 {
2331 	void *p;
2332 	bool prof_active;
2333 	prof_tctx_t *old_tctx, *tctx;
2334 
2335 	prof_active = prof_active_get_unlocked();
2336 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
2337 	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
2338 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
2339 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
2340 		    alignment, zero, tcache, arena, tctx);
2341 	} else {
2342 		p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
2343 		    tcache, arena);
2344 	}
2345 	if (unlikely(p == NULL)) {
2346 		prof_alloc_rollback(tsd, tctx, false);
2347 		return (NULL);
2348 	}
2349 
2350 	if (p == old_ptr && alignment != 0) {
2351 		/*
2352 		 * The allocation did not move, so it is possible that the size
2353 		 * class is smaller than would guarantee the requested
2354 		 * alignment, and that the alignment constraint was
2355 		 * serendipitously satisfied.  Additionally, old_usize may not
2356 		 * be the same as the current usize because of in-place large
2357 		 * reallocation.  Therefore, query the actual value of usize.
2358 		 */
2359 		*usize = isalloc(tsd_tsdn(tsd), p, config_prof);
2360 	}
2361 	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
2362 	    old_usize, old_tctx);
2363 
2364 	return (p);
2365 }
2366 
2367 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
2368 void JEMALLOC_NOTHROW *
2369 JEMALLOC_ALLOC_SIZE(2)
2370 je_rallocx(void *ptr, size_t size, int flags)
2371 {
2372 	void *p;
2373 	tsd_t *tsd;
2374 	size_t usize;
2375 	size_t old_usize;
2376 	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
2377 	size_t alignment = MALLOCX_ALIGN_GET(flags);
2378 	bool zero = flags & MALLOCX_ZERO;
2379 	arena_t *arena;
2380 	tcache_t *tcache;
2381 
2382 	assert(ptr != NULL);
2383 	assert(size != 0);
2384 	assert(malloc_initialized() || IS_INITIALIZER);
2385 	malloc_thread_init();
2386 	tsd = tsd_fetch();
2387 	witness_assert_lockless(tsd_tsdn(tsd));
2388 
2389 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
2390 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
2391 		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
2392 		if (unlikely(arena == NULL))
2393 			goto label_oom;
2394 	} else
2395 		arena = NULL;
2396 
2397 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
2398 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
2399 			tcache = NULL;
2400 		else
2401 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
2402 	} else
2403 		tcache = tcache_get(tsd, true);
2404 
2405 	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
2406 	if (config_valgrind && unlikely(in_valgrind))
2407 		old_rzsize = u2rz(old_usize);
2408 
2409 	if (config_prof && opt_prof) {
2410 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
2411 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
2412 			goto label_oom;
2413 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
2414 		    zero, tcache, arena);
2415 		if (unlikely(p == NULL))
2416 			goto label_oom;
2417 	} else {
2418 		p = iralloct(tsd, ptr, old_usize, size, alignment, zero,
2419 		     tcache, arena);
2420 		if (unlikely(p == NULL))
2421 			goto label_oom;
2422 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
2423 			usize = isalloc(tsd_tsdn(tsd), p, config_prof);
2424 	}
2425 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
2426 
2427 	if (config_stats) {
2428 		*tsd_thread_allocatedp_get(tsd) += usize;
2429 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
2430 	}
2431 	UTRACE(ptr, size, p);
2432 	JEMALLOC_VALGRIND_REALLOC(maybe, tsd_tsdn(tsd), p, usize, no, ptr,
2433 	    old_usize, old_rzsize, no, zero);
2434 	witness_assert_lockless(tsd_tsdn(tsd));
2435 	return (p);
2436 label_oom:
2437 	if (config_xmalloc && unlikely(opt_xmalloc)) {
2438 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
2439 		abort();
2440 	}
2441 	UTRACE(ptr, size, 0);
2442 	witness_assert_lockless(tsd_tsdn(tsd));
2443 	return (NULL);
2444 }
2445 
2446 JEMALLOC_ALWAYS_INLINE_C size_t
2447 ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
2448     size_t extra, size_t alignment, bool zero)
2449 {
2450 	size_t usize;
2451 
2452 	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero))
2453 		return (old_usize);
2454 	usize = isalloc(tsdn, ptr, config_prof);
2455 
2456 	return (usize);
2457 }
2458 
2459 static size_t
2460 ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
2461     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx)
2462 {
2463 	size_t usize;
2464 
2465 	if (tctx == NULL)
2466 		return (old_usize);
2467 	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
2468 	    zero);
2469 
2470 	return (usize);
2471 }
2472 
2473 JEMALLOC_ALWAYS_INLINE_C size_t
2474 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
2475     size_t extra, size_t alignment, bool zero)
2476 {
2477 	size_t usize_max, usize;
2478 	bool prof_active;
2479 	prof_tctx_t *old_tctx, *tctx;
2480 
2481 	prof_active = prof_active_get_unlocked();
2482 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
2483 	/*
2484 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
2485 	 * Therefore, compute its maximum possible value and use that in
2486 	 * prof_alloc_prep() to decide whether to capture a backtrace.
2487 	 * prof_realloc() will use the actual usize to decide whether to sample.
2488 	 */
2489 	if (alignment == 0) {
2490 		usize_max = s2u(size+extra);
2491 		assert(usize_max > 0 && usize_max <= HUGE_MAXCLASS);
2492 	} else {
2493 		usize_max = sa2u(size+extra, alignment);
2494 		if (unlikely(usize_max == 0 || usize_max > HUGE_MAXCLASS)) {
2495 			/*
2496 			 * usize_max is out of range, and chances are that
2497 			 * allocation will fail, but use the maximum possible
2498 			 * value and carry on with prof_alloc_prep(), just in
2499 			 * case allocation succeeds.
2500 			 */
2501 			usize_max = HUGE_MAXCLASS;
2502 		}
2503 	}
2504 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
2505 
2506 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
2507 		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
2508 		    size, extra, alignment, zero, tctx);
2509 	} else {
2510 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
2511 		    extra, alignment, zero);
2512 	}
2513 	if (usize == old_usize) {
2514 		prof_alloc_rollback(tsd, tctx, false);
2515 		return (usize);
2516 	}
2517 	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
2518 	    old_tctx);
2519 
2520 	return (usize);
2521 }
2522 
2523 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
2524 je_xallocx(void *ptr, size_t size, size_t extra, int flags)
2525 {
2526 	tsd_t *tsd;
2527 	size_t usize, old_usize;
2528 	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
2529 	size_t alignment = MALLOCX_ALIGN_GET(flags);
2530 	bool zero = flags & MALLOCX_ZERO;
2531 
2532 	assert(ptr != NULL);
2533 	assert(size != 0);
2534 	assert(SIZE_T_MAX - size >= extra);
2535 	assert(malloc_initialized() || IS_INITIALIZER);
2536 	malloc_thread_init();
2537 	tsd = tsd_fetch();
2538 	witness_assert_lockless(tsd_tsdn(tsd));
2539 
2540 	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
2541 
2542 	/*
2543 	 * The API explicitly absolves itself of protecting against (size +
2544 	 * extra) numerical overflow, but we may need to clamp extra to avoid
2545 	 * exceeding HUGE_MAXCLASS.
2546 	 *
2547 	 * Ordinarily, size limit checking is handled deeper down, but here we
2548 	 * have to check as part of (size + extra) clamping, since we need the
2549 	 * clamped value in the above helper functions.
2550 	 */
2551 	if (unlikely(size > HUGE_MAXCLASS)) {
2552 		usize = old_usize;
2553 		goto label_not_resized;
2554 	}
2555 	if (unlikely(HUGE_MAXCLASS - size < extra))
2556 		extra = HUGE_MAXCLASS - size;
2557 
2558 	if (config_valgrind && unlikely(in_valgrind))
2559 		old_rzsize = u2rz(old_usize);
2560 
2561 	if (config_prof && opt_prof) {
2562 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
2563 		    alignment, zero);
2564 	} else {
2565 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
2566 		    extra, alignment, zero);
2567 	}
2568 	if (unlikely(usize == old_usize))
2569 		goto label_not_resized;
2570 
2571 	if (config_stats) {
2572 		*tsd_thread_allocatedp_get(tsd) += usize;
2573 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
2574 	}
2575 	JEMALLOC_VALGRIND_REALLOC(no, tsd_tsdn(tsd), ptr, usize, no, ptr,
2576 	    old_usize, old_rzsize, no, zero);
2577 label_not_resized:
2578 	UTRACE(ptr, size, ptr);
2579 	witness_assert_lockless(tsd_tsdn(tsd));
2580 	return (usize);
2581 }
2582 
2583 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
2584 JEMALLOC_ATTR(pure)
2585 je_sallocx(const void *ptr, int flags)
2586 {
2587 	size_t usize;
2588 	tsdn_t *tsdn;
2589 
2590 	assert(malloc_initialized() || IS_INITIALIZER);
2591 	malloc_thread_init();
2592 
2593 	tsdn = tsdn_fetch();
2594 	witness_assert_lockless(tsdn);
2595 
2596 	if (config_ivsalloc)
2597 		usize = ivsalloc(tsdn, ptr, config_prof);
2598 	else
2599 		usize = isalloc(tsdn, ptr, config_prof);
2600 
2601 	witness_assert_lockless(tsdn);
2602 	return (usize);
2603 }
2604 
2605 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
2606 je_dallocx(void *ptr, int flags)
2607 {
2608 	tsd_t *tsd;
2609 	tcache_t *tcache;
2610 
2611 	assert(ptr != NULL);
2612 	assert(malloc_initialized() || IS_INITIALIZER);
2613 
2614 	tsd = tsd_fetch();
2615 	witness_assert_lockless(tsd_tsdn(tsd));
2616 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
2617 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
2618 			tcache = NULL;
2619 		else
2620 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
2621 	} else
2622 		tcache = tcache_get(tsd, false);
2623 
2624 	UTRACE(ptr, 0, 0);
2625 	if (likely(!malloc_slow))
2626 		ifree(tsd, ptr, tcache, false);
2627 	else
2628 		ifree(tsd, ptr, tcache, true);
2629 	witness_assert_lockless(tsd_tsdn(tsd));
2630 }
2631 
2632 JEMALLOC_ALWAYS_INLINE_C size_t
2633 inallocx(tsdn_t *tsdn, size_t size, int flags)
2634 {
2635 	size_t usize;
2636 
2637 	witness_assert_lockless(tsdn);
2638 
2639 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
2640 		usize = s2u(size);
2641 	else
2642 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
2643 	witness_assert_lockless(tsdn);
2644 	return (usize);
2645 }
2646 
2647 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
2648 je_sdallocx(void *ptr, size_t size, int flags)
2649 {
2650 	tsd_t *tsd;
2651 	tcache_t *tcache;
2652 	size_t usize;
2653 
2654 	assert(ptr != NULL);
2655 	assert(malloc_initialized() || IS_INITIALIZER);
2656 	tsd = tsd_fetch();
2657 	usize = inallocx(tsd_tsdn(tsd), size, flags);
2658 	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
2659 
2660 	witness_assert_lockless(tsd_tsdn(tsd));
2661 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
2662 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
2663 			tcache = NULL;
2664 		else
2665 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
2666 	} else
2667 		tcache = tcache_get(tsd, false);
2668 
2669 	UTRACE(ptr, 0, 0);
2670 	if (likely(!malloc_slow))
2671 		isfree(tsd, ptr, usize, tcache, false);
2672 	else
2673 		isfree(tsd, ptr, usize, tcache, true);
2674 	witness_assert_lockless(tsd_tsdn(tsd));
2675 }
2676 
2677 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
2678 JEMALLOC_ATTR(pure)
2679 je_nallocx(size_t size, int flags)
2680 {
2681 	size_t usize;
2682 	tsdn_t *tsdn;
2683 
2684 	assert(size != 0);
2685 
2686 	if (unlikely(malloc_init()))
2687 		return (0);
2688 
2689 	tsdn = tsdn_fetch();
2690 	witness_assert_lockless(tsdn);
2691 
2692 	usize = inallocx(tsdn, size, flags);
2693 	if (unlikely(usize > HUGE_MAXCLASS))
2694 		return (0);
2695 
2696 	witness_assert_lockless(tsdn);
2697 	return (usize);
2698 }
2699 
2700 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
2701 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
2702     size_t newlen)
2703 {
2704 	int ret;
2705 	tsd_t *tsd;
2706 
2707 	if (unlikely(malloc_init()))
2708 		return (EAGAIN);
2709 
2710 	tsd = tsd_fetch();
2711 	witness_assert_lockless(tsd_tsdn(tsd));
2712 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
2713 	witness_assert_lockless(tsd_tsdn(tsd));
2714 	return (ret);
2715 }
2716 
2717 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
2718 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
2719 {
2720 	int ret;
2721 	tsdn_t *tsdn;
2722 
2723 	if (unlikely(malloc_init()))
2724 		return (EAGAIN);
2725 
2726 	tsdn = tsdn_fetch();
2727 	witness_assert_lockless(tsdn);
2728 	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
2729 	witness_assert_lockless(tsdn);
2730 	return (ret);
2731 }
2732 
2733 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
2734 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
2735   void *newp, size_t newlen)
2736 {
2737 	int ret;
2738 	tsd_t *tsd;
2739 
2740 	if (unlikely(malloc_init()))
2741 		return (EAGAIN);
2742 
2743 	tsd = tsd_fetch();
2744 	witness_assert_lockless(tsd_tsdn(tsd));
2745 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
2746 	witness_assert_lockless(tsd_tsdn(tsd));
2747 	return (ret);
2748 }
2749 
2750 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
2751 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
2752     const char *opts)
2753 {
2754 	tsdn_t *tsdn;
2755 
2756 	tsdn = tsdn_fetch();
2757 	witness_assert_lockless(tsdn);
2758 	stats_print(write_cb, cbopaque, opts);
2759 	witness_assert_lockless(tsdn);
2760 }
2761 
2762 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
2763 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
2764 {
2765 	size_t ret;
2766 	tsdn_t *tsdn;
2767 
2768 	assert(malloc_initialized() || IS_INITIALIZER);
2769 	malloc_thread_init();
2770 
2771 	tsdn = tsdn_fetch();
2772 	witness_assert_lockless(tsdn);
2773 
2774 	if (config_ivsalloc)
2775 		ret = ivsalloc(tsdn, ptr, config_prof);
2776 	else
2777 		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
2778 
2779 	witness_assert_lockless(tsdn);
2780 	return (ret);
2781 }
2782 
2783 /*
2784  * End non-standard functions.
2785  */
2786 /******************************************************************************/
2787 /*
2788  * Begin compatibility functions.
2789  */
2790 
2791 #define	ALLOCM_LG_ALIGN(la)	(la)
2792 #define	ALLOCM_ALIGN(a)		(ffsl(a)-1)
2793 #define	ALLOCM_ZERO		((int)0x40)
2794 #define	ALLOCM_NO_MOVE		((int)0x80)
2795 
2796 #define	ALLOCM_SUCCESS		0
2797 #define	ALLOCM_ERR_OOM		1
2798 #define	ALLOCM_ERR_NOT_MOVED	2
2799 
2800 int
2801 je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
2802 {
2803 	void *p;
2804 
2805 	assert(ptr != NULL);
2806 
2807 	p = je_mallocx(size, flags);
2808 	if (p == NULL)
2809 		return (ALLOCM_ERR_OOM);
2810 	if (rsize != NULL)
2811 		*rsize = isalloc(tsdn_fetch(), p, config_prof);
2812 	*ptr = p;
2813 	return (ALLOCM_SUCCESS);
2814 }
2815 
2816 int
2817 je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
2818 {
2819 	int ret;
2820 	bool no_move = flags & ALLOCM_NO_MOVE;
2821 
2822 	assert(ptr != NULL);
2823 	assert(*ptr != NULL);
2824 	assert(size != 0);
2825 	assert(SIZE_T_MAX - size >= extra);
2826 
2827 	if (no_move) {
2828 		size_t usize = je_xallocx(*ptr, size, extra, flags);
2829 		ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED;
2830 		if (rsize != NULL)
2831 			*rsize = usize;
2832 	} else {
2833 		void *p = je_rallocx(*ptr, size+extra, flags);
2834 		if (p != NULL) {
2835 			*ptr = p;
2836 			ret = ALLOCM_SUCCESS;
2837 		} else
2838 			ret = ALLOCM_ERR_OOM;
2839 		if (rsize != NULL)
2840 			*rsize = isalloc(tsdn_fetch(), *ptr, config_prof);
2841 	}
2842 	return (ret);
2843 }
2844 
2845 int
2846 je_sallocm(const void *ptr, size_t *rsize, int flags)
2847 {
2848 
2849 	assert(rsize != NULL);
2850 	*rsize = je_sallocx(ptr, flags);
2851 	return (ALLOCM_SUCCESS);
2852 }
2853 
2854 int
2855 je_dallocm(void *ptr, int flags)
2856 {
2857 
2858 	je_dallocx(ptr, flags);
2859 	return (ALLOCM_SUCCESS);
2860 }
2861 
2862 int
2863 je_nallocm(size_t *rsize, size_t size, int flags)
2864 {
2865 	size_t usize;
2866 
2867 	usize = je_nallocx(size, flags);
2868 	if (usize == 0)
2869 		return (ALLOCM_ERR_OOM);
2870 	if (rsize != NULL)
2871 		*rsize = usize;
2872 	return (ALLOCM_SUCCESS);
2873 }
2874 
2875 #undef ALLOCM_LG_ALIGN
2876 #undef ALLOCM_ALIGN
2877 #undef ALLOCM_ZERO
2878 #undef ALLOCM_NO_MOVE
2879 
2880 #undef ALLOCM_SUCCESS
2881 #undef ALLOCM_ERR_OOM
2882 #undef ALLOCM_ERR_NOT_MOVED
2883 
2884 /*
2885  * End compatibility functions.
2886  */
2887 /******************************************************************************/
2888 /*
2889  * The following functions are used by threading libraries for protection of
2890  * malloc during fork().
2891  */
2892 
2893 /*
2894  * If an application creates a thread before doing any allocation in the main
2895  * thread, then calls fork(2) in the main thread followed by memory allocation
2896  * in the child process, a race can occur that results in deadlock within the
2897  * child: the main thread may have forked while the created thread had
2898  * partially initialized the allocator.  Ordinarily jemalloc prevents
2899  * fork/malloc races via the following functions it registers during
2900  * initialization using pthread_atfork(), but of course that does no good if
2901  * the allocator isn't fully initialized at fork time.  The following library
2902  * constructor is a partial solution to this problem.  It may still be possible
2903  * to trigger the deadlock described above, but doing so would involve forking
2904  * via a library constructor that runs before jemalloc's runs.
2905  */
2906 #ifndef JEMALLOC_JET
2907 JEMALLOC_ATTR(constructor)
2908 static void
2909 jemalloc_constructor(void)
2910 {
2911 
2912 	malloc_init();
2913 }
2914 #endif
2915 
2916 #ifndef JEMALLOC_MUTEX_INIT_CB
2917 void
2918 jemalloc_prefork(void)
2919 #else
2920 JEMALLOC_EXPORT void
2921 _malloc_prefork(void)
2922 #endif
2923 {
2924 	tsd_t *tsd;
2925 	unsigned i, j, narenas;
2926 	arena_t *arena;
2927 
2928 #ifdef JEMALLOC_MUTEX_INIT_CB
2929 	if (!malloc_initialized())
2930 		return;
2931 #endif
2932 	assert(malloc_initialized());
2933 
2934 	tsd = tsd_fetch();
2935 
2936 	narenas = narenas_total_get();
2937 
2938 	witness_prefork(tsd);
2939 	/* Acquire all mutexes in a safe order. */
2940 	ctl_prefork(tsd_tsdn(tsd));
2941 	tcache_prefork(tsd_tsdn(tsd));
2942 	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
2943 	prof_prefork0(tsd_tsdn(tsd));
2944 	for (i = 0; i < 3; i++) {
2945 		for (j = 0; j < narenas; j++) {
2946 			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
2947 			    NULL) {
2948 				switch (i) {
2949 				case 0:
2950 					arena_prefork0(tsd_tsdn(tsd), arena);
2951 					break;
2952 				case 1:
2953 					arena_prefork1(tsd_tsdn(tsd), arena);
2954 					break;
2955 				case 2:
2956 					arena_prefork2(tsd_tsdn(tsd), arena);
2957 					break;
2958 				default: not_reached();
2959 				}
2960 			}
2961 		}
2962 	}
2963 	base_prefork(tsd_tsdn(tsd));
2964 	for (i = 0; i < narenas; i++) {
2965 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
2966 			arena_prefork3(tsd_tsdn(tsd), arena);
2967 	}
2968 	prof_prefork1(tsd_tsdn(tsd));
2969 }
2970 
2971 #ifndef JEMALLOC_MUTEX_INIT_CB
2972 void
2973 jemalloc_postfork_parent(void)
2974 #else
2975 JEMALLOC_EXPORT void
2976 _malloc_postfork(void)
2977 #endif
2978 {
2979 	tsd_t *tsd;
2980 	unsigned i, narenas;
2981 
2982 #ifdef JEMALLOC_MUTEX_INIT_CB
2983 	if (!malloc_initialized())
2984 		return;
2985 #endif
2986 	assert(malloc_initialized());
2987 
2988 	tsd = tsd_fetch();
2989 
2990 	witness_postfork_parent(tsd);
2991 	/* Release all mutexes, now that fork() has completed. */
2992 	base_postfork_parent(tsd_tsdn(tsd));
2993 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
2994 		arena_t *arena;
2995 
2996 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
2997 			arena_postfork_parent(tsd_tsdn(tsd), arena);
2998 	}
2999 	prof_postfork_parent(tsd_tsdn(tsd));
3000 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
3001 	tcache_postfork_parent(tsd_tsdn(tsd));
3002 	ctl_postfork_parent(tsd_tsdn(tsd));
3003 }
3004 
3005 void
3006 jemalloc_postfork_child(void)
3007 {
3008 	tsd_t *tsd;
3009 	unsigned i, narenas;
3010 
3011 	assert(malloc_initialized());
3012 
3013 	tsd = tsd_fetch();
3014 
3015 	witness_postfork_child(tsd);
3016 	/* Release all mutexes, now that fork() has completed. */
3017 	base_postfork_child(tsd_tsdn(tsd));
3018 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
3019 		arena_t *arena;
3020 
3021 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
3022 			arena_postfork_child(tsd_tsdn(tsd), arena);
3023 	}
3024 	prof_postfork_child(tsd_tsdn(tsd));
3025 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
3026 	tcache_postfork_child(tsd_tsdn(tsd));
3027 	ctl_postfork_child(tsd_tsdn(tsd));
3028 }
3029 
3030 void
3031 _malloc_first_thread(void)
3032 {
3033 
3034 	(void)malloc_mutex_first_thread();
3035 }
3036 
3037 /******************************************************************************/
3038