1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/ckh.h"
7 #include "jemalloc/internal/hash.h"
8 #include "jemalloc/internal/malloc_io.h"
9 #include "jemalloc/internal/mutex.h"
10 #include "jemalloc/internal/emitter.h"
11 
12 /******************************************************************************/
13 
14 #ifdef JEMALLOC_PROF_LIBUNWIND
15 #define UNW_LOCAL_ONLY
16 #include <libunwind.h>
17 #endif
18 
19 #ifdef JEMALLOC_PROF_LIBGCC
20 /*
21  * We have a circular dependency -- jemalloc_internal.h tells us if we should
22  * use libgcc's unwinding functionality, but after we've included that, we've
23  * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
24  */
25 #undef _Unwind_Backtrace
26 #include <unwind.h>
27 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
28 #endif
29 
30 /******************************************************************************/
31 /* Data. */
32 
33 bool		opt_prof = false;
34 bool		opt_prof_active = true;
35 bool		opt_prof_thread_active_init = true;
36 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
37 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
38 bool		opt_prof_gdump = false;
39 bool		opt_prof_final = false;
40 bool		opt_prof_leak = false;
41 bool		opt_prof_accum = false;
42 bool		opt_prof_log = false;
43 char		opt_prof_prefix[
44     /* Minimize memory bloat for non-prof builds. */
45 #ifdef JEMALLOC_PROF
46     PATH_MAX +
47 #endif
48     1];
49 
50 /*
51  * Initialized as opt_prof_active, and accessed via
52  * prof_active_[gs]et{_unlocked,}().
53  */
54 bool			prof_active;
55 static malloc_mutex_t	prof_active_mtx;
56 
57 /*
58  * Initialized as opt_prof_thread_active_init, and accessed via
59  * prof_thread_active_init_[gs]et().
60  */
61 static bool		prof_thread_active_init;
62 static malloc_mutex_t	prof_thread_active_init_mtx;
63 
64 /*
65  * Initialized as opt_prof_gdump, and accessed via
66  * prof_gdump_[gs]et{_unlocked,}().
67  */
68 bool			prof_gdump_val;
69 static malloc_mutex_t	prof_gdump_mtx;
70 
71 uint64_t	prof_interval = 0;
72 
73 size_t		lg_prof_sample;
74 
75 typedef enum prof_logging_state_e prof_logging_state_t;
76 enum prof_logging_state_e {
77 	prof_logging_state_stopped,
78 	prof_logging_state_started,
79 	prof_logging_state_dumping
80 };
81 
82 /*
83  * - stopped: log_start never called, or previous log_stop has completed.
84  * - started: log_start called, log_stop not called yet. Allocations are logged.
85  * - dumping: log_stop called but not finished; samples are not logged anymore.
86  */
87 prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
88 
89 bool prof_logging_final_hook_registered = false;
90 
91 #ifdef JEMALLOC_JET
92 static bool prof_log_dummy = false;
93 #endif
94 
95 /* Incremented for every log file that is output. */
96 static uint64_t log_seq = 0;
97 static char log_filename[
98     /* Minimize memory bloat for non-prof builds. */
99 #ifdef JEMALLOC_PROF
100     PATH_MAX +
101 #endif
102     1];
103 
104 /* Timestamp for most recent call to log_start(). */
105 static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
106 
107 /* Increment these when adding to the log_bt and log_thr linked lists. */
108 static size_t log_bt_index = 0;
109 static size_t log_thr_index = 0;
110 
111 /* Linked list node definitions. These are only used in prof.c. */
112 typedef struct prof_bt_node_s prof_bt_node_t;
113 
114 struct prof_bt_node_s {
115 	prof_bt_node_t *next;
116 	size_t index;
117 	prof_bt_t bt;
118 	/* Variable size backtrace vector pointed to by bt. */
119 	void *vec[1];
120 };
121 
122 typedef struct prof_thr_node_s prof_thr_node_t;
123 
124 struct prof_thr_node_s {
125 	prof_thr_node_t *next;
126 	size_t index;
127 	uint64_t thr_uid;
128 	/* Variable size based on thr_name_sz. */
129 	char name[1];
130 };
131 
132 typedef struct prof_alloc_node_s prof_alloc_node_t;
133 
134 /* This is output when logging sampled allocations. */
135 struct prof_alloc_node_s {
136 	prof_alloc_node_t *next;
137 	/* Indices into an array of thread data. */
138 	size_t alloc_thr_ind;
139 	size_t free_thr_ind;
140 
141 	/* Indices into an array of backtraces. */
142 	size_t alloc_bt_ind;
143 	size_t free_bt_ind;
144 
145 	uint64_t alloc_time_ns;
146 	uint64_t free_time_ns;
147 
148 	size_t usize;
149 };
150 
151 /*
152  * Created on the first call to prof_log_start and deleted on prof_log_stop.
153  * These are the backtraces and threads that have already been logged by an
154  * allocation.
155  */
156 static bool log_tables_initialized = false;
157 static ckh_t log_bt_node_set;
158 static ckh_t log_thr_node_set;
159 
160 /* Store linked lists for logged data. */
161 static prof_bt_node_t *log_bt_first = NULL;
162 static prof_bt_node_t *log_bt_last = NULL;
163 static prof_thr_node_t *log_thr_first = NULL;
164 static prof_thr_node_t *log_thr_last = NULL;
165 static prof_alloc_node_t *log_alloc_first = NULL;
166 static prof_alloc_node_t *log_alloc_last = NULL;
167 
168 /* Protects the prof_logging_state and any log_{...} variable. */
169 static malloc_mutex_t log_mtx;
170 
171 /*
172  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
173  * there is no problem with using them for more than one gctx at the same time.
174  * The primary motivation for this sharing though is that gctx's are ephemeral,
175  * and destroying mutexes causes complications for systems that allocate when
176  * creating/destroying mutexes.
177  */
178 static malloc_mutex_t	*gctx_locks;
179 static atomic_u_t	cum_gctxs; /* Atomic counter. */
180 
181 /*
182  * Table of mutexes that are shared among tdata's.  No operations require
183  * holding multiple tdata locks, so there is no problem with using them for more
184  * than one tdata at the same time, even though a gctx lock may be acquired
185  * while holding a tdata lock.
186  */
187 static malloc_mutex_t	*tdata_locks;
188 
189 /*
190  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
191  * structure that knows about all backtraces currently captured.
192  */
193 static ckh_t		bt2gctx;
194 /* Non static to enable profiling. */
195 malloc_mutex_t		bt2gctx_mtx;
196 
197 /*
198  * Tree of all extant prof_tdata_t structures, regardless of state,
199  * {attached,detached,expired}.
200  */
201 static prof_tdata_tree_t	tdatas;
202 static malloc_mutex_t	tdatas_mtx;
203 
204 static uint64_t		next_thr_uid;
205 static malloc_mutex_t	next_thr_uid_mtx;
206 
207 static malloc_mutex_t	prof_dump_seq_mtx;
208 static uint64_t		prof_dump_seq;
209 static uint64_t		prof_dump_iseq;
210 static uint64_t		prof_dump_mseq;
211 static uint64_t		prof_dump_useq;
212 
213 /*
214  * This buffer is rather large for stack allocation, so use a single buffer for
215  * all profile dumps.
216  */
217 static malloc_mutex_t	prof_dump_mtx;
218 static char		prof_dump_buf[
219     /* Minimize memory bloat for non-prof builds. */
220 #ifdef JEMALLOC_PROF
221     PROF_DUMP_BUFSIZE
222 #else
223     1
224 #endif
225 ];
226 static size_t		prof_dump_buf_end;
227 static int		prof_dump_fd;
228 
229 /* Do not dump any profiles until bootstrapping is complete. */
230 static bool		prof_booted = false;
231 
232 /******************************************************************************/
233 /*
234  * Function prototypes for static functions that are referenced prior to
235  * definition.
236  */
237 
238 static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
239 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
240 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
241     bool even_if_attached);
242 static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
243     bool even_if_attached);
244 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
245 
246 /* Hashtable functions for log_bt_node_set and log_thr_node_set. */
247 static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
248 static bool prof_thr_node_keycomp(const void *k1, const void *k2);
249 static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
250 static bool prof_bt_node_keycomp(const void *k1, const void *k2);
251 
252 /******************************************************************************/
253 /* Red-black trees. */
254 
255 static int
prof_tctx_comp(const prof_tctx_t * a,const prof_tctx_t * b)256 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
257 	uint64_t a_thr_uid = a->thr_uid;
258 	uint64_t b_thr_uid = b->thr_uid;
259 	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
260 	if (ret == 0) {
261 		uint64_t a_thr_discrim = a->thr_discrim;
262 		uint64_t b_thr_discrim = b->thr_discrim;
263 		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
264 		    b_thr_discrim);
265 		if (ret == 0) {
266 			uint64_t a_tctx_uid = a->tctx_uid;
267 			uint64_t b_tctx_uid = b->tctx_uid;
268 			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
269 			    b_tctx_uid);
270 		}
271 	}
272 	return ret;
273 }
274 
rb_gen(static UNUSED,tctx_tree_,prof_tctx_tree_t,prof_tctx_t,tctx_link,prof_tctx_comp)275 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
276     tctx_link, prof_tctx_comp)
277 
278 static int
279 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
280 	unsigned a_len = a->bt.len;
281 	unsigned b_len = b->bt.len;
282 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
283 	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
284 	if (ret == 0) {
285 		ret = (a_len > b_len) - (a_len < b_len);
286 	}
287 	return ret;
288 }
289 
rb_gen(static UNUSED,gctx_tree_,prof_gctx_tree_t,prof_gctx_t,dump_link,prof_gctx_comp)290 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
291     prof_gctx_comp)
292 
293 static int
294 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
295 	int ret;
296 	uint64_t a_uid = a->thr_uid;
297 	uint64_t b_uid = b->thr_uid;
298 
299 	ret = ((a_uid > b_uid) - (a_uid < b_uid));
300 	if (ret == 0) {
301 		uint64_t a_discrim = a->thr_discrim;
302 		uint64_t b_discrim = b->thr_discrim;
303 
304 		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
305 	}
306 	return ret;
307 }
308 
rb_gen(static UNUSED,tdata_tree_,prof_tdata_tree_t,prof_tdata_t,tdata_link,prof_tdata_comp)309 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
310     prof_tdata_comp)
311 
312 /******************************************************************************/
313 
314 void
315 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
316 	prof_tdata_t *tdata;
317 
318 	cassert(config_prof);
319 
320 	if (updated) {
321 		/*
322 		 * Compute a new sample threshold.  This isn't very important in
323 		 * practice, because this function is rarely executed, so the
324 		 * potential for sample bias is minimal except in contrived
325 		 * programs.
326 		 */
327 		tdata = prof_tdata_get(tsd, true);
328 		if (tdata != NULL) {
329 			prof_sample_threshold_update(tdata);
330 		}
331 	}
332 
333 	if ((uintptr_t)tctx > (uintptr_t)1U) {
334 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
335 		tctx->prepared = false;
336 		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
337 			prof_tctx_destroy(tsd, tctx);
338 		} else {
339 			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
340 		}
341 	}
342 }
343 
344 void
prof_malloc_sample_object(tsdn_t * tsdn,const void * ptr,size_t usize,prof_tctx_t * tctx)345 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
346     prof_tctx_t *tctx) {
347 	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
348 
349 	/* Get the current time and set this in the extent_t. We'll read this
350 	 * when free() is called. */
351 	nstime_t t = NSTIME_ZERO_INITIALIZER;
352 	nstime_update(&t);
353 	prof_alloc_time_set(tsdn, ptr, NULL, t);
354 
355 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
356 	tctx->cnts.curobjs++;
357 	tctx->cnts.curbytes += usize;
358 	if (opt_prof_accum) {
359 		tctx->cnts.accumobjs++;
360 		tctx->cnts.accumbytes += usize;
361 	}
362 	tctx->prepared = false;
363 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
364 }
365 
366 static size_t
prof_log_bt_index(tsd_t * tsd,prof_bt_t * bt)367 prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
368 	assert(prof_logging_state == prof_logging_state_started);
369 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
370 
371 	prof_bt_node_t dummy_node;
372 	dummy_node.bt = *bt;
373 	prof_bt_node_t *node;
374 
375 	/* See if this backtrace is already cached in the table. */
376 	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
377 	    (void **)(&node), NULL)) {
378 		size_t sz = offsetof(prof_bt_node_t, vec) +
379 			        (bt->len * sizeof(void *));
380 		prof_bt_node_t *new_node = (prof_bt_node_t *)
381 		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
382 		    true, arena_get(TSDN_NULL, 0, true), true);
383 		if (log_bt_first == NULL) {
384 			log_bt_first = new_node;
385 			log_bt_last = new_node;
386 		} else {
387 			log_bt_last->next = new_node;
388 			log_bt_last = new_node;
389 		}
390 
391 		new_node->next = NULL;
392 		new_node->index = log_bt_index;
393 		/*
394 		 * Copy the backtrace: bt is inside a tdata or gctx, which
395 		 * might die before prof_log_stop is called.
396 		 */
397 		new_node->bt.len = bt->len;
398 		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
399 		new_node->bt.vec = new_node->vec;
400 
401 		log_bt_index++;
402 		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
403 		return new_node->index;
404 	} else {
405 		return node->index;
406 	}
407 }
408 static size_t
prof_log_thr_index(tsd_t * tsd,uint64_t thr_uid,const char * name)409 prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
410 	assert(prof_logging_state == prof_logging_state_started);
411 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
412 
413 	prof_thr_node_t dummy_node;
414 	dummy_node.thr_uid = thr_uid;
415 	prof_thr_node_t *node;
416 
417 	/* See if this thread is already cached in the table. */
418 	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
419 	    (void **)(&node), NULL)) {
420 		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
421 		prof_thr_node_t *new_node = (prof_thr_node_t *)
422 		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
423 		    true, arena_get(TSDN_NULL, 0, true), true);
424 		if (log_thr_first == NULL) {
425 			log_thr_first = new_node;
426 			log_thr_last = new_node;
427 		} else {
428 			log_thr_last->next = new_node;
429 			log_thr_last = new_node;
430 		}
431 
432 		new_node->next = NULL;
433 		new_node->index = log_thr_index;
434 		new_node->thr_uid = thr_uid;
435 		strcpy(new_node->name, name);
436 
437 		log_thr_index++;
438 		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
439 		return new_node->index;
440 	} else {
441 		return node->index;
442 	}
443 }
444 
445 static void
prof_try_log(tsd_t * tsd,const void * ptr,size_t usize,prof_tctx_t * tctx)446 prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
447 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
448 
449 	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
450 	if (cons_tdata == NULL) {
451 		/*
452 		 * We decide not to log these allocations. cons_tdata will be
453 		 * NULL only when the current thread is in a weird state (e.g.
454 		 * it's being destroyed).
455 		 */
456 		return;
457 	}
458 
459 	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
460 
461 	if (prof_logging_state != prof_logging_state_started) {
462 		goto label_done;
463 	}
464 
465 	if (!log_tables_initialized) {
466 		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
467 				prof_bt_node_hash, prof_bt_node_keycomp);
468 		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
469 				prof_thr_node_hash, prof_thr_node_keycomp);
470 		if (err1 || err2) {
471 			goto label_done;
472 		}
473 		log_tables_initialized = true;
474 	}
475 
476 	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
477 			          (alloc_ctx_t *)NULL);
478 	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
479 	nstime_update(&free_time);
480 
481 	size_t sz = sizeof(prof_alloc_node_t);
482 	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
483 	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
484 	    arena_get(TSDN_NULL, 0, true), true);
485 
486 	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
487 				        "" : tctx->tdata->thread_name;
488 	const char *cons_thr_name = prof_thread_name_get(tsd);
489 
490 	prof_bt_t bt;
491 	/* Initialize the backtrace, using the buffer in tdata to store it. */
492 	bt_init(&bt, cons_tdata->vec);
493 	prof_backtrace(&bt);
494 	prof_bt_t *cons_bt = &bt;
495 
496 	/* We haven't destroyed tctx yet, so gctx should be good to read. */
497 	prof_bt_t *prod_bt = &tctx->gctx->bt;
498 
499 	new_node->next = NULL;
500 	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
501 				      prod_thr_name);
502 	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
503 				     cons_thr_name);
504 	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
505 	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
506 	new_node->alloc_time_ns = nstime_ns(&alloc_time);
507 	new_node->free_time_ns = nstime_ns(&free_time);
508 	new_node->usize = usize;
509 
510 	if (log_alloc_first == NULL) {
511 		log_alloc_first = new_node;
512 		log_alloc_last = new_node;
513 	} else {
514 		log_alloc_last->next = new_node;
515 		log_alloc_last = new_node;
516 	}
517 
518 label_done:
519 	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
520 }
521 
522 void
prof_free_sampled_object(tsd_t * tsd,const void * ptr,size_t usize,prof_tctx_t * tctx)523 prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
524     prof_tctx_t *tctx) {
525 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
526 
527 	assert(tctx->cnts.curobjs > 0);
528 	assert(tctx->cnts.curbytes >= usize);
529 	tctx->cnts.curobjs--;
530 	tctx->cnts.curbytes -= usize;
531 
532 	prof_try_log(tsd, ptr, usize, tctx);
533 
534 	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
535 		prof_tctx_destroy(tsd, tctx);
536 	} else {
537 		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
538 	}
539 }
540 
541 void
bt_init(prof_bt_t * bt,void ** vec)542 bt_init(prof_bt_t *bt, void **vec) {
543 	cassert(config_prof);
544 
545 	bt->vec = vec;
546 	bt->len = 0;
547 }
548 
549 static void
prof_enter(tsd_t * tsd,prof_tdata_t * tdata)550 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
551 	cassert(config_prof);
552 	assert(tdata == prof_tdata_get(tsd, false));
553 
554 	if (tdata != NULL) {
555 		assert(!tdata->enq);
556 		tdata->enq = true;
557 	}
558 
559 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
560 }
561 
562 static void
prof_leave(tsd_t * tsd,prof_tdata_t * tdata)563 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
564 	cassert(config_prof);
565 	assert(tdata == prof_tdata_get(tsd, false));
566 
567 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
568 
569 	if (tdata != NULL) {
570 		bool idump, gdump;
571 
572 		assert(tdata->enq);
573 		tdata->enq = false;
574 		idump = tdata->enq_idump;
575 		tdata->enq_idump = false;
576 		gdump = tdata->enq_gdump;
577 		tdata->enq_gdump = false;
578 
579 		if (idump) {
580 			prof_idump(tsd_tsdn(tsd));
581 		}
582 		if (gdump) {
583 			prof_gdump(tsd_tsdn(tsd));
584 		}
585 	}
586 }
587 
588 #ifdef JEMALLOC_PROF_LIBUNWIND
589 void
prof_backtrace(prof_bt_t * bt)590 prof_backtrace(prof_bt_t *bt) {
591 	int nframes;
592 
593 	cassert(config_prof);
594 	assert(bt->len == 0);
595 	assert(bt->vec != NULL);
596 
597 	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
598 	if (nframes <= 0) {
599 		return;
600 	}
601 	bt->len = nframes;
602 }
603 #elif (defined(JEMALLOC_PROF_LIBGCC))
604 static _Unwind_Reason_Code
prof_unwind_init_callback(struct _Unwind_Context * context,void * arg)605 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
606 	cassert(config_prof);
607 
608 	return _URC_NO_REASON;
609 }
610 
611 static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context * context,void * arg)612 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
613 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
614 	void *ip;
615 
616 	cassert(config_prof);
617 
618 	ip = (void *)_Unwind_GetIP(context);
619 	if (ip == NULL) {
620 		return _URC_END_OF_STACK;
621 	}
622 	data->bt->vec[data->bt->len] = ip;
623 	data->bt->len++;
624 	if (data->bt->len == data->max) {
625 		return _URC_END_OF_STACK;
626 	}
627 
628 	return _URC_NO_REASON;
629 }
630 
631 void
prof_backtrace(prof_bt_t * bt)632 prof_backtrace(prof_bt_t *bt) {
633 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
634 
635 	cassert(config_prof);
636 
637 	_Unwind_Backtrace(prof_unwind_callback, &data);
638 }
639 #elif (defined(JEMALLOC_PROF_GCC))
640 void
prof_backtrace(prof_bt_t * bt)641 prof_backtrace(prof_bt_t *bt) {
642 #define BT_FRAME(i)							\
643 	if ((i) < PROF_BT_MAX) {					\
644 		void *p;						\
645 		if (__builtin_frame_address(i) == 0) {			\
646 			return;						\
647 		}							\
648 		p = __builtin_return_address(i);			\
649 		if (p == NULL) {					\
650 			return;						\
651 		}							\
652 		bt->vec[(i)] = p;					\
653 		bt->len = (i) + 1;					\
654 	} else {							\
655 		return;							\
656 	}
657 
658 	cassert(config_prof);
659 
660 	BT_FRAME(0)
661 	BT_FRAME(1)
662 	BT_FRAME(2)
663 	BT_FRAME(3)
664 	BT_FRAME(4)
665 	BT_FRAME(5)
666 	BT_FRAME(6)
667 	BT_FRAME(7)
668 	BT_FRAME(8)
669 	BT_FRAME(9)
670 
671 	BT_FRAME(10)
672 	BT_FRAME(11)
673 	BT_FRAME(12)
674 	BT_FRAME(13)
675 	BT_FRAME(14)
676 	BT_FRAME(15)
677 	BT_FRAME(16)
678 	BT_FRAME(17)
679 	BT_FRAME(18)
680 	BT_FRAME(19)
681 
682 	BT_FRAME(20)
683 	BT_FRAME(21)
684 	BT_FRAME(22)
685 	BT_FRAME(23)
686 	BT_FRAME(24)
687 	BT_FRAME(25)
688 	BT_FRAME(26)
689 	BT_FRAME(27)
690 	BT_FRAME(28)
691 	BT_FRAME(29)
692 
693 	BT_FRAME(30)
694 	BT_FRAME(31)
695 	BT_FRAME(32)
696 	BT_FRAME(33)
697 	BT_FRAME(34)
698 	BT_FRAME(35)
699 	BT_FRAME(36)
700 	BT_FRAME(37)
701 	BT_FRAME(38)
702 	BT_FRAME(39)
703 
704 	BT_FRAME(40)
705 	BT_FRAME(41)
706 	BT_FRAME(42)
707 	BT_FRAME(43)
708 	BT_FRAME(44)
709 	BT_FRAME(45)
710 	BT_FRAME(46)
711 	BT_FRAME(47)
712 	BT_FRAME(48)
713 	BT_FRAME(49)
714 
715 	BT_FRAME(50)
716 	BT_FRAME(51)
717 	BT_FRAME(52)
718 	BT_FRAME(53)
719 	BT_FRAME(54)
720 	BT_FRAME(55)
721 	BT_FRAME(56)
722 	BT_FRAME(57)
723 	BT_FRAME(58)
724 	BT_FRAME(59)
725 
726 	BT_FRAME(60)
727 	BT_FRAME(61)
728 	BT_FRAME(62)
729 	BT_FRAME(63)
730 	BT_FRAME(64)
731 	BT_FRAME(65)
732 	BT_FRAME(66)
733 	BT_FRAME(67)
734 	BT_FRAME(68)
735 	BT_FRAME(69)
736 
737 	BT_FRAME(70)
738 	BT_FRAME(71)
739 	BT_FRAME(72)
740 	BT_FRAME(73)
741 	BT_FRAME(74)
742 	BT_FRAME(75)
743 	BT_FRAME(76)
744 	BT_FRAME(77)
745 	BT_FRAME(78)
746 	BT_FRAME(79)
747 
748 	BT_FRAME(80)
749 	BT_FRAME(81)
750 	BT_FRAME(82)
751 	BT_FRAME(83)
752 	BT_FRAME(84)
753 	BT_FRAME(85)
754 	BT_FRAME(86)
755 	BT_FRAME(87)
756 	BT_FRAME(88)
757 	BT_FRAME(89)
758 
759 	BT_FRAME(90)
760 	BT_FRAME(91)
761 	BT_FRAME(92)
762 	BT_FRAME(93)
763 	BT_FRAME(94)
764 	BT_FRAME(95)
765 	BT_FRAME(96)
766 	BT_FRAME(97)
767 	BT_FRAME(98)
768 	BT_FRAME(99)
769 
770 	BT_FRAME(100)
771 	BT_FRAME(101)
772 	BT_FRAME(102)
773 	BT_FRAME(103)
774 	BT_FRAME(104)
775 	BT_FRAME(105)
776 	BT_FRAME(106)
777 	BT_FRAME(107)
778 	BT_FRAME(108)
779 	BT_FRAME(109)
780 
781 	BT_FRAME(110)
782 	BT_FRAME(111)
783 	BT_FRAME(112)
784 	BT_FRAME(113)
785 	BT_FRAME(114)
786 	BT_FRAME(115)
787 	BT_FRAME(116)
788 	BT_FRAME(117)
789 	BT_FRAME(118)
790 	BT_FRAME(119)
791 
792 	BT_FRAME(120)
793 	BT_FRAME(121)
794 	BT_FRAME(122)
795 	BT_FRAME(123)
796 	BT_FRAME(124)
797 	BT_FRAME(125)
798 	BT_FRAME(126)
799 	BT_FRAME(127)
800 #undef BT_FRAME
801 }
802 #else
803 void
prof_backtrace(prof_bt_t * bt)804 prof_backtrace(prof_bt_t *bt) {
805 	cassert(config_prof);
806 	not_reached();
807 }
808 #endif
809 
810 static malloc_mutex_t *
prof_gctx_mutex_choose(void)811 prof_gctx_mutex_choose(void) {
812 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
813 
814 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
815 }
816 
817 static malloc_mutex_t *
prof_tdata_mutex_choose(uint64_t thr_uid)818 prof_tdata_mutex_choose(uint64_t thr_uid) {
819 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
820 }
821 
822 static prof_gctx_t *
prof_gctx_create(tsdn_t * tsdn,prof_bt_t * bt)823 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
824 	/*
825 	 * Create a single allocation that has space for vec of length bt->len.
826 	 */
827 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
828 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
829 	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
830 	    true);
831 	if (gctx == NULL) {
832 		return NULL;
833 	}
834 	gctx->lock = prof_gctx_mutex_choose();
835 	/*
836 	 * Set nlimbo to 1, in order to avoid a race condition with
837 	 * prof_tctx_destroy()/prof_gctx_try_destroy().
838 	 */
839 	gctx->nlimbo = 1;
840 	tctx_tree_new(&gctx->tctxs);
841 	/* Duplicate bt. */
842 	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
843 	gctx->bt.vec = gctx->vec;
844 	gctx->bt.len = bt->len;
845 	return gctx;
846 }
847 
848 static void
prof_gctx_try_destroy(tsd_t * tsd,prof_tdata_t * tdata_self,prof_gctx_t * gctx,prof_tdata_t * tdata)849 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
850     prof_tdata_t *tdata) {
851 	cassert(config_prof);
852 
853 	/*
854 	 * Check that gctx is still unused by any thread cache before destroying
855 	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
856 	 * condition with this function, as does prof_tctx_destroy() in order to
857 	 * avoid a race between the main body of prof_tctx_destroy() and entry
858 	 * into this function.
859 	 */
860 	prof_enter(tsd, tdata_self);
861 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
862 	assert(gctx->nlimbo != 0);
863 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
864 		/* Remove gctx from bt2gctx. */
865 		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
866 			not_reached();
867 		}
868 		prof_leave(tsd, tdata_self);
869 		/* Destroy gctx. */
870 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
871 		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
872 	} else {
873 		/*
874 		 * Compensate for increment in prof_tctx_destroy() or
875 		 * prof_lookup().
876 		 */
877 		gctx->nlimbo--;
878 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
879 		prof_leave(tsd, tdata_self);
880 	}
881 }
882 
883 static bool
prof_tctx_should_destroy(tsdn_t * tsdn,prof_tctx_t * tctx)884 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
885 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
886 
887 	if (opt_prof_accum) {
888 		return false;
889 	}
890 	if (tctx->cnts.curobjs != 0) {
891 		return false;
892 	}
893 	if (tctx->prepared) {
894 		return false;
895 	}
896 	return true;
897 }
898 
899 static bool
prof_gctx_should_destroy(prof_gctx_t * gctx)900 prof_gctx_should_destroy(prof_gctx_t *gctx) {
901 	if (opt_prof_accum) {
902 		return false;
903 	}
904 	if (!tctx_tree_empty(&gctx->tctxs)) {
905 		return false;
906 	}
907 	if (gctx->nlimbo != 0) {
908 		return false;
909 	}
910 	return true;
911 }
912 
913 static void
prof_tctx_destroy(tsd_t * tsd,prof_tctx_t * tctx)914 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
915 	prof_tdata_t *tdata = tctx->tdata;
916 	prof_gctx_t *gctx = tctx->gctx;
917 	bool destroy_tdata, destroy_tctx, destroy_gctx;
918 
919 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
920 
921 	assert(tctx->cnts.curobjs == 0);
922 	assert(tctx->cnts.curbytes == 0);
923 	assert(!opt_prof_accum);
924 	assert(tctx->cnts.accumobjs == 0);
925 	assert(tctx->cnts.accumbytes == 0);
926 
927 	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
928 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
929 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
930 
931 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
932 	switch (tctx->state) {
933 	case prof_tctx_state_nominal:
934 		tctx_tree_remove(&gctx->tctxs, tctx);
935 		destroy_tctx = true;
936 		if (prof_gctx_should_destroy(gctx)) {
937 			/*
938 			 * Increment gctx->nlimbo in order to keep another
939 			 * thread from winning the race to destroy gctx while
940 			 * this one has gctx->lock dropped.  Without this, it
941 			 * would be possible for another thread to:
942 			 *
943 			 * 1) Sample an allocation associated with gctx.
944 			 * 2) Deallocate the sampled object.
945 			 * 3) Successfully prof_gctx_try_destroy(gctx).
946 			 *
947 			 * The result would be that gctx no longer exists by the
948 			 * time this thread accesses it in
949 			 * prof_gctx_try_destroy().
950 			 */
951 			gctx->nlimbo++;
952 			destroy_gctx = true;
953 		} else {
954 			destroy_gctx = false;
955 		}
956 		break;
957 	case prof_tctx_state_dumping:
958 		/*
959 		 * A dumping thread needs tctx to remain valid until dumping
960 		 * has finished.  Change state such that the dumping thread will
961 		 * complete destruction during a late dump iteration phase.
962 		 */
963 		tctx->state = prof_tctx_state_purgatory;
964 		destroy_tctx = false;
965 		destroy_gctx = false;
966 		break;
967 	default:
968 		not_reached();
969 		destroy_tctx = false;
970 		destroy_gctx = false;
971 	}
972 	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
973 	if (destroy_gctx) {
974 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
975 		    tdata);
976 	}
977 
978 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
979 
980 	if (destroy_tdata) {
981 		prof_tdata_destroy(tsd, tdata, false);
982 	}
983 
984 	if (destroy_tctx) {
985 		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
986 	}
987 }
988 
989 static bool
prof_lookup_global(tsd_t * tsd,prof_bt_t * bt,prof_tdata_t * tdata,void ** p_btkey,prof_gctx_t ** p_gctx,bool * p_new_gctx)990 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
991     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
992 	union {
993 		prof_gctx_t	*p;
994 		void		*v;
995 	} gctx, tgctx;
996 	union {
997 		prof_bt_t	*p;
998 		void		*v;
999 	} btkey;
1000 	bool new_gctx;
1001 
1002 	prof_enter(tsd, tdata);
1003 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1004 		/* bt has never been seen before.  Insert it. */
1005 		prof_leave(tsd, tdata);
1006 		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
1007 		if (tgctx.v == NULL) {
1008 			return true;
1009 		}
1010 		prof_enter(tsd, tdata);
1011 		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1012 			gctx.p = tgctx.p;
1013 			btkey.p = &gctx.p->bt;
1014 			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
1015 				/* OOM. */
1016 				prof_leave(tsd, tdata);
1017 				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
1018 				    true, true);
1019 				return true;
1020 			}
1021 			new_gctx = true;
1022 		} else {
1023 			new_gctx = false;
1024 		}
1025 	} else {
1026 		tgctx.v = NULL;
1027 		new_gctx = false;
1028 	}
1029 
1030 	if (!new_gctx) {
1031 		/*
1032 		 * Increment nlimbo, in order to avoid a race condition with
1033 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
1034 		 */
1035 		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
1036 		gctx.p->nlimbo++;
1037 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
1038 		new_gctx = false;
1039 
1040 		if (tgctx.v != NULL) {
1041 			/* Lost race to insert. */
1042 			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
1043 			    true);
1044 		}
1045 	}
1046 	prof_leave(tsd, tdata);
1047 
1048 	*p_btkey = btkey.v;
1049 	*p_gctx = gctx.p;
1050 	*p_new_gctx = new_gctx;
1051 	return false;
1052 }
1053 
1054 prof_tctx_t *
prof_lookup(tsd_t * tsd,prof_bt_t * bt)1055 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
1056 	union {
1057 		prof_tctx_t	*p;
1058 		void		*v;
1059 	} ret;
1060 	prof_tdata_t *tdata;
1061 	bool not_found;
1062 
1063 	cassert(config_prof);
1064 
1065 	tdata = prof_tdata_get(tsd, false);
1066 	if (tdata == NULL) {
1067 		return NULL;
1068 	}
1069 
1070 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1071 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
1072 	if (!not_found) { /* Note double negative! */
1073 		ret.p->prepared = true;
1074 	}
1075 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1076 	if (not_found) {
1077 		void *btkey;
1078 		prof_gctx_t *gctx;
1079 		bool new_gctx, error;
1080 
1081 		/*
1082 		 * This thread's cache lacks bt.  Look for it in the global
1083 		 * cache.
1084 		 */
1085 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
1086 		    &new_gctx)) {
1087 			return NULL;
1088 		}
1089 
1090 		/* Link a prof_tctx_t into gctx for this thread. */
1091 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
1092 		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
1093 		    arena_ichoose(tsd, NULL), true);
1094 		if (ret.p == NULL) {
1095 			if (new_gctx) {
1096 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1097 			}
1098 			return NULL;
1099 		}
1100 		ret.p->tdata = tdata;
1101 		ret.p->thr_uid = tdata->thr_uid;
1102 		ret.p->thr_discrim = tdata->thr_discrim;
1103 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
1104 		ret.p->gctx = gctx;
1105 		ret.p->tctx_uid = tdata->tctx_uid_next++;
1106 		ret.p->prepared = true;
1107 		ret.p->state = prof_tctx_state_initializing;
1108 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1109 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
1110 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1111 		if (error) {
1112 			if (new_gctx) {
1113 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1114 			}
1115 			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
1116 			return NULL;
1117 		}
1118 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1119 		ret.p->state = prof_tctx_state_nominal;
1120 		tctx_tree_insert(&gctx->tctxs, ret.p);
1121 		gctx->nlimbo--;
1122 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1123 	}
1124 
1125 	return ret.p;
1126 }
1127 
1128 /*
1129  * The bodies of this function and prof_leakcheck() are compiled out unless heap
1130  * profiling is enabled, so that it is possible to compile jemalloc with
1131  * floating point support completely disabled.  Avoiding floating point code is
1132  * important on memory-constrained systems, but it also enables a workaround for
1133  * versions of glibc that don't properly save/restore floating point registers
1134  * during dynamic lazy symbol loading (which internally calls into whatever
1135  * malloc implementation happens to be integrated into the application).  Note
1136  * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
1137  * memory moves, so jemalloc must be compiled with such optimizations disabled
1138  * (e.g.
1139  * -mno-sse) in order for the workaround to be complete.
1140  */
1141 void
prof_sample_threshold_update(prof_tdata_t * tdata)1142 prof_sample_threshold_update(prof_tdata_t *tdata) {
1143 #ifdef JEMALLOC_PROF
1144 	if (!config_prof) {
1145 		return;
1146 	}
1147 
1148 	if (lg_prof_sample == 0) {
1149 		tsd_bytes_until_sample_set(tsd_fetch(), 0);
1150 		return;
1151 	}
1152 
1153 	/*
1154 	 * Compute sample interval as a geometrically distributed random
1155 	 * variable with mean (2^lg_prof_sample).
1156 	 *
1157 	 *                             __        __
1158 	 *                             |  log(u)  |                     1
1159 	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
1160 	 *                             | log(1-p) |             lg_prof_sample
1161 	 *                                                     2
1162 	 *
1163 	 * For more information on the math, see:
1164 	 *
1165 	 *   Non-Uniform Random Variate Generation
1166 	 *   Luc Devroye
1167 	 *   Springer-Verlag, New York, 1986
1168 	 *   pp 500
1169 	 *   (http://luc.devroye.org/rnbookindex.html)
1170 	 */
1171 	uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
1172 	double u = (double)r * (1.0/9007199254740992.0L);
1173 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
1174 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
1175 	    + (uint64_t)1U;
1176 	if (bytes_until_sample > SSIZE_MAX) {
1177 		bytes_until_sample = SSIZE_MAX;
1178 	}
1179 	tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
1180 
1181 #endif
1182 }
1183 
1184 #ifdef JEMALLOC_JET
1185 static prof_tdata_t *
prof_tdata_count_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * arg)1186 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1187     void *arg) {
1188 	size_t *tdata_count = (size_t *)arg;
1189 
1190 	(*tdata_count)++;
1191 
1192 	return NULL;
1193 }
1194 
1195 size_t
prof_tdata_count(void)1196 prof_tdata_count(void) {
1197 	size_t tdata_count = 0;
1198 	tsdn_t *tsdn;
1199 
1200 	tsdn = tsdn_fetch();
1201 	malloc_mutex_lock(tsdn, &tdatas_mtx);
1202 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
1203 	    (void *)&tdata_count);
1204 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1205 
1206 	return tdata_count;
1207 }
1208 
1209 size_t
prof_bt_count(void)1210 prof_bt_count(void) {
1211 	size_t bt_count;
1212 	tsd_t *tsd;
1213 	prof_tdata_t *tdata;
1214 
1215 	tsd = tsd_fetch();
1216 	tdata = prof_tdata_get(tsd, false);
1217 	if (tdata == NULL) {
1218 		return 0;
1219 	}
1220 
1221 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
1222 	bt_count = ckh_count(&bt2gctx);
1223 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
1224 
1225 	return bt_count;
1226 }
1227 #endif
1228 
1229 static int
prof_dump_open_impl(bool propagate_err,const char * filename)1230 prof_dump_open_impl(bool propagate_err, const char *filename) {
1231 	int fd;
1232 
1233 	fd = creat(filename, 0644);
1234 	if (fd == -1 && !propagate_err) {
1235 		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
1236 		    filename);
1237 		if (opt_abort) {
1238 			abort();
1239 		}
1240 	}
1241 
1242 	return fd;
1243 }
1244 prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
1245 
1246 static bool
prof_dump_flush(bool propagate_err)1247 prof_dump_flush(bool propagate_err) {
1248 	bool ret = false;
1249 	ssize_t err;
1250 
1251 	cassert(config_prof);
1252 
1253 	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
1254 	if (err == -1) {
1255 		if (!propagate_err) {
1256 			malloc_write("<jemalloc>: write() failed during heap "
1257 			    "profile flush\n");
1258 			if (opt_abort) {
1259 				abort();
1260 			}
1261 		}
1262 		ret = true;
1263 	}
1264 	prof_dump_buf_end = 0;
1265 
1266 	return ret;
1267 }
1268 
1269 static bool
prof_dump_close(bool propagate_err)1270 prof_dump_close(bool propagate_err) {
1271 	bool ret;
1272 
1273 	assert(prof_dump_fd != -1);
1274 	ret = prof_dump_flush(propagate_err);
1275 	close(prof_dump_fd);
1276 	prof_dump_fd = -1;
1277 
1278 	return ret;
1279 }
1280 
1281 static bool
prof_dump_write(bool propagate_err,const char * s)1282 prof_dump_write(bool propagate_err, const char *s) {
1283 	size_t i, slen, n;
1284 
1285 	cassert(config_prof);
1286 
1287 	i = 0;
1288 	slen = strlen(s);
1289 	while (i < slen) {
1290 		/* Flush the buffer if it is full. */
1291 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1292 			if (prof_dump_flush(propagate_err) && propagate_err) {
1293 				return true;
1294 			}
1295 		}
1296 
1297 		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
1298 			/* Finish writing. */
1299 			n = slen - i;
1300 		} else {
1301 			/* Write as much of s as will fit. */
1302 			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1303 		}
1304 		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1305 		prof_dump_buf_end += n;
1306 		i += n;
1307 	}
1308 	assert(i == slen);
1309 
1310 	return false;
1311 }
1312 
1313 JEMALLOC_FORMAT_PRINTF(2, 3)
1314 static bool
prof_dump_printf(bool propagate_err,const char * format,...)1315 prof_dump_printf(bool propagate_err, const char *format, ...) {
1316 	bool ret;
1317 	va_list ap;
1318 	char buf[PROF_PRINTF_BUFSIZE];
1319 
1320 	va_start(ap, format);
1321 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
1322 	va_end(ap);
1323 	ret = prof_dump_write(propagate_err, buf);
1324 
1325 	return ret;
1326 }
1327 
1328 static void
prof_tctx_merge_tdata(tsdn_t * tsdn,prof_tctx_t * tctx,prof_tdata_t * tdata)1329 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
1330 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1331 
1332 	malloc_mutex_lock(tsdn, tctx->gctx->lock);
1333 
1334 	switch (tctx->state) {
1335 	case prof_tctx_state_initializing:
1336 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1337 		return;
1338 	case prof_tctx_state_nominal:
1339 		tctx->state = prof_tctx_state_dumping;
1340 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1341 
1342 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1343 
1344 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1345 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1346 		if (opt_prof_accum) {
1347 			tdata->cnt_summed.accumobjs +=
1348 			    tctx->dump_cnts.accumobjs;
1349 			tdata->cnt_summed.accumbytes +=
1350 			    tctx->dump_cnts.accumbytes;
1351 		}
1352 		break;
1353 	case prof_tctx_state_dumping:
1354 	case prof_tctx_state_purgatory:
1355 		not_reached();
1356 	}
1357 }
1358 
1359 static void
prof_tctx_merge_gctx(tsdn_t * tsdn,prof_tctx_t * tctx,prof_gctx_t * gctx)1360 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
1361 	malloc_mutex_assert_owner(tsdn, gctx->lock);
1362 
1363 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1364 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1365 	if (opt_prof_accum) {
1366 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1367 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1368 	}
1369 }
1370 
1371 static prof_tctx_t *
prof_tctx_merge_iter(prof_tctx_tree_t * tctxs,prof_tctx_t * tctx,void * arg)1372 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1373 	tsdn_t *tsdn = (tsdn_t *)arg;
1374 
1375 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1376 
1377 	switch (tctx->state) {
1378 	case prof_tctx_state_nominal:
1379 		/* New since dumping started; ignore. */
1380 		break;
1381 	case prof_tctx_state_dumping:
1382 	case prof_tctx_state_purgatory:
1383 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1384 		break;
1385 	default:
1386 		not_reached();
1387 	}
1388 
1389 	return NULL;
1390 }
1391 
1392 struct prof_tctx_dump_iter_arg_s {
1393 	tsdn_t	*tsdn;
1394 	bool	propagate_err;
1395 };
1396 
1397 static prof_tctx_t *
prof_tctx_dump_iter(prof_tctx_tree_t * tctxs,prof_tctx_t * tctx,void * opaque)1398 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
1399 	struct prof_tctx_dump_iter_arg_s *arg =
1400 	    (struct prof_tctx_dump_iter_arg_s *)opaque;
1401 
1402 	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1403 
1404 	switch (tctx->state) {
1405 	case prof_tctx_state_initializing:
1406 	case prof_tctx_state_nominal:
1407 		/* Not captured by this dump. */
1408 		break;
1409 	case prof_tctx_state_dumping:
1410 	case prof_tctx_state_purgatory:
1411 		if (prof_dump_printf(arg->propagate_err,
1412 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1413 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1414 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1415 		    tctx->dump_cnts.accumbytes)) {
1416 			return tctx;
1417 		}
1418 		break;
1419 	default:
1420 		not_reached();
1421 	}
1422 	return NULL;
1423 }
1424 
1425 static prof_tctx_t *
prof_tctx_finish_iter(prof_tctx_tree_t * tctxs,prof_tctx_t * tctx,void * arg)1426 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1427 	tsdn_t *tsdn = (tsdn_t *)arg;
1428 	prof_tctx_t *ret;
1429 
1430 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1431 
1432 	switch (tctx->state) {
1433 	case prof_tctx_state_nominal:
1434 		/* New since dumping started; ignore. */
1435 		break;
1436 	case prof_tctx_state_dumping:
1437 		tctx->state = prof_tctx_state_nominal;
1438 		break;
1439 	case prof_tctx_state_purgatory:
1440 		ret = tctx;
1441 		goto label_return;
1442 	default:
1443 		not_reached();
1444 	}
1445 
1446 	ret = NULL;
1447 label_return:
1448 	return ret;
1449 }
1450 
1451 static void
prof_dump_gctx_prep(tsdn_t * tsdn,prof_gctx_t * gctx,prof_gctx_tree_t * gctxs)1452 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
1453 	cassert(config_prof);
1454 
1455 	malloc_mutex_lock(tsdn, gctx->lock);
1456 
1457 	/*
1458 	 * Increment nlimbo so that gctx won't go away before dump.
1459 	 * Additionally, link gctx into the dump list so that it is included in
1460 	 * prof_dump()'s second pass.
1461 	 */
1462 	gctx->nlimbo++;
1463 	gctx_tree_insert(gctxs, gctx);
1464 
1465 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1466 
1467 	malloc_mutex_unlock(tsdn, gctx->lock);
1468 }
1469 
1470 struct prof_gctx_merge_iter_arg_s {
1471 	tsdn_t	*tsdn;
1472 	size_t	leak_ngctx;
1473 };
1474 
1475 static prof_gctx_t *
prof_gctx_merge_iter(prof_gctx_tree_t * gctxs,prof_gctx_t * gctx,void * opaque)1476 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1477 	struct prof_gctx_merge_iter_arg_s *arg =
1478 	    (struct prof_gctx_merge_iter_arg_s *)opaque;
1479 
1480 	malloc_mutex_lock(arg->tsdn, gctx->lock);
1481 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1482 	    (void *)arg->tsdn);
1483 	if (gctx->cnt_summed.curobjs != 0) {
1484 		arg->leak_ngctx++;
1485 	}
1486 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1487 
1488 	return NULL;
1489 }
1490 
1491 static void
prof_gctx_finish(tsd_t * tsd,prof_gctx_tree_t * gctxs)1492 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
1493 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1494 	prof_gctx_t *gctx;
1495 
1496 	/*
1497 	 * Standard tree iteration won't work here, because as soon as we
1498 	 * decrement gctx->nlimbo and unlock gctx, another thread can
1499 	 * concurrently destroy it, which will corrupt the tree.  Therefore,
1500 	 * tear down the tree one node at a time during iteration.
1501 	 */
1502 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1503 		gctx_tree_remove(gctxs, gctx);
1504 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1505 		{
1506 			prof_tctx_t *next;
1507 
1508 			next = NULL;
1509 			do {
1510 				prof_tctx_t *to_destroy =
1511 				    tctx_tree_iter(&gctx->tctxs, next,
1512 				    prof_tctx_finish_iter,
1513 				    (void *)tsd_tsdn(tsd));
1514 				if (to_destroy != NULL) {
1515 					next = tctx_tree_next(&gctx->tctxs,
1516 					    to_destroy);
1517 					tctx_tree_remove(&gctx->tctxs,
1518 					    to_destroy);
1519 					idalloctm(tsd_tsdn(tsd), to_destroy,
1520 					    NULL, NULL, true, true);
1521 				} else {
1522 					next = NULL;
1523 				}
1524 			} while (next != NULL);
1525 		}
1526 		gctx->nlimbo--;
1527 		if (prof_gctx_should_destroy(gctx)) {
1528 			gctx->nlimbo++;
1529 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1530 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1531 		} else {
1532 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1533 		}
1534 	}
1535 }
1536 
1537 struct prof_tdata_merge_iter_arg_s {
1538 	tsdn_t		*tsdn;
1539 	prof_cnt_t	cnt_all;
1540 };
1541 
1542 static prof_tdata_t *
prof_tdata_merge_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * opaque)1543 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1544     void *opaque) {
1545 	struct prof_tdata_merge_iter_arg_s *arg =
1546 	    (struct prof_tdata_merge_iter_arg_s *)opaque;
1547 
1548 	malloc_mutex_lock(arg->tsdn, tdata->lock);
1549 	if (!tdata->expired) {
1550 		size_t tabind;
1551 		union {
1552 			prof_tctx_t	*p;
1553 			void		*v;
1554 		} tctx;
1555 
1556 		tdata->dumping = true;
1557 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1558 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1559 		    &tctx.v);) {
1560 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1561 		}
1562 
1563 		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1564 		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1565 		if (opt_prof_accum) {
1566 			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1567 			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1568 		}
1569 	} else {
1570 		tdata->dumping = false;
1571 	}
1572 	malloc_mutex_unlock(arg->tsdn, tdata->lock);
1573 
1574 	return NULL;
1575 }
1576 
1577 static prof_tdata_t *
prof_tdata_dump_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * arg)1578 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1579     void *arg) {
1580 	bool propagate_err = *(bool *)arg;
1581 
1582 	if (!tdata->dumping) {
1583 		return NULL;
1584 	}
1585 
1586 	if (prof_dump_printf(propagate_err,
1587 	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1588 	    tdata->thr_uid, tdata->cnt_summed.curobjs,
1589 	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1590 	    tdata->cnt_summed.accumbytes,
1591 	    (tdata->thread_name != NULL) ? " " : "",
1592 	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
1593 		return tdata;
1594 	}
1595 	return NULL;
1596 }
1597 
1598 static bool
prof_dump_header_impl(tsdn_t * tsdn,bool propagate_err,const prof_cnt_t * cnt_all)1599 prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
1600     const prof_cnt_t *cnt_all) {
1601 	bool ret;
1602 
1603 	if (prof_dump_printf(propagate_err,
1604 	    "heap_v2/%"FMTu64"\n"
1605 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1606 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1607 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
1608 		return true;
1609 	}
1610 
1611 	malloc_mutex_lock(tsdn, &tdatas_mtx);
1612 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1613 	    (void *)&propagate_err) != NULL);
1614 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1615 	return ret;
1616 }
1617 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
1618 
1619 static bool
prof_dump_gctx(tsdn_t * tsdn,bool propagate_err,prof_gctx_t * gctx,const prof_bt_t * bt,prof_gctx_tree_t * gctxs)1620 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1621     const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
1622 	bool ret;
1623 	unsigned i;
1624 	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1625 
1626 	cassert(config_prof);
1627 	malloc_mutex_assert_owner(tsdn, gctx->lock);
1628 
1629 	/* Avoid dumping such gctx's that have no useful data. */
1630 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1631 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1632 		assert(gctx->cnt_summed.curobjs == 0);
1633 		assert(gctx->cnt_summed.curbytes == 0);
1634 		assert(gctx->cnt_summed.accumobjs == 0);
1635 		assert(gctx->cnt_summed.accumbytes == 0);
1636 		ret = false;
1637 		goto label_return;
1638 	}
1639 
1640 	if (prof_dump_printf(propagate_err, "@")) {
1641 		ret = true;
1642 		goto label_return;
1643 	}
1644 	for (i = 0; i < bt->len; i++) {
1645 		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1646 		    (uintptr_t)bt->vec[i])) {
1647 			ret = true;
1648 			goto label_return;
1649 		}
1650 	}
1651 
1652 	if (prof_dump_printf(propagate_err,
1653 	    "\n"
1654 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1655 	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1656 	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1657 		ret = true;
1658 		goto label_return;
1659 	}
1660 
1661 	prof_tctx_dump_iter_arg.tsdn = tsdn;
1662 	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1663 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1664 	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
1665 		ret = true;
1666 		goto label_return;
1667 	}
1668 
1669 	ret = false;
1670 label_return:
1671 	return ret;
1672 }
1673 
1674 #ifndef _WIN32
1675 JEMALLOC_FORMAT_PRINTF(1, 2)
1676 static int
prof_open_maps(const char * format,...)1677 prof_open_maps(const char *format, ...) {
1678 	int mfd;
1679 	va_list ap;
1680 	char filename[PATH_MAX + 1];
1681 
1682 	va_start(ap, format);
1683 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
1684 	va_end(ap);
1685 
1686 #if defined(O_CLOEXEC)
1687 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
1688 #else
1689 	mfd = open(filename, O_RDONLY);
1690 	if (mfd != -1) {
1691 		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
1692 	}
1693 #endif
1694 
1695 	return mfd;
1696 }
1697 #endif
1698 
1699 static int
prof_getpid(void)1700 prof_getpid(void) {
1701 #ifdef _WIN32
1702 	return GetCurrentProcessId();
1703 #else
1704 	return getpid();
1705 #endif
1706 }
1707 
1708 static bool
prof_dump_maps(bool propagate_err)1709 prof_dump_maps(bool propagate_err) {
1710 	bool ret;
1711 	int mfd;
1712 
1713 	cassert(config_prof);
1714 #ifdef __FreeBSD__
1715 	mfd = prof_open_maps("/proc/curproc/map");
1716 #elif defined(_WIN32)
1717 	mfd = -1; // Not implemented
1718 #else
1719 	{
1720 		int pid = prof_getpid();
1721 
1722 		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1723 		if (mfd == -1) {
1724 			mfd = prof_open_maps("/proc/%d/maps", pid);
1725 		}
1726 	}
1727 #endif
1728 	if (mfd != -1) {
1729 		ssize_t nread;
1730 
1731 		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1732 		    propagate_err) {
1733 			ret = true;
1734 			goto label_return;
1735 		}
1736 		nread = 0;
1737 		do {
1738 			prof_dump_buf_end += nread;
1739 			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1740 				/* Make space in prof_dump_buf before read(). */
1741 				if (prof_dump_flush(propagate_err) &&
1742 				    propagate_err) {
1743 					ret = true;
1744 					goto label_return;
1745 				}
1746 			}
1747 			nread = malloc_read_fd(mfd,
1748 			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
1749 			    - prof_dump_buf_end);
1750 		} while (nread > 0);
1751 	} else {
1752 		ret = true;
1753 		goto label_return;
1754 	}
1755 
1756 	ret = false;
1757 label_return:
1758 	if (mfd != -1) {
1759 		close(mfd);
1760 	}
1761 	return ret;
1762 }
1763 
1764 /*
1765  * See prof_sample_threshold_update() comment for why the body of this function
1766  * is conditionally compiled.
1767  */
1768 static void
prof_leakcheck(const prof_cnt_t * cnt_all,size_t leak_ngctx,const char * filename)1769 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1770     const char *filename) {
1771 #ifdef JEMALLOC_PROF
1772 	/*
1773 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1774 	 * differ slightly from what jeprof reports, because here we scale the
1775 	 * summary values, whereas jeprof scales each context individually and
1776 	 * reports the sums of the scaled values.
1777 	 */
1778 	if (cnt_all->curbytes != 0) {
1779 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1780 		double ratio = (((double)cnt_all->curbytes) /
1781 		    (double)cnt_all->curobjs) / sample_period;
1782 		double scale_factor = 1.0 / (1.0 - exp(-ratio));
1783 		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1784 		    * scale_factor);
1785 		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1786 		    scale_factor);
1787 
1788 		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1789 		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1790 		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1791 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1792 		malloc_printf(
1793 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1794 		    filename);
1795 	}
1796 #endif
1797 }
1798 
1799 struct prof_gctx_dump_iter_arg_s {
1800 	tsdn_t	*tsdn;
1801 	bool	propagate_err;
1802 };
1803 
1804 static prof_gctx_t *
prof_gctx_dump_iter(prof_gctx_tree_t * gctxs,prof_gctx_t * gctx,void * opaque)1805 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1806 	prof_gctx_t *ret;
1807 	struct prof_gctx_dump_iter_arg_s *arg =
1808 	    (struct prof_gctx_dump_iter_arg_s *)opaque;
1809 
1810 	malloc_mutex_lock(arg->tsdn, gctx->lock);
1811 
1812 	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1813 	    gctxs)) {
1814 		ret = gctx;
1815 		goto label_return;
1816 	}
1817 
1818 	ret = NULL;
1819 label_return:
1820 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1821 	return ret;
1822 }
1823 
1824 static void
prof_dump_prep(tsd_t * tsd,prof_tdata_t * tdata,struct prof_tdata_merge_iter_arg_s * prof_tdata_merge_iter_arg,struct prof_gctx_merge_iter_arg_s * prof_gctx_merge_iter_arg,prof_gctx_tree_t * gctxs)1825 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
1826     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1827     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1828     prof_gctx_tree_t *gctxs) {
1829 	size_t tabind;
1830 	union {
1831 		prof_gctx_t	*p;
1832 		void		*v;
1833 	} gctx;
1834 
1835 	prof_enter(tsd, tdata);
1836 
1837 	/*
1838 	 * Put gctx's in limbo and clear their counters in preparation for
1839 	 * summing.
1840 	 */
1841 	gctx_tree_new(gctxs);
1842 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
1843 		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
1844 	}
1845 
1846 	/*
1847 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1848 	 * stats and merge them into the associated gctx's.
1849 	 */
1850 	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1851 	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
1852 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1853 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1854 	    (void *)prof_tdata_merge_iter_arg);
1855 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1856 
1857 	/* Merge tctx stats into gctx's. */
1858 	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1859 	prof_gctx_merge_iter_arg->leak_ngctx = 0;
1860 	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
1861 	    (void *)prof_gctx_merge_iter_arg);
1862 
1863 	prof_leave(tsd, tdata);
1864 }
1865 
1866 static bool
prof_dump_file(tsd_t * tsd,bool propagate_err,const char * filename,bool leakcheck,prof_tdata_t * tdata,struct prof_tdata_merge_iter_arg_s * prof_tdata_merge_iter_arg,struct prof_gctx_merge_iter_arg_s * prof_gctx_merge_iter_arg,struct prof_gctx_dump_iter_arg_s * prof_gctx_dump_iter_arg,prof_gctx_tree_t * gctxs)1867 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
1868     bool leakcheck, prof_tdata_t *tdata,
1869     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1870     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1871     struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
1872     prof_gctx_tree_t *gctxs) {
1873 	/* Create dump file. */
1874 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
1875 		return true;
1876 	}
1877 
1878 	/* Dump profile header. */
1879 	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1880 	    &prof_tdata_merge_iter_arg->cnt_all)) {
1881 		goto label_write_error;
1882 	}
1883 
1884 	/* Dump per gctx profile stats. */
1885 	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
1886 	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
1887 	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
1888 	    (void *)prof_gctx_dump_iter_arg) != NULL) {
1889 		goto label_write_error;
1890 	}
1891 
1892 	/* Dump /proc/<pid>/maps if possible. */
1893 	if (prof_dump_maps(propagate_err)) {
1894 		goto label_write_error;
1895 	}
1896 
1897 	if (prof_dump_close(propagate_err)) {
1898 		return true;
1899 	}
1900 
1901 	return false;
1902 label_write_error:
1903 	prof_dump_close(propagate_err);
1904 	return true;
1905 }
1906 
1907 static bool
prof_dump(tsd_t * tsd,bool propagate_err,const char * filename,bool leakcheck)1908 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
1909     bool leakcheck) {
1910 	cassert(config_prof);
1911 	assert(tsd_reentrancy_level_get(tsd) == 0);
1912 
1913 	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
1914 	if (tdata == NULL) {
1915 		return true;
1916 	}
1917 
1918 	pre_reentrancy(tsd, NULL);
1919 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1920 
1921 	prof_gctx_tree_t gctxs;
1922 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1923 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1924 	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1925 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1926 	    &prof_gctx_merge_iter_arg, &gctxs);
1927 	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
1928 	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
1929 	    &prof_gctx_dump_iter_arg, &gctxs);
1930 	prof_gctx_finish(tsd, &gctxs);
1931 
1932 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1933 	post_reentrancy(tsd);
1934 
1935 	if (err) {
1936 		return true;
1937 	}
1938 
1939 	if (leakcheck) {
1940 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1941 		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
1942 	}
1943 	return false;
1944 }
1945 
1946 #ifdef JEMALLOC_JET
1947 void
prof_cnt_all(uint64_t * curobjs,uint64_t * curbytes,uint64_t * accumobjs,uint64_t * accumbytes)1948 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
1949     uint64_t *accumbytes) {
1950 	tsd_t *tsd;
1951 	prof_tdata_t *tdata;
1952 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1953 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1954 	prof_gctx_tree_t gctxs;
1955 
1956 	tsd = tsd_fetch();
1957 	tdata = prof_tdata_get(tsd, false);
1958 	if (tdata == NULL) {
1959 		if (curobjs != NULL) {
1960 			*curobjs = 0;
1961 		}
1962 		if (curbytes != NULL) {
1963 			*curbytes = 0;
1964 		}
1965 		if (accumobjs != NULL) {
1966 			*accumobjs = 0;
1967 		}
1968 		if (accumbytes != NULL) {
1969 			*accumbytes = 0;
1970 		}
1971 		return;
1972 	}
1973 
1974 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1975 	    &prof_gctx_merge_iter_arg, &gctxs);
1976 	prof_gctx_finish(tsd, &gctxs);
1977 
1978 	if (curobjs != NULL) {
1979 		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
1980 	}
1981 	if (curbytes != NULL) {
1982 		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
1983 	}
1984 	if (accumobjs != NULL) {
1985 		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
1986 	}
1987 	if (accumbytes != NULL) {
1988 		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
1989 	}
1990 }
1991 #endif
1992 
1993 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1994 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1995 static void
prof_dump_filename(char * filename,char v,uint64_t vseq)1996 prof_dump_filename(char *filename, char v, uint64_t vseq) {
1997 	cassert(config_prof);
1998 
1999 	if (vseq != VSEQ_INVALID) {
2000 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
2001 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2002 		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
2003 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
2004 	} else {
2005 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
2006 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2007 		    "%s.%d.%"FMTu64".%c.heap",
2008 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
2009 	}
2010 	prof_dump_seq++;
2011 }
2012 
2013 static void
prof_fdump(void)2014 prof_fdump(void) {
2015 	tsd_t *tsd;
2016 	char filename[DUMP_FILENAME_BUFSIZE];
2017 
2018 	cassert(config_prof);
2019 	assert(opt_prof_final);
2020 	assert(opt_prof_prefix[0] != '\0');
2021 
2022 	if (!prof_booted) {
2023 		return;
2024 	}
2025 	tsd = tsd_fetch();
2026 	assert(tsd_reentrancy_level_get(tsd) == 0);
2027 
2028 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2029 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
2030 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2031 	prof_dump(tsd, false, filename, opt_prof_leak);
2032 }
2033 
2034 bool
prof_accum_init(tsdn_t * tsdn,prof_accum_t * prof_accum)2035 prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
2036 	cassert(config_prof);
2037 
2038 #ifndef JEMALLOC_ATOMIC_U64
2039 	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
2040 	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
2041 		return true;
2042 	}
2043 	prof_accum->accumbytes = 0;
2044 #else
2045 	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
2046 #endif
2047 	return false;
2048 }
2049 
2050 void
prof_idump(tsdn_t * tsdn)2051 prof_idump(tsdn_t *tsdn) {
2052 	tsd_t *tsd;
2053 	prof_tdata_t *tdata;
2054 
2055 	cassert(config_prof);
2056 
2057 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2058 		return;
2059 	}
2060 	tsd = tsdn_tsd(tsdn);
2061 	if (tsd_reentrancy_level_get(tsd) > 0) {
2062 		return;
2063 	}
2064 
2065 	tdata = prof_tdata_get(tsd, false);
2066 	if (tdata == NULL) {
2067 		return;
2068 	}
2069 	if (tdata->enq) {
2070 		tdata->enq_idump = true;
2071 		return;
2072 	}
2073 
2074 	if (opt_prof_prefix[0] != '\0') {
2075 		char filename[PATH_MAX + 1];
2076 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2077 		prof_dump_filename(filename, 'i', prof_dump_iseq);
2078 		prof_dump_iseq++;
2079 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2080 		prof_dump(tsd, false, filename, false);
2081 	}
2082 }
2083 
2084 bool
prof_mdump(tsd_t * tsd,const char * filename)2085 prof_mdump(tsd_t *tsd, const char *filename) {
2086 	cassert(config_prof);
2087 	assert(tsd_reentrancy_level_get(tsd) == 0);
2088 
2089 	if (!opt_prof || !prof_booted) {
2090 		return true;
2091 	}
2092 	char filename_buf[DUMP_FILENAME_BUFSIZE];
2093 	if (filename == NULL) {
2094 		/* No filename specified, so automatically generate one. */
2095 		if (opt_prof_prefix[0] == '\0') {
2096 			return true;
2097 		}
2098 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2099 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
2100 		prof_dump_mseq++;
2101 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2102 		filename = filename_buf;
2103 	}
2104 	return prof_dump(tsd, true, filename, false);
2105 }
2106 
2107 void
prof_gdump(tsdn_t * tsdn)2108 prof_gdump(tsdn_t *tsdn) {
2109 	tsd_t *tsd;
2110 	prof_tdata_t *tdata;
2111 
2112 	cassert(config_prof);
2113 
2114 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2115 		return;
2116 	}
2117 	tsd = tsdn_tsd(tsdn);
2118 	if (tsd_reentrancy_level_get(tsd) > 0) {
2119 		return;
2120 	}
2121 
2122 	tdata = prof_tdata_get(tsd, false);
2123 	if (tdata == NULL) {
2124 		return;
2125 	}
2126 	if (tdata->enq) {
2127 		tdata->enq_gdump = true;
2128 		return;
2129 	}
2130 
2131 	if (opt_prof_prefix[0] != '\0') {
2132 		char filename[DUMP_FILENAME_BUFSIZE];
2133 		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
2134 		prof_dump_filename(filename, 'u', prof_dump_useq);
2135 		prof_dump_useq++;
2136 		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
2137 		prof_dump(tsd, false, filename, false);
2138 	}
2139 }
2140 
2141 static void
prof_bt_hash(const void * key,size_t r_hash[2])2142 prof_bt_hash(const void *key, size_t r_hash[2]) {
2143 	prof_bt_t *bt = (prof_bt_t *)key;
2144 
2145 	cassert(config_prof);
2146 
2147 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
2148 }
2149 
2150 static bool
prof_bt_keycomp(const void * k1,const void * k2)2151 prof_bt_keycomp(const void *k1, const void *k2) {
2152 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
2153 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
2154 
2155 	cassert(config_prof);
2156 
2157 	if (bt1->len != bt2->len) {
2158 		return false;
2159 	}
2160 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
2161 }
2162 
2163 static void
prof_bt_node_hash(const void * key,size_t r_hash[2])2164 prof_bt_node_hash(const void *key, size_t r_hash[2]) {
2165 	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
2166 	prof_bt_hash((void *)(&bt_node->bt), r_hash);
2167 }
2168 
2169 static bool
prof_bt_node_keycomp(const void * k1,const void * k2)2170 prof_bt_node_keycomp(const void *k1, const void *k2) {
2171 	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
2172 	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
2173 	return prof_bt_keycomp((void *)(&bt_node1->bt),
2174 	    (void *)(&bt_node2->bt));
2175 }
2176 
2177 static void
prof_thr_node_hash(const void * key,size_t r_hash[2])2178 prof_thr_node_hash(const void *key, size_t r_hash[2]) {
2179 	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
2180 	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
2181 }
2182 
2183 static bool
prof_thr_node_keycomp(const void * k1,const void * k2)2184 prof_thr_node_keycomp(const void *k1, const void *k2) {
2185 	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
2186 	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
2187 	return thr_node1->thr_uid == thr_node2->thr_uid;
2188 }
2189 
2190 static uint64_t
prof_thr_uid_alloc(tsdn_t * tsdn)2191 prof_thr_uid_alloc(tsdn_t *tsdn) {
2192 	uint64_t thr_uid;
2193 
2194 	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
2195 	thr_uid = next_thr_uid;
2196 	next_thr_uid++;
2197 	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
2198 
2199 	return thr_uid;
2200 }
2201 
2202 static prof_tdata_t *
prof_tdata_init_impl(tsd_t * tsd,uint64_t thr_uid,uint64_t thr_discrim,char * thread_name,bool active)2203 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
2204     char *thread_name, bool active) {
2205 	prof_tdata_t *tdata;
2206 
2207 	cassert(config_prof);
2208 
2209 	/* Initialize an empty cache for this thread. */
2210 	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
2211 	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
2212 	    arena_get(TSDN_NULL, 0, true), true);
2213 	if (tdata == NULL) {
2214 		return NULL;
2215 	}
2216 
2217 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
2218 	tdata->thr_uid = thr_uid;
2219 	tdata->thr_discrim = thr_discrim;
2220 	tdata->thread_name = thread_name;
2221 	tdata->attached = true;
2222 	tdata->expired = false;
2223 	tdata->tctx_uid_next = 0;
2224 
2225 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
2226 	    prof_bt_keycomp)) {
2227 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2228 		return NULL;
2229 	}
2230 
2231 	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
2232 	prof_sample_threshold_update(tdata);
2233 
2234 	tdata->enq = false;
2235 	tdata->enq_idump = false;
2236 	tdata->enq_gdump = false;
2237 
2238 	tdata->dumping = false;
2239 	tdata->active = active;
2240 
2241 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2242 	tdata_tree_insert(&tdatas, tdata);
2243 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2244 
2245 	return tdata;
2246 }
2247 
2248 prof_tdata_t *
prof_tdata_init(tsd_t * tsd)2249 prof_tdata_init(tsd_t *tsd) {
2250 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
2251 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
2252 }
2253 
2254 static bool
prof_tdata_should_destroy_unlocked(prof_tdata_t * tdata,bool even_if_attached)2255 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
2256 	if (tdata->attached && !even_if_attached) {
2257 		return false;
2258 	}
2259 	if (ckh_count(&tdata->bt2tctx) != 0) {
2260 		return false;
2261 	}
2262 	return true;
2263 }
2264 
2265 static bool
prof_tdata_should_destroy(tsdn_t * tsdn,prof_tdata_t * tdata,bool even_if_attached)2266 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
2267     bool even_if_attached) {
2268 	malloc_mutex_assert_owner(tsdn, tdata->lock);
2269 
2270 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
2271 }
2272 
2273 static void
prof_tdata_destroy_locked(tsd_t * tsd,prof_tdata_t * tdata,bool even_if_attached)2274 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
2275     bool even_if_attached) {
2276 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
2277 
2278 	tdata_tree_remove(&tdatas, tdata);
2279 
2280 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
2281 
2282 	if (tdata->thread_name != NULL) {
2283 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2284 		    true);
2285 	}
2286 	ckh_delete(tsd, &tdata->bt2tctx);
2287 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2288 }
2289 
2290 static void
prof_tdata_destroy(tsd_t * tsd,prof_tdata_t * tdata,bool even_if_attached)2291 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
2292 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2293 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
2294 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2295 }
2296 
2297 static void
prof_tdata_detach(tsd_t * tsd,prof_tdata_t * tdata)2298 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
2299 	bool destroy_tdata;
2300 
2301 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
2302 	if (tdata->attached) {
2303 		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
2304 		    true);
2305 		/*
2306 		 * Only detach if !destroy_tdata, because detaching would allow
2307 		 * another thread to win the race to destroy tdata.
2308 		 */
2309 		if (!destroy_tdata) {
2310 			tdata->attached = false;
2311 		}
2312 		tsd_prof_tdata_set(tsd, NULL);
2313 	} else {
2314 		destroy_tdata = false;
2315 	}
2316 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
2317 	if (destroy_tdata) {
2318 		prof_tdata_destroy(tsd, tdata, true);
2319 	}
2320 }
2321 
2322 prof_tdata_t *
prof_tdata_reinit(tsd_t * tsd,prof_tdata_t * tdata)2323 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
2324 	uint64_t thr_uid = tdata->thr_uid;
2325 	uint64_t thr_discrim = tdata->thr_discrim + 1;
2326 	char *thread_name = (tdata->thread_name != NULL) ?
2327 	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
2328 	bool active = tdata->active;
2329 
2330 	prof_tdata_detach(tsd, tdata);
2331 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
2332 	    active);
2333 }
2334 
2335 static bool
prof_tdata_expire(tsdn_t * tsdn,prof_tdata_t * tdata)2336 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
2337 	bool destroy_tdata;
2338 
2339 	malloc_mutex_lock(tsdn, tdata->lock);
2340 	if (!tdata->expired) {
2341 		tdata->expired = true;
2342 		destroy_tdata = tdata->attached ? false :
2343 		    prof_tdata_should_destroy(tsdn, tdata, false);
2344 	} else {
2345 		destroy_tdata = false;
2346 	}
2347 	malloc_mutex_unlock(tsdn, tdata->lock);
2348 
2349 	return destroy_tdata;
2350 }
2351 
2352 static prof_tdata_t *
prof_tdata_reset_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * arg)2353 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
2354     void *arg) {
2355 	tsdn_t *tsdn = (tsdn_t *)arg;
2356 
2357 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
2358 }
2359 
2360 void
prof_reset(tsd_t * tsd,size_t lg_sample)2361 prof_reset(tsd_t *tsd, size_t lg_sample) {
2362 	prof_tdata_t *next;
2363 
2364 	assert(lg_sample < (sizeof(uint64_t) << 3));
2365 
2366 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
2367 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2368 
2369 	lg_prof_sample = lg_sample;
2370 
2371 	next = NULL;
2372 	do {
2373 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
2374 		    prof_tdata_reset_iter, (void *)tsd);
2375 		if (to_destroy != NULL) {
2376 			next = tdata_tree_next(&tdatas, to_destroy);
2377 			prof_tdata_destroy_locked(tsd, to_destroy, false);
2378 		} else {
2379 			next = NULL;
2380 		}
2381 	} while (next != NULL);
2382 
2383 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2384 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
2385 }
2386 
2387 void
prof_tdata_cleanup(tsd_t * tsd)2388 prof_tdata_cleanup(tsd_t *tsd) {
2389 	prof_tdata_t *tdata;
2390 
2391 	if (!config_prof) {
2392 		return;
2393 	}
2394 
2395 	tdata = tsd_prof_tdata_get(tsd);
2396 	if (tdata != NULL) {
2397 		prof_tdata_detach(tsd, tdata);
2398 	}
2399 }
2400 
2401 bool
prof_active_get(tsdn_t * tsdn)2402 prof_active_get(tsdn_t *tsdn) {
2403 	bool prof_active_current;
2404 
2405 	malloc_mutex_lock(tsdn, &prof_active_mtx);
2406 	prof_active_current = prof_active;
2407 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2408 	return prof_active_current;
2409 }
2410 
2411 bool
prof_active_set(tsdn_t * tsdn,bool active)2412 prof_active_set(tsdn_t *tsdn, bool active) {
2413 	bool prof_active_old;
2414 
2415 	malloc_mutex_lock(tsdn, &prof_active_mtx);
2416 	prof_active_old = prof_active;
2417 	prof_active = active;
2418 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2419 	return prof_active_old;
2420 }
2421 
2422 #ifdef JEMALLOC_JET
2423 size_t
prof_log_bt_count(void)2424 prof_log_bt_count(void) {
2425 	size_t cnt = 0;
2426 	prof_bt_node_t *node = log_bt_first;
2427 	while (node != NULL) {
2428 		cnt++;
2429 		node = node->next;
2430 	}
2431 	return cnt;
2432 }
2433 
2434 size_t
prof_log_alloc_count(void)2435 prof_log_alloc_count(void) {
2436 	size_t cnt = 0;
2437 	prof_alloc_node_t *node = log_alloc_first;
2438 	while (node != NULL) {
2439 		cnt++;
2440 		node = node->next;
2441 	}
2442 	return cnt;
2443 }
2444 
2445 size_t
prof_log_thr_count(void)2446 prof_log_thr_count(void) {
2447 	size_t cnt = 0;
2448 	prof_thr_node_t *node = log_thr_first;
2449 	while (node != NULL) {
2450 		cnt++;
2451 		node = node->next;
2452 	}
2453 	return cnt;
2454 }
2455 
2456 bool
prof_log_is_logging(void)2457 prof_log_is_logging(void) {
2458 	return prof_logging_state == prof_logging_state_started;
2459 }
2460 
2461 bool
prof_log_rep_check(void)2462 prof_log_rep_check(void) {
2463 	if (prof_logging_state == prof_logging_state_stopped
2464 	    && log_tables_initialized) {
2465 		return true;
2466 	}
2467 
2468 	if (log_bt_last != NULL && log_bt_last->next != NULL) {
2469 		return true;
2470 	}
2471 	if (log_thr_last != NULL && log_thr_last->next != NULL) {
2472 		return true;
2473 	}
2474 	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
2475 		return true;
2476 	}
2477 
2478 	size_t bt_count = prof_log_bt_count();
2479 	size_t thr_count = prof_log_thr_count();
2480 	size_t alloc_count = prof_log_alloc_count();
2481 
2482 
2483 	if (prof_logging_state == prof_logging_state_stopped) {
2484 		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
2485 			return true;
2486 		}
2487 	}
2488 
2489 	prof_alloc_node_t *node = log_alloc_first;
2490 	while (node != NULL) {
2491 		if (node->alloc_bt_ind >= bt_count) {
2492 			return true;
2493 		}
2494 		if (node->free_bt_ind >= bt_count) {
2495 			return true;
2496 		}
2497 		if (node->alloc_thr_ind >= thr_count) {
2498 			return true;
2499 		}
2500 		if (node->free_thr_ind >= thr_count) {
2501 			return true;
2502 		}
2503 		if (node->alloc_time_ns > node->free_time_ns) {
2504 			return true;
2505 		}
2506 		node = node->next;
2507 	}
2508 
2509 	return false;
2510 }
2511 
2512 void
prof_log_dummy_set(bool new_value)2513 prof_log_dummy_set(bool new_value) {
2514 	prof_log_dummy = new_value;
2515 }
2516 #endif
2517 
2518 /* Used as an atexit function to stop logging on exit. */
2519 static void
prof_log_stop_final(void)2520 prof_log_stop_final(void) {
2521 	tsd_t *tsd = tsd_fetch();
2522 	prof_log_stop(tsd_tsdn(tsd));
2523 }
2524 
2525 bool
prof_log_start(tsdn_t * tsdn,const char * filename)2526 prof_log_start(tsdn_t *tsdn, const char *filename) {
2527 	if (!opt_prof || !prof_booted) {
2528 		return true;
2529 	}
2530 
2531 	bool ret = false;
2532 	size_t buf_size = PATH_MAX + 1;
2533 
2534 	malloc_mutex_lock(tsdn, &log_mtx);
2535 
2536 	if (prof_logging_state != prof_logging_state_stopped) {
2537 		ret = true;
2538 	} else if (filename == NULL) {
2539 		/* Make default name. */
2540 		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
2541 		    opt_prof_prefix, prof_getpid(), log_seq);
2542 		log_seq++;
2543 		prof_logging_state = prof_logging_state_started;
2544 	} else if (strlen(filename) >= buf_size) {
2545 		ret = true;
2546 	} else {
2547 		strcpy(log_filename, filename);
2548 		prof_logging_state = prof_logging_state_started;
2549 	}
2550 
2551 	if (!ret) {
2552 		nstime_update(&log_start_timestamp);
2553 	}
2554 
2555 	if (!prof_logging_final_hook_registered) {
2556 		if (atexit(prof_log_stop_final) != 0) {
2557 			malloc_write("<jemalloc>: Error in atexit() "
2558 				     "for logging\n");
2559 			if (opt_abort) {
2560 				abort();
2561 			}
2562 		} else {
2563 			prof_logging_final_hook_registered = true;
2564 		}
2565 	}
2566 
2567 	malloc_mutex_unlock(tsdn, &log_mtx);
2568 
2569 	return ret;
2570 }
2571 
2572 struct prof_emitter_cb_arg_s {
2573 	int fd;
2574 	ssize_t ret;
2575 };
2576 
2577 static void
prof_emitter_write_cb(void * opaque,const char * to_write)2578 prof_emitter_write_cb(void *opaque, const char *to_write) {
2579 	struct prof_emitter_cb_arg_s *arg =
2580 	    (struct prof_emitter_cb_arg_s *)opaque;
2581 	size_t bytes = strlen(to_write);
2582 #ifdef JEMALLOC_JET
2583 	if (prof_log_dummy) {
2584 		return;
2585 	}
2586 #endif
2587 	arg->ret = write(arg->fd, (void *)to_write, bytes);
2588 }
2589 
2590 /*
2591  * prof_log_emit_{...} goes through the appropriate linked list, emitting each
2592  * node to the json and deallocating it.
2593  */
2594 static void
prof_log_emit_threads(tsd_t * tsd,emitter_t * emitter)2595 prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
2596 	emitter_json_array_kv_begin(emitter, "threads");
2597 	prof_thr_node_t *thr_node = log_thr_first;
2598 	prof_thr_node_t *thr_old_node;
2599 	while (thr_node != NULL) {
2600 		emitter_json_object_begin(emitter);
2601 
2602 		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
2603 		    &thr_node->thr_uid);
2604 
2605 		char *thr_name = thr_node->name;
2606 
2607 		emitter_json_kv(emitter, "thr_name", emitter_type_string,
2608 		    &thr_name);
2609 
2610 		emitter_json_object_end(emitter);
2611 		thr_old_node = thr_node;
2612 		thr_node = thr_node->next;
2613 		idalloc(tsd, thr_old_node);
2614 	}
2615 	emitter_json_array_end(emitter);
2616 }
2617 
2618 static void
prof_log_emit_traces(tsd_t * tsd,emitter_t * emitter)2619 prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
2620 	emitter_json_array_kv_begin(emitter, "stack_traces");
2621 	prof_bt_node_t *bt_node = log_bt_first;
2622 	prof_bt_node_t *bt_old_node;
2623 	/*
2624 	 * Calculate how many hex digits we need: twice number of bytes, two for
2625 	 * "0x", and then one more for terminating '\0'.
2626 	 */
2627 	char buf[2 * sizeof(intptr_t) + 3];
2628 	size_t buf_sz = sizeof(buf);
2629 	while (bt_node != NULL) {
2630 		emitter_json_array_begin(emitter);
2631 		size_t i;
2632 		for (i = 0; i < bt_node->bt.len; i++) {
2633 			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
2634 			char *trace_str = buf;
2635 			emitter_json_value(emitter, emitter_type_string,
2636 			    &trace_str);
2637 		}
2638 		emitter_json_array_end(emitter);
2639 
2640 		bt_old_node = bt_node;
2641 		bt_node = bt_node->next;
2642 		idalloc(tsd, bt_old_node);
2643 	}
2644 	emitter_json_array_end(emitter);
2645 }
2646 
2647 static void
prof_log_emit_allocs(tsd_t * tsd,emitter_t * emitter)2648 prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
2649 	emitter_json_array_kv_begin(emitter, "allocations");
2650 	prof_alloc_node_t *alloc_node = log_alloc_first;
2651 	prof_alloc_node_t *alloc_old_node;
2652 	while (alloc_node != NULL) {
2653 		emitter_json_object_begin(emitter);
2654 
2655 		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
2656 		    &alloc_node->alloc_thr_ind);
2657 
2658 		emitter_json_kv(emitter, "free_thread", emitter_type_size,
2659 		    &alloc_node->free_thr_ind);
2660 
2661 		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
2662 		    &alloc_node->alloc_bt_ind);
2663 
2664 		emitter_json_kv(emitter, "free_trace", emitter_type_size,
2665 		    &alloc_node->free_bt_ind);
2666 
2667 		emitter_json_kv(emitter, "alloc_timestamp",
2668 		    emitter_type_uint64, &alloc_node->alloc_time_ns);
2669 
2670 		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
2671 		    &alloc_node->free_time_ns);
2672 
2673 		emitter_json_kv(emitter, "usize", emitter_type_uint64,
2674 		    &alloc_node->usize);
2675 
2676 		emitter_json_object_end(emitter);
2677 
2678 		alloc_old_node = alloc_node;
2679 		alloc_node = alloc_node->next;
2680 		idalloc(tsd, alloc_old_node);
2681 	}
2682 	emitter_json_array_end(emitter);
2683 }
2684 
2685 static void
prof_log_emit_metadata(emitter_t * emitter)2686 prof_log_emit_metadata(emitter_t *emitter) {
2687 	emitter_json_object_kv_begin(emitter, "info");
2688 
2689 	nstime_t now = NSTIME_ZERO_INITIALIZER;
2690 
2691 	nstime_update(&now);
2692 	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
2693 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
2694 
2695 	char *vers = JEMALLOC_VERSION;
2696 	emitter_json_kv(emitter, "version",
2697 	    emitter_type_string, &vers);
2698 
2699 	emitter_json_kv(emitter, "lg_sample_rate",
2700 	    emitter_type_int, &lg_prof_sample);
2701 
2702 	int pid = prof_getpid();
2703 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
2704 
2705 	emitter_json_object_end(emitter);
2706 }
2707 
2708 
2709 bool
prof_log_stop(tsdn_t * tsdn)2710 prof_log_stop(tsdn_t *tsdn) {
2711 	if (!opt_prof || !prof_booted) {
2712 		return true;
2713 	}
2714 
2715 	tsd_t *tsd = tsdn_tsd(tsdn);
2716 	malloc_mutex_lock(tsdn, &log_mtx);
2717 
2718 	if (prof_logging_state != prof_logging_state_started) {
2719 		malloc_mutex_unlock(tsdn, &log_mtx);
2720 		return true;
2721 	}
2722 
2723 	/*
2724 	 * Set the state to dumping. We'll set it to stopped when we're done.
2725 	 * Since other threads won't be able to start/stop/log when the state is
2726 	 * dumping, we don't have to hold the lock during the whole method.
2727 	 */
2728 	prof_logging_state = prof_logging_state_dumping;
2729 	malloc_mutex_unlock(tsdn, &log_mtx);
2730 
2731 
2732 	emitter_t emitter;
2733 
2734 	/* Create a file. */
2735 
2736 	int fd;
2737 #ifdef JEMALLOC_JET
2738 	if (prof_log_dummy) {
2739 		fd = 0;
2740 	} else {
2741 		fd = creat(log_filename, 0644);
2742 	}
2743 #else
2744 	fd = creat(log_filename, 0644);
2745 #endif
2746 
2747 	if (fd == -1) {
2748 		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
2749 			      " failed with %d\n", log_filename, errno);
2750 		if (opt_abort) {
2751 			abort();
2752 		}
2753 		return true;
2754 	}
2755 
2756 	/* Emit to json. */
2757 	struct prof_emitter_cb_arg_s arg;
2758 	arg.fd = fd;
2759 	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
2760 	    (void *)(&arg));
2761 
2762 	emitter_begin(&emitter);
2763 	prof_log_emit_metadata(&emitter);
2764 	prof_log_emit_threads(tsd, &emitter);
2765 	prof_log_emit_traces(tsd, &emitter);
2766 	prof_log_emit_allocs(tsd, &emitter);
2767 	emitter_end(&emitter);
2768 
2769 	/* Reset global state. */
2770 	if (log_tables_initialized) {
2771 		ckh_delete(tsd, &log_bt_node_set);
2772 		ckh_delete(tsd, &log_thr_node_set);
2773 	}
2774 	log_tables_initialized = false;
2775 	log_bt_index = 0;
2776 	log_thr_index = 0;
2777 	log_bt_first = NULL;
2778 	log_bt_last = NULL;
2779 	log_thr_first = NULL;
2780 	log_thr_last = NULL;
2781 	log_alloc_first = NULL;
2782 	log_alloc_last = NULL;
2783 
2784 	malloc_mutex_lock(tsdn, &log_mtx);
2785 	prof_logging_state = prof_logging_state_stopped;
2786 	malloc_mutex_unlock(tsdn, &log_mtx);
2787 
2788 #ifdef JEMALLOC_JET
2789 	if (prof_log_dummy) {
2790 		return false;
2791 	}
2792 #endif
2793 	return close(fd);
2794 }
2795 
2796 const char *
prof_thread_name_get(tsd_t * tsd)2797 prof_thread_name_get(tsd_t *tsd) {
2798 	prof_tdata_t *tdata;
2799 
2800 	tdata = prof_tdata_get(tsd, true);
2801 	if (tdata == NULL) {
2802 		return "";
2803 	}
2804 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
2805 }
2806 
2807 static char *
prof_thread_name_alloc(tsdn_t * tsdn,const char * thread_name)2808 prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
2809 	char *ret;
2810 	size_t size;
2811 
2812 	if (thread_name == NULL) {
2813 		return NULL;
2814 	}
2815 
2816 	size = strlen(thread_name) + 1;
2817 	if (size == 1) {
2818 		return "";
2819 	}
2820 
2821 	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
2822 	    arena_get(TSDN_NULL, 0, true), true);
2823 	if (ret == NULL) {
2824 		return NULL;
2825 	}
2826 	memcpy(ret, thread_name, size);
2827 	return ret;
2828 }
2829 
2830 int
prof_thread_name_set(tsd_t * tsd,const char * thread_name)2831 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
2832 	prof_tdata_t *tdata;
2833 	unsigned i;
2834 	char *s;
2835 
2836 	tdata = prof_tdata_get(tsd, true);
2837 	if (tdata == NULL) {
2838 		return EAGAIN;
2839 	}
2840 
2841 	/* Validate input. */
2842 	if (thread_name == NULL) {
2843 		return EFAULT;
2844 	}
2845 	for (i = 0; thread_name[i] != '\0'; i++) {
2846 		char c = thread_name[i];
2847 		if (!isgraph(c) && !isblank(c)) {
2848 			return EFAULT;
2849 		}
2850 	}
2851 
2852 	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2853 	if (s == NULL) {
2854 		return EAGAIN;
2855 	}
2856 
2857 	if (tdata->thread_name != NULL) {
2858 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2859 		    true);
2860 		tdata->thread_name = NULL;
2861 	}
2862 	if (strlen(s) > 0) {
2863 		tdata->thread_name = s;
2864 	}
2865 	return 0;
2866 }
2867 
2868 bool
prof_thread_active_get(tsd_t * tsd)2869 prof_thread_active_get(tsd_t *tsd) {
2870 	prof_tdata_t *tdata;
2871 
2872 	tdata = prof_tdata_get(tsd, true);
2873 	if (tdata == NULL) {
2874 		return false;
2875 	}
2876 	return tdata->active;
2877 }
2878 
2879 bool
prof_thread_active_set(tsd_t * tsd,bool active)2880 prof_thread_active_set(tsd_t *tsd, bool active) {
2881 	prof_tdata_t *tdata;
2882 
2883 	tdata = prof_tdata_get(tsd, true);
2884 	if (tdata == NULL) {
2885 		return true;
2886 	}
2887 	tdata->active = active;
2888 	return false;
2889 }
2890 
2891 bool
prof_thread_active_init_get(tsdn_t * tsdn)2892 prof_thread_active_init_get(tsdn_t *tsdn) {
2893 	bool active_init;
2894 
2895 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2896 	active_init = prof_thread_active_init;
2897 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2898 	return active_init;
2899 }
2900 
2901 bool
prof_thread_active_init_set(tsdn_t * tsdn,bool active_init)2902 prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
2903 	bool active_init_old;
2904 
2905 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2906 	active_init_old = prof_thread_active_init;
2907 	prof_thread_active_init = active_init;
2908 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2909 	return active_init_old;
2910 }
2911 
2912 bool
prof_gdump_get(tsdn_t * tsdn)2913 prof_gdump_get(tsdn_t *tsdn) {
2914 	bool prof_gdump_current;
2915 
2916 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2917 	prof_gdump_current = prof_gdump_val;
2918 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2919 	return prof_gdump_current;
2920 }
2921 
2922 bool
prof_gdump_set(tsdn_t * tsdn,bool gdump)2923 prof_gdump_set(tsdn_t *tsdn, bool gdump) {
2924 	bool prof_gdump_old;
2925 
2926 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2927 	prof_gdump_old = prof_gdump_val;
2928 	prof_gdump_val = gdump;
2929 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2930 	return prof_gdump_old;
2931 }
2932 
2933 void
prof_boot0(void)2934 prof_boot0(void) {
2935 	cassert(config_prof);
2936 
2937 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2938 	    sizeof(PROF_PREFIX_DEFAULT));
2939 }
2940 
2941 void
prof_boot1(void)2942 prof_boot1(void) {
2943 	cassert(config_prof);
2944 
2945 	/*
2946 	 * opt_prof must be in its final state before any arenas are
2947 	 * initialized, so this function must be executed early.
2948 	 */
2949 
2950 	if (opt_prof_leak && !opt_prof) {
2951 		/*
2952 		 * Enable opt_prof, but in such a way that profiles are never
2953 		 * automatically dumped.
2954 		 */
2955 		opt_prof = true;
2956 		opt_prof_gdump = false;
2957 	} else if (opt_prof) {
2958 		if (opt_lg_prof_interval >= 0) {
2959 			prof_interval = (((uint64_t)1U) <<
2960 			    opt_lg_prof_interval);
2961 		}
2962 	}
2963 }
2964 
2965 bool
prof_boot2(tsd_t * tsd)2966 prof_boot2(tsd_t *tsd) {
2967 	cassert(config_prof);
2968 
2969 	if (opt_prof) {
2970 		unsigned i;
2971 
2972 		lg_prof_sample = opt_lg_prof_sample;
2973 
2974 		prof_active = opt_prof_active;
2975 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2976 		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
2977 			return true;
2978 		}
2979 
2980 		prof_gdump_val = opt_prof_gdump;
2981 		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2982 		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
2983 			return true;
2984 		}
2985 
2986 		prof_thread_active_init = opt_prof_thread_active_init;
2987 		if (malloc_mutex_init(&prof_thread_active_init_mtx,
2988 		    "prof_thread_active_init",
2989 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
2990 		    malloc_mutex_rank_exclusive)) {
2991 			return true;
2992 		}
2993 
2994 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2995 		    prof_bt_keycomp)) {
2996 			return true;
2997 		}
2998 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2999 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
3000 			return true;
3001 		}
3002 
3003 		tdata_tree_new(&tdatas);
3004 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
3005 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
3006 			return true;
3007 		}
3008 
3009 		next_thr_uid = 0;
3010 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
3011 		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
3012 			return true;
3013 		}
3014 
3015 		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
3016 		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
3017 			return true;
3018 		}
3019 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
3020 		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
3021 			return true;
3022 		}
3023 
3024 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
3025 		    atexit(prof_fdump) != 0) {
3026 			malloc_write("<jemalloc>: Error in atexit()\n");
3027 			if (opt_abort) {
3028 				abort();
3029 			}
3030 		}
3031 
3032 		if (opt_prof_log) {
3033 			prof_log_start(tsd_tsdn(tsd), NULL);
3034 		}
3035 
3036 		if (malloc_mutex_init(&log_mtx, "prof_log",
3037 		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
3038 			return true;
3039 		}
3040 
3041 		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
3042 		    prof_bt_node_hash, prof_bt_node_keycomp)) {
3043 			return true;
3044 		}
3045 
3046 		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
3047 		    prof_thr_node_hash, prof_thr_node_keycomp)) {
3048 			return true;
3049 		}
3050 
3051 		log_tables_initialized = true;
3052 
3053 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3054 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
3055 		    CACHELINE);
3056 		if (gctx_locks == NULL) {
3057 			return true;
3058 		}
3059 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3060 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
3061 			    WITNESS_RANK_PROF_GCTX,
3062 			    malloc_mutex_rank_exclusive)) {
3063 				return true;
3064 			}
3065 		}
3066 
3067 		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3068 		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
3069 		    CACHELINE);
3070 		if (tdata_locks == NULL) {
3071 			return true;
3072 		}
3073 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3074 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
3075 			    WITNESS_RANK_PROF_TDATA,
3076 			    malloc_mutex_rank_exclusive)) {
3077 				return true;
3078 			}
3079 		}
3080 #ifdef JEMALLOC_PROF_LIBGCC
3081 		/*
3082 		 * Cause the backtracing machinery to allocate its internal
3083 		 * state before enabling profiling.
3084 		 */
3085 		_Unwind_Backtrace(prof_unwind_init_callback, NULL);
3086 #endif
3087 	}
3088 	prof_booted = true;
3089 
3090 	return false;
3091 }
3092 
3093 void
prof_prefork0(tsdn_t * tsdn)3094 prof_prefork0(tsdn_t *tsdn) {
3095 	if (config_prof && opt_prof) {
3096 		unsigned i;
3097 
3098 		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
3099 		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
3100 		malloc_mutex_prefork(tsdn, &tdatas_mtx);
3101 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3102 			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
3103 		}
3104 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3105 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
3106 		}
3107 	}
3108 }
3109 
3110 void
prof_prefork1(tsdn_t * tsdn)3111 prof_prefork1(tsdn_t *tsdn) {
3112 	if (config_prof && opt_prof) {
3113 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
3114 		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
3115 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
3116 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
3117 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
3118 	}
3119 }
3120 
3121 void
prof_postfork_parent(tsdn_t * tsdn)3122 prof_postfork_parent(tsdn_t *tsdn) {
3123 	if (config_prof && opt_prof) {
3124 		unsigned i;
3125 
3126 		malloc_mutex_postfork_parent(tsdn,
3127 		    &prof_thread_active_init_mtx);
3128 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
3129 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
3130 		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
3131 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
3132 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3133 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
3134 		}
3135 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3136 			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
3137 		}
3138 		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
3139 		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
3140 		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
3141 	}
3142 }
3143 
3144 void
prof_postfork_child(tsdn_t * tsdn)3145 prof_postfork_child(tsdn_t *tsdn) {
3146 	if (config_prof && opt_prof) {
3147 		unsigned i;
3148 
3149 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
3150 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
3151 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
3152 		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
3153 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
3154 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3155 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
3156 		}
3157 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3158 			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
3159 		}
3160 		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
3161 		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
3162 		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
3163 	}
3164 }
3165 
3166 /******************************************************************************/
3167