1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 
7 /******************************************************************************/
8 /* Data. */
9 
10 /* This option should be opt-in only. */
11 #define BACKGROUND_THREAD_DEFAULT false
12 /* Read-only after initialization. */
13 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
14 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
15 
16 /* Used for thread creation, termination and stats. */
17 malloc_mutex_t background_thread_lock;
18 /* Indicates global state.  Atomic because decay reads this w/o locking. */
19 atomic_b_t background_thread_enabled_state;
20 size_t n_background_threads;
21 size_t max_background_threads;
22 /* Thread info per-index. */
23 background_thread_info_t *background_thread_info;
24 
25 /* False if no necessary runtime support. */
26 bool can_enable_background_thread;
27 
28 /******************************************************************************/
29 
30 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
31 #include <dlfcn.h>
32 
33 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
34     void *(*)(void *), void *__restrict);
35 
36 static void
pthread_create_wrapper_init(void)37 pthread_create_wrapper_init(void) {
38 #ifdef JEMALLOC_LAZY_LOCK
39 	if (!isthreaded) {
40 		isthreaded = true;
41 	}
42 #endif
43 }
44 
45 int
pthread_create_wrapper(pthread_t * __restrict thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * __restrict arg)46 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
47     void *(*start_routine)(void *), void *__restrict arg) {
48 	pthread_create_wrapper_init();
49 
50 	return pthread_create_fptr(thread, attr, start_routine, arg);
51 }
52 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
53 
54 #ifndef JEMALLOC_BACKGROUND_THREAD
55 #define NOT_REACHED { not_reached(); }
background_thread_create(tsd_t * tsd,unsigned arena_ind)56 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
57 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
58 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
59 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
60     arena_decay_t *decay, size_t npages_new) NOT_REACHED
61 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
62 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
63 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
64 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
65 bool background_thread_stats_read(tsdn_t *tsdn,
66     background_thread_stats_t *stats) NOT_REACHED
67 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
68 #undef NOT_REACHED
69 #else
70 
71 static bool background_thread_enabled_at_fork;
72 
73 static void
74 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
75 	background_thread_wakeup_time_set(tsdn, info, 0);
76 	info->npages_to_purge_new = 0;
77 	if (config_stats) {
78 		info->tot_n_runs = 0;
79 		nstime_init(&info->tot_sleep_time, 0);
80 	}
81 }
82 
83 static inline bool
84 set_current_thread_affinity(UNUSED int cpu) {
85 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
86 	cpu_set_t cpuset;
87 	CPU_ZERO(&cpuset);
88 	CPU_SET(cpu, &cpuset);
89 	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
90 
91 	return (ret != 0);
92 #else
93 	return false;
94 #endif
95 }
96 
97 /* Threshold for determining when to wake up the background thread. */
98 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
99 #define BILLION UINT64_C(1000000000)
100 /* Minimal sleep interval 100 ms. */
101 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
102 
103 static inline size_t
104 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
105 	size_t i;
106 	uint64_t sum = 0;
107 	for (i = 0; i < interval; i++) {
108 		sum += decay->backlog[i] * h_steps[i];
109 	}
110 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
111 		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
112 	}
113 
114 	return (size_t)(sum >> SMOOTHSTEP_BFP);
115 }
116 
117 static uint64_t
118 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
119     extents_t *extents) {
120 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
121 		/* Use minimal interval if decay is contended. */
122 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
123 	}
124 
125 	uint64_t interval;
126 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
127 	if (decay_time <= 0) {
128 		/* Purging is eagerly done or disabled currently. */
129 		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
130 		goto label_done;
131 	}
132 
133 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
134 	assert(decay_interval_ns > 0);
135 	size_t npages = extents_npages_get(extents);
136 	if (npages == 0) {
137 		unsigned i;
138 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
139 			if (decay->backlog[i] > 0) {
140 				break;
141 			}
142 		}
143 		if (i == SMOOTHSTEP_NSTEPS) {
144 			/* No dirty pages recorded.  Sleep indefinitely. */
145 			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
146 			goto label_done;
147 		}
148 	}
149 	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
150 		/* Use max interval. */
151 		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
152 		goto label_done;
153 	}
154 
155 	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
156 	size_t ub = SMOOTHSTEP_NSTEPS;
157 	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
158 	lb = (lb < 2) ? 2 : lb;
159 	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
160 	    (lb + 2 > ub)) {
161 		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
162 		goto label_done;
163 	}
164 
165 	assert(lb + 2 <= ub);
166 	size_t npurge_lb, npurge_ub;
167 	npurge_lb = decay_npurge_after_interval(decay, lb);
168 	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
169 		interval = decay_interval_ns * lb;
170 		goto label_done;
171 	}
172 	npurge_ub = decay_npurge_after_interval(decay, ub);
173 	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
174 		interval = decay_interval_ns * ub;
175 		goto label_done;
176 	}
177 
178 	unsigned n_search = 0;
179 	size_t target, npurge;
180 	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
181 	    && (lb + 2 < ub)) {
182 		target = (lb + ub) / 2;
183 		npurge = decay_npurge_after_interval(decay, target);
184 		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
185 			ub = target;
186 			npurge_ub = npurge;
187 		} else {
188 			lb = target;
189 			npurge_lb = npurge;
190 		}
191 		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
192 	}
193 	interval = decay_interval_ns * (ub + lb) / 2;
194 label_done:
195 	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
196 	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
197 	malloc_mutex_unlock(tsdn, &decay->mtx);
198 
199 	return interval;
200 }
201 
202 /* Compute purge interval for background threads. */
203 static uint64_t
204 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
205 	uint64_t i1, i2;
206 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
207 	    &arena->extents_dirty);
208 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
209 		return i1;
210 	}
211 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
212 	    &arena->extents_muzzy);
213 
214 	return i1 < i2 ? i1 : i2;
215 }
216 
217 static void
218 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
219     uint64_t interval) {
220 	if (config_stats) {
221 		info->tot_n_runs++;
222 	}
223 	info->npages_to_purge_new = 0;
224 
225 	struct timeval tv;
226 	/* Specific clock required by timedwait. */
227 	gettimeofday(&tv, NULL);
228 	nstime_t before_sleep;
229 	nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
230 
231 	int ret;
232 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
233 		assert(background_thread_indefinite_sleep(info));
234 		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
235 		assert(ret == 0);
236 	} else {
237 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
238 		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
239 		/* We need malloc clock (can be different from tv). */
240 		nstime_t next_wakeup;
241 		nstime_init(&next_wakeup, 0);
242 		nstime_update(&next_wakeup);
243 		nstime_iadd(&next_wakeup, interval);
244 		assert(nstime_ns(&next_wakeup) <
245 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
246 		background_thread_wakeup_time_set(tsdn, info,
247 		    nstime_ns(&next_wakeup));
248 
249 		nstime_t ts_wakeup;
250 		nstime_copy(&ts_wakeup, &before_sleep);
251 		nstime_iadd(&ts_wakeup, interval);
252 		struct timespec ts;
253 		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
254 		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
255 
256 		assert(!background_thread_indefinite_sleep(info));
257 		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
258 		assert(ret == ETIMEDOUT || ret == 0);
259 		background_thread_wakeup_time_set(tsdn, info,
260 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
261 	}
262 	if (config_stats) {
263 		gettimeofday(&tv, NULL);
264 		nstime_t after_sleep;
265 		nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
266 		if (nstime_compare(&after_sleep, &before_sleep) > 0) {
267 			nstime_subtract(&after_sleep, &before_sleep);
268 			nstime_add(&info->tot_sleep_time, &after_sleep);
269 		}
270 	}
271 }
272 
273 static bool
274 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
275 	if (unlikely(info->state == background_thread_paused)) {
276 		malloc_mutex_unlock(tsdn, &info->mtx);
277 		/* Wait on global lock to update status. */
278 		malloc_mutex_lock(tsdn, &background_thread_lock);
279 		malloc_mutex_unlock(tsdn, &background_thread_lock);
280 		malloc_mutex_lock(tsdn, &info->mtx);
281 		return true;
282 	}
283 
284 	return false;
285 }
286 
287 static inline void
288 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
289 	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
290 	unsigned narenas = narenas_total_get();
291 
292 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
293 		arena_t *arena = arena_get(tsdn, i, false);
294 		if (!arena) {
295 			continue;
296 		}
297 		arena_decay(tsdn, arena, true, false);
298 		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
299 			/* Min interval will be used. */
300 			continue;
301 		}
302 		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
303 		    arena);
304 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
305 		if (min_interval > interval) {
306 			min_interval = interval;
307 		}
308 	}
309 	background_thread_sleep(tsdn, info, min_interval);
310 }
311 
312 static bool
313 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
314 	if (info == &background_thread_info[0]) {
315 		malloc_mutex_assert_owner(tsd_tsdn(tsd),
316 		    &background_thread_lock);
317 	} else {
318 		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
319 		    &background_thread_lock);
320 	}
321 
322 	pre_reentrancy(tsd, NULL);
323 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
324 	bool has_thread;
325 	assert(info->state != background_thread_paused);
326 	if (info->state == background_thread_started) {
327 		has_thread = true;
328 		info->state = background_thread_stopped;
329 		pthread_cond_signal(&info->cond);
330 	} else {
331 		has_thread = false;
332 	}
333 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
334 
335 	if (!has_thread) {
336 		post_reentrancy(tsd);
337 		return false;
338 	}
339 	void *ret;
340 	if (pthread_join(info->thread, &ret)) {
341 		post_reentrancy(tsd);
342 		return true;
343 	}
344 	assert(ret == NULL);
345 	n_background_threads--;
346 	post_reentrancy(tsd);
347 
348 	return false;
349 }
350 
351 static void *background_thread_entry(void *ind_arg);
352 
353 static int
354 background_thread_create_signals_masked(pthread_t *thread,
355     const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
356 	/*
357 	 * Mask signals during thread creation so that the thread inherits
358 	 * an empty signal set.
359 	 */
360 	sigset_t set;
361 	sigfillset(&set);
362 	sigset_t oldset;
363 	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
364 	if (mask_err != 0) {
365 		return mask_err;
366 	}
367 	int create_err = pthread_create_wrapper(thread, attr, start_routine,
368 	    arg);
369 	/*
370 	 * Restore the signal mask.  Failure to restore the signal mask here
371 	 * changes program behavior.
372 	 */
373 	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
374 	if (restore_err != 0) {
375 		malloc_printf("<jemalloc>: background thread creation "
376 		    "failed (%d), and signal mask restoration failed "
377 		    "(%d)\n", create_err, restore_err);
378 		if (opt_abort) {
379 			abort();
380 		}
381 	}
382 	return create_err;
383 }
384 
385 static bool
386 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
387     bool *created_threads) {
388 	bool ret = false;
389 	if (likely(*n_created == n_background_threads)) {
390 		return ret;
391 	}
392 
393 	tsdn_t *tsdn = tsd_tsdn(tsd);
394 	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
395 	for (unsigned i = 1; i < max_background_threads; i++) {
396 		if (created_threads[i]) {
397 			continue;
398 		}
399 		background_thread_info_t *info = &background_thread_info[i];
400 		malloc_mutex_lock(tsdn, &info->mtx);
401 		/*
402 		 * In case of the background_thread_paused state because of
403 		 * arena reset, delay the creation.
404 		 */
405 		bool create = (info->state == background_thread_started);
406 		malloc_mutex_unlock(tsdn, &info->mtx);
407 		if (!create) {
408 			continue;
409 		}
410 
411 		pre_reentrancy(tsd, NULL);
412 		int err = background_thread_create_signals_masked(&info->thread,
413 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
414 		post_reentrancy(tsd);
415 
416 		if (err == 0) {
417 			(*n_created)++;
418 			created_threads[i] = true;
419 		} else {
420 			malloc_printf("<jemalloc>: background thread "
421 			    "creation failed (%d)\n", err);
422 			if (opt_abort) {
423 				abort();
424 			}
425 		}
426 		/* Return to restart the loop since we unlocked. */
427 		ret = true;
428 		break;
429 	}
430 	malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
431 
432 	return ret;
433 }
434 
435 static void
436 background_thread0_work(tsd_t *tsd) {
437 	/* Thread0 is also responsible for launching / terminating threads. */
438 	VARIABLE_ARRAY(bool, created_threads, max_background_threads);
439 	unsigned i;
440 	for (i = 1; i < max_background_threads; i++) {
441 		created_threads[i] = false;
442 	}
443 	/* Start working, and create more threads when asked. */
444 	unsigned n_created = 1;
445 	while (background_thread_info[0].state != background_thread_stopped) {
446 		if (background_thread_pause_check(tsd_tsdn(tsd),
447 		    &background_thread_info[0])) {
448 			continue;
449 		}
450 		if (check_background_thread_creation(tsd, &n_created,
451 		    (bool *)&created_threads)) {
452 			continue;
453 		}
454 		background_work_sleep_once(tsd_tsdn(tsd),
455 		    &background_thread_info[0], 0);
456 	}
457 
458 	/*
459 	 * Shut down other threads at exit.  Note that the ctl thread is holding
460 	 * the global background_thread mutex (and is waiting) for us.
461 	 */
462 	assert(!background_thread_enabled());
463 	for (i = 1; i < max_background_threads; i++) {
464 		background_thread_info_t *info = &background_thread_info[i];
465 		assert(info->state != background_thread_paused);
466 		if (created_threads[i]) {
467 			background_threads_disable_single(tsd, info);
468 		} else {
469 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
470 			if (info->state != background_thread_stopped) {
471 				/* The thread was not created. */
472 				assert(info->state ==
473 				    background_thread_started);
474 				n_background_threads--;
475 				info->state = background_thread_stopped;
476 			}
477 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
478 		}
479 	}
480 	background_thread_info[0].state = background_thread_stopped;
481 	assert(n_background_threads == 1);
482 }
483 
484 static void
485 background_work(tsd_t *tsd, unsigned ind) {
486 	background_thread_info_t *info = &background_thread_info[ind];
487 
488 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
489 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
490 	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
491 	if (ind == 0) {
492 		background_thread0_work(tsd);
493 	} else {
494 		while (info->state != background_thread_stopped) {
495 			if (background_thread_pause_check(tsd_tsdn(tsd),
496 			    info)) {
497 				continue;
498 			}
499 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
500 		}
501 	}
502 	assert(info->state == background_thread_stopped);
503 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
504 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
505 }
506 
507 static void *
508 background_thread_entry(void *ind_arg) {
509 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
510 	assert(thread_ind < max_background_threads);
511 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
512 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
513 #endif
514 	if (opt_percpu_arena != percpu_arena_disabled) {
515 		set_current_thread_affinity((int)thread_ind);
516 	}
517 	/*
518 	 * Start periodic background work.  We use internal tsd which avoids
519 	 * side effects, for example triggering new arena creation (which in
520 	 * turn triggers another background thread creation).
521 	 */
522 	background_work(tsd_internal_fetch(), thread_ind);
523 	assert(pthread_equal(pthread_self(),
524 	    background_thread_info[thread_ind].thread));
525 
526 	return NULL;
527 }
528 
529 static void
530 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
531 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
532 	info->state = background_thread_started;
533 	background_thread_info_init(tsd_tsdn(tsd), info);
534 	n_background_threads++;
535 }
536 
537 /* Create a new background thread if needed. */
538 bool
539 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
540 	assert(have_background_thread);
541 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
542 
543 	/* We create at most NCPUs threads. */
544 	size_t thread_ind = arena_ind % max_background_threads;
545 	background_thread_info_t *info = &background_thread_info[thread_ind];
546 
547 	bool need_new_thread;
548 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
549 	need_new_thread = background_thread_enabled() &&
550 	    (info->state == background_thread_stopped);
551 	if (need_new_thread) {
552 		background_thread_init(tsd, info);
553 	}
554 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
555 	if (!need_new_thread) {
556 		return false;
557 	}
558 	if (arena_ind != 0) {
559 		/* Threads are created asynchronously by Thread 0. */
560 		background_thread_info_t *t0 = &background_thread_info[0];
561 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
562 		assert(t0->state == background_thread_started);
563 		pthread_cond_signal(&t0->cond);
564 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
565 
566 		return false;
567 	}
568 
569 	pre_reentrancy(tsd, NULL);
570 	/*
571 	 * To avoid complications (besides reentrancy), create internal
572 	 * background threads with the underlying pthread_create.
573 	 */
574 	int err = background_thread_create_signals_masked(&info->thread, NULL,
575 	    background_thread_entry, (void *)thread_ind);
576 	post_reentrancy(tsd);
577 
578 	if (err != 0) {
579 		malloc_printf("<jemalloc>: arena 0 background thread creation "
580 		    "failed (%d)\n", err);
581 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
582 		info->state = background_thread_stopped;
583 		n_background_threads--;
584 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
585 
586 		return true;
587 	}
588 
589 	return false;
590 }
591 
592 bool
593 background_threads_enable(tsd_t *tsd) {
594 	assert(n_background_threads == 0);
595 	assert(background_thread_enabled());
596 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
597 
598 	VARIABLE_ARRAY(bool, marked, max_background_threads);
599 	unsigned i, nmarked;
600 	for (i = 0; i < max_background_threads; i++) {
601 		marked[i] = false;
602 	}
603 	nmarked = 0;
604 	/* Thread 0 is required and created at the end. */
605 	marked[0] = true;
606 	/* Mark the threads we need to create for thread 0. */
607 	unsigned n = narenas_total_get();
608 	for (i = 1; i < n; i++) {
609 		if (marked[i % max_background_threads] ||
610 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
611 			continue;
612 		}
613 		background_thread_info_t *info = &background_thread_info[
614 		    i % max_background_threads];
615 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
616 		assert(info->state == background_thread_stopped);
617 		background_thread_init(tsd, info);
618 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
619 		marked[i % max_background_threads] = true;
620 		if (++nmarked == max_background_threads) {
621 			break;
622 		}
623 	}
624 
625 	return background_thread_create(tsd, 0);
626 }
627 
628 bool
629 background_threads_disable(tsd_t *tsd) {
630 	assert(!background_thread_enabled());
631 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
632 
633 	/* Thread 0 will be responsible for terminating other threads. */
634 	if (background_threads_disable_single(tsd,
635 	    &background_thread_info[0])) {
636 		return true;
637 	}
638 	assert(n_background_threads == 0);
639 
640 	return false;
641 }
642 
643 /* Check if we need to signal the background thread early. */
644 void
645 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
646     arena_decay_t *decay, size_t npages_new) {
647 	background_thread_info_t *info = arena_background_thread_info_get(
648 	    arena);
649 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
650 		/*
651 		 * Background thread may hold the mutex for a long period of
652 		 * time.  We'd like to avoid the variance on application
653 		 * threads.  So keep this non-blocking, and leave the work to a
654 		 * future epoch.
655 		 */
656 		return;
657 	}
658 
659 	if (info->state != background_thread_started) {
660 		goto label_done;
661 	}
662 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
663 		goto label_done;
664 	}
665 
666 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
667 	if (decay_time <= 0) {
668 		/* Purging is eagerly done or disabled currently. */
669 		goto label_done_unlock2;
670 	}
671 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
672 	assert(decay_interval_ns > 0);
673 
674 	nstime_t diff;
675 	nstime_init(&diff, background_thread_wakeup_time_get(info));
676 	if (nstime_compare(&diff, &decay->epoch) <= 0) {
677 		goto label_done_unlock2;
678 	}
679 	nstime_subtract(&diff, &decay->epoch);
680 	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
681 		goto label_done_unlock2;
682 	}
683 
684 	if (npages_new > 0) {
685 		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
686 		/*
687 		 * Compute how many new pages we would need to purge by the next
688 		 * wakeup, which is used to determine if we should signal the
689 		 * background thread.
690 		 */
691 		uint64_t npurge_new;
692 		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
693 			npurge_new = npages_new;
694 		} else {
695 			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
696 			assert(h_steps_max >=
697 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
698 			npurge_new = npages_new * (h_steps_max -
699 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
700 			npurge_new >>= SMOOTHSTEP_BFP;
701 		}
702 		info->npages_to_purge_new += npurge_new;
703 	}
704 
705 	bool should_signal;
706 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
707 		should_signal = true;
708 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
709 	    (extents_npages_get(&arena->extents_dirty) > 0 ||
710 	    extents_npages_get(&arena->extents_muzzy) > 0 ||
711 	    info->npages_to_purge_new > 0)) {
712 		should_signal = true;
713 	} else {
714 		should_signal = false;
715 	}
716 
717 	if (should_signal) {
718 		info->npages_to_purge_new = 0;
719 		pthread_cond_signal(&info->cond);
720 	}
721 label_done_unlock2:
722 	malloc_mutex_unlock(tsdn, &decay->mtx);
723 label_done:
724 	malloc_mutex_unlock(tsdn, &info->mtx);
725 }
726 
727 void
728 background_thread_prefork0(tsdn_t *tsdn) {
729 	malloc_mutex_prefork(tsdn, &background_thread_lock);
730 	background_thread_enabled_at_fork = background_thread_enabled();
731 }
732 
733 void
734 background_thread_prefork1(tsdn_t *tsdn) {
735 	for (unsigned i = 0; i < max_background_threads; i++) {
736 		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
737 	}
738 }
739 
740 void
741 background_thread_postfork_parent(tsdn_t *tsdn) {
742 	for (unsigned i = 0; i < max_background_threads; i++) {
743 		malloc_mutex_postfork_parent(tsdn,
744 		    &background_thread_info[i].mtx);
745 	}
746 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
747 }
748 
749 void
750 background_thread_postfork_child(tsdn_t *tsdn) {
751 	for (unsigned i = 0; i < max_background_threads; i++) {
752 		malloc_mutex_postfork_child(tsdn,
753 		    &background_thread_info[i].mtx);
754 	}
755 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
756 	if (!background_thread_enabled_at_fork) {
757 		return;
758 	}
759 
760 	/* Clear background_thread state (reset to disabled for child). */
761 	malloc_mutex_lock(tsdn, &background_thread_lock);
762 	n_background_threads = 0;
763 	background_thread_enabled_set(tsdn, false);
764 	for (unsigned i = 0; i < max_background_threads; i++) {
765 		background_thread_info_t *info = &background_thread_info[i];
766 		malloc_mutex_lock(tsdn, &info->mtx);
767 		info->state = background_thread_stopped;
768 		int ret = pthread_cond_init(&info->cond, NULL);
769 		assert(ret == 0);
770 		background_thread_info_init(tsdn, info);
771 		malloc_mutex_unlock(tsdn, &info->mtx);
772 	}
773 	malloc_mutex_unlock(tsdn, &background_thread_lock);
774 }
775 
776 bool
777 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
778 	assert(config_stats);
779 	malloc_mutex_lock(tsdn, &background_thread_lock);
780 	if (!background_thread_enabled()) {
781 		malloc_mutex_unlock(tsdn, &background_thread_lock);
782 		return true;
783 	}
784 
785 	stats->num_threads = n_background_threads;
786 	uint64_t num_runs = 0;
787 	nstime_init(&stats->run_interval, 0);
788 	for (unsigned i = 0; i < max_background_threads; i++) {
789 		background_thread_info_t *info = &background_thread_info[i];
790 		malloc_mutex_lock(tsdn, &info->mtx);
791 		if (info->state != background_thread_stopped) {
792 			num_runs += info->tot_n_runs;
793 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
794 		}
795 		malloc_mutex_unlock(tsdn, &info->mtx);
796 	}
797 	stats->num_runs = num_runs;
798 	if (num_runs > 0) {
799 		nstime_idivide(&stats->run_interval, num_runs);
800 	}
801 	malloc_mutex_unlock(tsdn, &background_thread_lock);
802 
803 	return false;
804 }
805 
806 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
807 #undef BILLION
808 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
809 
810 static bool
811 pthread_create_fptr_init(void) {
812 	if (pthread_create_fptr != NULL) {
813 		return false;
814 	}
815 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
816 	if (pthread_create_fptr == NULL) {
817 		can_enable_background_thread = false;
818 		if (config_lazy_lock || opt_background_thread) {
819 			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
820 			    "\"pthread_create\")\n");
821 			abort();
822 		}
823 	} else {
824 		can_enable_background_thread = true;
825 	}
826 
827 	return false;
828 }
829 
830 /*
831  * When lazy lock is enabled, we need to make sure setting isthreaded before
832  * taking any background_thread locks.  This is called early in ctl (instead of
833  * wait for the pthread_create calls to trigger) because the mutex is required
834  * before creating background threads.
835  */
836 void
837 background_thread_ctl_init(tsdn_t *tsdn) {
838 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
839 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
840 	pthread_create_fptr_init();
841 	pthread_create_wrapper_init();
842 #endif
843 }
844 
845 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
846 
847 bool
848 background_thread_boot0(void) {
849 	if (!have_background_thread && opt_background_thread) {
850 		malloc_printf("<jemalloc>: option background_thread currently "
851 		    "supports pthread only\n");
852 		return true;
853 	}
854 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
855 	if ((config_lazy_lock || opt_background_thread) &&
856 	    pthread_create_fptr_init()) {
857 		return true;
858 	}
859 #endif
860 	return false;
861 }
862 
863 bool
background_thread_boot1(tsdn_t * tsdn)864 background_thread_boot1(tsdn_t *tsdn) {
865 #ifdef JEMALLOC_BACKGROUND_THREAD
866 	assert(have_background_thread);
867 	assert(narenas_total_get() > 0);
868 
869 	if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
870 	    ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
871 		opt_max_background_threads = ncpus;
872 	}
873 	max_background_threads = opt_max_background_threads;
874 
875 	background_thread_enabled_set(tsdn, opt_background_thread);
876 	if (malloc_mutex_init(&background_thread_lock,
877 	    "background_thread_global",
878 	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
879 	    malloc_mutex_rank_exclusive)) {
880 		return true;
881 	}
882 
883 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
884 	    b0get(), opt_max_background_threads *
885 	    sizeof(background_thread_info_t), CACHELINE);
886 	if (background_thread_info == NULL) {
887 		return true;
888 	}
889 
890 	for (unsigned i = 0; i < max_background_threads; i++) {
891 		background_thread_info_t *info = &background_thread_info[i];
892 		/* Thread mutex is rank_inclusive because of thread0. */
893 		if (malloc_mutex_init(&info->mtx, "background_thread",
894 		    WITNESS_RANK_BACKGROUND_THREAD,
895 		    malloc_mutex_address_ordered)) {
896 			return true;
897 		}
898 		if (pthread_cond_init(&info->cond, NULL)) {
899 			return true;
900 		}
901 		malloc_mutex_lock(tsdn, &info->mtx);
902 		info->state = background_thread_stopped;
903 		background_thread_info_init(tsdn, info);
904 		malloc_mutex_unlock(tsdn, &info->mtx);
905 	}
906 #endif
907 
908 	return false;
909 }
910