1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 
7 /******************************************************************************/
8 /* Data. */
9 
10 /* This option should be opt-in only. */
11 #define BACKGROUND_THREAD_DEFAULT false
12 /* Read-only after initialization. */
13 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
14 
15 /* Used for thread creation, termination and stats. */
16 malloc_mutex_t background_thread_lock;
17 /* Indicates global state.  Atomic because decay reads this w/o locking. */
18 atomic_b_t background_thread_enabled_state;
19 size_t n_background_threads;
20 /* Thread info per-index. */
21 background_thread_info_t *background_thread_info;
22 
23 /* False if no necessary runtime support. */
24 bool can_enable_background_thread;
25 
26 /******************************************************************************/
27 
28 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
29 #include <dlfcn.h>
30 
31 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
32     void *(*)(void *), void *__restrict);
33 static pthread_once_t once_control = PTHREAD_ONCE_INIT;
34 
35 static void
36 pthread_create_wrapper_once(void) {
37 #ifdef JEMALLOC_LAZY_LOCK
38 	isthreaded = true;
39 #endif
40 }
41 
42 int
43 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
44     void *(*start_routine)(void *), void *__restrict arg) {
45 	pthread_once(&once_control, pthread_create_wrapper_once);
46 
47 	return pthread_create_fptr(thread, attr, start_routine, arg);
48 }
49 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
50 
51 #ifndef JEMALLOC_BACKGROUND_THREAD
52 #define NOT_REACHED { not_reached(); }
53 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
54 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
55 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
56 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
57     arena_decay_t *decay, size_t npages_new) NOT_REACHED
58 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
59 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
60 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
61 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
62 bool background_thread_stats_read(tsdn_t *tsdn,
63     background_thread_stats_t *stats) NOT_REACHED
64 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
65 #undef NOT_REACHED
66 #else
67 
68 static bool background_thread_enabled_at_fork;
69 
70 static void
71 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
72 	background_thread_wakeup_time_set(tsdn, info, 0);
73 	info->npages_to_purge_new = 0;
74 	if (config_stats) {
75 		info->tot_n_runs = 0;
76 		nstime_init(&info->tot_sleep_time, 0);
77 	}
78 }
79 
80 static inline bool
81 set_current_thread_affinity(UNUSED int cpu) {
82 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
83 	cpu_set_t cpuset;
84 	CPU_ZERO(&cpuset);
85 	CPU_SET(cpu, &cpuset);
86 	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
87 
88 	return (ret != 0);
89 #else
90 	return false;
91 #endif
92 }
93 
94 /* Threshold for determining when to wake up the background thread. */
95 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
96 #define BILLION UINT64_C(1000000000)
97 /* Minimal sleep interval 100 ms. */
98 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
99 
100 static inline size_t
101 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
102 	size_t i;
103 	uint64_t sum = 0;
104 	for (i = 0; i < interval; i++) {
105 		sum += decay->backlog[i] * h_steps[i];
106 	}
107 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
108 		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
109 	}
110 
111 	return (size_t)(sum >> SMOOTHSTEP_BFP);
112 }
113 
114 static uint64_t
115 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
116     extents_t *extents) {
117 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
118 		/* Use minimal interval if decay is contended. */
119 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
120 	}
121 
122 	uint64_t interval;
123 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
124 	if (decay_time <= 0) {
125 		/* Purging is eagerly done or disabled currently. */
126 		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
127 		goto label_done;
128 	}
129 
130 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
131 	assert(decay_interval_ns > 0);
132 	size_t npages = extents_npages_get(extents);
133 	if (npages == 0) {
134 		unsigned i;
135 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
136 			if (decay->backlog[i] > 0) {
137 				break;
138 			}
139 		}
140 		if (i == SMOOTHSTEP_NSTEPS) {
141 			/* No dirty pages recorded.  Sleep indefinitely. */
142 			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
143 			goto label_done;
144 		}
145 	}
146 	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
147 		/* Use max interval. */
148 		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
149 		goto label_done;
150 	}
151 
152 	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
153 	size_t ub = SMOOTHSTEP_NSTEPS;
154 	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
155 	lb = (lb < 2) ? 2 : lb;
156 	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
157 	    (lb + 2 > ub)) {
158 		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
159 		goto label_done;
160 	}
161 
162 	assert(lb + 2 <= ub);
163 	size_t npurge_lb, npurge_ub;
164 	npurge_lb = decay_npurge_after_interval(decay, lb);
165 	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
166 		interval = decay_interval_ns * lb;
167 		goto label_done;
168 	}
169 	npurge_ub = decay_npurge_after_interval(decay, ub);
170 	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
171 		interval = decay_interval_ns * ub;
172 		goto label_done;
173 	}
174 
175 	unsigned n_search = 0;
176 	size_t target, npurge;
177 	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
178 	    && (lb + 2 < ub)) {
179 		target = (lb + ub) / 2;
180 		npurge = decay_npurge_after_interval(decay, target);
181 		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
182 			ub = target;
183 			npurge_ub = npurge;
184 		} else {
185 			lb = target;
186 			npurge_lb = npurge;
187 		}
188 		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
189 	}
190 	interval = decay_interval_ns * (ub + lb) / 2;
191 label_done:
192 	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
193 	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
194 	malloc_mutex_unlock(tsdn, &decay->mtx);
195 
196 	return interval;
197 }
198 
199 /* Compute purge interval for background threads. */
200 static uint64_t
201 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
202 	uint64_t i1, i2;
203 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
204 	    &arena->extents_dirty);
205 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
206 		return i1;
207 	}
208 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
209 	    &arena->extents_muzzy);
210 
211 	return i1 < i2 ? i1 : i2;
212 }
213 
214 static void
215 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
216     uint64_t interval) {
217 	if (config_stats) {
218 		info->tot_n_runs++;
219 	}
220 	info->npages_to_purge_new = 0;
221 
222 	struct timeval tv;
223 	/* Specific clock required by timedwait. */
224 	gettimeofday(&tv, NULL);
225 	nstime_t before_sleep;
226 	nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
227 
228 	int ret;
229 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
230 		assert(background_thread_indefinite_sleep(info));
231 		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
232 		assert(ret == 0);
233 	} else {
234 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
235 		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
236 		/* We need malloc clock (can be different from tv). */
237 		nstime_t next_wakeup;
238 		nstime_init(&next_wakeup, 0);
239 		nstime_update(&next_wakeup);
240 		nstime_iadd(&next_wakeup, interval);
241 		assert(nstime_ns(&next_wakeup) <
242 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
243 		background_thread_wakeup_time_set(tsdn, info,
244 		    nstime_ns(&next_wakeup));
245 
246 		nstime_t ts_wakeup;
247 		nstime_copy(&ts_wakeup, &before_sleep);
248 		nstime_iadd(&ts_wakeup, interval);
249 		struct timespec ts;
250 		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
251 		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
252 
253 		assert(!background_thread_indefinite_sleep(info));
254 		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
255 		assert(ret == ETIMEDOUT || ret == 0);
256 		background_thread_wakeup_time_set(tsdn, info,
257 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
258 	}
259 	if (config_stats) {
260 		gettimeofday(&tv, NULL);
261 		nstime_t after_sleep;
262 		nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
263 		if (nstime_compare(&after_sleep, &before_sleep) > 0) {
264 			nstime_subtract(&after_sleep, &before_sleep);
265 			nstime_add(&info->tot_sleep_time, &after_sleep);
266 		}
267 	}
268 }
269 
270 static bool
271 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
272 	if (unlikely(info->state == background_thread_paused)) {
273 		malloc_mutex_unlock(tsdn, &info->mtx);
274 		/* Wait on global lock to update status. */
275 		malloc_mutex_lock(tsdn, &background_thread_lock);
276 		malloc_mutex_unlock(tsdn, &background_thread_lock);
277 		malloc_mutex_lock(tsdn, &info->mtx);
278 		return true;
279 	}
280 
281 	return false;
282 }
283 
284 static inline void
285 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
286 	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
287 	unsigned narenas = narenas_total_get();
288 
289 	for (unsigned i = ind; i < narenas; i += ncpus) {
290 		arena_t *arena = arena_get(tsdn, i, false);
291 		if (!arena) {
292 			continue;
293 		}
294 		arena_decay(tsdn, arena, true, false);
295 		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
296 			/* Min interval will be used. */
297 			continue;
298 		}
299 		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
300 		    arena);
301 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
302 		if (min_interval > interval) {
303 			min_interval = interval;
304 		}
305 	}
306 	background_thread_sleep(tsdn, info, min_interval);
307 }
308 
309 static bool
310 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
311 	if (info == &background_thread_info[0]) {
312 		malloc_mutex_assert_owner(tsd_tsdn(tsd),
313 		    &background_thread_lock);
314 	} else {
315 		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
316 		    &background_thread_lock);
317 	}
318 
319 	pre_reentrancy(tsd, NULL);
320 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
321 	bool has_thread;
322 	assert(info->state != background_thread_paused);
323 	if (info->state == background_thread_started) {
324 		has_thread = true;
325 		info->state = background_thread_stopped;
326 		pthread_cond_signal(&info->cond);
327 	} else {
328 		has_thread = false;
329 	}
330 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
331 
332 	if (!has_thread) {
333 		post_reentrancy(tsd);
334 		return false;
335 	}
336 	void *ret;
337 	if (pthread_join(info->thread, &ret)) {
338 		post_reentrancy(tsd);
339 		return true;
340 	}
341 	assert(ret == NULL);
342 	n_background_threads--;
343 	post_reentrancy(tsd);
344 
345 	return false;
346 }
347 
348 static void *background_thread_entry(void *ind_arg);
349 
350 static int
351 background_thread_create_signals_masked(pthread_t *thread,
352     const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
353 	/*
354 	 * Mask signals during thread creation so that the thread inherits
355 	 * an empty signal set.
356 	 */
357 	sigset_t set;
358 	sigfillset(&set);
359 	sigset_t oldset;
360 	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
361 	if (mask_err != 0) {
362 		return mask_err;
363 	}
364 	int create_err = pthread_create_wrapper(thread, attr, start_routine,
365 	    arg);
366 	/*
367 	 * Restore the signal mask.  Failure to restore the signal mask here
368 	 * changes program behavior.
369 	 */
370 	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
371 	if (restore_err != 0) {
372 		malloc_printf("<jemalloc>: background thread creation "
373 		    "failed (%d), and signal mask restoration failed "
374 		    "(%d)\n", create_err, restore_err);
375 		if (opt_abort) {
376 			abort();
377 		}
378 	}
379 	return create_err;
380 }
381 
382 static void
383 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
384     bool *created_threads) {
385 	if (likely(*n_created == n_background_threads)) {
386 		return;
387 	}
388 
389 	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
390 label_restart:
391 	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
392 	for (unsigned i = 1; i < ncpus; i++) {
393 		if (created_threads[i]) {
394 			continue;
395 		}
396 		background_thread_info_t *info = &background_thread_info[i];
397 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
398 		assert(info->state != background_thread_paused);
399 		bool create = (info->state == background_thread_started);
400 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
401 		if (!create) {
402 			continue;
403 		}
404 
405 		/*
406 		 * To avoid deadlock with prefork handlers (which waits for the
407 		 * mutex held here), unlock before calling pthread_create().
408 		 */
409 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
410 
411 		pre_reentrancy(tsd, NULL);
412 		int err = background_thread_create_signals_masked(&info->thread,
413 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
414 		post_reentrancy(tsd);
415 
416 		if (err == 0) {
417 			(*n_created)++;
418 			created_threads[i] = true;
419 		} else {
420 			malloc_printf("<jemalloc>: background thread "
421 			    "creation failed (%d)\n", err);
422 			if (opt_abort) {
423 				abort();
424 			}
425 		}
426 		/* Restart since we unlocked. */
427 		goto label_restart;
428 	}
429 	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
430 	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
431 }
432 
433 static void
434 background_thread0_work(tsd_t *tsd) {
435 	/* Thread0 is also responsible for launching / terminating threads. */
436 	VARIABLE_ARRAY(bool, created_threads, ncpus);
437 	unsigned i;
438 	for (i = 1; i < ncpus; i++) {
439 		created_threads[i] = false;
440 	}
441 	/* Start working, and create more threads when asked. */
442 	unsigned n_created = 1;
443 	while (background_thread_info[0].state != background_thread_stopped) {
444 		if (background_thread_pause_check(tsd_tsdn(tsd),
445 		    &background_thread_info[0])) {
446 			continue;
447 		}
448 		check_background_thread_creation(tsd, &n_created,
449 		    (bool *)&created_threads);
450 		background_work_sleep_once(tsd_tsdn(tsd),
451 		    &background_thread_info[0], 0);
452 	}
453 
454 	/*
455 	 * Shut down other threads at exit.  Note that the ctl thread is holding
456 	 * the global background_thread mutex (and is waiting) for us.
457 	 */
458 	assert(!background_thread_enabled());
459 	for (i = 1; i < ncpus; i++) {
460 		background_thread_info_t *info = &background_thread_info[i];
461 		assert(info->state != background_thread_paused);
462 		if (created_threads[i]) {
463 			background_threads_disable_single(tsd, info);
464 		} else {
465 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
466 			/* Clear in case the thread wasn't created. */
467 			info->state = background_thread_stopped;
468 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
469 		}
470 	}
471 	background_thread_info[0].state = background_thread_stopped;
472 	assert(n_background_threads == 1);
473 }
474 
475 static void
476 background_work(tsd_t *tsd, unsigned ind) {
477 	background_thread_info_t *info = &background_thread_info[ind];
478 
479 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
480 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
481 	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
482 	if (ind == 0) {
483 		background_thread0_work(tsd);
484 	} else {
485 		while (info->state != background_thread_stopped) {
486 			if (background_thread_pause_check(tsd_tsdn(tsd),
487 			    info)) {
488 				continue;
489 			}
490 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
491 		}
492 	}
493 	assert(info->state == background_thread_stopped);
494 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
495 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
496 }
497 
498 static void *
499 background_thread_entry(void *ind_arg) {
500 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
501 	assert(thread_ind < ncpus);
502 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
503 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
504 #endif
505 	if (opt_percpu_arena != percpu_arena_disabled) {
506 		set_current_thread_affinity((int)thread_ind);
507 	}
508 	/*
509 	 * Start periodic background work.  We use internal tsd which avoids
510 	 * side effects, for example triggering new arena creation (which in
511 	 * turn triggers another background thread creation).
512 	 */
513 	background_work(tsd_internal_fetch(), thread_ind);
514 	assert(pthread_equal(pthread_self(),
515 	    background_thread_info[thread_ind].thread));
516 
517 	return NULL;
518 }
519 
520 static void
521 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
522 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
523 	info->state = background_thread_started;
524 	background_thread_info_init(tsd_tsdn(tsd), info);
525 	n_background_threads++;
526 }
527 
528 /* Create a new background thread if needed. */
529 bool
530 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
531 	assert(have_background_thread);
532 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
533 
534 	/* We create at most NCPUs threads. */
535 	size_t thread_ind = arena_ind % ncpus;
536 	background_thread_info_t *info = &background_thread_info[thread_ind];
537 
538 	bool need_new_thread;
539 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
540 	need_new_thread = background_thread_enabled() &&
541 	    (info->state == background_thread_stopped);
542 	if (need_new_thread) {
543 		background_thread_init(tsd, info);
544 	}
545 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
546 	if (!need_new_thread) {
547 		return false;
548 	}
549 	if (arena_ind != 0) {
550 		/* Threads are created asynchronously by Thread 0. */
551 		background_thread_info_t *t0 = &background_thread_info[0];
552 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
553 		assert(t0->state == background_thread_started);
554 		pthread_cond_signal(&t0->cond);
555 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
556 
557 		return false;
558 	}
559 
560 	pre_reentrancy(tsd, NULL);
561 	/*
562 	 * To avoid complications (besides reentrancy), create internal
563 	 * background threads with the underlying pthread_create.
564 	 */
565 	int err = background_thread_create_signals_masked(&info->thread, NULL,
566 	    background_thread_entry, (void *)thread_ind);
567 	post_reentrancy(tsd);
568 
569 	if (err != 0) {
570 		malloc_printf("<jemalloc>: arena 0 background thread creation "
571 		    "failed (%d)\n", err);
572 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
573 		info->state = background_thread_stopped;
574 		n_background_threads--;
575 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
576 
577 		return true;
578 	}
579 
580 	return false;
581 }
582 
583 bool
584 background_threads_enable(tsd_t *tsd) {
585 	assert(n_background_threads == 0);
586 	assert(background_thread_enabled());
587 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
588 
589 	VARIABLE_ARRAY(bool, marked, ncpus);
590 	unsigned i, nmarked;
591 	for (i = 0; i < ncpus; i++) {
592 		marked[i] = false;
593 	}
594 	nmarked = 0;
595 	/* Mark the threads we need to create for thread 0. */
596 	unsigned n = narenas_total_get();
597 	for (i = 1; i < n; i++) {
598 		if (marked[i % ncpus] ||
599 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
600 			continue;
601 		}
602 		background_thread_info_t *info = &background_thread_info[i];
603 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
604 		assert(info->state == background_thread_stopped);
605 		background_thread_init(tsd, info);
606 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
607 		marked[i % ncpus] = true;
608 		if (++nmarked == ncpus) {
609 			break;
610 		}
611 	}
612 
613 	return background_thread_create(tsd, 0);
614 }
615 
616 bool
617 background_threads_disable(tsd_t *tsd) {
618 	assert(!background_thread_enabled());
619 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
620 
621 	/* Thread 0 will be responsible for terminating other threads. */
622 	if (background_threads_disable_single(tsd,
623 	    &background_thread_info[0])) {
624 		return true;
625 	}
626 	assert(n_background_threads == 0);
627 
628 	return false;
629 }
630 
631 /* Check if we need to signal the background thread early. */
632 void
633 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
634     arena_decay_t *decay, size_t npages_new) {
635 	background_thread_info_t *info = arena_background_thread_info_get(
636 	    arena);
637 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
638 		/*
639 		 * Background thread may hold the mutex for a long period of
640 		 * time.  We'd like to avoid the variance on application
641 		 * threads.  So keep this non-blocking, and leave the work to a
642 		 * future epoch.
643 		 */
644 		return;
645 	}
646 
647 	if (info->state != background_thread_started) {
648 		goto label_done;
649 	}
650 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
651 		goto label_done;
652 	}
653 
654 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
655 	if (decay_time <= 0) {
656 		/* Purging is eagerly done or disabled currently. */
657 		goto label_done_unlock2;
658 	}
659 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
660 	assert(decay_interval_ns > 0);
661 
662 	nstime_t diff;
663 	nstime_init(&diff, background_thread_wakeup_time_get(info));
664 	if (nstime_compare(&diff, &decay->epoch) <= 0) {
665 		goto label_done_unlock2;
666 	}
667 	nstime_subtract(&diff, &decay->epoch);
668 	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
669 		goto label_done_unlock2;
670 	}
671 
672 	if (npages_new > 0) {
673 		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
674 		/*
675 		 * Compute how many new pages we would need to purge by the next
676 		 * wakeup, which is used to determine if we should signal the
677 		 * background thread.
678 		 */
679 		uint64_t npurge_new;
680 		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
681 			npurge_new = npages_new;
682 		} else {
683 			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
684 			assert(h_steps_max >=
685 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
686 			npurge_new = npages_new * (h_steps_max -
687 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
688 			npurge_new >>= SMOOTHSTEP_BFP;
689 		}
690 		info->npages_to_purge_new += npurge_new;
691 	}
692 
693 	bool should_signal;
694 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
695 		should_signal = true;
696 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
697 	    (extents_npages_get(&arena->extents_dirty) > 0 ||
698 	    extents_npages_get(&arena->extents_muzzy) > 0 ||
699 	    info->npages_to_purge_new > 0)) {
700 		should_signal = true;
701 	} else {
702 		should_signal = false;
703 	}
704 
705 	if (should_signal) {
706 		info->npages_to_purge_new = 0;
707 		pthread_cond_signal(&info->cond);
708 	}
709 label_done_unlock2:
710 	malloc_mutex_unlock(tsdn, &decay->mtx);
711 label_done:
712 	malloc_mutex_unlock(tsdn, &info->mtx);
713 }
714 
715 void
716 background_thread_prefork0(tsdn_t *tsdn) {
717 	malloc_mutex_prefork(tsdn, &background_thread_lock);
718 	background_thread_enabled_at_fork = background_thread_enabled();
719 }
720 
721 void
722 background_thread_prefork1(tsdn_t *tsdn) {
723 	for (unsigned i = 0; i < ncpus; i++) {
724 		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
725 	}
726 }
727 
728 void
729 background_thread_postfork_parent(tsdn_t *tsdn) {
730 	for (unsigned i = 0; i < ncpus; i++) {
731 		malloc_mutex_postfork_parent(tsdn,
732 		    &background_thread_info[i].mtx);
733 	}
734 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
735 }
736 
737 void
738 background_thread_postfork_child(tsdn_t *tsdn) {
739 	for (unsigned i = 0; i < ncpus; i++) {
740 		malloc_mutex_postfork_child(tsdn,
741 		    &background_thread_info[i].mtx);
742 	}
743 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
744 	if (!background_thread_enabled_at_fork) {
745 		return;
746 	}
747 
748 	/* Clear background_thread state (reset to disabled for child). */
749 	malloc_mutex_lock(tsdn, &background_thread_lock);
750 	n_background_threads = 0;
751 	background_thread_enabled_set(tsdn, false);
752 	for (unsigned i = 0; i < ncpus; i++) {
753 		background_thread_info_t *info = &background_thread_info[i];
754 		malloc_mutex_lock(tsdn, &info->mtx);
755 		info->state = background_thread_stopped;
756 		int ret = pthread_cond_init(&info->cond, NULL);
757 		assert(ret == 0);
758 		background_thread_info_init(tsdn, info);
759 		malloc_mutex_unlock(tsdn, &info->mtx);
760 	}
761 	malloc_mutex_unlock(tsdn, &background_thread_lock);
762 }
763 
764 bool
765 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
766 	assert(config_stats);
767 	malloc_mutex_lock(tsdn, &background_thread_lock);
768 	if (!background_thread_enabled()) {
769 		malloc_mutex_unlock(tsdn, &background_thread_lock);
770 		return true;
771 	}
772 
773 	stats->num_threads = n_background_threads;
774 	uint64_t num_runs = 0;
775 	nstime_init(&stats->run_interval, 0);
776 	for (unsigned i = 0; i < ncpus; i++) {
777 		background_thread_info_t *info = &background_thread_info[i];
778 		malloc_mutex_lock(tsdn, &info->mtx);
779 		if (info->state != background_thread_stopped) {
780 			num_runs += info->tot_n_runs;
781 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
782 		}
783 		malloc_mutex_unlock(tsdn, &info->mtx);
784 	}
785 	stats->num_runs = num_runs;
786 	if (num_runs > 0) {
787 		nstime_idivide(&stats->run_interval, num_runs);
788 	}
789 	malloc_mutex_unlock(tsdn, &background_thread_lock);
790 
791 	return false;
792 }
793 
794 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
795 #undef BILLION
796 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
797 
798 /*
799  * When lazy lock is enabled, we need to make sure setting isthreaded before
800  * taking any background_thread locks.  This is called early in ctl (instead of
801  * wait for the pthread_create calls to trigger) because the mutex is required
802  * before creating background threads.
803  */
804 void
805 background_thread_ctl_init(tsdn_t *tsdn) {
806 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
807 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
808 	pthread_once(&once_control, pthread_create_wrapper_once);
809 #endif
810 }
811 
812 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
813 
814 bool
815 background_thread_boot0(void) {
816 	if (!have_background_thread && opt_background_thread) {
817 		malloc_printf("<jemalloc>: option background_thread currently "
818 		    "supports pthread only\n");
819 		return true;
820 	}
821 
822 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
823 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
824 	if (pthread_create_fptr == NULL) {
825 		can_enable_background_thread = false;
826 		if (config_lazy_lock || opt_background_thread) {
827 			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
828 			    "\"pthread_create\")\n");
829 			abort();
830 		}
831 	} else {
832 		can_enable_background_thread = true;
833 	}
834 #endif
835 	return false;
836 }
837 
838 bool
839 background_thread_boot1(tsdn_t *tsdn) {
840 #ifdef JEMALLOC_BACKGROUND_THREAD
841 	assert(have_background_thread);
842 	assert(narenas_total_get() > 0);
843 
844 	background_thread_enabled_set(tsdn, opt_background_thread);
845 	if (malloc_mutex_init(&background_thread_lock,
846 	    "background_thread_global",
847 	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
848 	    malloc_mutex_rank_exclusive)) {
849 		return true;
850 	}
851 	if (opt_background_thread) {
852 		background_thread_ctl_init(tsdn);
853 	}
854 
855 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
856 	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
857 	if (background_thread_info == NULL) {
858 		return true;
859 	}
860 
861 	for (unsigned i = 0; i < ncpus; i++) {
862 		background_thread_info_t *info = &background_thread_info[i];
863 		/* Thread mutex is rank_inclusive because of thread0. */
864 		if (malloc_mutex_init(&info->mtx, "background_thread",
865 		    WITNESS_RANK_BACKGROUND_THREAD,
866 		    malloc_mutex_address_ordered)) {
867 			return true;
868 		}
869 		if (pthread_cond_init(&info->cond, NULL)) {
870 			return true;
871 		}
872 		malloc_mutex_lock(tsdn, &info->mtx);
873 		info->state = background_thread_stopped;
874 		background_thread_info_init(tsdn, info);
875 		malloc_mutex_unlock(tsdn, &info->mtx);
876 	}
877 #endif
878 
879 	return false;
880 }
881