xref: /freebsd/contrib/jemalloc/src/tcache.c (revision b0b1dbdd)
1 #define	JEMALLOC_TCACHE_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 
4 /******************************************************************************/
5 /* Data. */
6 
7 bool	opt_tcache = true;
8 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
9 
10 tcache_bin_info_t	*tcache_bin_info;
11 static unsigned		stack_nelms; /* Total stack elms per tcache. */
12 
13 unsigned		nhbins;
14 size_t			tcache_maxclass;
15 
16 tcaches_t		*tcaches;
17 
18 /* Index of first element within tcaches that has never been used. */
19 static unsigned		tcaches_past;
20 
21 /* Head of singly linked list tracking available tcaches elements. */
22 static tcaches_t	*tcaches_avail;
23 
24 /* Protects tcaches{,_past,_avail}. */
25 static malloc_mutex_t	tcaches_mtx;
26 
27 /******************************************************************************/
28 
29 size_t
30 tcache_salloc(tsdn_t *tsdn, const void *ptr)
31 {
32 
33 	return (arena_salloc(tsdn, ptr, false));
34 }
35 
36 void
37 tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
38 {
39 	szind_t binind = tcache->next_gc_bin;
40 	tcache_bin_t *tbin = &tcache->tbins[binind];
41 	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
42 
43 	if (tbin->low_water > 0) {
44 		/*
45 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
46 		 */
47 		if (binind < NBINS) {
48 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
49 			    tbin->ncached - tbin->low_water + (tbin->low_water
50 			    >> 2));
51 		} else {
52 			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
53 			    - tbin->low_water + (tbin->low_water >> 2), tcache);
54 		}
55 		/*
56 		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
57 		 * fill count is always at least 1.
58 		 */
59 		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
60 			tbin->lg_fill_div++;
61 	} else if (tbin->low_water < 0) {
62 		/*
63 		 * Increase fill count by 2X.  Make sure lg_fill_div stays
64 		 * greater than 0.
65 		 */
66 		if (tbin->lg_fill_div > 1)
67 			tbin->lg_fill_div--;
68 	}
69 	tbin->low_water = tbin->ncached;
70 
71 	tcache->next_gc_bin++;
72 	if (tcache->next_gc_bin == nhbins)
73 		tcache->next_gc_bin = 0;
74 }
75 
76 void *
77 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
78     tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
79 {
80 	void *ret;
81 
82 	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
83 	    tcache->prof_accumbytes : 0);
84 	if (config_prof)
85 		tcache->prof_accumbytes = 0;
86 	ret = tcache_alloc_easy(tbin, tcache_success);
87 
88 	return (ret);
89 }
90 
91 void
92 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
93     szind_t binind, unsigned rem)
94 {
95 	arena_t *arena;
96 	void *ptr;
97 	unsigned i, nflush, ndeferred;
98 	bool merged_stats = false;
99 
100 	assert(binind < NBINS);
101 	assert(rem <= tbin->ncached);
102 
103 	arena = arena_choose(tsd, NULL);
104 	assert(arena != NULL);
105 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
106 		/* Lock the arena bin associated with the first object. */
107 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
108 		    *(tbin->avail - 1));
109 		arena_t *bin_arena = extent_node_arena_get(&chunk->node);
110 		arena_bin_t *bin = &bin_arena->bins[binind];
111 
112 		if (config_prof && bin_arena == arena) {
113 			if (arena_prof_accum(tsd_tsdn(tsd), arena,
114 			    tcache->prof_accumbytes))
115 				prof_idump(tsd_tsdn(tsd));
116 			tcache->prof_accumbytes = 0;
117 		}
118 
119 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
120 		if (config_stats && bin_arena == arena) {
121 			assert(!merged_stats);
122 			merged_stats = true;
123 			bin->stats.nflushes++;
124 			bin->stats.nrequests += tbin->tstats.nrequests;
125 			tbin->tstats.nrequests = 0;
126 		}
127 		ndeferred = 0;
128 		for (i = 0; i < nflush; i++) {
129 			ptr = *(tbin->avail - 1 - i);
130 			assert(ptr != NULL);
131 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
132 			if (extent_node_arena_get(&chunk->node) == bin_arena) {
133 				size_t pageind = ((uintptr_t)ptr -
134 				    (uintptr_t)chunk) >> LG_PAGE;
135 				arena_chunk_map_bits_t *bitselm =
136 				    arena_bitselm_get_mutable(chunk, pageind);
137 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
138 				    bin_arena, chunk, ptr, bitselm);
139 			} else {
140 				/*
141 				 * This object was allocated via a different
142 				 * arena bin than the one that is currently
143 				 * locked.  Stash the object, so that it can be
144 				 * handled in a future pass.
145 				 */
146 				*(tbin->avail - 1 - ndeferred) = ptr;
147 				ndeferred++;
148 			}
149 		}
150 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
151 		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
152 	}
153 	if (config_stats && !merged_stats) {
154 		/*
155 		 * The flush loop didn't happen to flush to this thread's
156 		 * arena, so the stats didn't get merged.  Manually do so now.
157 		 */
158 		arena_bin_t *bin = &arena->bins[binind];
159 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
160 		bin->stats.nflushes++;
161 		bin->stats.nrequests += tbin->tstats.nrequests;
162 		tbin->tstats.nrequests = 0;
163 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
164 	}
165 
166 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
167 	    sizeof(void *));
168 	tbin->ncached = rem;
169 	if ((int)tbin->ncached < tbin->low_water)
170 		tbin->low_water = tbin->ncached;
171 }
172 
173 void
174 tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
175     unsigned rem, tcache_t *tcache)
176 {
177 	arena_t *arena;
178 	void *ptr;
179 	unsigned i, nflush, ndeferred;
180 	bool merged_stats = false;
181 
182 	assert(binind < nhbins);
183 	assert(rem <= tbin->ncached);
184 
185 	arena = arena_choose(tsd, NULL);
186 	assert(arena != NULL);
187 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
188 		/* Lock the arena associated with the first object. */
189 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
190 		    *(tbin->avail - 1));
191 		arena_t *locked_arena = extent_node_arena_get(&chunk->node);
192 		UNUSED bool idump;
193 
194 		if (config_prof)
195 			idump = false;
196 		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
197 		if ((config_prof || config_stats) && locked_arena == arena) {
198 			if (config_prof) {
199 				idump = arena_prof_accum_locked(arena,
200 				    tcache->prof_accumbytes);
201 				tcache->prof_accumbytes = 0;
202 			}
203 			if (config_stats) {
204 				merged_stats = true;
205 				arena->stats.nrequests_large +=
206 				    tbin->tstats.nrequests;
207 				arena->stats.lstats[binind - NBINS].nrequests +=
208 				    tbin->tstats.nrequests;
209 				tbin->tstats.nrequests = 0;
210 			}
211 		}
212 		ndeferred = 0;
213 		for (i = 0; i < nflush; i++) {
214 			ptr = *(tbin->avail - 1 - i);
215 			assert(ptr != NULL);
216 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
217 			if (extent_node_arena_get(&chunk->node) ==
218 			    locked_arena) {
219 				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
220 				    locked_arena, chunk, ptr);
221 			} else {
222 				/*
223 				 * This object was allocated via a different
224 				 * arena than the one that is currently locked.
225 				 * Stash the object, so that it can be handled
226 				 * in a future pass.
227 				 */
228 				*(tbin->avail - 1 - ndeferred) = ptr;
229 				ndeferred++;
230 			}
231 		}
232 		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
233 		if (config_prof && idump)
234 			prof_idump(tsd_tsdn(tsd));
235 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
236 		    ndeferred);
237 	}
238 	if (config_stats && !merged_stats) {
239 		/*
240 		 * The flush loop didn't happen to flush to this thread's
241 		 * arena, so the stats didn't get merged.  Manually do so now.
242 		 */
243 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
244 		arena->stats.nrequests_large += tbin->tstats.nrequests;
245 		arena->stats.lstats[binind - NBINS].nrequests +=
246 		    tbin->tstats.nrequests;
247 		tbin->tstats.nrequests = 0;
248 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
249 	}
250 
251 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
252 	    sizeof(void *));
253 	tbin->ncached = rem;
254 	if ((int)tbin->ncached < tbin->low_water)
255 		tbin->low_water = tbin->ncached;
256 }
257 
258 static void
259 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
260 {
261 
262 	if (config_stats) {
263 		/* Link into list of extant tcaches. */
264 		malloc_mutex_lock(tsdn, &arena->lock);
265 		ql_elm_new(tcache, link);
266 		ql_tail_insert(&arena->tcache_ql, tcache, link);
267 		malloc_mutex_unlock(tsdn, &arena->lock);
268 	}
269 }
270 
271 static void
272 tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
273 {
274 
275 	if (config_stats) {
276 		/* Unlink from list of extant tcaches. */
277 		malloc_mutex_lock(tsdn, &arena->lock);
278 		if (config_debug) {
279 			bool in_ql = false;
280 			tcache_t *iter;
281 			ql_foreach(iter, &arena->tcache_ql, link) {
282 				if (iter == tcache) {
283 					in_ql = true;
284 					break;
285 				}
286 			}
287 			assert(in_ql);
288 		}
289 		ql_remove(&arena->tcache_ql, tcache, link);
290 		tcache_stats_merge(tsdn, tcache, arena);
291 		malloc_mutex_unlock(tsdn, &arena->lock);
292 	}
293 }
294 
295 void
296 tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
297     arena_t *newarena)
298 {
299 
300 	tcache_arena_dissociate(tsdn, tcache, oldarena);
301 	tcache_arena_associate(tsdn, tcache, newarena);
302 }
303 
304 tcache_t *
305 tcache_get_hard(tsd_t *tsd)
306 {
307 	arena_t *arena;
308 
309 	if (!tcache_enabled_get()) {
310 		if (tsd_nominal(tsd))
311 			tcache_enabled_set(false); /* Memoize. */
312 		return (NULL);
313 	}
314 	arena = arena_choose(tsd, NULL);
315 	if (unlikely(arena == NULL))
316 		return (NULL);
317 	return (tcache_create(tsd_tsdn(tsd), arena));
318 }
319 
320 tcache_t *
321 tcache_create(tsdn_t *tsdn, arena_t *arena)
322 {
323 	tcache_t *tcache;
324 	size_t size, stack_offset;
325 	unsigned i;
326 
327 	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
328 	/* Naturally align the pointer stacks. */
329 	size = PTR_CEILING(size);
330 	stack_offset = size;
331 	size += stack_nelms * sizeof(void *);
332 	/* Avoid false cacheline sharing. */
333 	size = sa2u(size, CACHELINE);
334 
335 	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
336 	    arena_get(TSDN_NULL, 0, true));
337 	if (tcache == NULL)
338 		return (NULL);
339 
340 	tcache_arena_associate(tsdn, tcache, arena);
341 
342 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
343 
344 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
345 	for (i = 0; i < nhbins; i++) {
346 		tcache->tbins[i].lg_fill_div = 1;
347 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
348 		/*
349 		 * avail points past the available space.  Allocations will
350 		 * access the slots toward higher addresses (for the benefit of
351 		 * prefetch).
352 		 */
353 		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
354 		    (uintptr_t)stack_offset);
355 	}
356 
357 	return (tcache);
358 }
359 
360 static void
361 tcache_destroy(tsd_t *tsd, tcache_t *tcache)
362 {
363 	arena_t *arena;
364 	unsigned i;
365 
366 	arena = arena_choose(tsd, NULL);
367 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
368 
369 	for (i = 0; i < NBINS; i++) {
370 		tcache_bin_t *tbin = &tcache->tbins[i];
371 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
372 
373 		if (config_stats && tbin->tstats.nrequests != 0) {
374 			arena_bin_t *bin = &arena->bins[i];
375 			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
376 			bin->stats.nrequests += tbin->tstats.nrequests;
377 			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
378 		}
379 	}
380 
381 	for (; i < nhbins; i++) {
382 		tcache_bin_t *tbin = &tcache->tbins[i];
383 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
384 
385 		if (config_stats && tbin->tstats.nrequests != 0) {
386 			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
387 			arena->stats.nrequests_large += tbin->tstats.nrequests;
388 			arena->stats.lstats[i - NBINS].nrequests +=
389 			    tbin->tstats.nrequests;
390 			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
391 		}
392 	}
393 
394 	if (config_prof && tcache->prof_accumbytes > 0 &&
395 	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
396 		prof_idump(tsd_tsdn(tsd));
397 
398 	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
399 }
400 
401 void
402 tcache_cleanup(tsd_t *tsd)
403 {
404 	tcache_t *tcache;
405 
406 	if (!config_tcache)
407 		return;
408 
409 	if ((tcache = tsd_tcache_get(tsd)) != NULL) {
410 		tcache_destroy(tsd, tcache);
411 		tsd_tcache_set(tsd, NULL);
412 	}
413 }
414 
415 void
416 tcache_enabled_cleanup(tsd_t *tsd)
417 {
418 
419 	/* Do nothing. */
420 }
421 
422 void
423 tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
424 {
425 	unsigned i;
426 
427 	cassert(config_stats);
428 
429 	malloc_mutex_assert_owner(tsdn, &arena->lock);
430 
431 	/* Merge and reset tcache stats. */
432 	for (i = 0; i < NBINS; i++) {
433 		arena_bin_t *bin = &arena->bins[i];
434 		tcache_bin_t *tbin = &tcache->tbins[i];
435 		malloc_mutex_lock(tsdn, &bin->lock);
436 		bin->stats.nrequests += tbin->tstats.nrequests;
437 		malloc_mutex_unlock(tsdn, &bin->lock);
438 		tbin->tstats.nrequests = 0;
439 	}
440 
441 	for (; i < nhbins; i++) {
442 		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
443 		tcache_bin_t *tbin = &tcache->tbins[i];
444 		arena->stats.nrequests_large += tbin->tstats.nrequests;
445 		lstats->nrequests += tbin->tstats.nrequests;
446 		tbin->tstats.nrequests = 0;
447 	}
448 }
449 
450 static bool
451 tcaches_create_prep(tsd_t *tsd) {
452 	bool err;
453 
454 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
455 
456 	if (tcaches == NULL) {
457 		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
458 		    (MALLOCX_TCACHE_MAX+1));
459 		if (tcaches == NULL) {
460 			err = true;
461 			goto label_return;
462 		}
463 	}
464 
465 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
466 		err = true;
467 		goto label_return;
468 	}
469 
470 	err = false;
471 label_return:
472 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
473 	return err;
474 }
475 
476 bool
477 tcaches_create(tsd_t *tsd, unsigned *r_ind) {
478 	bool err;
479 	arena_t *arena;
480 	tcache_t *tcache;
481 	tcaches_t *elm;
482 
483 	if (tcaches_create_prep(tsd)) {
484 		err = true;
485 		goto label_return;
486 	}
487 
488 	arena = arena_ichoose(tsd, NULL);
489 	if (unlikely(arena == NULL)) {
490 		err = true;
491 		goto label_return;
492 	}
493 	tcache = tcache_create(tsd_tsdn(tsd), arena);
494 	if (tcache == NULL) {
495 		err = true;
496 		goto label_return;
497 	}
498 
499 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
500 	if (tcaches_avail != NULL) {
501 		elm = tcaches_avail;
502 		tcaches_avail = tcaches_avail->next;
503 		elm->tcache = tcache;
504 		*r_ind = (unsigned)(elm - tcaches);
505 	} else {
506 		elm = &tcaches[tcaches_past];
507 		elm->tcache = tcache;
508 		*r_ind = tcaches_past;
509 		tcaches_past++;
510 	}
511 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
512 
513 	err = false;
514 label_return:
515 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &tcaches_mtx);
516 	return err;
517 }
518 
519 static void
520 tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) {
521 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
522 
523 	if (elm->tcache == NULL) {
524 		return;
525 	}
526 	tcache_destroy(tsd, elm->tcache);
527 	elm->tcache = NULL;
528 }
529 
530 void
531 tcaches_flush(tsd_t *tsd, unsigned ind) {
532 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
533 	tcaches_elm_flush(tsd, &tcaches[ind]);
534 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
535 }
536 
537 void
538 tcaches_destroy(tsd_t *tsd, unsigned ind) {
539 	tcaches_t *elm;
540 
541 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
542 	elm = &tcaches[ind];
543 	tcaches_elm_flush(tsd, elm);
544 	elm->next = tcaches_avail;
545 	tcaches_avail = elm;
546 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
547 }
548 
549 bool
550 tcache_boot(tsdn_t *tsdn) {
551 	unsigned i;
552 
553 	cassert(config_tcache);
554 
555 	/*
556 	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
557 	 * known.
558 	 */
559 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS)
560 		tcache_maxclass = SMALL_MAXCLASS;
561 	else if ((ZU(1) << opt_lg_tcache_max) > large_maxclass)
562 		tcache_maxclass = large_maxclass;
563 	else
564 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
565 
566 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES)) {
567 		return true;
568 	}
569 
570 	nhbins = size2index(tcache_maxclass) + 1;
571 
572 	/* Initialize tcache_bin_info. */
573 	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins *
574 	    sizeof(tcache_bin_info_t));
575 	if (tcache_bin_info == NULL)
576 		return (true);
577 	stack_nelms = 0;
578 	for (i = 0; i < NBINS; i++) {
579 		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
580 			tcache_bin_info[i].ncached_max =
581 			    TCACHE_NSLOTS_SMALL_MIN;
582 		} else if ((arena_bin_info[i].nregs << 1) <=
583 		    TCACHE_NSLOTS_SMALL_MAX) {
584 			tcache_bin_info[i].ncached_max =
585 			    (arena_bin_info[i].nregs << 1);
586 		} else {
587 			tcache_bin_info[i].ncached_max =
588 			    TCACHE_NSLOTS_SMALL_MAX;
589 		}
590 		stack_nelms += tcache_bin_info[i].ncached_max;
591 	}
592 	for (; i < nhbins; i++) {
593 		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
594 		stack_nelms += tcache_bin_info[i].ncached_max;
595 	}
596 
597 	return (false);
598 }
599 
600 void
601 tcache_prefork(tsdn_t *tsdn) {
602 	if (!config_prof && opt_tcache) {
603 		malloc_mutex_prefork(tsdn, &tcaches_mtx);
604 	}
605 }
606 
607 void
608 tcache_postfork_parent(tsdn_t *tsdn) {
609 	if (!config_prof && opt_tcache) {
610 		malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
611 	}
612 }
613 
614 void
615 tcache_postfork_child(tsdn_t *tsdn) {
616 	if (!config_prof && opt_tcache) {
617 		malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
618 	}
619 }
620