1 #define JEMALLOC_BASE_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/extent_mmap.h"
7 #include "jemalloc/internal/mutex.h"
8 #include "jemalloc/internal/sz.h"
9 
10 /******************************************************************************/
11 /* Data. */
12 
13 static base_t *b0;
14 
15 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
16 
17 const char *metadata_thp_mode_names[] = {
18 	"disabled",
19 	"auto",
20 	"always"
21 };
22 
23 /******************************************************************************/
24 
25 static inline bool
metadata_thp_madvise(void)26 metadata_thp_madvise(void) {
27 	return (metadata_thp_enabled() &&
28 	    (init_system_thp_mode == thp_mode_default));
29 }
30 
31 static void *
base_map(tsdn_t * tsdn,extent_hooks_t * extent_hooks,unsigned ind,size_t size)32 base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
33 	void *addr;
34 	bool zero = true;
35 	bool commit = true;
36 
37 	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
38 	assert(size == HUGEPAGE_CEILING(size));
39 	size_t alignment = HUGEPAGE;
40 	if (extent_hooks == &extent_hooks_default) {
41 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
42 	} else {
43 		/* No arena context as we are creating new arenas. */
44 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
45 		pre_reentrancy(tsd, NULL);
46 		addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
47 		    &zero, &commit, ind);
48 		post_reentrancy(tsd);
49 	}
50 
51 	return addr;
52 }
53 
54 static void
base_unmap(tsdn_t * tsdn,extent_hooks_t * extent_hooks,unsigned ind,void * addr,size_t size)55 base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
56     size_t size) {
57 	/*
58 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
59 	 * stopping at first success.  This cascade is performed for consistency
60 	 * with the cascade in extent_dalloc_wrapper() because an application's
61 	 * custom hooks may not support e.g. dalloc.  This function is only ever
62 	 * called as a side effect of arena destruction, so although it might
63 	 * seem pointless to do anything besides dalloc here, the application
64 	 * may in fact want the end state of all associated virtual memory to be
65 	 * in some consistent-but-allocated state.
66 	 */
67 	if (extent_hooks == &extent_hooks_default) {
68 		if (!extent_dalloc_mmap(addr, size)) {
69 			goto label_done;
70 		}
71 		if (!pages_decommit(addr, size)) {
72 			goto label_done;
73 		}
74 		if (!pages_purge_forced(addr, size)) {
75 			goto label_done;
76 		}
77 		if (!pages_purge_lazy(addr, size)) {
78 			goto label_done;
79 		}
80 		/* Nothing worked.  This should never happen. */
81 		not_reached();
82 	} else {
83 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
84 		pre_reentrancy(tsd, NULL);
85 		if (extent_hooks->dalloc != NULL &&
86 		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
87 		    ind)) {
88 			goto label_post_reentrancy;
89 		}
90 		if (extent_hooks->decommit != NULL &&
91 		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
92 		    ind)) {
93 			goto label_post_reentrancy;
94 		}
95 		if (extent_hooks->purge_forced != NULL &&
96 		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
97 		    size, ind)) {
98 			goto label_post_reentrancy;
99 		}
100 		if (extent_hooks->purge_lazy != NULL &&
101 		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
102 		    ind)) {
103 			goto label_post_reentrancy;
104 		}
105 		/* Nothing worked.  That's the application's problem. */
106 	label_post_reentrancy:
107 		post_reentrancy(tsd);
108 	}
109 label_done:
110 	if (metadata_thp_madvise()) {
111 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
112 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
113 		    (size & HUGEPAGE_MASK) == 0);
114 		pages_nohuge(addr, size);
115 	}
116 }
117 
118 static void
base_extent_init(size_t * extent_sn_next,extent_t * extent,void * addr,size_t size)119 base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
120     size_t size) {
121 	size_t sn;
122 
123 	sn = *extent_sn_next;
124 	(*extent_sn_next)++;
125 
126 	extent_binit(extent, addr, size, sn);
127 }
128 
129 static size_t
base_get_num_blocks(base_t * base,bool with_new_block)130 base_get_num_blocks(base_t *base, bool with_new_block) {
131 	base_block_t *b = base->blocks;
132 	assert(b != NULL);
133 
134 	size_t n_blocks = with_new_block ? 2 : 1;
135 	while (b->next != NULL) {
136 		n_blocks++;
137 		b = b->next;
138 	}
139 
140 	return n_blocks;
141 }
142 
143 static void
base_auto_thp_switch(tsdn_t * tsdn,base_t * base)144 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
145 	assert(opt_metadata_thp == metadata_thp_auto);
146 	malloc_mutex_assert_owner(tsdn, &base->mtx);
147 	if (base->auto_thp_switched) {
148 		return;
149 	}
150 	/* Called when adding a new block. */
151 	bool should_switch;
152 	if (base_ind_get(base) != 0) {
153 		should_switch = (base_get_num_blocks(base, true) ==
154 		    BASE_AUTO_THP_THRESHOLD);
155 	} else {
156 		should_switch = (base_get_num_blocks(base, true) ==
157 		    BASE_AUTO_THP_THRESHOLD_A0);
158 	}
159 	if (!should_switch) {
160 		return;
161 	}
162 
163 	base->auto_thp_switched = true;
164 	assert(!config_stats || base->n_thp == 0);
165 	/* Make the initial blocks THP lazily. */
166 	base_block_t *block = base->blocks;
167 	while (block != NULL) {
168 		assert((block->size & HUGEPAGE_MASK) == 0);
169 		pages_huge(block, block->size);
170 		if (config_stats) {
171 			base->n_thp += HUGEPAGE_CEILING(block->size -
172 			    extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
173 		}
174 		block = block->next;
175 		assert(block == NULL || (base_ind_get(base) == 0));
176 	}
177 }
178 
179 static void *
base_extent_bump_alloc_helper(extent_t * extent,size_t * gap_size,size_t size,size_t alignment)180 base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
181     size_t alignment) {
182 	void *ret;
183 
184 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
185 	assert(size == ALIGNMENT_CEILING(size, alignment));
186 
187 	*gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
188 	    alignment) - (uintptr_t)extent_addr_get(extent);
189 	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
190 	assert(extent_bsize_get(extent) >= *gap_size + size);
191 	extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
192 	    *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
193 	    extent_sn_get(extent));
194 	return ret;
195 }
196 
197 static void
base_extent_bump_alloc_post(base_t * base,extent_t * extent,size_t gap_size,void * addr,size_t size)198 base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
199     void *addr, size_t size) {
200 	if (extent_bsize_get(extent) > 0) {
201 		/*
202 		 * Compute the index for the largest size class that does not
203 		 * exceed extent's size.
204 		 */
205 		szind_t index_floor =
206 		    sz_size2index(extent_bsize_get(extent) + 1) - 1;
207 		extent_heap_insert(&base->avail[index_floor], extent);
208 	}
209 
210 	if (config_stats) {
211 		base->allocated += size;
212 		/*
213 		 * Add one PAGE to base_resident for every page boundary that is
214 		 * crossed by the new allocation. Adjust n_thp similarly when
215 		 * metadata_thp is enabled.
216 		 */
217 		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
218 		    PAGE_CEILING((uintptr_t)addr - gap_size);
219 		assert(base->allocated <= base->resident);
220 		assert(base->resident <= base->mapped);
221 		if (metadata_thp_madvise() && (opt_metadata_thp ==
222 		    metadata_thp_always || base->auto_thp_switched)) {
223 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
224 			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
225 			    LG_HUGEPAGE;
226 			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
227 		}
228 	}
229 }
230 
231 static void *
base_extent_bump_alloc(base_t * base,extent_t * extent,size_t size,size_t alignment)232 base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
233     size_t alignment) {
234 	void *ret;
235 	size_t gap_size;
236 
237 	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
238 	base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
239 	return ret;
240 }
241 
242 /*
243  * Allocate a block of virtual memory that is large enough to start with a
244  * base_block_t header, followed by an object of specified size and alignment.
245  * On success a pointer to the initialized base_block_t header is returned.
246  */
247 static base_block_t *
base_block_alloc(tsdn_t * tsdn,base_t * base,extent_hooks_t * extent_hooks,unsigned ind,pszind_t * pind_last,size_t * extent_sn_next,size_t size,size_t alignment)248 base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
249     unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
250     size_t alignment) {
251 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
252 	size_t usize = ALIGNMENT_CEILING(size, alignment);
253 	size_t header_size = sizeof(base_block_t);
254 	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
255 	    header_size;
256 	/*
257 	 * Create increasingly larger blocks in order to limit the total number
258 	 * of disjoint virtual memory ranges.  Choose the next size in the page
259 	 * size class series (skipping size classes that are not a multiple of
260 	 * HUGEPAGE), or a size large enough to satisfy the requested size and
261 	 * alignment, whichever is larger.
262 	 */
263 	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
264 	    + usize));
265 	pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
266 	    *pind_last;
267 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
268 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
269 	    : next_block_size;
270 	base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
271 	    block_size);
272 	if (block == NULL) {
273 		return NULL;
274 	}
275 
276 	if (metadata_thp_madvise()) {
277 		void *addr = (void *)block;
278 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
279 		    (block_size & HUGEPAGE_MASK) == 0);
280 		if (opt_metadata_thp == metadata_thp_always) {
281 			pages_huge(addr, block_size);
282 		} else if (opt_metadata_thp == metadata_thp_auto &&
283 		    base != NULL) {
284 			/* base != NULL indicates this is not a new base. */
285 			malloc_mutex_lock(tsdn, &base->mtx);
286 			base_auto_thp_switch(tsdn, base);
287 			if (base->auto_thp_switched) {
288 				pages_huge(addr, block_size);
289 			}
290 			malloc_mutex_unlock(tsdn, &base->mtx);
291 		}
292 	}
293 
294 	*pind_last = sz_psz2ind(block_size);
295 	block->size = block_size;
296 	block->next = NULL;
297 	assert(block_size >= header_size);
298 	base_extent_init(extent_sn_next, &block->extent,
299 	    (void *)((uintptr_t)block + header_size), block_size - header_size);
300 	return block;
301 }
302 
303 /*
304  * Allocate an extent that is at least as large as specified size, with
305  * specified alignment.
306  */
307 static extent_t *
base_extent_alloc(tsdn_t * tsdn,base_t * base,size_t size,size_t alignment)308 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
309 	malloc_mutex_assert_owner(tsdn, &base->mtx);
310 
311 	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
312 	/*
313 	 * Drop mutex during base_block_alloc(), because an extent hook will be
314 	 * called.
315 	 */
316 	malloc_mutex_unlock(tsdn, &base->mtx);
317 	base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
318 	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
319 	    alignment);
320 	malloc_mutex_lock(tsdn, &base->mtx);
321 	if (block == NULL) {
322 		return NULL;
323 	}
324 	block->next = base->blocks;
325 	base->blocks = block;
326 	if (config_stats) {
327 		base->allocated += sizeof(base_block_t);
328 		base->resident += PAGE_CEILING(sizeof(base_block_t));
329 		base->mapped += block->size;
330 		if (metadata_thp_madvise() &&
331 		    !(opt_metadata_thp == metadata_thp_auto
332 		      && !base->auto_thp_switched)) {
333 			assert(base->n_thp > 0);
334 			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
335 			    LG_HUGEPAGE;
336 		}
337 		assert(base->allocated <= base->resident);
338 		assert(base->resident <= base->mapped);
339 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
340 	}
341 	return &block->extent;
342 }
343 
344 base_t *
b0get(void)345 b0get(void) {
346 	return b0;
347 }
348 
349 base_t *
base_new(tsdn_t * tsdn,unsigned ind,extent_hooks_t * extent_hooks)350 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
351 	pszind_t pind_last = 0;
352 	size_t extent_sn_next = 0;
353 	base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
354 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
355 	if (block == NULL) {
356 		return NULL;
357 	}
358 
359 	size_t gap_size;
360 	size_t base_alignment = CACHELINE;
361 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
362 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
363 	    &gap_size, base_size, base_alignment);
364 	base->ind = ind;
365 	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
366 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
367 	    malloc_mutex_rank_exclusive)) {
368 		base_unmap(tsdn, extent_hooks, ind, block, block->size);
369 		return NULL;
370 	}
371 	base->pind_last = pind_last;
372 	base->extent_sn_next = extent_sn_next;
373 	base->blocks = block;
374 	base->auto_thp_switched = false;
375 	for (szind_t i = 0; i < NSIZES; i++) {
376 		extent_heap_new(&base->avail[i]);
377 	}
378 	if (config_stats) {
379 		base->allocated = sizeof(base_block_t);
380 		base->resident = PAGE_CEILING(sizeof(base_block_t));
381 		base->mapped = block->size;
382 		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
383 		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
384 		    >> LG_HUGEPAGE : 0;
385 		assert(base->allocated <= base->resident);
386 		assert(base->resident <= base->mapped);
387 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
388 	}
389 	base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
390 	    base_size);
391 
392 	return base;
393 }
394 
395 void
base_delete(tsdn_t * tsdn,base_t * base)396 base_delete(tsdn_t *tsdn, base_t *base) {
397 	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
398 	base_block_t *next = base->blocks;
399 	do {
400 		base_block_t *block = next;
401 		next = block->next;
402 		base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
403 		    block->size);
404 	} while (next != NULL);
405 }
406 
407 extent_hooks_t *
base_extent_hooks_get(base_t * base)408 base_extent_hooks_get(base_t *base) {
409 	return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
410 	    ATOMIC_ACQUIRE);
411 }
412 
413 extent_hooks_t *
base_extent_hooks_set(base_t * base,extent_hooks_t * extent_hooks)414 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
415 	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
416 	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
417 	return old_extent_hooks;
418 }
419 
420 static void *
base_alloc_impl(tsdn_t * tsdn,base_t * base,size_t size,size_t alignment,size_t * esn)421 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
422     size_t *esn) {
423 	alignment = QUANTUM_CEILING(alignment);
424 	size_t usize = ALIGNMENT_CEILING(size, alignment);
425 	size_t asize = usize + alignment - QUANTUM;
426 
427 	extent_t *extent = NULL;
428 	malloc_mutex_lock(tsdn, &base->mtx);
429 	for (szind_t i = sz_size2index(asize); i < NSIZES; i++) {
430 		extent = extent_heap_remove_first(&base->avail[i]);
431 		if (extent != NULL) {
432 			/* Use existing space. */
433 			break;
434 		}
435 	}
436 	if (extent == NULL) {
437 		/* Try to allocate more space. */
438 		extent = base_extent_alloc(tsdn, base, usize, alignment);
439 	}
440 	void *ret;
441 	if (extent == NULL) {
442 		ret = NULL;
443 		goto label_return;
444 	}
445 
446 	ret = base_extent_bump_alloc(base, extent, usize, alignment);
447 	if (esn != NULL) {
448 		*esn = extent_sn_get(extent);
449 	}
450 label_return:
451 	malloc_mutex_unlock(tsdn, &base->mtx);
452 	return ret;
453 }
454 
455 /*
456  * base_alloc() returns zeroed memory, which is always demand-zeroed for the
457  * auto arenas, in order to make multi-page sparse data structures such as radix
458  * tree nodes efficient with respect to physical memory usage.  Upon success a
459  * pointer to at least size bytes with specified alignment is returned.  Note
460  * that size is rounded up to the nearest multiple of alignment to avoid false
461  * sharing.
462  */
463 void *
base_alloc(tsdn_t * tsdn,base_t * base,size_t size,size_t alignment)464 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
465 	return base_alloc_impl(tsdn, base, size, alignment, NULL);
466 }
467 
468 extent_t *
base_alloc_extent(tsdn_t * tsdn,base_t * base)469 base_alloc_extent(tsdn_t *tsdn, base_t *base) {
470 	size_t esn;
471 	extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
472 	    CACHELINE, &esn);
473 	if (extent == NULL) {
474 		return NULL;
475 	}
476 	extent_esn_set(extent, esn);
477 	return extent;
478 }
479 
480 void
base_stats_get(tsdn_t * tsdn,base_t * base,size_t * allocated,size_t * resident,size_t * mapped,size_t * n_thp)481 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
482     size_t *mapped, size_t *n_thp) {
483 	cassert(config_stats);
484 
485 	malloc_mutex_lock(tsdn, &base->mtx);
486 	assert(base->allocated <= base->resident);
487 	assert(base->resident <= base->mapped);
488 	*allocated = base->allocated;
489 	*resident = base->resident;
490 	*mapped = base->mapped;
491 	*n_thp = base->n_thp;
492 	malloc_mutex_unlock(tsdn, &base->mtx);
493 }
494 
495 void
base_prefork(tsdn_t * tsdn,base_t * base)496 base_prefork(tsdn_t *tsdn, base_t *base) {
497 	malloc_mutex_prefork(tsdn, &base->mtx);
498 }
499 
500 void
base_postfork_parent(tsdn_t * tsdn,base_t * base)501 base_postfork_parent(tsdn_t *tsdn, base_t *base) {
502 	malloc_mutex_postfork_parent(tsdn, &base->mtx);
503 }
504 
505 void
base_postfork_child(tsdn_t * tsdn,base_t * base)506 base_postfork_child(tsdn_t *tsdn, base_t *base) {
507 	malloc_mutex_postfork_child(tsdn, &base->mtx);
508 }
509 
510 bool
base_boot(tsdn_t * tsdn)511 base_boot(tsdn_t *tsdn) {
512 	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
513 	return (b0 == NULL);
514 }
515