1 #ifndef JEMALLOC_INTERNAL_TSD_H
2 #define JEMALLOC_INTERNAL_TSD_H
3 
4 #include "jemalloc/internal/arena_types.h"
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/jemalloc_internal_externs.h"
7 #include "jemalloc/internal/prof_types.h"
8 #include "jemalloc/internal/ql.h"
9 #include "jemalloc/internal/rtree_tsd.h"
10 #include "jemalloc/internal/tcache_types.h"
11 #include "jemalloc/internal/tcache_structs.h"
12 #include "jemalloc/internal/util.h"
13 #include "jemalloc/internal/witness.h"
14 
15 /*
16  * Thread-Specific-Data layout
17  * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
18  * s: state
19  * e: tcache_enabled
20  * m: thread_allocated (config_stats)
21  * f: thread_deallocated (config_stats)
22  * p: prof_tdata (config_prof)
23  * c: rtree_ctx (rtree cache accessed on deallocation)
24  * t: tcache
25  * --- data not accessed on tcache fast path: arena-related fields ---
26  * d: arenas_tdata_bypass
27  * r: reentrancy_level
28  * x: narenas_tdata
29  * i: iarena
30  * a: arena
31  * o: arenas_tdata
32  * Loading TSD data is on the critical path of basically all malloc operations.
33  * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
34  * Use a compact layout to reduce cache footprint.
35  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
36  * |----------------------------  1st cacheline  ----------------------------|
37  * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
38  * |----------------------------  2nd cacheline  ----------------------------|
39  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
40  * |----------------------------  3nd cacheline  ----------------------------|
41  * | [c * 32  ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
42  * +-------------------------------------------------------------------------+
43  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
44  *
45  * The last 3 members (i, a and o) before tcache isn't really needed on tcache
46  * fast path.  However we have a number of unused tcache bins and witnesses
47  * (never touched unless config_debug) at the end of tcache, so we place them
48  * there to avoid breaking the cachelines and possibly paging in an extra page.
49  */
50 #ifdef JEMALLOC_JET
51 typedef void (*test_callback_t)(int *);
52 #  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
53 #  define MALLOC_TEST_TSD \
54     O(test_data,		int,			int)		\
55     O(test_callback,		test_callback_t,	int)
56 #  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
57 #else
58 #  define MALLOC_TEST_TSD
59 #  define MALLOC_TEST_TSD_INITIALIZER
60 #endif
61 
62 /*  O(name,			type,			nullable type */
63 #define MALLOC_TSD							\
64     O(tcache_enabled,		bool,			bool)		\
65     O(arenas_tdata_bypass,	bool,			bool)		\
66     O(reentrancy_level,		int8_t,			int8_t)		\
67     O(narenas_tdata,		uint32_t,		uint32_t)	\
68     O(thread_allocated,		uint64_t,		uint64_t)	\
69     O(thread_deallocated,	uint64_t,		uint64_t)	\
70     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
71     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
72     O(iarena,			arena_t *,		arena_t *)	\
73     O(arena,			arena_t *,		arena_t *)	\
74     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
75     O(tcache,			tcache_t,		tcache_t)	\
76     O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
77     MALLOC_TEST_TSD
78 
79 #define TSD_INITIALIZER {						\
80     tsd_state_uninitialized,						\
81     TCACHE_ENABLED_ZERO_INITIALIZER,					\
82     false,								\
83     0,									\
84     0,									\
85     0,									\
86     0,									\
87     NULL,								\
88     RTREE_CTX_ZERO_INITIALIZER,						\
89     NULL,								\
90     NULL,								\
91     NULL,								\
92     TCACHE_ZERO_INITIALIZER,						\
93     WITNESS_TSD_INITIALIZER						\
94     MALLOC_TEST_TSD_INITIALIZER						\
95 }
96 
97 enum {
98 	tsd_state_nominal = 0, /* Common case --> jnz. */
99 	tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
100 	/* the above 2 nominal states should be lower values. */
101 	tsd_state_nominal_max = 1, /* used for comparison only. */
102 	tsd_state_minimal_initialized = 2,
103 	tsd_state_purgatory = 3,
104 	tsd_state_reincarnated = 4,
105 	tsd_state_uninitialized = 5
106 };
107 
108 /* Manually limit tsd_state_t to a single byte. */
109 typedef uint8_t tsd_state_t;
110 
111 /* The actual tsd. */
112 struct tsd_s {
113 	/*
114 	 * The contents should be treated as totally opaque outside the tsd
115 	 * module.  Access any thread-local state through the getters and
116 	 * setters below.
117 	 */
118 	tsd_state_t	state;
119 #define O(n, t, nt)							\
120 	t use_a_getter_or_setter_instead_##n;
121 MALLOC_TSD
122 #undef O
123 };
124 
125 /*
126  * Wrapper around tsd_t that makes it possible to avoid implicit conversion
127  * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
128  * explicitly converted to tsd_t, which is non-nullable.
129  */
130 struct tsdn_s {
131 	tsd_t tsd;
132 };
133 #define TSDN_NULL ((tsdn_t *)0)
134 JEMALLOC_ALWAYS_INLINE tsdn_t *
135 tsd_tsdn(tsd_t *tsd) {
136 	return (tsdn_t *)tsd;
137 }
138 
139 JEMALLOC_ALWAYS_INLINE bool
140 tsdn_null(const tsdn_t *tsdn) {
141 	return tsdn == NULL;
142 }
143 
144 JEMALLOC_ALWAYS_INLINE tsd_t *
145 tsdn_tsd(tsdn_t *tsdn) {
146 	assert(!tsdn_null(tsdn));
147 
148 	return &tsdn->tsd;
149 }
150 
151 void *malloc_tsd_malloc(size_t size);
152 void malloc_tsd_dalloc(void *wrapper);
153 void malloc_tsd_cleanup_register(bool (*f)(void));
154 tsd_t *malloc_tsd_boot0(void);
155 void malloc_tsd_boot1(void);
156 void tsd_cleanup(void *arg);
157 tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
158 void tsd_slow_update(tsd_t *tsd);
159 
160 /*
161  * We put the platform-specific data declarations and inlines into their own
162  * header files to avoid cluttering this file.  They define tsd_boot0,
163  * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
164  */
165 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
166 #include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
167 #elif (defined(JEMALLOC_TLS))
168 #include "jemalloc/internal/tsd_tls.h"
169 #elif (defined(_WIN32))
170 #include "jemalloc/internal/tsd_win.h"
171 #else
172 #include "jemalloc/internal/tsd_generic.h"
173 #endif
174 
175 /*
176  * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of
177  * foo.  This omits some safety checks, and so can be used during tsd
178  * initialization and cleanup.
179  */
180 #define O(n, t, nt)							\
181 JEMALLOC_ALWAYS_INLINE t *						\
182 tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
183 	return &tsd->use_a_getter_or_setter_instead_##n;		\
184 }
185 MALLOC_TSD
186 #undef O
187 
188 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
189 #define O(n, t, nt)							\
190 JEMALLOC_ALWAYS_INLINE t *						\
191 tsd_##n##p_get(tsd_t *tsd) {						\
192 	assert(tsd->state == tsd_state_nominal ||			\
193 	    tsd->state == tsd_state_nominal_slow ||			\
194 	    tsd->state == tsd_state_reincarnated ||			\
195 	    tsd->state == tsd_state_minimal_initialized);		\
196 	return tsd_##n##p_get_unsafe(tsd);				\
197 }
198 MALLOC_TSD
199 #undef O
200 
201 /*
202  * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
203  * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
204  */
205 #define O(n, t, nt)							\
206 JEMALLOC_ALWAYS_INLINE nt *						\
207 tsdn_##n##p_get(tsdn_t *tsdn) {						\
208 	if (tsdn_null(tsdn)) {						\
209 		return NULL;						\
210 	}								\
211 	tsd_t *tsd = tsdn_tsd(tsdn);					\
212 	return (nt *)tsd_##n##p_get(tsd);				\
213 }
214 MALLOC_TSD
215 #undef O
216 
217 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
218 #define O(n, t, nt)							\
219 JEMALLOC_ALWAYS_INLINE t						\
220 tsd_##n##_get(tsd_t *tsd) {						\
221 	return *tsd_##n##p_get(tsd);					\
222 }
223 MALLOC_TSD
224 #undef O
225 
226 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
227 #define O(n, t, nt)							\
228 JEMALLOC_ALWAYS_INLINE void						\
229 tsd_##n##_set(tsd_t *tsd, t val) {					\
230 	assert(tsd->state != tsd_state_reincarnated &&			\
231 	    tsd->state != tsd_state_minimal_initialized);		\
232 	*tsd_##n##p_get(tsd) = val;					\
233 }
234 MALLOC_TSD
235 #undef O
236 
237 JEMALLOC_ALWAYS_INLINE void
238 tsd_assert_fast(tsd_t *tsd) {
239 	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
240 	    tsd_reentrancy_level_get(tsd) == 0);
241 }
242 
243 JEMALLOC_ALWAYS_INLINE bool
244 tsd_fast(tsd_t *tsd) {
245 	bool fast = (tsd->state == tsd_state_nominal);
246 	if (fast) {
247 		tsd_assert_fast(tsd);
248 	}
249 
250 	return fast;
251 }
252 
253 JEMALLOC_ALWAYS_INLINE tsd_t *
254 tsd_fetch_impl(bool init, bool minimal) {
255 	tsd_t *tsd = tsd_get(init);
256 
257 	if (!init && tsd_get_allocates() && tsd == NULL) {
258 		return NULL;
259 	}
260 	assert(tsd != NULL);
261 
262 	if (unlikely(tsd->state != tsd_state_nominal)) {
263 		return tsd_fetch_slow(tsd, minimal);
264 	}
265 	assert(tsd_fast(tsd));
266 	tsd_assert_fast(tsd);
267 
268 	return tsd;
269 }
270 
271 /* Get a minimal TSD that requires no cleanup.  See comments in free(). */
272 JEMALLOC_ALWAYS_INLINE tsd_t *
273 tsd_fetch_min(void) {
274 	return tsd_fetch_impl(true, true);
275 }
276 
277 /* For internal background threads use only. */
278 JEMALLOC_ALWAYS_INLINE tsd_t *
279 tsd_internal_fetch(void) {
280 	tsd_t *tsd = tsd_fetch_min();
281 	/* Use reincarnated state to prevent full initialization. */
282 	tsd->state = tsd_state_reincarnated;
283 
284 	return tsd;
285 }
286 
287 JEMALLOC_ALWAYS_INLINE tsd_t *
288 tsd_fetch(void) {
289 	return tsd_fetch_impl(true, false);
290 }
291 
292 static inline bool
293 tsd_nominal(tsd_t *tsd) {
294 	return (tsd->state <= tsd_state_nominal_max);
295 }
296 
297 JEMALLOC_ALWAYS_INLINE tsdn_t *
298 tsdn_fetch(void) {
299 	if (!tsd_booted_get()) {
300 		return NULL;
301 	}
302 
303 	return tsd_tsdn(tsd_fetch_impl(false, false));
304 }
305 
306 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
307 tsd_rtree_ctx(tsd_t *tsd) {
308 	return tsd_rtree_ctxp_get(tsd);
309 }
310 
311 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
312 tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
313 	/*
314 	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
315 	 * return a pointer to it.
316 	 */
317 	if (unlikely(tsdn_null(tsdn))) {
318 		rtree_ctx_data_init(fallback);
319 		return fallback;
320 	}
321 	return tsd_rtree_ctx(tsdn_tsd(tsdn));
322 }
323 
324 #endif /* JEMALLOC_INTERNAL_TSD_H */
325