1 //===---------- emutls.c - Implements __emutls_get_address ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include <stdint.h>
10 #include <stdlib.h>
11 #include <string.h>
12
13 #include "int_lib.h"
14
15 #ifdef __BIONIC__
16 // There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
17 // to round 2. We need to delay deallocation because:
18 // - Android versions older than M lack __cxa_thread_atexit_impl, so apps
19 // use a pthread key destructor to call C++ destructors.
20 // - Apps might use __thread/thread_local variables in pthread destructors.
21 // We can't wait until the final two rounds, because jemalloc needs two rounds
22 // after the final malloc/free call to free its thread-specific data (see
23 // https://reviews.llvm.org/D46978#1107507).
24 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
25 #else
26 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
27 #endif
28
29 #if defined(_MSC_VER) && !defined(__clang__)
30 // MSVC raises a warning about a nonstandard extension being used for the 0
31 // sized element in this array. Disable this for warn-as-error builds.
32 #pragma warning(push)
33 #pragma warning(disable : 4206)
34 #endif
35
36 typedef struct emutls_address_array {
37 uintptr_t skip_destructor_rounds;
38 uintptr_t size; // number of elements in the 'data' array
39 void *data[];
40 } emutls_address_array;
41
42 #if defined(_MSC_VER) && !defined(__clang__)
43 #pragma warning(pop)
44 #endif
45
46 static void emutls_shutdown(emutls_address_array *array);
47
48 #ifndef _WIN32
49
50 #include <pthread.h>
51
52 static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
53 static pthread_key_t emutls_pthread_key;
54 static bool emutls_key_created = false;
55
56 typedef unsigned int gcc_word __attribute__((mode(word)));
57 typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
58
59 // Default is not to use posix_memalign, so systems like Android
60 // can use thread local data without heavier POSIX memory allocators.
61 #ifndef EMUTLS_USE_POSIX_MEMALIGN
62 #define EMUTLS_USE_POSIX_MEMALIGN 0
63 #endif
64
emutls_memalign_alloc(size_t align,size_t size)65 static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
66 void *base;
67 #if EMUTLS_USE_POSIX_MEMALIGN
68 if (posix_memalign(&base, align, size) != 0)
69 abort();
70 #else
71 #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *))
72 char *object;
73 if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
74 abort();
75 base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) &
76 ~(uintptr_t)(align - 1));
77
78 ((void **)base)[-1] = object;
79 #endif
80 return base;
81 }
82
emutls_memalign_free(void * base)83 static __inline void emutls_memalign_free(void *base) {
84 #if EMUTLS_USE_POSIX_MEMALIGN
85 free(base);
86 #else
87 // The mallocated address is in ((void**)base)[-1]
88 free(((void **)base)[-1]);
89 #endif
90 }
91
emutls_setspecific(emutls_address_array * value)92 static __inline void emutls_setspecific(emutls_address_array *value) {
93 pthread_setspecific(emutls_pthread_key, (void *)value);
94 }
95
emutls_getspecific()96 static __inline emutls_address_array *emutls_getspecific() {
97 return (emutls_address_array *)pthread_getspecific(emutls_pthread_key);
98 }
99
emutls_key_destructor(void * ptr)100 static void emutls_key_destructor(void *ptr) {
101 emutls_address_array *array = (emutls_address_array *)ptr;
102 if (array->skip_destructor_rounds > 0) {
103 // emutls is deallocated using a pthread key destructor. These
104 // destructors are called in several rounds to accommodate destructor
105 // functions that (re)initialize key values with pthread_setspecific.
106 // Delay the emutls deallocation to accommodate other end-of-thread
107 // cleanup tasks like calling thread_local destructors (e.g. the
108 // __cxa_thread_atexit fallback in libc++abi).
109 array->skip_destructor_rounds--;
110 emutls_setspecific(array);
111 } else {
112 emutls_shutdown(array);
113 free(ptr);
114 }
115 }
116
emutls_init(void)117 static __inline void emutls_init(void) {
118 if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
119 abort();
120 emutls_key_created = true;
121 }
122
emutls_init_once(void)123 static __inline void emutls_init_once(void) {
124 static pthread_once_t once = PTHREAD_ONCE_INIT;
125 pthread_once(&once, emutls_init);
126 }
127
emutls_lock()128 static __inline void emutls_lock() { pthread_mutex_lock(&emutls_mutex); }
129
emutls_unlock()130 static __inline void emutls_unlock() { pthread_mutex_unlock(&emutls_mutex); }
131
132 #else // _WIN32
133
134 #include <assert.h>
135 #include <malloc.h>
136 #include <stdio.h>
137 #include <windows.h>
138
139 static LPCRITICAL_SECTION emutls_mutex;
140 static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
141
142 typedef uintptr_t gcc_word;
143 typedef void *gcc_pointer;
144
win_error(DWORD last_err,const char * hint)145 static void win_error(DWORD last_err, const char *hint) {
146 char *buffer = NULL;
147 if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
148 FORMAT_MESSAGE_FROM_SYSTEM |
149 FORMAT_MESSAGE_MAX_WIDTH_MASK,
150 NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
151 fprintf(stderr, "Windows error: %s\n", buffer);
152 } else {
153 fprintf(stderr, "Unkown Windows error: %s\n", hint);
154 }
155 LocalFree(buffer);
156 }
157
win_abort(DWORD last_err,const char * hint)158 static __inline void win_abort(DWORD last_err, const char *hint) {
159 win_error(last_err, hint);
160 abort();
161 }
162
emutls_memalign_alloc(size_t align,size_t size)163 static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
164 void *base = _aligned_malloc(size, align);
165 if (!base)
166 win_abort(GetLastError(), "_aligned_malloc");
167 return base;
168 }
169
emutls_memalign_free(void * base)170 static __inline void emutls_memalign_free(void *base) { _aligned_free(base); }
171
emutls_exit(void)172 static void emutls_exit(void) {
173 if (emutls_mutex) {
174 DeleteCriticalSection(emutls_mutex);
175 _aligned_free(emutls_mutex);
176 emutls_mutex = NULL;
177 }
178 if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
179 emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index));
180 TlsFree(emutls_tls_index);
181 emutls_tls_index = TLS_OUT_OF_INDEXES;
182 }
183 }
184
185 #pragma warning(push)
186 #pragma warning(disable : 4100)
emutls_init(PINIT_ONCE p0,PVOID p1,PVOID * p2)187 static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
188 emutls_mutex =
189 (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
190 if (!emutls_mutex) {
191 win_error(GetLastError(), "_aligned_malloc");
192 return FALSE;
193 }
194 InitializeCriticalSection(emutls_mutex);
195
196 emutls_tls_index = TlsAlloc();
197 if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
198 emutls_exit();
199 win_error(GetLastError(), "TlsAlloc");
200 return FALSE;
201 }
202 atexit(&emutls_exit);
203 return TRUE;
204 }
205
emutls_init_once(void)206 static __inline void emutls_init_once(void) {
207 static INIT_ONCE once;
208 InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
209 }
210
emutls_lock()211 static __inline void emutls_lock() { EnterCriticalSection(emutls_mutex); }
212
emutls_unlock()213 static __inline void emutls_unlock() { LeaveCriticalSection(emutls_mutex); }
214
emutls_setspecific(emutls_address_array * value)215 static __inline void emutls_setspecific(emutls_address_array *value) {
216 if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0)
217 win_abort(GetLastError(), "TlsSetValue");
218 }
219
emutls_getspecific()220 static __inline emutls_address_array *emutls_getspecific() {
221 LPVOID value = TlsGetValue(emutls_tls_index);
222 if (value == NULL) {
223 const DWORD err = GetLastError();
224 if (err != ERROR_SUCCESS)
225 win_abort(err, "TlsGetValue");
226 }
227 return (emutls_address_array *)value;
228 }
229
230 // Provide atomic load/store functions for emutls_get_index if built with MSVC.
231 #if !defined(__ATOMIC_RELEASE)
232 #include <intrin.h>
233
234 enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
235
__atomic_load_n(void * ptr,unsigned type)236 static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
237 assert(type == __ATOMIC_ACQUIRE);
238 // These return the previous value - but since we do an OR with 0,
239 // it's equivalent to a plain load.
240 #ifdef _WIN64
241 return InterlockedOr64(ptr, 0);
242 #else
243 return InterlockedOr(ptr, 0);
244 #endif
245 }
246
__atomic_store_n(void * ptr,uintptr_t val,unsigned type)247 static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
248 assert(type == __ATOMIC_RELEASE);
249 InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
250 }
251
252 #endif // __ATOMIC_RELEASE
253
254 #pragma warning(pop)
255
256 #endif // _WIN32
257
258 static size_t emutls_num_object = 0; // number of allocated TLS objects
259
260 // Free the allocated TLS data
emutls_shutdown(emutls_address_array * array)261 static void emutls_shutdown(emutls_address_array *array) {
262 if (array) {
263 uintptr_t i;
264 for (i = 0; i < array->size; ++i) {
265 if (array->data[i])
266 emutls_memalign_free(array->data[i]);
267 }
268 }
269 }
270
271 // For every TLS variable xyz,
272 // there is one __emutls_control variable named __emutls_v.xyz.
273 // If xyz has non-zero initial value, __emutls_v.xyz's "value"
274 // will point to __emutls_t.xyz, which has the initial value.
275 typedef struct __emutls_control {
276 // Must use gcc_word here, instead of size_t, to match GCC. When
277 // gcc_word is larger than size_t, the upper extra bits are all
278 // zeros. We can use variables of size_t to operate on size and
279 // align.
280 gcc_word size; // size of the object in bytes
281 gcc_word align; // alignment of the object in bytes
282 union {
283 uintptr_t index; // data[index-1] is the object address
284 void *address; // object address, when in single thread env
285 } object;
286 void *value; // null or non-zero initial value for the object
287 } __emutls_control;
288
289 // Emulated TLS objects are always allocated at run-time.
emutls_allocate_object(__emutls_control * control)290 static __inline void *emutls_allocate_object(__emutls_control *control) {
291 // Use standard C types, check with gcc's emutls.o.
292 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
293 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *));
294
295 size_t size = control->size;
296 size_t align = control->align;
297 void *base;
298 if (align < sizeof(void *))
299 align = sizeof(void *);
300 // Make sure that align is power of 2.
301 if ((align & (align - 1)) != 0)
302 abort();
303
304 base = emutls_memalign_alloc(align, size);
305 if (control->value)
306 memcpy(base, control->value, size);
307 else
308 memset(base, 0, size);
309 return base;
310 }
311
312 // Returns control->object.index; set index if not allocated yet.
emutls_get_index(__emutls_control * control)313 static __inline uintptr_t emutls_get_index(__emutls_control *control) {
314 uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
315 if (!index) {
316 emutls_init_once();
317 emutls_lock();
318 index = control->object.index;
319 if (!index) {
320 index = ++emutls_num_object;
321 __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
322 }
323 emutls_unlock();
324 }
325 return index;
326 }
327
328 // Updates newly allocated thread local emutls_address_array.
emutls_check_array_set_size(emutls_address_array * array,uintptr_t size)329 static __inline void emutls_check_array_set_size(emutls_address_array *array,
330 uintptr_t size) {
331 if (array == NULL)
332 abort();
333 array->size = size;
334 emutls_setspecific(array);
335 }
336
337 // Returns the new 'data' array size, number of elements,
338 // which must be no smaller than the given index.
emutls_new_data_array_size(uintptr_t index)339 static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
340 // Need to allocate emutls_address_array with extra slots
341 // to store the header.
342 // Round up the emutls_address_array size to multiple of 16.
343 uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
344 return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
345 }
346
347 // Returns the size in bytes required for an emutls_address_array with
348 // N number of elements for data field.
emutls_asize(uintptr_t N)349 static __inline uintptr_t emutls_asize(uintptr_t N) {
350 return N * sizeof(void *) + sizeof(emutls_address_array);
351 }
352
353 // Returns the thread local emutls_address_array.
354 // Extends its size if necessary to hold address at index.
355 static __inline emutls_address_array *
emutls_get_address_array(uintptr_t index)356 emutls_get_address_array(uintptr_t index) {
357 emutls_address_array *array = emutls_getspecific();
358 if (array == NULL) {
359 uintptr_t new_size = emutls_new_data_array_size(index);
360 array = (emutls_address_array *)malloc(emutls_asize(new_size));
361 if (array) {
362 memset(array->data, 0, new_size * sizeof(void *));
363 array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
364 }
365 emutls_check_array_set_size(array, new_size);
366 } else if (index > array->size) {
367 uintptr_t orig_size = array->size;
368 uintptr_t new_size = emutls_new_data_array_size(index);
369 array = (emutls_address_array *)realloc(array, emutls_asize(new_size));
370 if (array)
371 memset(array->data + orig_size, 0,
372 (new_size - orig_size) * sizeof(void *));
373 emutls_check_array_set_size(array, new_size);
374 }
375 return array;
376 }
377
__emutls_get_address(__emutls_control * control)378 void *__emutls_get_address(__emutls_control *control) {
379 uintptr_t index = emutls_get_index(control);
380 emutls_address_array *array = emutls_get_address_array(index--);
381 if (array->data[index] == NULL)
382 array->data[index] = emutls_allocate_object(control);
383 return array->data[index];
384 }
385
386 #ifdef __BIONIC__
387 // Called by Bionic on dlclose to delete the emutls pthread key.
__emutls_unregister_key(void)388 __attribute__((visibility("hidden"))) void __emutls_unregister_key(void) {
389 if (emutls_key_created) {
390 pthread_key_delete(emutls_pthread_key);
391 emutls_key_created = false;
392 }
393 }
394 #endif
395