1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 /*
8 * Native Client support for thread local storage
9 */
10
11 #include <stdbool.h>
12 #include <stdint.h>
13 #include <string.h>
14
15 #include "native_client/src/include/elf32.h"
16 #include "native_client/src/include/elf64.h"
17 #include "native_client/src/untrusted/nacl/nacl_thread.h"
18 #include "native_client/src/untrusted/nacl/tls.h"
19 #include "native_client/src/untrusted/nacl/tls_params.h"
20
21 /*
22 * We support two mechanisms for finding templates for TLS variables:
23 *
24 * 1) The PT_TLS header (in the ELF program headers), which is
25 * located via the __ehdr_start symbol, which current binutils
26 * linkers define when the ELF file headers and program headers
27 * are mapped into the address space.
28 *
29 * 2) The __tls_template_* symbols, which are defined by PNaCl's
30 * ExpandTls LLVM pass, which is used when linking ABI-stable
31 * pexes.
32 *
33 * We use weak references to refer to these symbols so that the code
34 * can work with both mechanisms.
35 *
36 * The __tls_template_* symbols used to be defined by the binutils
37 * linker (using linker scripts), but this has been superseded by
38 * having the linker define __ehdr_start.
39 */
40
41 extern char __tls_template_start __attribute__((weak));
42 extern char __tls_template_tdata_end __attribute__((weak));
43 extern char __tls_template_end __attribute__((weak));
44 extern uint32_t __tls_template_alignment __attribute__((weak));
45
46 extern union {
47 Elf32_Ehdr ehdr32;
48 Elf64_Ehdr ehdr64;
49 } __ehdr_start __attribute__((weak, visibility("hidden")));
50
aligned_size(size_t size,size_t alignment)51 static size_t aligned_size(size_t size, size_t alignment) {
52 return (size + alignment - 1) & -alignment;
53 }
54
aligned_addr(void * start,size_t alignment)55 static char *aligned_addr(void *start, size_t alignment) {
56 return (void *) aligned_size((size_t) start, alignment);
57 }
58
59 /*
60 * Collect information about the TLS initializer data here.
61 * The first call to get_tls_info() fills in all the data,
62 * based either on PT_TLS or on __tls_template_*.
63 */
64
65 struct tls_info {
66 const char *tdata_start; /* Address of .tdata (initializer data) */
67 size_t tdata_size; /* Size of .tdata (initializer data) */
68 size_t tbss_size; /* Size of .tbss (zero-fill space after .tdata) */
69 size_t tls_alignment; /* Alignment required for TLS segment */
70 };
71
72 static struct tls_info cached_tls_info;
73
74 #if defined(__arm__)
75
76 /*
77 * On ARM, the linker might not optimize GD-model TLS accesses into the
78 * simpler forms that don't use a helper function. __tls_get_addr is
79 * called with the address of a two-word GOT entry: the first word is the
80 * module ID, which is immaterial for static linking; the second word is
81 * the offset within the (only) module's TLS data. The TLS data always
82 * starts at 8 bytes past the thread pointer, plus alignment.
83 */
84
85 static size_t tp_tls_offset;
86
finish_info_cache(const struct tls_info * info)87 static void finish_info_cache(const struct tls_info *info) {
88 /*
89 * Cache this calculation at startup so it doesn't need to be repeated.
90 */
91 tp_tls_offset = aligned_size(8, info->tls_alignment);
92 }
93
__tls_get_addr(uintptr_t * entry)94 void *__tls_get_addr(uintptr_t *entry) {
95 /*
96 * This is never called before finish_info_cache has been called.
97 */
98 return (char *) __builtin_thread_pointer() + tp_tls_offset + entry[1];
99 }
100
101 #else /* !defined(__arm__) */
102
finish_info_cache(const struct tls_info * info)103 static void finish_info_cache(
104 const struct tls_info *info __attribute__((unused))) {
105 }
106
107 #endif /* defined(__arm__) */
108
109 #define DEFINE_READ_PHDR(func_name, ehdr, Elf_Phdr, elf_class) \
110 __attribute__((unused)) \
111 static int func_name(void) { \
112 if ((ehdr) == NULL || \
113 (ehdr)->e_ident[EI_CLASS] != (elf_class) || \
114 (ehdr)->e_phentsize != sizeof(Elf_Phdr)) \
115 return 0; \
116 const Elf_Phdr *phdr = \
117 (const Elf_Phdr *) ((const char *) (ehdr) + (ehdr)->e_phoff); \
118 for (int i = 0; i < (ehdr)->e_phnum; ++i) { \
119 if (phdr[i].p_type == PT_TLS) { \
120 cached_tls_info.tls_alignment = phdr[i].p_align; \
121 cached_tls_info.tdata_start = \
122 (const char *) (uintptr_t) phdr[i].p_vaddr; \
123 /* For a PIE, we should offset the the load address. */ \
124 if ((ehdr)->e_type == ET_DYN) \
125 cached_tls_info.tdata_start += (uintptr_t) (ehdr); \
126 cached_tls_info.tdata_size = phdr[i].p_filesz; \
127 cached_tls_info.tbss_size = phdr[i].p_memsz - phdr[i].p_filesz; \
128 return 1; \
129 } \
130 } \
131 return 0; \
132 }
133
134 DEFINE_READ_PHDR(read_phdr32, &__ehdr_start.ehdr32, Elf32_Phdr, ELFCLASS32)
135 DEFINE_READ_PHDR(read_phdr64, &__ehdr_start.ehdr64, Elf64_Phdr, ELFCLASS64)
136
get_tls_info(void)137 static const struct tls_info *get_tls_info(void) {
138 if (cached_tls_info.tls_alignment == 0) {
139 int did_read_phdr;
140 #if defined(__pnacl__) || defined(__x86_64__)
141 /*
142 * This is only needed in non-ABI-stable pexes for which the
143 * ExpandTls pass has not been run. On ABI-stable pexes,
144 * link-time optimization will optimize away these calls because
145 * &__ehdr_start == NULL.
146 */
147 did_read_phdr = read_phdr32() || read_phdr64();
148 #else
149 did_read_phdr = read_phdr32();
150 #endif
151
152 if (!did_read_phdr) {
153 /*
154 * We didn't find anything that way, so assume that we were
155 * built with PNaCl's ExpandTls LLVM pass.
156 */
157 cached_tls_info.tls_alignment = __tls_template_alignment;
158 cached_tls_info.tdata_start = &__tls_template_start;
159 cached_tls_info.tdata_size = (&__tls_template_tdata_end -
160 &__tls_template_start);
161 cached_tls_info.tbss_size = (&__tls_template_end -
162 &__tls_template_tdata_end);
163 }
164
165 finish_info_cache(&cached_tls_info);
166 }
167 return &cached_tls_info;
168 }
169
170 /*
171 * We support x86 and ARM TLS layouts.
172 *
173 * x86 layout:
174 * * TLS data + BSS
175 * * padding to round TLS data+BSS size upto tls_alignment
176 * --- thread pointer ($tp) points here
177 * * TDB (thread library's data block)
178 *
179 * ARM layout:
180 * * TDB (thread library's data block)
181 * * note that no padding follows the TDB
182 * --- thread pointer ($tp) points here
183 * * 8-byte header for use by the thread library
184 * * padding to round 8-byte header upto tls_alignment
185 * * TLS data + BSS
186 *
187 * The offset from the thread pointer to the TLS data is fixed by the
188 * linker.
189 *
190 * The addresses of the thread pointer and TLS data must both be
191 * aligned to tls_alignment. Since combined_area is not necessarily
192 * aligned to tls_alignment, padding may be required at the start of
193 * both x86 and ARM TLS layouts (not shown above).
194 */
195
tp_from_combined_area(const struct tls_info * info,void * combined_area,size_t tdb_size)196 static char *tp_from_combined_area(const struct tls_info *info,
197 void *combined_area, size_t tdb_size) {
198 size_t tls_size = info->tdata_size + info->tbss_size;
199 ptrdiff_t tdboff = __nacl_tp_tdb_offset(tdb_size);
200 if (tdboff < 0) {
201 /*
202 * The combined area is big enough to hold the TDB and then be aligned
203 * up to the $tp alignment requirement. If the whole area is aligned
204 * to the $tp requirement, then aligning the beginning of the area
205 * would give us the beginning unchanged, which is not what we need.
206 * Instead, align from the putative end of the TDB, to decide where
207 * $tp--the true end of the TDB--should actually lie.
208 */
209 return aligned_addr((char *) combined_area + tdb_size, info->tls_alignment);
210 } else {
211 /*
212 * The linker increases the size of the TLS block up to its alignment
213 * requirement, and that total is subtracted from the $tp address to
214 * access the TLS area. To keep that final address properly aligned,
215 * we need to align up from the allocated space and then add the
216 * aligned size.
217 */
218 tls_size = aligned_size(tls_size, info->tls_alignment);
219 return aligned_addr((char *) combined_area, info->tls_alignment) + tls_size;
220 }
221 }
222
__nacl_tls_initialize_memory(void * combined_area,size_t tdb_size)223 void *__nacl_tls_initialize_memory(void *combined_area, size_t tdb_size) {
224 const struct tls_info *info = get_tls_info();
225 size_t tls_size = info->tdata_size + info->tbss_size;
226 char *combined_area_end =
227 (char *) combined_area + __nacl_tls_combined_size(tdb_size);
228 void *tp = tp_from_combined_area(info, combined_area, tdb_size);
229 char *start = tp;
230
231 if (__nacl_tp_tls_offset(0) > 0) {
232 /*
233 * From $tp, we skip the header size and then must round up from
234 * there to the required alignment (which is what the linker will
235 * will do when calculating TPOFF relocations at link time). The
236 * end result is that the offset from $tp matches the one chosen
237 * by the linker exactly and that the final address is aligned to
238 * info->tls_alignment (since $tp was already aligned to at least
239 * that much).
240 */
241 start += aligned_size(__nacl_tp_tls_offset(tls_size), info->tls_alignment);
242 } else {
243 /*
244 * We'll subtract the aligned size of the TLS block from $tp, which
245 * must itself already be adequately aligned.
246 */
247 start += __nacl_tp_tls_offset(aligned_size(tls_size, info->tls_alignment));
248 }
249
250 /* Sanity check. (But avoid pulling in assert() here.) */
251 if (start + info->tdata_size + info->tbss_size > combined_area_end)
252 __builtin_trap();
253 memcpy(start, info->tdata_start, info->tdata_size);
254 memset(start + info->tdata_size, 0, info->tbss_size);
255
256 if (__nacl_tp_tdb_offset(tdb_size) == 0) {
257 /*
258 * On x86 (but not on ARM), the TDB sits directly at $tp and the
259 * first word there must hold the $tp pointer itself.
260 */
261 void *tdb = (char *) tp + __nacl_tp_tdb_offset(tdb_size);
262 *(void **) tdb = tdb;
263 }
264
265 return tp;
266 }
267
__nacl_tls_combined_size(size_t tdb_size)268 size_t __nacl_tls_combined_size(size_t tdb_size) {
269 const struct tls_info *info = get_tls_info();
270 size_t tls_size = info->tdata_size + info->tbss_size;
271 ptrdiff_t tlsoff = __nacl_tp_tls_offset(tls_size);
272 size_t combined_size = tls_size + tdb_size;
273 /*
274 * __nacl_tls_initialize_memory() accepts a non-aligned pointer; it
275 * aligns the thread pointer itself. We have to reserve some extra
276 * space to allow this alignment padding to occur.
277 */
278 combined_size += info->tls_alignment - 1;
279 if (tlsoff > 0) {
280 /*
281 * ARM case: We have to add ARM's 8 byte header, because that is
282 * not incorporated into tls_size. Furthermore, the header is
283 * padded out to tls_alignment.
284 */
285 combined_size += aligned_size(tlsoff, info->tls_alignment);
286 }
287 return combined_size;
288 }
289