1 /*
2  * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 /*
8  * Native Client support for thread local storage
9  */
10 
11 #include <stdbool.h>
12 #include <stdint.h>
13 #include <string.h>
14 
15 #include "native_client/src/include/elf32.h"
16 #include "native_client/src/include/elf64.h"
17 #include "native_client/src/untrusted/nacl/nacl_thread.h"
18 #include "native_client/src/untrusted/nacl/tls.h"
19 #include "native_client/src/untrusted/nacl/tls_params.h"
20 
21 /*
22  * We support two mechanisms for finding templates for TLS variables:
23  *
24  *  1) The PT_TLS header (in the ELF program headers), which is
25  *     located via the __ehdr_start symbol, which current binutils
26  *     linkers define when the ELF file headers and program headers
27  *     are mapped into the address space.
28  *
29  *  2) The __tls_template_* symbols, which are defined by PNaCl's
30  *     ExpandTls LLVM pass, which is used when linking ABI-stable
31  *     pexes.
32  *
33  * We use weak references to refer to these symbols so that the code
34  * can work with both mechanisms.
35  *
36  * The __tls_template_* symbols used to be defined by the binutils
37  * linker (using linker scripts), but this has been superseded by
38  * having the linker define __ehdr_start.
39  */
40 
41 extern char __tls_template_start __attribute__((weak));
42 extern char __tls_template_tdata_end __attribute__((weak));
43 extern char __tls_template_end __attribute__((weak));
44 extern uint32_t __tls_template_alignment __attribute__((weak));
45 
46 extern union {
47   Elf32_Ehdr ehdr32;
48   Elf64_Ehdr ehdr64;
49 } __ehdr_start __attribute__((weak, visibility("hidden")));
50 
aligned_size(size_t size,size_t alignment)51 static size_t aligned_size(size_t size, size_t alignment) {
52   return (size + alignment - 1) & -alignment;
53 }
54 
aligned_addr(void * start,size_t alignment)55 static char *aligned_addr(void *start, size_t alignment) {
56   return (void *) aligned_size((size_t) start, alignment);
57 }
58 
59 /*
60  * Collect information about the TLS initializer data here.
61  * The first call to get_tls_info() fills in all the data,
62  * based either on PT_TLS or on __tls_template_*.
63  */
64 
65 struct tls_info {
66   const char *tdata_start;  /* Address of .tdata (initializer data) */
67   size_t tdata_size;        /* Size of .tdata (initializer data) */
68   size_t tbss_size;         /* Size of .tbss (zero-fill space after .tdata) */
69   size_t tls_alignment;     /* Alignment required for TLS segment */
70 };
71 
72 static struct tls_info cached_tls_info;
73 
74 #if defined(__arm__)
75 
76 /*
77  * On ARM, the linker might not optimize GD-model TLS accesses into the
78  * simpler forms that don't use a helper function.  __tls_get_addr is
79  * called with the address of a two-word GOT entry: the first word is the
80  * module ID, which is immaterial for static linking; the second word is
81  * the offset within the (only) module's TLS data.  The TLS data always
82  * starts at 8 bytes past the thread pointer, plus alignment.
83  */
84 
85 static size_t tp_tls_offset;
86 
finish_info_cache(const struct tls_info * info)87 static void finish_info_cache(const struct tls_info *info) {
88   /*
89    * Cache this calculation at startup so it doesn't need to be repeated.
90    */
91   tp_tls_offset = aligned_size(8, info->tls_alignment);
92 }
93 
__tls_get_addr(uintptr_t * entry)94 void *__tls_get_addr(uintptr_t *entry) {
95   /*
96    * This is never called before finish_info_cache has been called.
97    */
98   return (char *) __builtin_thread_pointer() + tp_tls_offset + entry[1];
99 }
100 
101 #else  /* !defined(__arm__) */
102 
finish_info_cache(const struct tls_info * info)103 static void finish_info_cache(
104     const struct tls_info *info __attribute__((unused))) {
105 }
106 
107 #endif  /* defined(__arm__) */
108 
109 #define DEFINE_READ_PHDR(func_name, ehdr, Elf_Phdr, elf_class)          \
110   __attribute__((unused))                                               \
111   static int func_name(void) {                                          \
112     if ((ehdr) == NULL ||                                               \
113         (ehdr)->e_ident[EI_CLASS] != (elf_class) ||                     \
114         (ehdr)->e_phentsize != sizeof(Elf_Phdr))                        \
115       return 0;                                                         \
116     const Elf_Phdr *phdr =                                              \
117       (const Elf_Phdr *) ((const char *) (ehdr) + (ehdr)->e_phoff);     \
118     for (int i = 0; i < (ehdr)->e_phnum; ++i) {                         \
119       if (phdr[i].p_type == PT_TLS) {                                   \
120         cached_tls_info.tls_alignment = phdr[i].p_align;                \
121         cached_tls_info.tdata_start =                                   \
122           (const char *) (uintptr_t) phdr[i].p_vaddr;                   \
123         /* For a PIE, we should offset the the load address. */         \
124         if ((ehdr)->e_type == ET_DYN)                                   \
125           cached_tls_info.tdata_start += (uintptr_t) (ehdr);            \
126         cached_tls_info.tdata_size = phdr[i].p_filesz;                  \
127         cached_tls_info.tbss_size = phdr[i].p_memsz - phdr[i].p_filesz; \
128         return 1;                                                       \
129       }                                                                 \
130     }                                                                   \
131     return 0;                                                           \
132   }
133 
134 DEFINE_READ_PHDR(read_phdr32, &__ehdr_start.ehdr32, Elf32_Phdr, ELFCLASS32)
135 DEFINE_READ_PHDR(read_phdr64, &__ehdr_start.ehdr64, Elf64_Phdr, ELFCLASS64)
136 
get_tls_info(void)137 static const struct tls_info *get_tls_info(void) {
138   if (cached_tls_info.tls_alignment == 0) {
139     int did_read_phdr;
140 #if defined(__pnacl__) || defined(__x86_64__)
141     /*
142      * This is only needed in non-ABI-stable pexes for which the
143      * ExpandTls pass has not been run.  On ABI-stable pexes,
144      * link-time optimization will optimize away these calls because
145      * &__ehdr_start == NULL.
146      */
147     did_read_phdr = read_phdr32() || read_phdr64();
148 #else
149     did_read_phdr = read_phdr32();
150 #endif
151 
152     if (!did_read_phdr) {
153       /*
154        * We didn't find anything that way, so assume that we were
155        * built with PNaCl's ExpandTls LLVM pass.
156        */
157       cached_tls_info.tls_alignment = __tls_template_alignment;
158       cached_tls_info.tdata_start = &__tls_template_start;
159       cached_tls_info.tdata_size = (&__tls_template_tdata_end -
160                                     &__tls_template_start);
161       cached_tls_info.tbss_size = (&__tls_template_end -
162                                    &__tls_template_tdata_end);
163     }
164 
165     finish_info_cache(&cached_tls_info);
166   }
167   return &cached_tls_info;
168 }
169 
170 /*
171  * We support x86 and ARM TLS layouts.
172  *
173  * x86 layout:
174  *  * TLS data + BSS
175  *  * padding to round TLS data+BSS size upto tls_alignment
176  *  --- thread pointer ($tp) points here
177  *  * TDB (thread library's data block)
178  *
179  * ARM layout:
180  *  * TDB (thread library's data block)
181  *     * note that no padding follows the TDB
182  *  --- thread pointer ($tp) points here
183  *  * 8-byte header for use by the thread library
184  *  * padding to round 8-byte header upto tls_alignment
185  *  * TLS data + BSS
186  *
187  * The offset from the thread pointer to the TLS data is fixed by the
188  * linker.
189  *
190  * The addresses of the thread pointer and TLS data must both be
191  * aligned to tls_alignment.  Since combined_area is not necessarily
192  * aligned to tls_alignment, padding may be required at the start of
193  * both x86 and ARM TLS layouts (not shown above).
194  */
195 
tp_from_combined_area(const struct tls_info * info,void * combined_area,size_t tdb_size)196 static char *tp_from_combined_area(const struct tls_info *info,
197                                    void *combined_area, size_t tdb_size) {
198   size_t tls_size = info->tdata_size + info->tbss_size;
199   ptrdiff_t tdboff = __nacl_tp_tdb_offset(tdb_size);
200   if (tdboff < 0) {
201     /*
202      * The combined area is big enough to hold the TDB and then be aligned
203      * up to the $tp alignment requirement.  If the whole area is aligned
204      * to the $tp requirement, then aligning the beginning of the area
205      * would give us the beginning unchanged, which is not what we need.
206      * Instead, align from the putative end of the TDB, to decide where
207      * $tp--the true end of the TDB--should actually lie.
208      */
209     return aligned_addr((char *) combined_area + tdb_size, info->tls_alignment);
210   } else {
211     /*
212      * The linker increases the size of the TLS block up to its alignment
213      * requirement, and that total is subtracted from the $tp address to
214      * access the TLS area.  To keep that final address properly aligned,
215      * we need to align up from the allocated space and then add the
216      * aligned size.
217      */
218     tls_size = aligned_size(tls_size, info->tls_alignment);
219     return aligned_addr((char *) combined_area, info->tls_alignment) + tls_size;
220   }
221 }
222 
__nacl_tls_initialize_memory(void * combined_area,size_t tdb_size)223 void *__nacl_tls_initialize_memory(void *combined_area, size_t tdb_size) {
224   const struct tls_info *info = get_tls_info();
225   size_t tls_size = info->tdata_size + info->tbss_size;
226   char *combined_area_end =
227       (char *) combined_area + __nacl_tls_combined_size(tdb_size);
228   void *tp = tp_from_combined_area(info, combined_area, tdb_size);
229   char *start = tp;
230 
231   if (__nacl_tp_tls_offset(0) > 0) {
232     /*
233      * From $tp, we skip the header size and then must round up from
234      * there to the required alignment (which is what the linker will
235      * will do when calculating TPOFF relocations at link time).  The
236      * end result is that the offset from $tp matches the one chosen
237      * by the linker exactly and that the final address is aligned to
238      * info->tls_alignment (since $tp was already aligned to at least
239      * that much).
240      */
241     start += aligned_size(__nacl_tp_tls_offset(tls_size), info->tls_alignment);
242   } else {
243     /*
244      * We'll subtract the aligned size of the TLS block from $tp, which
245      * must itself already be adequately aligned.
246      */
247     start += __nacl_tp_tls_offset(aligned_size(tls_size, info->tls_alignment));
248   }
249 
250   /* Sanity check.  (But avoid pulling in assert() here.) */
251   if (start + info->tdata_size + info->tbss_size > combined_area_end)
252     __builtin_trap();
253   memcpy(start, info->tdata_start, info->tdata_size);
254   memset(start + info->tdata_size, 0, info->tbss_size);
255 
256   if (__nacl_tp_tdb_offset(tdb_size) == 0) {
257     /*
258      * On x86 (but not on ARM), the TDB sits directly at $tp and the
259      * first word there must hold the $tp pointer itself.
260      */
261     void *tdb = (char *) tp + __nacl_tp_tdb_offset(tdb_size);
262     *(void **) tdb = tdb;
263   }
264 
265   return tp;
266 }
267 
__nacl_tls_combined_size(size_t tdb_size)268 size_t __nacl_tls_combined_size(size_t tdb_size) {
269   const struct tls_info *info = get_tls_info();
270   size_t tls_size = info->tdata_size + info->tbss_size;
271   ptrdiff_t tlsoff = __nacl_tp_tls_offset(tls_size);
272   size_t combined_size = tls_size + tdb_size;
273   /*
274    * __nacl_tls_initialize_memory() accepts a non-aligned pointer; it
275    * aligns the thread pointer itself.  We have to reserve some extra
276    * space to allow this alignment padding to occur.
277    */
278   combined_size += info->tls_alignment - 1;
279   if (tlsoff > 0) {
280     /*
281      * ARM case: We have to add ARM's 8 byte header, because that is
282      * not incorporated into tls_size.  Furthermore, the header is
283      * padded out to tls_alignment.
284      */
285     combined_size += aligned_size(tlsoff, info->tls_alignment);
286   }
287   return combined_size;
288 }
289