1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (C) Mellanox Technologies Ltd. 2001-2015.  ALL RIGHTS RESERVED.
4  * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
5  *                         reserved.
6  * $COPYRIGHT$
7  *
8  * Additional copyrights may follow
9  *
10  * $HEADER$
11  */
12 /*
13  * Copied from OpenUCX
14  */
15 
16 #include "patcher_linux.h"
17 
18 #include "opal/mca/patcher/base/base.h"
19 
20 #include "opal/constants.h"
21 #include "opal/util/sys_limits.h"
22 #include "opal/util/output.h"
23 #include "opal/prefetch.h"
24 
25 #if defined(HAVE_SYS_AUXV_H)
26 #include <sys/auxv.h>
27 #endif
28 
29 #include <elf.h>
30 
31 #include <sys/mman.h>
32 #include <pthread.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <dlfcn.h>
37 #include <fcntl.h>
38 #include <link.h>
39 
40 static void *mca_patcher_linux_dlopen(const char *filename, int flag);
41 
42 typedef struct mca_patcher_linux_dl_iter_context {
43     mca_patcher_linux_patch_t *patch;
44     bool remove;
45     int status;
46 } mca_patcher_linux_dl_iter_context_t;
47 
48 OBJ_CLASS_INSTANCE(mca_patcher_linux_patch_got_t, opal_list_item_t, NULL, NULL);
49 
mca_patcher_linux_patch_construct(mca_patcher_linux_patch_t * patch)50 static void mca_patcher_linux_patch_construct (mca_patcher_linux_patch_t *patch)
51 {
52     OBJ_CONSTRUCT(&patch->patch_got_list, opal_list_t);
53 }
54 
mca_patcher_linux_patch_destruct(mca_patcher_linux_patch_t * patch)55 static void mca_patcher_linux_patch_destruct (mca_patcher_linux_patch_t *patch)
56 {
57     OPAL_LIST_DESTRUCT(&patch->patch_got_list);
58 }
59 
60 OBJ_CLASS_INSTANCE(mca_patcher_linux_patch_t, mca_patcher_base_patch_t, mca_patcher_linux_patch_construct,
61                    mca_patcher_linux_patch_destruct);
62 
63 /* List of patches to be applied to additional libraries */
64 static void *(*orig_dlopen) (const char *, int);
65 
ElfW(Phdr)66 static const ElfW(Phdr) *
67 mca_patcher_linux_get_phdr_dynamic(const ElfW(Phdr) *phdr, uint16_t phnum, int phent)
68 {
69     for (uint16_t i = 0 ; i < phnum ; ++i, phdr = (ElfW(Phdr)*)((intptr_t) phdr + phent)) {
70         if (phdr->p_type == PT_DYNAMIC) {
71             return phdr;
72         }
73     }
74 
75     return NULL;
76 }
77 
mca_patcher_linux_get_dynentry(ElfW (Addr)base,const ElfW (Phdr)* pdyn,ElfW (Sxword)type)78 static void *mca_patcher_linux_get_dynentry(ElfW(Addr) base, const ElfW(Phdr) *pdyn, ElfW(Sxword) type)
79 {
80     for (ElfW(Dyn) *dyn = (ElfW(Dyn)*)(base + pdyn->p_vaddr); dyn->d_tag; ++dyn) {
81         if (dyn->d_tag == type) {
82             return (void *) (uintptr_t) dyn->d_un.d_val;
83         }
84     }
85 
86     return NULL;
87 }
88 
mca_patcher_linux_get_got_entry(ElfW (Addr)base,const ElfW (Phdr)* phdr,int16_t phnum,int phent,const char * symbol)89 static void * mca_patcher_linux_get_got_entry (ElfW(Addr) base, const ElfW(Phdr) *phdr, int16_t phnum,
90                                                int phent, const char *symbol)
91 {
92     const ElfW(Phdr) *dphdr;
93     void *jmprel, *strtab;
94     ElfW(Sym)  *symtab;
95     size_t pltrelsz;
96 
97     dphdr = mca_patcher_linux_get_phdr_dynamic (phdr, phnum, phent);
98 
99     jmprel = mca_patcher_linux_get_dynentry (base, dphdr, DT_JMPREL);
100     symtab = (ElfW(Sym) *) mca_patcher_linux_get_dynentry (base, dphdr, DT_SYMTAB);
101     strtab = mca_patcher_linux_get_dynentry (base, dphdr, DT_STRTAB);
102     pltrelsz = (size_t) (uintptr_t) mca_patcher_linux_get_dynentry (base, dphdr, DT_PLTRELSZ);
103 
104     for (ElfW(Rela) *reloc = jmprel; (intptr_t) reloc < (intptr_t) jmprel + pltrelsz; ++reloc) {
105 #if SIZEOF_VOID_P == 8
106         uint32_t relsymidx = ELF64_R_SYM(reloc->r_info);
107 #else
108         uint32_t relsymidx = ELF32_R_SYM(reloc->r_info);
109 #endif
110         char *elf_sym = (char *) strtab + symtab[relsymidx].st_name;
111 
112         if (0 == strcmp (symbol, elf_sym)) {
113             return (void *)(base + reloc->r_offset);
114         }
115     }
116 
117     return NULL;
118 }
119 
mca_patcher_linux_get_aux_phent(void)120 static int mca_patcher_linux_get_aux_phent (void)
121 {
122 #if !defined(HAVE_SYS_AUXV_H)
123 #define MCA_PATCHER_LINUX_AUXV_BUF_LEN 16
124     static const char *proc_auxv_filename = "/proc/self/auxv";
125     static int phent = 0;
126     ElfW(auxv_t) buffer[MCA_PATCHER_LINUX_AUXV_BUF_LEN];
127     unsigned count;
128     ssize_t nread;
129     int fd;
130 
131     /* Can avoid lock here - worst case we'll read the file more than once */
132     if (phent == 0) {
133         fd = open(proc_auxv_filename, O_RDONLY);
134         if (fd < 0) {
135             opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output,
136                                  "failed to open '%s' for reading: %s", proc_auxv_filename,
137                                  strerror (errno));
138             return OPAL_ERROR;
139         }
140 
141         /* Use small buffer on the stack, avoid using malloc() */
142         do {
143             nread = read(fd, buffer, sizeof(buffer));
144             if (nread < 0) {
145                 opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output,
146                                      "failed to read %" PRIsize_t " bytes from %s (ret=%ld): %s", sizeof (buffer),
147                                       proc_auxv_filename, nread, strerror (errno));
148                 break;
149             }
150 
151             count = nread / sizeof(buffer[0]);
152             for (unsigned i = 0 ; i < count && AT_NULL != buffer[i].a_type ; ++i) {
153                 if (AT_PHENT == buffer[i].a_type) {
154                     phent = buffer[i].a_un.a_val;
155                     opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output,
156                                          "read phent from %s: %d", proc_auxv_filename, phent);
157                     break;
158                 }
159             }
160         } while ((count > 0) && (phent == 0));
161 
162         close(fd);
163     }
164 
165     return phent;
166 #else
167     return getauxval (AT_PHENT);
168 #endif
169 }
170 
171 static int
mca_patcher_linux_modify_got(ElfW (Addr)base,const ElfW (Phdr)* phdr,const char * phname,int16_t phnum,int phent,mca_patcher_linux_dl_iter_context_t * ctx)172 mca_patcher_linux_modify_got (ElfW(Addr) base, const ElfW(Phdr) *phdr, const char *phname,
173                               int16_t phnum, int phent, mca_patcher_linux_dl_iter_context_t *ctx)
174 {
175     long page_size = opal_getpagesize ();
176     void **entry, *page;
177     int ret;
178 
179     entry = mca_patcher_linux_get_got_entry (base, phdr, phnum, phent, ctx->patch->super.patch_symbol);
180     if (entry == NULL) {
181         return OPAL_SUCCESS;
182     }
183 
184     page = (void *)((intptr_t)entry & ~(page_size - 1));
185     ret = mprotect(page, page_size, PROT_READ|PROT_WRITE);
186     if (ret < 0) {
187         opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output,
188                              "failed to modify GOT page %p to rw: %s", page, strerror (errno));
189         return OPAL_ERR_NOT_SUPPORTED;
190     }
191 
192     if (!ctx->remove) {
193         if (*entry != (void *) ctx->patch->super.patch_value) {
194             mca_patcher_linux_patch_got_t *patch_got = OBJ_NEW(mca_patcher_linux_patch_got_t);
195             if (NULL == patch_got) {
196                 return OPAL_ERR_OUT_OF_RESOURCE;
197             }
198 
199             opal_output_verbose (MCA_BASE_VERBOSE_TRACE, opal_patcher_base_framework.framework_output,
200                                  "patch %p (%s): modifying got entry %p. original value %p. new value %p\n", (void *)ctx->patch,
201                                  ctx->patch->super.patch_symbol, (void *) entry, *entry, (void *) ctx->patch->super.patch_value);
202 
203             patch_got->got_entry = entry;
204             patch_got->got_orig = *entry;
205 
206             opal_list_append (&ctx->patch->patch_got_list, &patch_got->super);
207 
208             *entry = (void *) ctx->patch->super.patch_value;
209         }
210     } else {
211         /* find the appropriate entry and restore the original value */
212         mca_patcher_linux_patch_got_t *patch_got;
213         OPAL_LIST_FOREACH_REV(patch_got, &ctx->patch->patch_got_list, mca_patcher_linux_patch_got_t) {
214             if (patch_got->got_entry == entry) {
215                 opal_output_verbose (MCA_BASE_VERBOSE_TRACE, opal_patcher_base_framework.framework_output,
216                                      "restoring got entry %p with original value %p\n", (void *) entry, patch_got->got_orig);
217                 if (*entry == (void *) ctx->patch->super.patch_value) {
218                     *entry = patch_got->got_orig;
219                 }
220                 opal_list_remove_item (&ctx->patch->patch_got_list, &patch_got->super);
221                 OBJ_RELEASE(patch_got);
222                 break;
223             }
224         }
225     }
226 
227     return OPAL_SUCCESS;
228 }
229 
mca_patcher_linux_phdr_iterator(struct dl_phdr_info * info,size_t size,void * data)230 static int mca_patcher_linux_phdr_iterator(struct dl_phdr_info *info, size_t size, void *data)
231 {
232     mca_patcher_linux_dl_iter_context_t *ctx = data;
233     int phent;
234 
235     phent = mca_patcher_linux_get_aux_phent();
236     if (phent <= 0) {
237         opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output,
238                              "failed to read phent size");
239         ctx->status = OPAL_ERR_NOT_SUPPORTED;
240         return -1;
241     }
242 
243     ctx->status = mca_patcher_linux_modify_got (info->dlpi_addr, info->dlpi_phdr,
244                                                 info->dlpi_name, info->dlpi_phnum,
245                                                 phent, ctx);
246     if (ctx->status == OPAL_SUCCESS) {
247         return 0; /* continue iteration and patch all objects */
248     } else {
249         return -1; /* stop iteration if got a real error */
250     }
251 }
252 
253 /* called with lock held */
mca_patcher_linux_apply_patch(mca_patcher_linux_patch_t * patch)254 static int mca_patcher_linux_apply_patch (mca_patcher_linux_patch_t *patch)
255 {
256     mca_patcher_linux_dl_iter_context_t ctx = {
257         .patch    = patch,
258         .remove   = false,
259         .status   = OPAL_SUCCESS,
260     };
261 
262     /* Avoid locks here because we don't modify ELF data structures.
263      * Worst case the same symbol will be written more than once.
264      */
265     (void) dl_iterate_phdr(mca_patcher_linux_phdr_iterator, &ctx);
266 
267     return ctx.status;
268 }
269 
mca_patcher_linux_remove_patch(mca_patcher_linux_patch_t * patch)270 static int mca_patcher_linux_remove_patch (mca_patcher_linux_patch_t *patch)
271 {
272     mca_patcher_linux_dl_iter_context_t ctx = {
273         .patch    = patch,
274         .remove   = true,
275         .status   = OPAL_SUCCESS,
276     };
277 
278     /* Avoid locks here because we don't modify ELF data structures.
279      * Worst case the same symbol will be written more than once.
280      */
281     (void) dl_iterate_phdr(mca_patcher_linux_phdr_iterator, &ctx);
282 
283     return ctx.status;
284 }
285 
mca_patcher_linux_dlopen(const char * filename,int flag)286 static void *mca_patcher_linux_dlopen(const char *filename, int flag)
287 {
288     OPAL_PATCHER_BEGIN;
289     mca_patcher_linux_patch_t *patch;
290     void *handle;
291 
292     handle = orig_dlopen (filename, flag);
293     if (handle != NULL) {
294         /*
295          * Every time a new object is loaded, we must update its relocations
296          * with our list of patches (including dlopen itself). This code is less
297          * efficient and will modify all existing objects every time, but good
298          * enough.
299          */
300         opal_mutex_lock (&mca_patcher_linux_module.patch_list_mutex);
301         OPAL_LIST_FOREACH(patch, &mca_patcher_linux_module.patch_list, mca_patcher_linux_patch_t) {
302             if (!patch->super.patch_data_size) {
303                 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_patcher_base_framework.framework_output,
304                                      "in dlopen(), re-applying '%s' to %p", patch->super.patch_symbol, (void *) patch->super.patch_value);
305                 /* ignore hook binary patches */
306                 mca_patcher_linux_apply_patch (patch);
307             }
308         }
309         opal_mutex_unlock (&mca_patcher_linux_module.patch_list_mutex);
310     }
311 
312     OPAL_PATCHER_END;
313     return handle;
314 }
315 
mca_patcher_linux_get_orig(const char * symbol,void * replacement)316 static intptr_t mca_patcher_linux_get_orig (const char *symbol, void *replacement)
317 {
318     const char *error;
319     void *func_ptr;
320 
321     func_ptr = dlsym(RTLD_DEFAULT, symbol);
322     if (func_ptr == replacement) {
323         (void)dlerror();
324         func_ptr = dlsym(RTLD_NEXT, symbol);
325         if (func_ptr == NULL) {
326             error = dlerror();
327             opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output,
328                                  "could not find address of original %s(): %s", symbol, error ? error : "Unknown error");
329         }
330     }
331 
332     opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_patcher_base_framework.framework_output,
333                          "original %s() is at %p", symbol, func_ptr);
334 
335     return (intptr_t) func_ptr;
336 }
337 
mca_patcher_linux_patch_symbol(const char * symbol_name,uintptr_t replacement,uintptr_t * orig)338 static int mca_patcher_linux_patch_symbol (const char *symbol_name, uintptr_t replacement, uintptr_t *orig)
339 {
340     mca_patcher_linux_patch_t *patch = OBJ_NEW(mca_patcher_linux_patch_t);
341     int rc;
342 
343     if (OPAL_UNLIKELY(NULL == patch)) {
344         return OPAL_ERR_OUT_OF_RESOURCE;
345     }
346 
347     patch->super.patch_symbol = strdup (symbol_name);
348     if (NULL == patch->super.patch_symbol) {
349         OBJ_RELEASE(patch);
350         return OPAL_ERR_OUT_OF_RESOURCE;
351     }
352 
353     patch->super.patch_value = mca_patcher_base_addr_text (replacement);
354     patch->super.patch_restore = (mca_patcher_base_restore_fn_t) mca_patcher_linux_remove_patch;
355 
356     /* Take lock first to handle a possible race where dlopen() is called
357      * from another thread and we may end up not patching it.
358      */
359     opal_mutex_lock (&mca_patcher_linux_module.patch_list_mutex);
360     do {
361         rc = mca_patcher_base_patch_hook (&mca_patcher_linux_module, patch->super.patch_value);
362         if (OPAL_SUCCESS != rc) {
363             OBJ_RELEASE(patch);
364             break;
365         }
366 
367         rc = mca_patcher_linux_apply_patch (patch);
368         if (OPAL_SUCCESS != rc) {
369             OBJ_RELEASE(patch);
370             break;
371         }
372 
373         *orig = mca_patcher_linux_get_orig (patch->super.patch_symbol, (void *) replacement);
374 
375         opal_list_append (&mca_patcher_linux_module.patch_list, &patch->super.super);
376     } while (0);
377     opal_mutex_unlock (&mca_patcher_linux_module.patch_list_mutex);
378 
379     return rc;
380 }
381 
382 /* called with lock held */
mca_patcher_linux_install_dlopen(void)383 static int mca_patcher_linux_install_dlopen (void)
384 {
385     return mca_patcher_linux_patch_symbol ("dlopen", (uintptr_t) mca_patcher_linux_dlopen,
386                                            (uintptr_t *) &orig_dlopen);
387 }
388 
mca_patcher_linux_init(void)389 static int mca_patcher_linux_init (void)
390 {
391     return mca_patcher_linux_install_dlopen ();
392 }
393 
394 mca_patcher_base_module_t mca_patcher_linux_module = {
395     .patch_init = mca_patcher_linux_init,
396     .patch_symbol = mca_patcher_linux_patch_symbol,
397 };
398