1 /*
2  * Copyright 2014-2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "ac_rtld.h"
25 
26 #include "ac_binary.h"
27 #include "ac_gpu_info.h"
28 #include "util/compiler.h"
29 #include "util/u_dynarray.h"
30 #include "util/u_math.h"
31 
32 #include <gelf.h>
33 #include <libelf.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 #ifndef EM_AMDGPU
40 // Old distributions may not have this enum constant
41 #define EM_AMDGPU 224
42 #endif
43 
44 #ifndef STT_AMDGPU_LDS
45 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
46 #endif
47 
48 #ifndef SHN_AMDGPU_LDS
49 #define SHN_AMDGPU_LDS 0xff00
50 #endif
51 
52 #ifndef R_AMDGPU_NONE
53 #define R_AMDGPU_NONE          0
54 #define R_AMDGPU_ABS32_LO      1
55 #define R_AMDGPU_ABS32_HI      2
56 #define R_AMDGPU_ABS64         3
57 #define R_AMDGPU_REL32         4
58 #define R_AMDGPU_REL64         5
59 #define R_AMDGPU_ABS32         6
60 #define R_AMDGPU_GOTPCREL      7
61 #define R_AMDGPU_GOTPCREL32_LO 8
62 #define R_AMDGPU_GOTPCREL32_HI 9
63 #define R_AMDGPU_REL32_LO      10
64 #define R_AMDGPU_REL32_HI      11
65 #define R_AMDGPU_RELATIVE64    13
66 #endif
67 
68 /* For the UMR disassembler. */
69 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
70 #define DEBUGGER_NUM_MARKERS        5
71 
72 struct ac_rtld_section {
73    bool is_rx : 1;
74    bool is_pasted_text : 1;
75    uint64_t offset;
76    const char *name;
77 };
78 
79 struct ac_rtld_part {
80    Elf *elf;
81    struct ac_rtld_section *sections;
82    unsigned num_sections;
83 };
84 
report_errorvf(const char * fmt,va_list va)85 static void report_errorvf(const char *fmt, va_list va)
86 {
87    fprintf(stderr, "ac_rtld error: ");
88 
89    vfprintf(stderr, fmt, va);
90 
91    fprintf(stderr, "\n");
92 }
93 
94 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
95 
report_errorf(const char * fmt,...)96 static void report_errorf(const char *fmt, ...)
97 {
98    va_list va;
99    va_start(va, fmt);
100    report_errorvf(fmt, va);
101    va_end(va);
102 }
103 
104 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
105 
report_elf_errorf(const char * fmt,...)106 static void report_elf_errorf(const char *fmt, ...)
107 {
108    va_list va;
109    va_start(va, fmt);
110    report_errorvf(fmt, va);
111    va_end(va);
112 
113    fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
114 }
115 
116 /**
117  * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
118  * \p part_idx.
119  */
find_symbol(const struct util_dynarray * symbols,const char * name,unsigned part_idx)120 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
121                                                 const char *name, unsigned part_idx)
122 {
123    util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
124       if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
125          return symbol;
126    }
127    return NULL;
128 }
129 
compare_symbol_by_align(const void * lhsp,const void * rhsp)130 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
131 {
132    const struct ac_rtld_symbol *lhs = lhsp;
133    const struct ac_rtld_symbol *rhs = rhsp;
134    if (rhs->align > lhs->align)
135       return 1;
136    if (rhs->align < lhs->align)
137       return -1;
138    return 0;
139 }
140 
141 /**
142  * Sort the given symbol list by decreasing alignment and assign offsets.
143  */
layout_symbols(struct ac_rtld_symbol * symbols,unsigned num_symbols,uint64_t * ptotal_size)144 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
145                            uint64_t *ptotal_size)
146 {
147    qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
148 
149    uint64_t total_size = *ptotal_size;
150 
151    for (unsigned i = 0; i < num_symbols; ++i) {
152       struct ac_rtld_symbol *s = &symbols[i];
153       assert(util_is_power_of_two_nonzero(s->align));
154 
155       total_size = align64(total_size, s->align);
156       s->offset = total_size;
157 
158       if (total_size + s->size < total_size) {
159          report_errorf("%s: size overflow", __FUNCTION__);
160          return false;
161       }
162 
163       total_size += s->size;
164    }
165 
166    *ptotal_size = total_size;
167    return true;
168 }
169 
170 /**
171  * Read LDS symbols from the given \p section of the ELF of \p part and append
172  * them to the LDS symbols list.
173  *
174  * Shared LDS symbols are filtered out.
175  */
read_private_lds_symbols(struct ac_rtld_binary * binary,unsigned part_idx,Elf_Scn * section,uint32_t * lds_end_align)176 static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
177                                      Elf_Scn *section, uint32_t *lds_end_align)
178 {
179 #define report_if(cond)                                                                            \
180    do {                                                                                            \
181       if ((cond)) {                                                                                \
182          report_errorf(#cond);                                                                     \
183          return false;                                                                             \
184       }                                                                                            \
185    } while (false)
186 #define report_elf_if(cond)                                                                        \
187    do {                                                                                            \
188       if ((cond)) {                                                                                \
189          report_elf_errorf(#cond);                                                                 \
190          return false;                                                                             \
191       }                                                                                            \
192    } while (false)
193 
194    struct ac_rtld_part *part = &binary->parts[part_idx];
195    Elf64_Shdr *shdr = elf64_getshdr(section);
196    uint32_t strtabidx = shdr->sh_link;
197    Elf_Data *symbols_data = elf_getdata(section, NULL);
198    report_elf_if(!symbols_data);
199 
200    const Elf64_Sym *symbol = symbols_data->d_buf;
201    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
202 
203    for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
204       struct ac_rtld_symbol s = {0};
205 
206       if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
207          /* old-style LDS symbols from initial prototype -- remove eventually */
208          s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
209       } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
210          s.align = MIN2(symbol->st_value, 1u << 16);
211          report_if(!util_is_power_of_two_nonzero(s.align));
212       } else
213          continue;
214 
215       report_if(symbol->st_size > 1u << 29);
216 
217       s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
218       s.size = symbol->st_size;
219       s.part_idx = part_idx;
220 
221       if (!strcmp(s.name, "__lds_end")) {
222          report_elf_if(s.size != 0);
223          *lds_end_align = MAX2(*lds_end_align, s.align);
224          continue;
225       }
226 
227       const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
228       if (shared) {
229          report_elf_if(s.align > shared->align);
230          report_elf_if(s.size > shared->size);
231          continue;
232       }
233 
234       util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
235    }
236 
237    return true;
238 
239 #undef report_if
240 #undef report_elf_if
241 }
242 
243 /**
244  * Open a binary consisting of one or more shader parts.
245  *
246  * \param binary the uninitialized struct
247  * \param i binary opening parameters
248  */
ac_rtld_open(struct ac_rtld_binary * binary,struct ac_rtld_open_info i)249 bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
250 {
251    /* One of the libelf implementations
252     * (http://www.mr511.de/software/english.htm) requires calling
253     * elf_version() before elf_memory().
254     */
255    elf_version(EV_CURRENT);
256 
257    memset(binary, 0, sizeof(*binary));
258    memcpy(&binary->options, &i.options, sizeof(binary->options));
259    binary->wave_size = i.wave_size;
260    binary->num_parts = i.num_parts;
261    binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
262    if (!binary->parts)
263       return false;
264 
265    uint64_t pasted_text_size = 0;
266    uint64_t rx_align = 1;
267    uint64_t rx_size = 0;
268    uint64_t exec_size = 0;
269 
270 #define report_if(cond)                                                                            \
271    do {                                                                                            \
272       if ((cond)) {                                                                                \
273          report_errorf(#cond);                                                                     \
274          goto fail;                                                                                \
275       }                                                                                            \
276    } while (false)
277 #define report_elf_if(cond)                                                                        \
278    do {                                                                                            \
279       if ((cond)) {                                                                                \
280          report_elf_errorf(#cond);                                                                 \
281          goto fail;                                                                                \
282       }                                                                                            \
283    } while (false)
284 
285    /* Copy and layout shared LDS symbols. */
286    if (i.num_shared_lds_symbols) {
287       if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
288                                 i.num_shared_lds_symbols))
289          goto fail;
290 
291       memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
292    }
293 
294    util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
295       symbol->part_idx = ~0u;
296 
297    unsigned max_lds_size = 64 * 1024;
298 
299    if (i.info->chip_class == GFX6 ||
300        (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
301       max_lds_size = 32 * 1024;
302 
303    uint64_t shared_lds_size = 0;
304    if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
305       goto fail;
306 
307    if (shared_lds_size > max_lds_size) {
308       fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
309               (unsigned)shared_lds_size, max_lds_size);
310       goto fail;
311    }
312    binary->lds_size = shared_lds_size;
313 
314    /* First pass over all parts: open ELFs, pre-determine the placement of
315     * sections in the memory image, and collect and layout private LDS symbols. */
316    uint32_t lds_end_align = 0;
317 
318    if (binary->options.halt_at_entry)
319       pasted_text_size += 4;
320 
321    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
322       struct ac_rtld_part *part = &binary->parts[part_idx];
323       unsigned part_lds_symbols_begin =
324          util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
325 
326       part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
327       report_elf_if(!part->elf);
328 
329       const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
330       report_elf_if(!ehdr);
331       report_if(ehdr->e_machine != EM_AMDGPU);
332 
333       size_t section_str_index;
334       size_t num_shdrs;
335       report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
336       report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
337 
338       part->num_sections = num_shdrs;
339       part->sections = calloc(sizeof(*part->sections), num_shdrs);
340       report_if(!part->sections);
341 
342       Elf_Scn *section = NULL;
343       while ((section = elf_nextscn(part->elf, section))) {
344          Elf64_Shdr *shdr = elf64_getshdr(section);
345          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
346          s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
347          report_elf_if(!s->name);
348 
349          /* Cannot actually handle linked objects yet */
350          report_elf_if(shdr->sh_addr != 0);
351 
352          /* Alignment must be 0 or a power of two */
353          report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
354          uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
355 
356          if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
357             report_if(shdr->sh_flags & SHF_WRITE);
358 
359             s->is_rx = true;
360 
361             if (shdr->sh_flags & SHF_EXECINSTR) {
362                report_elf_if(shdr->sh_size & 3);
363 
364                if (!strcmp(s->name, ".text"))
365                   s->is_pasted_text = true;
366 
367                exec_size += shdr->sh_size;
368             }
369 
370             if (s->is_pasted_text) {
371                s->offset = pasted_text_size;
372                pasted_text_size += shdr->sh_size;
373             } else {
374                rx_align = align(rx_align, sh_align);
375                rx_size = align(rx_size, sh_align);
376                s->offset = rx_size;
377                rx_size += shdr->sh_size;
378             }
379          } else if (shdr->sh_type == SHT_SYMTAB) {
380             if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
381                goto fail;
382          }
383       }
384 
385       uint64_t part_lds_size = shared_lds_size;
386       if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
387                                                 part_lds_symbols_begin),
388                           util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
389                              part_lds_symbols_begin,
390                           &part_lds_size))
391          goto fail;
392       binary->lds_size = MAX2(binary->lds_size, part_lds_size);
393    }
394 
395    binary->rx_end_markers = pasted_text_size;
396    pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
397 
398    /* __lds_end is a special symbol that points at the end of the memory
399     * occupied by other LDS symbols. Its alignment is taken as the
400     * maximum of its alignment over all shader parts where it occurs.
401     */
402    if (lds_end_align) {
403       binary->lds_size = align(binary->lds_size, lds_end_align);
404 
405       struct ac_rtld_symbol *lds_end =
406          util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
407       lds_end->name = "__lds_end";
408       lds_end->size = 0;
409       lds_end->align = lds_end_align;
410       lds_end->offset = binary->lds_size;
411       lds_end->part_idx = ~0u;
412    }
413 
414    if (binary->lds_size > max_lds_size) {
415       fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
416               (unsigned)binary->lds_size, max_lds_size);
417       goto fail;
418    }
419 
420    /* Second pass: Adjust offsets of non-pasted text sections. */
421    binary->rx_size = pasted_text_size;
422    binary->rx_size = align(binary->rx_size, rx_align);
423 
424    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
425       struct ac_rtld_part *part = &binary->parts[part_idx];
426       size_t num_shdrs;
427       elf_getshdrnum(part->elf, &num_shdrs);
428 
429       for (unsigned j = 0; j < num_shdrs; ++j) {
430          struct ac_rtld_section *s = &part->sections[j];
431          if (s->is_rx && !s->is_pasted_text)
432             s->offset += binary->rx_size;
433       }
434    }
435 
436    binary->rx_size += rx_size;
437    binary->exec_size = exec_size;
438 
439    /* The SQ fetches up to N cache lines of 16 dwords
440     * ahead of the PC, configurable by SH_MEM_CONFIG and
441     * S_INST_PREFETCH. This can cause two issues:
442     *
443     * (1) Crossing a page boundary to an unmapped page. The logic
444     *     does not distinguish between a required fetch and a "mere"
445     *     prefetch and will fault.
446     *
447     * (2) Prefetching instructions that will be changed for a
448     *     different shader.
449     *
450     * (2) is not currently an issue because we flush the I$ at IB
451     * boundaries, but (1) needs to be addressed. Due to buffer
452     * suballocation, we just play it safe.
453     */
454    unsigned prefetch_distance = 0;
455 
456    if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN)
457       prefetch_distance = 16;
458    else if (i.info->chip_class >= GFX10)
459       prefetch_distance = 3;
460 
461    if (prefetch_distance)
462       binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64);
463 
464    return true;
465 
466 #undef report_if
467 #undef report_elf_if
468 
469 fail:
470    ac_rtld_close(binary);
471    return false;
472 }
473 
ac_rtld_close(struct ac_rtld_binary * binary)474 void ac_rtld_close(struct ac_rtld_binary *binary)
475 {
476    for (unsigned i = 0; i < binary->num_parts; ++i) {
477       struct ac_rtld_part *part = &binary->parts[i];
478       free(part->sections);
479       elf_end(part->elf);
480    }
481 
482    util_dynarray_fini(&binary->lds_symbols);
483    free(binary->parts);
484    binary->parts = NULL;
485    binary->num_parts = 0;
486 }
487 
get_section_by_name(struct ac_rtld_part * part,const char * name,const char ** data,size_t * nbytes)488 static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
489                                 size_t *nbytes)
490 {
491    for (unsigned i = 0; i < part->num_sections; ++i) {
492       struct ac_rtld_section *s = &part->sections[i];
493       if (s->name && !strcmp(name, s->name)) {
494          Elf_Scn *target_scn = elf_getscn(part->elf, i);
495          Elf_Data *target_data = elf_getdata(target_scn, NULL);
496          if (!target_data) {
497             report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
498             return false;
499          }
500 
501          *data = target_data->d_buf;
502          *nbytes = target_data->d_size;
503          return true;
504       }
505    }
506    return false;
507 }
508 
ac_rtld_get_section_by_name(struct ac_rtld_binary * binary,const char * name,const char ** data,size_t * nbytes)509 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
510                                  size_t *nbytes)
511 {
512    assert(binary->num_parts == 1);
513    return get_section_by_name(&binary->parts[0], name, data, nbytes);
514 }
515 
ac_rtld_read_config(const struct radeon_info * info,struct ac_rtld_binary * binary,struct ac_shader_config * config)516 bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
517                          struct ac_shader_config *config)
518 {
519    for (unsigned i = 0; i < binary->num_parts; ++i) {
520       struct ac_rtld_part *part = &binary->parts[i];
521       const char *config_data;
522       size_t config_nbytes;
523 
524       if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
525          return false;
526 
527       /* TODO: be precise about scratch use? */
528       struct ac_shader_config c = {0};
529       ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, true, info, &c);
530 
531       config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
532       config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
533       config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
534       config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
535       config->scratch_bytes_per_wave =
536          MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
537 
538       assert(i == 0 || config->float_mode == c.float_mode);
539       config->float_mode = c.float_mode;
540 
541       /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
542        * the main shader part is used. */
543       assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
544       config->spi_ps_input_ena = c.spi_ps_input_ena;
545       config->spi_ps_input_addr = c.spi_ps_input_addr;
546 
547       /* TODO: consistently use LDS symbols for this */
548       config->lds_size = MAX2(config->lds_size, c.lds_size);
549 
550       /* TODO: Should we combine these somehow? It's currently only
551        * used for radeonsi's compute, where multiple parts aren't used. */
552       assert(config->rsrc1 == 0 && config->rsrc2 == 0);
553       config->rsrc1 = c.rsrc1;
554       config->rsrc2 = c.rsrc2;
555    }
556 
557    return true;
558 }
559 
resolve_symbol(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Sym * sym,const char * name,uint64_t * value)560 static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
561                            const Elf64_Sym *sym, const char *name, uint64_t *value)
562 {
563    /* TODO: properly disentangle the undef and the LDS cases once
564     * STT_AMDGPU_LDS is retired. */
565    if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
566       const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
567 
568       if (lds_sym) {
569          *value = lds_sym->offset;
570          return true;
571       }
572 
573       /* TODO: resolve from other parts */
574 
575       if (u->get_external_symbol(u->cb_data, name, value))
576          return true;
577 
578       report_errorf("symbol %s: unknown", name);
579       return false;
580    }
581 
582    struct ac_rtld_part *part = &u->binary->parts[part_idx];
583    if (sym->st_shndx >= part->num_sections) {
584       report_errorf("symbol %s: section out of bounds", name);
585       return false;
586    }
587 
588    struct ac_rtld_section *s = &part->sections[sym->st_shndx];
589    if (!s->is_rx) {
590       report_errorf("symbol %s: bad section", name);
591       return false;
592    }
593 
594    uint64_t section_base = u->rx_va + s->offset;
595 
596    *value = section_base + sym->st_value;
597    return true;
598 }
599 
apply_relocs(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Shdr * reloc_shdr,const Elf_Data * reloc_data)600 static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
601                          const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
602 {
603 #define report_if(cond)                                                                            \
604    do {                                                                                            \
605       if ((cond)) {                                                                                \
606          report_errorf(#cond);                                                                     \
607          return false;                                                                             \
608       }                                                                                            \
609    } while (false)
610 #define report_elf_if(cond)                                                                        \
611    do {                                                                                            \
612       if ((cond)) {                                                                                \
613          report_elf_errorf(#cond);                                                                 \
614          return false;                                                                             \
615       }                                                                                            \
616    } while (false)
617 
618    struct ac_rtld_part *part = &u->binary->parts[part_idx];
619    Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
620    report_elf_if(!target_scn);
621 
622    Elf_Data *target_data = elf_getdata(target_scn, NULL);
623    report_elf_if(!target_data);
624 
625    Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
626    report_elf_if(!symbols_scn);
627 
628    Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
629    report_elf_if(!symbols_shdr);
630    uint32_t strtabidx = symbols_shdr->sh_link;
631 
632    Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
633    report_elf_if(!symbols_data);
634 
635    const Elf64_Sym *symbols = symbols_data->d_buf;
636    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
637 
638    struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
639    report_if(!s->is_rx);
640 
641    const char *orig_base = target_data->d_buf;
642    char *dst_base = u->rx_ptr + s->offset;
643    uint64_t va_base = u->rx_va + s->offset;
644 
645    Elf64_Rel *rel = reloc_data->d_buf;
646    size_t num_relocs = reloc_data->d_size / sizeof(*rel);
647    for (size_t i = 0; i < num_relocs; ++i, ++rel) {
648       size_t r_sym = ELF64_R_SYM(rel->r_info);
649       unsigned r_type = ELF64_R_TYPE(rel->r_info);
650 
651       const char *orig_ptr = orig_base + rel->r_offset;
652       char *dst_ptr = dst_base + rel->r_offset;
653       uint64_t va = va_base + rel->r_offset;
654 
655       uint64_t symbol;
656       uint64_t addend;
657 
658       if (r_sym == STN_UNDEF) {
659          symbol = 0;
660       } else {
661          report_elf_if(r_sym >= num_symbols);
662 
663          const Elf64_Sym *sym = &symbols[r_sym];
664          const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
665          report_elf_if(!symbol_name);
666 
667          if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
668             return false;
669       }
670 
671       /* TODO: Should we also support .rela sections, where the
672        * addend is part of the relocation record? */
673 
674       /* Load the addend from the ELF instead of the destination,
675        * because the destination may be in VRAM. */
676       switch (r_type) {
677       case R_AMDGPU_ABS32:
678       case R_AMDGPU_ABS32_LO:
679       case R_AMDGPU_ABS32_HI:
680       case R_AMDGPU_REL32:
681       case R_AMDGPU_REL32_LO:
682       case R_AMDGPU_REL32_HI:
683          addend = *(const uint32_t *)orig_ptr;
684          break;
685       case R_AMDGPU_ABS64:
686       case R_AMDGPU_REL64:
687          addend = *(const uint64_t *)orig_ptr;
688          break;
689       default:
690          report_errorf("unsupported r_type == %u", r_type);
691          return false;
692       }
693 
694       uint64_t abs = symbol + addend;
695 
696       switch (r_type) {
697       case R_AMDGPU_ABS32:
698          assert((uint32_t)abs == abs);
699          FALLTHROUGH;
700       case R_AMDGPU_ABS32_LO:
701          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
702          break;
703       case R_AMDGPU_ABS32_HI:
704          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
705          break;
706       case R_AMDGPU_ABS64:
707          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
708          break;
709       case R_AMDGPU_REL32:
710          assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
711          FALLTHROUGH;
712       case R_AMDGPU_REL32_LO:
713          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
714          break;
715       case R_AMDGPU_REL32_HI:
716          *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
717          break;
718       case R_AMDGPU_REL64:
719          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
720          break;
721       default:
722          unreachable("bad r_type");
723       }
724    }
725 
726    return true;
727 
728 #undef report_if
729 #undef report_elf_if
730 }
731 
732 /**
733  * Upload the binary or binaries to the provided GPU buffers, including
734  * relocations.
735  */
ac_rtld_upload(struct ac_rtld_upload_info * u)736 int ac_rtld_upload(struct ac_rtld_upload_info *u)
737 {
738 #define report_if(cond)                                                                            \
739    do {                                                                                            \
740       if ((cond)) {                                                                                \
741          report_errorf(#cond);                                                                     \
742          return -1;                                                                             \
743       }                                                                                            \
744    } while (false)
745 #define report_elf_if(cond)                                                                        \
746    do {                                                                                            \
747       if ((cond)) {                                                                                \
748          report_errorf(#cond);                                                                     \
749          return -1;                                                                             \
750       }                                                                                            \
751    } while (false)
752 
753    int size = 0;
754    if (u->binary->options.halt_at_entry) {
755       /* s_sethalt 1 */
756       *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
757    }
758 
759    /* First pass: upload raw section data and lay out private LDS symbols. */
760    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
761       struct ac_rtld_part *part = &u->binary->parts[i];
762 
763       Elf_Scn *section = NULL;
764       while ((section = elf_nextscn(part->elf, section))) {
765          Elf64_Shdr *shdr = elf64_getshdr(section);
766          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
767 
768          if (!s->is_rx)
769             continue;
770 
771          report_if(shdr->sh_type != SHT_PROGBITS);
772 
773          Elf_Data *data = elf_getdata(section, NULL);
774          report_elf_if(!data || data->d_size != shdr->sh_size);
775          memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
776 
777          size = MAX2(size, s->offset + shdr->sh_size);
778       }
779    }
780 
781    if (u->binary->rx_end_markers) {
782       uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
783       for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
784          *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
785       size += 4 * DEBUGGER_NUM_MARKERS;
786    }
787 
788    /* Second pass: handle relocations, overwriting uploaded data where
789     * appropriate. */
790    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
791       struct ac_rtld_part *part = &u->binary->parts[i];
792       Elf_Scn *section = NULL;
793       while ((section = elf_nextscn(part->elf, section))) {
794          Elf64_Shdr *shdr = elf64_getshdr(section);
795          if (shdr->sh_type == SHT_REL) {
796             Elf_Data *relocs = elf_getdata(section, NULL);
797             report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
798             if (!apply_relocs(u, i, shdr, relocs))
799                return -1;
800          } else if (shdr->sh_type == SHT_RELA) {
801             report_errorf("SHT_RELA not supported");
802             return -1;
803          }
804       }
805    }
806 
807    return size;
808 
809 #undef report_if
810 #undef report_elf_if
811 }
812