1 /* Offload image generation tool for AMD GCN.
2
3 Copyright (C) 2014-2022 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Munges GCN assembly into a C source file defining the GCN code as a
22 string.
23
24 This is not a complete assembler. We presume the source is well
25 formed from the compiler and can die horribly if it is not. */
26
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "obstack.h"
31 #include "diagnostic.h"
32 #include "intl.h"
33 #include <libgen.h>
34 #include "collect-utils.h"
35 #include "gomp-constants.h"
36 #include "simple-object.h"
37 #include "elf.h"
38
39 /* These probably won't (all) be in elf.h for a while. */
40 #undef EM_AMDGPU
41 #define EM_AMDGPU 0xe0;
42
43 #undef ELFOSABI_AMDGPU_HSA
44 #define ELFOSABI_AMDGPU_HSA 64
45 #undef ELFABIVERSION_AMDGPU_HSA_V3
46 #define ELFABIVERSION_AMDGPU_HSA_V3 1
47 #undef ELFABIVERSION_AMDGPU_HSA_V4
48 #define ELFABIVERSION_AMDGPU_HSA_V4 2
49
50 #undef EF_AMDGPU_MACH_AMDGCN_GFX803
51 #define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
52 #undef EF_AMDGPU_MACH_AMDGCN_GFX900
53 #define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
54 #undef EF_AMDGPU_MACH_AMDGCN_GFX906
55 #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
56 #undef EF_AMDGPU_MACH_AMDGCN_GFX908
57 #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
58
59 #define EF_AMDGPU_XNACK_V3 0x100
60 #define EF_AMDGPU_SRAM_ECC_V3 0x200
61
62 #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
63 #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
64 #define EF_AMDGPU_FEATURE_XNACK_ANY_V4 0x100
65 #define EF_AMDGPU_FEATURE_XNACK_OFF_V4 0x200
66 #define EF_AMDGPU_FEATURE_XNACK_ON_V4 0x300
67
68 #define EF_AMDGPU_FEATURE_SRAMECC_V4 0xc00 /* Mask. */
69 #define EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 0x000
70 #define EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 0x400
71 #define EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 0x800
72 #define EF_AMDGPU_FEATURE_SRAMECC_ON_V4 0xc00
73
74 #ifdef HAVE_GCN_ASM_V3_SYNTAX
75 #define SET_XNACK_ON(VAR) VAR |= EF_AMDGPU_XNACK_V3
76 #define SET_XNACK_OFF(VAR) VAR &= ~EF_AMDGPU_XNACK_V3
77 #define TEST_XNACK(VAR) (VAR & EF_AMDGPU_XNACK_V3)
78
79 #define SET_SRAM_ECC_ON(VAR) VAR |= EF_AMDGPU_SRAM_ECC_V3
80 #define SET_SRAM_ECC_ANY(VAR) SET_SRAM_ECC_ON (VAR)
81 #define SET_SRAM_ECC_OFF(VAR) VAR &= ~EF_AMDGPU_SRAM_ECC_V3
82 #define SET_SRAM_ECC_UNSUPPORTED(VAR) SET_SRAM_ECC_OFF (VAR)
83 #define TEST_SRAM_ECC_ANY(VAR) 0 /* Not supported. */
84 #define TEST_SRAM_ECC_ON(VAR) (VAR & EF_AMDGPU_SRAM_ECC_V3)
85 #endif
86 #ifdef HAVE_GCN_ASM_V4_SYNTAX
87 #define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
88 | EF_AMDGPU_FEATURE_XNACK_ON_V4)
89 #define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
90 | EF_AMDGPU_FEATURE_XNACK_OFF_V4)
91 #define TEST_XNACK(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
92 == EF_AMDGPU_FEATURE_XNACK_ON_V4)
93
94 #define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
95 | EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
96 #define SET_SRAM_ECC_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
97 | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
98 #define SET_SRAM_ECC_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
99 | EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
100 #define SET_SRAM_ECC_UNSUPPORTED(VAR) \
101 VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
102 | EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4)
103 #define TEST_SRAM_ECC_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
104 == EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
105 #define TEST_SRAM_ECC_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
106 == EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
107 #endif
108
109 #ifndef R_AMDGPU_NONE
110 #define R_AMDGPU_NONE 0
111 #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
112 #define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */
113 #define R_AMDGPU_ABS64 3 /* S + A */
114 #define R_AMDGPU_REL32 4 /* S + A - P */
115 #define R_AMDGPU_REL64 5 /* S + A - P */
116 #define R_AMDGPU_ABS32 6 /* S + A */
117 #define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */
118 #define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */
119 #define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */
120 #define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */
121 #define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */
122 #define R_AMDGPU_RELATIVE64 13 /* B + A */
123 #endif
124
125 const char tool_name[] = "gcn mkoffload";
126
127 static const char *gcn_dumpbase;
128 static struct obstack files_to_cleanup;
129
130 enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
131 uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture.
132 uint32_t elf_flags =
133 #ifdef HAVE_GCN_ASM_V3_SYNTAX
134 0;
135 #endif
136 #ifdef HAVE_GCN_ASM_V4_SYNTAX
137 (EF_AMDGPU_FEATURE_XNACK_ANY_V4 | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4);
138 #endif
139
140 /* Delete tempfiles. */
141
142 void
tool_cleanup(bool from_signal ATTRIBUTE_UNUSED)143 tool_cleanup (bool from_signal ATTRIBUTE_UNUSED)
144 {
145 obstack_ptr_grow (&files_to_cleanup, NULL);
146 const char **files = XOBFINISH (&files_to_cleanup, const char **);
147 for (int i = 0; files[i]; i++)
148 maybe_unlink (files[i]);
149 }
150
151 static void
mkoffload_cleanup(void)152 mkoffload_cleanup (void)
153 {
154 tool_cleanup (false);
155 }
156
157 /* Unlink FILE unless requested otherwise. */
158
159 void
maybe_unlink(const char * file)160 maybe_unlink (const char *file)
161 {
162 if (!save_temps)
163 {
164 if (unlink_if_ordinary (file) && errno != ENOENT)
165 fatal_error (input_location, "deleting file %s: %m", file);
166 }
167 else if (verbose)
168 fprintf (stderr, "[Leaving %s]\n", file);
169 }
170
171 /* Add or change the value of an environment variable, outputting the
172 change to standard error if in verbose mode. */
173
174 static void
xputenv(const char * string)175 xputenv (const char *string)
176 {
177 if (verbose)
178 fprintf (stderr, "%s\n", string);
179 putenv (CONST_CAST (char *, string));
180 }
181
182 /* Read the whole input file. It will be NUL terminated (but
183 remember, there could be a NUL in the file itself. */
184
185 static const char *
read_file(FILE * stream,size_t * plen)186 read_file (FILE *stream, size_t *plen)
187 {
188 size_t alloc = 16384;
189 size_t base = 0;
190 char *buffer;
191
192 if (!fseek (stream, 0, SEEK_END))
193 {
194 /* Get the file size. */
195 long s = ftell (stream);
196 if (s >= 0)
197 alloc = s + 100;
198 fseek (stream, 0, SEEK_SET);
199 }
200 buffer = XNEWVEC (char, alloc);
201
202 for (;;)
203 {
204 size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
205
206 if (!n)
207 break;
208 base += n;
209 if (base + 1 == alloc)
210 {
211 alloc *= 2;
212 buffer = XRESIZEVEC (char, buffer, alloc);
213 }
214 }
215 buffer[base] = 0;
216 *plen = base;
217 return buffer;
218 }
219
220 /* Parse STR, saving found tokens into PVALUES and return their number.
221 Tokens are assumed to be delimited by ':'. */
222
223 static unsigned
parse_env_var(const char * str,char *** pvalues)224 parse_env_var (const char *str, char ***pvalues)
225 {
226 const char *curval, *nextval;
227 char **values;
228 unsigned num = 1, i;
229
230 curval = strchr (str, ':');
231 while (curval)
232 {
233 num++;
234 curval = strchr (curval + 1, ':');
235 }
236
237 values = (char **) xmalloc (num * sizeof (char *));
238 curval = str;
239 nextval = strchr (curval, ':');
240 if (nextval == NULL)
241 nextval = strchr (curval, '\0');
242
243 for (i = 0; i < num; i++)
244 {
245 int l = nextval - curval;
246 values[i] = (char *) xmalloc (l + 1);
247 memcpy (values[i], curval, l);
248 values[i][l] = 0;
249 curval = nextval + 1;
250 nextval = strchr (curval, ':');
251 if (nextval == NULL)
252 nextval = strchr (curval, '\0');
253 }
254 *pvalues = values;
255 return num;
256 }
257
258 /* Auxiliary function that frees elements of PTR and PTR itself.
259 N is number of elements to be freed. If PTR is NULL, nothing is freed.
260 If an element is NULL, subsequent elements are not freed. */
261
262 static void
free_array_of_ptrs(void ** ptr,unsigned n)263 free_array_of_ptrs (void **ptr, unsigned n)
264 {
265 unsigned i;
266 if (!ptr)
267 return;
268 for (i = 0; i < n; i++)
269 {
270 if (!ptr[i])
271 break;
272 free (ptr[i]);
273 }
274 free (ptr);
275 return;
276 }
277
278 /* Check whether NAME can be accessed in MODE. This is like access,
279 except that it never considers directories to be executable. */
280
281 static int
access_check(const char * name,int mode)282 access_check (const char *name, int mode)
283 {
284 if (mode == X_OK)
285 {
286 struct stat st;
287
288 if (stat (name, &st) < 0 || S_ISDIR (st.st_mode))
289 return -1;
290 }
291
292 return access (name, mode);
293 }
294
295 /* Copy the early-debug-info from the incoming LTO object to a new object
296 that will be linked into the output HSACO file. The host relocations
297 must be translated into GCN relocations, and any global undefined symbols
298 must be weakened (so as not to have the debug info try to pull in host
299 junk).
300
301 Returns true if the file was created, false otherwise. */
302
303 static bool
copy_early_debug_info(const char * infile,const char * outfile)304 copy_early_debug_info (const char *infile, const char *outfile)
305 {
306 const char *errmsg;
307 int err;
308
309 /* The simple_object code can handle extracting the debug sections.
310 This code is based on that in lto-wrapper.cc. */
311 int infd = open (infile, O_RDONLY | O_BINARY);
312 if (infd == -1)
313 return false;
314 simple_object_read *inobj = simple_object_start_read (infd, 0,
315 "__GNU_LTO",
316 &errmsg, &err);
317 if (!inobj)
318 return false;
319
320 off_t off, len;
321 if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info",
322 &off, &len, &errmsg, &err) != 1)
323 {
324 simple_object_release_read (inobj);
325 close (infd);
326 return false;
327 }
328
329 errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, true);
330 if (errmsg)
331 {
332 unlink_if_ordinary (outfile);
333 return false;
334 }
335
336 simple_object_release_read (inobj);
337 close (infd);
338
339 /* Open the file we just created for some adjustments.
340 The simple_object code can't do this, so we do it manually. */
341 FILE *outfd = fopen (outfile, "r+b");
342 if (!outfd)
343 return false;
344
345 Elf64_Ehdr ehdr;
346 if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1)
347 {
348 fclose (outfd);
349 return true;
350 }
351
352 /* We only support host relocations of x86_64, for now. */
353 gcc_assert (ehdr.e_machine == EM_X86_64);
354
355 /* Fiji devices use HSACOv3 regardless of the assembler. */
356 uint32_t elf_flags_actual = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
357 ? 0 : elf_flags);
358 /* GFX900 devices don't support the sramecc attribute even if
359 a buggy assembler thinks it does. This must match gcn-hsa.h */
360 if (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX900)
361 SET_SRAM_ECC_UNSUPPORTED (elf_flags_actual);
362
363 /* Patch the correct elf architecture flag into the file. */
364 ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
365 #ifdef HAVE_GCN_ASM_V3_SYNTAX
366 ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA_V3;
367 #endif
368 #ifdef HAVE_GCN_ASM_V4_SYNTAX
369 ehdr.e_ident[8] = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
370 ? ELFABIVERSION_AMDGPU_HSA_V3
371 : ELFABIVERSION_AMDGPU_HSA_V4);
372 #endif
373 ehdr.e_type = ET_REL;
374 ehdr.e_machine = EM_AMDGPU;
375 ehdr.e_flags = elf_arch | elf_flags_actual;
376
377 /* Load the section headers so we can walk them later. */
378 Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
379 * ehdr.e_shnum);
380 if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1
381 || fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum,
382 outfd) != ehdr.e_shnum)
383 {
384 free (sections);
385 fclose (outfd);
386 return true;
387 }
388
389 /* Convert the host relocations to target relocations. */
390 for (int i = 0; i < ehdr.e_shnum; i++)
391 {
392 if (sections[i].sh_type != SHT_RELA)
393 continue;
394
395 char *data = (char *)xmalloc (sections[i].sh_size);
396 if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
397 || fread (data, sections[i].sh_size, 1, outfd) != 1)
398 {
399 free (data);
400 continue;
401 }
402
403 for (size_t offset = 0;
404 offset < sections[i].sh_size;
405 offset += sections[i].sh_entsize)
406 {
407 Elf64_Rela *reloc = (Elf64_Rela *) (data + offset);
408
409 /* Map the host relocations to GCN relocations.
410 Only relocations that can appear in DWARF need be handled. */
411 switch (ELF64_R_TYPE (reloc->r_info))
412 {
413 case R_X86_64_32:
414 case R_X86_64_32S:
415 reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
416 R_AMDGPU_ABS32);
417 break;
418 case R_X86_64_PC32:
419 reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
420 R_AMDGPU_REL32);
421 break;
422 case R_X86_64_PC64:
423 reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
424 R_AMDGPU_REL64);
425 break;
426 case R_X86_64_64:
427 reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
428 R_AMDGPU_ABS64);
429 break;
430 case R_X86_64_RELATIVE:
431 reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
432 R_AMDGPU_RELATIVE64);
433 break;
434 default:
435 gcc_unreachable ();
436 }
437 }
438
439 /* Write back our relocation changes. */
440 if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
441 fwrite (data, sections[i].sh_size, 1, outfd);
442
443 free (data);
444 }
445
446 /* Weaken any global undefined symbols that would pull in unwanted
447 objects. */
448 for (int i = 0; i < ehdr.e_shnum; i++)
449 {
450 if (sections[i].sh_type != SHT_SYMTAB)
451 continue;
452
453 char *data = (char *)xmalloc (sections[i].sh_size);
454 if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
455 || fread (data, sections[i].sh_size, 1, outfd) != 1)
456 {
457 free (data);
458 continue;
459 }
460
461 for (size_t offset = 0;
462 offset < sections[i].sh_size;
463 offset += sections[i].sh_entsize)
464 {
465 Elf64_Sym *sym = (Elf64_Sym *) (data + offset);
466 int type = ELF64_ST_TYPE (sym->st_info);
467 int bind = ELF64_ST_BIND (sym->st_info);
468
469 if (bind == STB_GLOBAL && sym->st_shndx == 0)
470 sym->st_info = ELF64_ST_INFO (STB_WEAK, type);
471 }
472
473 /* Write back our symbol changes. */
474 if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
475 fwrite (data, sections[i].sh_size, 1, outfd);
476
477 free (data);
478 }
479 free (sections);
480
481 /* Write back our header changes. */
482 rewind (outfd);
483 fwrite (&ehdr, sizeof (ehdr), 1, outfd);
484
485 fclose (outfd);
486 return true;
487 }
488
489 /* Parse an input assembler file, extract the offload tables etc.,
490 and output (1) the assembler code, minus the tables (which can contain
491 problematic relocations), and (2) a C file with the offload tables
492 encoded as structured data. */
493
494 static void
process_asm(FILE * in,FILE * out,FILE * cfile)495 process_asm (FILE *in, FILE *out, FILE *cfile)
496 {
497 int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0;
498 struct obstack fns_os, dims_os, regcounts_os;
499 obstack_init (&fns_os);
500 obstack_init (&dims_os);
501 obstack_init (®counts_os);
502
503 struct oaccdims
504 {
505 int d[3];
506 char *name;
507 } dim;
508
509 struct regcount
510 {
511 int sgpr_count;
512 int vgpr_count;
513 char *kernel_name;
514 } regcount = { -1, -1, NULL };
515
516 /* Always add _init_array and _fini_array as kernels. */
517 obstack_ptr_grow (&fns_os, xstrdup ("_init_array"));
518 obstack_ptr_grow (&fns_os, xstrdup ("_fini_array"));
519 fn_count += 2;
520
521 char buf[1000];
522 enum
523 { IN_CODE,
524 IN_METADATA,
525 IN_VARS,
526 IN_FUNCS
527 } state = IN_CODE;
528 while (fgets (buf, sizeof (buf), in))
529 {
530 switch (state)
531 {
532 case IN_CODE:
533 {
534 if (sscanf (buf, " ;; OPENACC-DIMS: %d, %d, %d : %ms\n",
535 &dim.d[0], &dim.d[1], &dim.d[2], &dim.name) == 4)
536 {
537 obstack_grow (&dims_os, &dim, sizeof (dim));
538 dims_count++;
539 }
540
541 break;
542 }
543 case IN_METADATA:
544 {
545 if (sscanf (buf, " - .name: %ms\n", ®count.kernel_name) == 1)
546 break;
547 else if (sscanf (buf, " .sgpr_count: %d\n",
548 ®count.sgpr_count) == 1)
549 {
550 gcc_assert (regcount.kernel_name);
551 break;
552 }
553 else if (sscanf (buf, " .vgpr_count: %d\n",
554 ®count.vgpr_count) == 1)
555 {
556 gcc_assert (regcount.kernel_name);
557 break;
558 }
559
560 break;
561 }
562 case IN_VARS:
563 {
564 char *varname;
565 unsigned varsize;
566 if (sscanf (buf, " .8byte %ms\n", &varname))
567 {
568 fputs (buf, out);
569 fgets (buf, sizeof (buf), in);
570 if (!sscanf (buf, " .8byte %u\n", &varsize))
571 abort ();
572 var_count++;
573 }
574 break;
575 }
576 case IN_FUNCS:
577 {
578 char *funcname;
579 if (sscanf (buf, "\t.8byte\t%ms\n", &funcname))
580 {
581 obstack_ptr_grow (&fns_os, funcname);
582 fn_count++;
583 continue;
584 }
585 break;
586 }
587 }
588
589 char dummy;
590 if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0)
591 {
592 state = IN_VARS;
593
594 /* Add a global symbol to allow plugin-gcn.c to locate the table
595 at runtime. It can't use the "offload_var_table.N" emitted by
596 the compiler because a) they're not global, and b) there's one
597 for each input file combined into the binary. */
598 fputs (buf, out);
599 fputs ("\t.global .offload_var_table\n"
600 "\t.type .offload_var_table, @object\n"
601 ".offload_var_table:\n",
602 out);
603 }
604 else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
605 state = IN_FUNCS;
606 else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
607 {
608 state = IN_METADATA;
609 regcount.kernel_name = NULL;
610 regcount.sgpr_count = regcount.vgpr_count = -1;
611 }
612 else if (sscanf (buf, " .section %c", &dummy) > 0
613 || sscanf (buf, " .text%c", &dummy) > 0
614 || sscanf (buf, " .bss%c", &dummy) > 0
615 || sscanf (buf, " .data%c", &dummy) > 0
616 || sscanf (buf, " .ident %c", &dummy) > 0)
617 state = IN_CODE;
618 else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0)
619 {
620 state = IN_CODE;
621 gcc_assert (regcount.kernel_name != NULL
622 && regcount.sgpr_count >= 0
623 && regcount.vgpr_count >= 0);
624 obstack_grow (®counts_os, ®count, sizeof (regcount));
625 regcount_count++;
626 regcount.kernel_name = NULL;
627 regcount.sgpr_count = regcount.vgpr_count = -1;
628 }
629
630 if (state == IN_CODE || state == IN_METADATA || state == IN_VARS)
631 fputs (buf, out);
632 }
633
634 char **fns = XOBFINISH (&fns_os, char **);
635 struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
636 struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *);
637
638 fprintf (cfile, "#include <stdlib.h>\n");
639 fprintf (cfile, "#include <stdbool.h>\n\n");
640
641 fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
642
643 /* Dump out function idents. */
644 fprintf (cfile, "static const struct hsa_kernel_description {\n"
645 " const char *name;\n"
646 " int oacc_dims[3];\n"
647 " int sgpr_count;\n"
648 " int vgpr_count;\n"
649 "} gcn_kernels[] = {\n ");
650 dim.d[0] = dim.d[1] = dim.d[2] = 0;
651 const char *comma;
652 int i;
653 for (comma = "", i = 0; i < fn_count; comma = ",\n ", i++)
654 {
655 /* Find if we recorded dimensions for this function. */
656 int *d = dim.d; /* Previously zeroed. */
657 int sgpr_count = 0;
658 int vgpr_count = 0;
659 for (int j = 0; j < dims_count; j++)
660 if (strcmp (fns[i], dims[j].name) == 0)
661 {
662 d = dims[j].d;
663 break;
664 }
665 for (int j = 0; j < regcount_count; j++)
666 if (strcmp (fns[i], regcounts[j].kernel_name) == 0)
667 {
668 sgpr_count = regcounts[j].sgpr_count;
669 vgpr_count = regcounts[j].vgpr_count;
670 break;
671 }
672
673 fprintf (cfile, "%s{\"%s\", {%d, %d, %d}, %d, %d}", comma,
674 fns[i], d[0], d[1], d[2], sgpr_count, vgpr_count);
675
676 free (fns[i]);
677 }
678 fprintf (cfile, "\n};\n\n");
679
680 obstack_free (&fns_os, NULL);
681 for (i = 0; i < dims_count; i++)
682 free (dims[i].name);
683 for (i = 0; i < regcount_count; i++)
684 free (regcounts[i].kernel_name);
685 obstack_free (&dims_os, NULL);
686 obstack_free (®counts_os, NULL);
687 }
688
689 /* Embed an object file into a C source file. */
690
691 static void
process_obj(FILE * in,FILE * cfile)692 process_obj (FILE *in, FILE *cfile)
693 {
694 size_t len = 0;
695 const char *input = read_file (in, &len);
696
697 /* Dump out an array containing the binary.
698 FIXME: do this with objcopy. */
699 fprintf (cfile, "static unsigned char gcn_code[] = {");
700 for (size_t i = 0; i < len; i += 17)
701 {
702 fprintf (cfile, "\n\t");
703 for (size_t j = i; j < i + 17 && j < len; j++)
704 fprintf (cfile, "%3u,", (unsigned char) input[j]);
705 }
706 fprintf (cfile, "\n};\n\n");
707
708 fprintf (cfile,
709 "static const struct gcn_image {\n"
710 " size_t size;\n"
711 " void *image;\n"
712 "} gcn_image = {\n"
713 " %zu,\n"
714 " gcn_code\n"
715 "};\n\n",
716 len);
717
718 fprintf (cfile,
719 "static const struct gcn_image_desc {\n"
720 " const struct gcn_image *gcn_image;\n"
721 " unsigned kernel_count;\n"
722 " const struct hsa_kernel_description *kernel_infos;\n"
723 " unsigned global_variable_count;\n"
724 "} target_data = {\n"
725 " &gcn_image,\n"
726 " sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n"
727 " gcn_kernels,\n"
728 " gcn_num_vars\n"
729 "};\n\n");
730
731 fprintf (cfile,
732 "#ifdef __cplusplus\n"
733 "extern \"C\" {\n"
734 "#endif\n"
735 "extern void GOMP_offload_register_ver"
736 " (unsigned, const void *, int, const void *);\n"
737 "extern void GOMP_offload_unregister_ver"
738 " (unsigned, const void *, int, const void *);\n"
739 "#ifdef __cplusplus\n"
740 "}\n"
741 "#endif\n\n");
742
743 fprintf (cfile, "extern const void *const __OFFLOAD_TABLE__[];\n\n");
744
745 fprintf (cfile, "static __attribute__((constructor)) void init (void)\n"
746 "{\n"
747 " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__,"
748 " %d/*GCN*/, &target_data);\n"
749 "};\n",
750 GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN),
751 GOMP_DEVICE_GCN);
752
753 fprintf (cfile, "static __attribute__((destructor)) void fini (void)\n"
754 "{\n"
755 " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__,"
756 " %d/*GCN*/, &target_data);\n"
757 "};\n",
758 GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN),
759 GOMP_DEVICE_GCN);
760 }
761
762 /* Compile a C file using the host compiler. */
763
764 static void
compile_native(const char * infile,const char * outfile,const char * compiler,bool fPIC,bool fpic)765 compile_native (const char *infile, const char *outfile, const char *compiler,
766 bool fPIC, bool fpic)
767 {
768 const char *collect_gcc_options = getenv ("COLLECT_GCC_OPTIONS");
769 if (!collect_gcc_options)
770 fatal_error (input_location,
771 "environment variable COLLECT_GCC_OPTIONS must be set");
772
773 struct obstack argv_obstack;
774 obstack_init (&argv_obstack);
775 obstack_ptr_grow (&argv_obstack, compiler);
776 if (fPIC)
777 obstack_ptr_grow (&argv_obstack, "-fPIC");
778 if (fpic)
779 obstack_ptr_grow (&argv_obstack, "-fpic");
780 if (save_temps)
781 obstack_ptr_grow (&argv_obstack, "-save-temps");
782 if (verbose)
783 obstack_ptr_grow (&argv_obstack, "-v");
784 obstack_ptr_grow (&argv_obstack, "-dumpdir");
785 obstack_ptr_grow (&argv_obstack, "");
786 obstack_ptr_grow (&argv_obstack, "-dumpbase");
787 obstack_ptr_grow (&argv_obstack, gcn_dumpbase);
788 obstack_ptr_grow (&argv_obstack, "-dumpbase-ext");
789 obstack_ptr_grow (&argv_obstack, ".c");
790 switch (offload_abi)
791 {
792 case OFFLOAD_ABI_LP64:
793 obstack_ptr_grow (&argv_obstack, "-m64");
794 break;
795 case OFFLOAD_ABI_ILP32:
796 obstack_ptr_grow (&argv_obstack, "-m32");
797 break;
798 default:
799 gcc_unreachable ();
800 }
801 obstack_ptr_grow (&argv_obstack, infile);
802 obstack_ptr_grow (&argv_obstack, "-c");
803 obstack_ptr_grow (&argv_obstack, "-o");
804 obstack_ptr_grow (&argv_obstack, outfile);
805 obstack_ptr_grow (&argv_obstack, NULL);
806
807 const char **new_argv = XOBFINISH (&argv_obstack, const char **);
808 fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
809 ".gccnative_args");
810 obstack_free (&argv_obstack, NULL);
811 }
812
813 int
main(int argc,char ** argv)814 main (int argc, char **argv)
815 {
816 FILE *in = stdin;
817 FILE *out = stdout;
818 FILE *cfile = stdout;
819 const char *outname = 0;
820
821 progname = "mkoffload";
822 diagnostic_initialize (global_dc, 0);
823
824 obstack_init (&files_to_cleanup);
825 if (atexit (mkoffload_cleanup) != 0)
826 fatal_error (input_location, "atexit failed");
827
828 char *collect_gcc = getenv ("COLLECT_GCC");
829 if (collect_gcc == NULL)
830 fatal_error (input_location, "COLLECT_GCC must be set.");
831 const char *gcc_path = dirname (ASTRDUP (collect_gcc));
832 const char *gcc_exec = basename (ASTRDUP (collect_gcc));
833
834 size_t len = (strlen (gcc_path) + 1 + strlen (GCC_INSTALL_NAME) + 1);
835 char *driver = XALLOCAVEC (char, len);
836
837 if (strcmp (gcc_exec, collect_gcc) == 0)
838 /* collect_gcc has no path, so it was found in PATH. Make sure we also
839 find accel-gcc in PATH. */
840 gcc_path = NULL;
841
842 int driver_used = 0;
843 if (gcc_path != NULL)
844 driver_used = sprintf (driver, "%s/", gcc_path);
845 sprintf (driver + driver_used, "%s", GCC_INSTALL_NAME);
846
847 bool found = false;
848 if (gcc_path == NULL)
849 found = true;
850 else if (access_check (driver, X_OK) == 0)
851 found = true;
852 else
853 {
854 /* Don't use alloca pointer with XRESIZEVEC. */
855 driver = NULL;
856 /* Look in all COMPILER_PATHs for GCC_INSTALL_NAME. */
857 char **paths = NULL;
858 unsigned n_paths;
859 n_paths = parse_env_var (getenv ("COMPILER_PATH"), &paths);
860 for (unsigned i = 0; i < n_paths; i++)
861 {
862 len = strlen (paths[i]) + 1 + strlen (GCC_INSTALL_NAME) + 1;
863 driver = XRESIZEVEC (char, driver, len);
864 sprintf (driver, "%s/%s", paths[i], GCC_INSTALL_NAME);
865 if (access_check (driver, X_OK) == 0)
866 {
867 found = true;
868 break;
869 }
870 }
871 free_array_of_ptrs ((void **) paths, n_paths);
872 }
873
874 if (!found)
875 fatal_error (input_location,
876 "offload compiler %s not found", GCC_INSTALL_NAME);
877
878 /* We may be called with all the arguments stored in some file and
879 passed with @file. Expand them into argv before processing. */
880 expandargv (&argc, &argv);
881
882 /* Scan the argument vector. */
883 bool fopenmp = false;
884 bool fopenacc = false;
885 bool fPIC = false;
886 bool fpic = false;
887 bool sram_seen = false;
888 for (int i = 1; i < argc; i++)
889 {
890 #define STR "-foffload-abi="
891 if (startswith (argv[i], STR))
892 {
893 if (strcmp (argv[i] + strlen (STR), "lp64") == 0)
894 offload_abi = OFFLOAD_ABI_LP64;
895 else if (strcmp (argv[i] + strlen (STR), "ilp32") == 0)
896 offload_abi = OFFLOAD_ABI_ILP32;
897 else
898 fatal_error (input_location,
899 "unrecognizable argument of option " STR);
900 }
901 #undef STR
902 else if (strcmp (argv[i], "-fopenmp") == 0)
903 fopenmp = true;
904 else if (strcmp (argv[i], "-fopenacc") == 0)
905 fopenacc = true;
906 else if (strcmp (argv[i], "-fPIC") == 0)
907 fPIC = true;
908 else if (strcmp (argv[i], "-fpic") == 0)
909 fpic = true;
910 else if (strcmp (argv[i], "-mxnack") == 0)
911 SET_XNACK_ON (elf_flags);
912 else if (strcmp (argv[i], "-mno-xnack") == 0)
913 SET_XNACK_OFF (elf_flags);
914 else if (strcmp (argv[i], "-msram-ecc=on") == 0)
915 {
916 SET_SRAM_ECC_ON (elf_flags);
917 sram_seen = true;
918 }
919 else if (strcmp (argv[i], "-msram-ecc=any") == 0)
920 {
921 SET_SRAM_ECC_ANY (elf_flags);
922 sram_seen = true;
923 }
924 else if (strcmp (argv[i], "-msram-ecc=off") == 0)
925 {
926 SET_SRAM_ECC_OFF (elf_flags);
927 sram_seen = true;
928 }
929 else if (strcmp (argv[i], "-save-temps") == 0)
930 save_temps = true;
931 else if (strcmp (argv[i], "-v") == 0)
932 verbose = true;
933 else if (strcmp (argv[i], "-dumpbase") == 0
934 && i + 1 < argc)
935 dumppfx = argv[++i];
936 else if (strcmp (argv[i], "-march=fiji") == 0)
937 elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
938 else if (strcmp (argv[i], "-march=gfx900") == 0)
939 elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
940 else if (strcmp (argv[i], "-march=gfx906") == 0)
941 elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
942 else if (strcmp (argv[i], "-march=gfx908") == 0)
943 elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
944 }
945
946 if (!(fopenacc ^ fopenmp))
947 fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
948
949 if (!sram_seen)
950 {
951 #ifdef HAVE_GCN_ASM_V3_SYNTAX
952 /* For HSACOv3, the SRAM-ECC feature defaults to "on" on GPUs where the
953 feature is available.
954 (HSACOv4 has elf_flags initialsed to "any" in all cases.) */
955 switch (elf_arch)
956 {
957 case EF_AMDGPU_MACH_AMDGCN_GFX803:
958 case EF_AMDGPU_MACH_AMDGCN_GFX900:
959 case EF_AMDGPU_MACH_AMDGCN_GFX906:
960 #ifndef HAVE_GCN_SRAM_ECC_GFX908
961 case EF_AMDGPU_MACH_AMDGCN_GFX908:
962 #endif
963 break;
964 default:
965 SET_SRAM_ECC_ON (elf_flags);
966 break;
967 }
968 #endif
969 }
970
971 const char *abi;
972 switch (offload_abi)
973 {
974 case OFFLOAD_ABI_LP64:
975 abi = "-m64";
976 break;
977 case OFFLOAD_ABI_ILP32:
978 abi = "-m32";
979 break;
980 default:
981 gcc_unreachable ();
982 }
983
984 /* Build arguments for compiler pass. */
985 struct obstack cc_argv_obstack;
986 obstack_init (&cc_argv_obstack);
987 obstack_ptr_grow (&cc_argv_obstack, driver);
988 obstack_ptr_grow (&cc_argv_obstack, "-S");
989
990 if (save_temps)
991 obstack_ptr_grow (&cc_argv_obstack, "-save-temps");
992 if (verbose)
993 obstack_ptr_grow (&cc_argv_obstack, "-v");
994 obstack_ptr_grow (&cc_argv_obstack, abi);
995 obstack_ptr_grow (&cc_argv_obstack, "-xlto");
996 if (fopenmp)
997 obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
998
999 for (int ix = 1; ix != argc; ix++)
1000 {
1001 if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
1002 outname = argv[++ix];
1003 else
1004 obstack_ptr_grow (&cc_argv_obstack, argv[ix]);
1005 }
1006
1007 if (!dumppfx)
1008 dumppfx = outname;
1009
1010 gcn_dumpbase = concat (dumppfx, ".c", NULL);
1011
1012 const char *gcn_cfile_name;
1013 if (save_temps)
1014 gcn_cfile_name = gcn_dumpbase;
1015 else
1016 gcn_cfile_name = make_temp_file (".c");
1017 obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name);
1018
1019 cfile = fopen (gcn_cfile_name, "w");
1020 if (!cfile)
1021 fatal_error (input_location, "cannot open '%s'", gcn_cfile_name);
1022
1023 /* Currently, we only support offloading in 64-bit configurations. */
1024 if (offload_abi == OFFLOAD_ABI_LP64)
1025 {
1026 const char *mko_dumpbase = concat (dumppfx, ".mkoffload", NULL);
1027 const char *hsaco_dumpbase = concat (dumppfx, ".mkoffload.hsaco", NULL);
1028
1029 const char *gcn_s1_name;
1030 const char *gcn_s2_name;
1031 const char *gcn_o_name;
1032 if (save_temps)
1033 {
1034 gcn_s1_name = concat (mko_dumpbase, ".1.s", NULL);
1035 gcn_s2_name = concat (mko_dumpbase, ".2.s", NULL);
1036 gcn_o_name = hsaco_dumpbase;
1037 }
1038 else
1039 {
1040 gcn_s1_name = make_temp_file (".mkoffload.1.s");
1041 gcn_s2_name = make_temp_file (".mkoffload.2.s");
1042 gcn_o_name = make_temp_file (".mkoffload.hsaco");
1043 }
1044 obstack_ptr_grow (&files_to_cleanup, gcn_s1_name);
1045 obstack_ptr_grow (&files_to_cleanup, gcn_s2_name);
1046 obstack_ptr_grow (&files_to_cleanup, gcn_o_name);
1047
1048 obstack_ptr_grow (&cc_argv_obstack, "-dumpdir");
1049 obstack_ptr_grow (&cc_argv_obstack, "");
1050 obstack_ptr_grow (&cc_argv_obstack, "-dumpbase");
1051 obstack_ptr_grow (&cc_argv_obstack, mko_dumpbase);
1052 obstack_ptr_grow (&cc_argv_obstack, "-dumpbase-ext");
1053 obstack_ptr_grow (&cc_argv_obstack, "");
1054
1055 obstack_ptr_grow (&cc_argv_obstack, "-o");
1056 obstack_ptr_grow (&cc_argv_obstack, gcn_s1_name);
1057 obstack_ptr_grow (&cc_argv_obstack, NULL);
1058 const char **cc_argv = XOBFINISH (&cc_argv_obstack, const char **);
1059
1060 /* Build arguments for assemble/link pass. */
1061 struct obstack ld_argv_obstack;
1062 obstack_init (&ld_argv_obstack);
1063 obstack_ptr_grow (&ld_argv_obstack, driver);
1064
1065 /* Extract early-debug information from the input objects.
1066 This loop finds all the inputs that end ".o" and aren't the output. */
1067 int dbgcount = 0;
1068 for (int ix = 1; ix != argc; ix++)
1069 {
1070 if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
1071 ++ix;
1072 else
1073 {
1074 if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0)
1075 {
1076 char *dbgobj;
1077 if (save_temps)
1078 {
1079 char buf[10];
1080 sprintf (buf, "%d", dbgcount++);
1081 dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL);
1082 }
1083 else
1084 dbgobj = make_temp_file (".mkoffload.dbg.o");
1085 obstack_ptr_grow (&files_to_cleanup, dbgobj);
1086
1087 /* If the copy fails then just ignore it. */
1088 if (copy_early_debug_info (argv[ix], dbgobj))
1089 {
1090 obstack_ptr_grow (&ld_argv_obstack, dbgobj);
1091 obstack_ptr_grow (&files_to_cleanup, dbgobj);
1092 }
1093 else
1094 free (dbgobj);
1095 }
1096 }
1097 }
1098 obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
1099 obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
1100 obstack_ptr_grow (&ld_argv_obstack,
1101 (TEST_XNACK (elf_flags)
1102 ? "-mxnack" : "-mno-xnack"));
1103 obstack_ptr_grow (&ld_argv_obstack,
1104 (TEST_SRAM_ECC_ON (elf_flags) ? "-msram-ecc=on"
1105 : TEST_SRAM_ECC_ANY (elf_flags) ? "-msram-ecc=any"
1106 : "-msram-ecc=off"));
1107 if (verbose)
1108 obstack_ptr_grow (&ld_argv_obstack, "-v");
1109
1110 for (int i = 1; i < argc; i++)
1111 if (startswith (argv[i], "-l")
1112 || startswith (argv[i], "-Wl")
1113 || startswith (argv[i], "-march"))
1114 obstack_ptr_grow (&ld_argv_obstack, argv[i]);
1115
1116 obstack_ptr_grow (&cc_argv_obstack, "-dumpdir");
1117 obstack_ptr_grow (&cc_argv_obstack, "");
1118 obstack_ptr_grow (&cc_argv_obstack, "-dumpbase");
1119 obstack_ptr_grow (&cc_argv_obstack, hsaco_dumpbase);
1120 obstack_ptr_grow (&cc_argv_obstack, "-dumpbase-ext");
1121 obstack_ptr_grow (&cc_argv_obstack, "");
1122
1123 obstack_ptr_grow (&ld_argv_obstack, "-o");
1124 obstack_ptr_grow (&ld_argv_obstack, gcn_o_name);
1125 obstack_ptr_grow (&ld_argv_obstack, NULL);
1126 const char **ld_argv = XOBFINISH (&ld_argv_obstack, const char **);
1127
1128 /* Clean up unhelpful environment variables. */
1129 char *execpath = getenv ("GCC_EXEC_PREFIX");
1130 char *cpath = getenv ("COMPILER_PATH");
1131 char *lpath = getenv ("LIBRARY_PATH");
1132 unsetenv ("GCC_EXEC_PREFIX");
1133 unsetenv ("COMPILER_PATH");
1134 unsetenv ("LIBRARY_PATH");
1135
1136 /* Run the compiler pass. */
1137 fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true, ".gcc_args");
1138 obstack_free (&cc_argv_obstack, NULL);
1139
1140 in = fopen (gcn_s1_name, "r");
1141 if (!in)
1142 fatal_error (input_location, "cannot open intermediate gcn asm file");
1143
1144 out = fopen (gcn_s2_name, "w");
1145 if (!out)
1146 fatal_error (input_location, "cannot open '%s'", gcn_s2_name);
1147
1148 process_asm (in, out, cfile);
1149
1150 fclose (in);
1151 fclose (out);
1152
1153 /* Run the assemble/link pass. */
1154 fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
1155 obstack_free (&ld_argv_obstack, NULL);
1156
1157 in = fopen (gcn_o_name, "r");
1158 if (!in)
1159 fatal_error (input_location, "cannot open intermediate gcn obj file");
1160
1161 process_obj (in, cfile);
1162
1163 fclose (in);
1164
1165 xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
1166 xputenv (concat ("COMPILER_PATH=", cpath, NULL));
1167 xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
1168 }
1169
1170 fclose (cfile);
1171
1172 compile_native (gcn_cfile_name, outname, collect_gcc, fPIC, fpic);
1173
1174 return 0;
1175 }
1176