1 /* Producing binary form of HSA BRIG from our internal representation.
2    Copyright (C) 2013-2020 Free Software Foundation, Inc.
3    Contributed by Martin Jambor <mjambor@suse.cz> and
4    Martin Liska <mliska@suse.cz>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "function.h"
39 #include "cfg.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "gimple-pretty-print.h"
43 #include "diagnostic-core.h"
44 #include "cgraph.h"
45 #include "dumpfile.h"
46 #include "print-tree.h"
47 #include "alloc-pool.h"
48 #include "symbol-summary.h"
49 #include "hsa-common.h"
50 #include "gomp-constants.h"
51 
52 /* Convert VAL to little endian form, if necessary.  */
53 
54 static uint16_t
lendian16(uint16_t val)55 lendian16 (uint16_t val)
56 {
57 #if GCC_VERSION >= 4008
58 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
59   return val;
60 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
61   return __builtin_bswap16 (val);
62 #else   /* __ORDER_PDP_ENDIAN__ */
63   return val;
64 #endif
65 #else
66 // provide a safe slower default, with shifts and masking
67 #ifndef WORDS_BIGENDIAN
68   return val;
69 #else
70   return (val >> 8) | (val << 8);
71 #endif
72 #endif
73 }
74 
75 /* Convert VAL to little endian form, if necessary.  */
76 
77 static uint32_t
lendian32(uint32_t val)78 lendian32 (uint32_t val)
79 {
80 #if GCC_VERSION >= 4006
81 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
82   return val;
83 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
84   return __builtin_bswap32 (val);
85 #else  /* __ORDER_PDP_ENDIAN__ */
86   return (val >> 16) | (val << 16);
87 #endif
88 #else
89 // provide a safe slower default, with shifts and masking
90 #ifndef WORDS_BIGENDIAN
91   return val;
92 #else
93   val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
94   return (val >> 16) | (val << 16);
95 #endif
96 #endif
97 }
98 
99 /* Convert VAL to little endian form, if necessary.  */
100 
101 static uint64_t
lendian64(uint64_t val)102 lendian64 (uint64_t val)
103 {
104 #if GCC_VERSION >= 4006
105 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
106   return val;
107 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
108   return __builtin_bswap64 (val);
109 #else  /* __ORDER_PDP_ENDIAN__ */
110   return (((val & 0xffffll) << 48)
111 	  | ((val & 0xffff0000ll) << 16)
112 	  | ((val & 0xffff00000000ll) >> 16)
113 	  | ((val & 0xffff000000000000ll) >> 48));
114 #endif
115 #else
116 // provide a safe slower default, with shifts and masking
117 #ifndef WORDS_BIGENDIAN
118   return val;
119 #else
120   val = (((val & 0xff00ff00ff00ff00ll) >> 8)
121 	 | ((val & 0x00ff00ff00ff00ffll) << 8));
122   val = ((( val & 0xffff0000ffff0000ll) >> 16)
123 	 | (( val & 0x0000ffff0000ffffll) << 16));
124   return (val >> 32) | (val << 32);
125 #endif
126 #endif
127 }
128 
129 #define BRIG_ELF_SECTION_NAME ".brig"
130 #define BRIG_LABEL_STRING "hsa_brig"
131 #define BRIG_SECTION_DATA_NAME    "hsa_data"
132 #define BRIG_SECTION_CODE_NAME    "hsa_code"
133 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
134 
135 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
136 
137 /* Required HSA section alignment.  */
138 
139 #define HSA_SECTION_ALIGNMENT 16
140 
141 /* Chunks of BRIG binary data.  */
142 
143 struct hsa_brig_data_chunk
144 {
145   /* Size of the data already stored into a chunk.  */
146   unsigned size;
147 
148   /* Pointer to the data.  */
149   char *data;
150 };
151 
152 /* Structure representing a BRIG section, holding and writing its data.  */
153 
154 struct hsa_brig_section
155 {
156   /* Section name that will be output to the BRIG.  */
157   const char *section_name;
158   /* Size in bytes of all data stored in the section.  */
159   unsigned total_size;
160   /* The size of the header of the section including padding.  */
161   unsigned header_byte_count;
162   /* The size of the header of the section without any padding.  */
163   unsigned header_byte_delta;
164 
165   void init (const char *name);
166   void release ();
167   void output ();
168   unsigned add (const void *data, unsigned len, void **output = NULL);
169   void round_size_up (int factor);
170   void *get_ptr_by_offset (unsigned int offset);
171 
172 private:
173   void allocate_new_chunk ();
174 
175   /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes.  */
176   vec <struct hsa_brig_data_chunk> chunks;
177 
178   /* More convenient access to the last chunk from the vector above.  */
179   struct hsa_brig_data_chunk *cur_chunk;
180 };
181 
182 static struct hsa_brig_section brig_data, brig_code, brig_operand;
183 static uint32_t brig_insn_count;
184 static bool brig_initialized = false;
185 
186 /* Mapping between emitted HSA functions and their offset in code segment.  */
187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
188 
189 /* Hash map of emitted function declarations.  */
190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
191 
192 /* Hash table of emitted internal function declaration offsets.  */
193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
194 
195 /* List of sbr instructions.  */
196 static vec <hsa_insn_sbr *> *switch_instructions;
197 
198 class function_linkage_pair
199 {
200 public:
function_linkage_pair(tree decl,unsigned int off)201   function_linkage_pair (tree decl, unsigned int off)
202     : function_decl (decl), offset (off) {}
203 
204   /* Declaration of called function.  */
205   tree function_decl;
206 
207   /* Offset in operand section.  */
208   unsigned int offset;
209 };
210 
211 /* Vector of function calls where we need to resolve function offsets.  */
212 static auto_vec <function_linkage_pair> function_call_linkage;
213 
214 /* Add a new chunk, allocate data for it and initialize it.  */
215 
216 void
allocate_new_chunk()217 hsa_brig_section::allocate_new_chunk ()
218 {
219   struct hsa_brig_data_chunk new_chunk;
220 
221   new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
222   new_chunk.size = 0;
223   cur_chunk = chunks.safe_push (new_chunk);
224 }
225 
226 /* Initialize the brig section.  */
227 
228 void
init(const char * name)229 hsa_brig_section::init (const char *name)
230 {
231   section_name = name;
232   /* While the following computation is basically wrong, because the intent
233      certainly wasn't to have the first character of name and padding, which
234      are a part of sizeof (BrigSectionHeader), included in the first addend,
235      this is what the disassembler expects.  */
236   total_size = sizeof (BrigSectionHeader) + strlen (section_name);
237   chunks.create (1);
238   allocate_new_chunk ();
239   header_byte_delta = total_size;
240   round_size_up (4);
241   header_byte_count = total_size;
242 }
243 
244 /* Free all data in the section.  */
245 
246 void
release()247 hsa_brig_section::release ()
248 {
249   for (unsigned i = 0; i < chunks.length (); i++)
250     free (chunks[i].data);
251   chunks.release ();
252   cur_chunk = NULL;
253 }
254 
255 /* Write the section to the output file to a section with the name given at
256    initialization.  Switches the output section and does not restore it.  */
257 
258 void
output()259 hsa_brig_section::output ()
260 {
261   struct BrigSectionHeader section_header;
262   char padding[8];
263 
264   section_header.byteCount = lendian64 (total_size);
265   section_header.headerByteCount = lendian32 (header_byte_count);
266   section_header.nameLength = lendian32 (strlen (section_name));
267   assemble_string ((const char *) &section_header, 16);
268   assemble_string (section_name, (section_header.nameLength));
269   memset (&padding, 0, sizeof (padding));
270   /* This is also a consequence of the wrong header size computation described
271      in a comment in hsa_brig_section::init.  */
272   assemble_string (padding, 8);
273   for (unsigned i = 0; i < chunks.length (); i++)
274     assemble_string (chunks[i].data, chunks[i].size);
275 }
276 
277 /* Add to the stream LEN bytes of opaque binary DATA.  Return the offset at
278    which it was stored.  If OUTPUT is not NULL, store into it the pointer to
279    the place where DATA was actually stored.  */
280 
281 unsigned
add(const void * data,unsigned len,void ** output)282 hsa_brig_section::add (const void *data, unsigned len, void **output)
283 {
284   unsigned offset = total_size;
285 
286   gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
287   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
288     allocate_new_chunk ();
289 
290   char *dst = cur_chunk->data + cur_chunk->size;
291   memcpy (dst, data, len);
292   if (output)
293     *output = dst;
294   cur_chunk->size += len;
295   total_size += len;
296 
297   return offset;
298 }
299 
300 /* Add padding to section so that its size is divisible by FACTOR.  */
301 
302 void
round_size_up(int factor)303 hsa_brig_section::round_size_up (int factor)
304 {
305   unsigned padding, res = total_size % factor;
306 
307   if (res == 0)
308     return;
309 
310   padding = factor - res;
311   total_size += padding;
312   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
313     {
314       padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
315       cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
316       allocate_new_chunk ();
317     }
318 
319   cur_chunk->size += padding;
320 }
321 
322 /* Return pointer to data by global OFFSET in the section.  */
323 
324 void *
get_ptr_by_offset(unsigned int offset)325 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
326 {
327   gcc_assert (offset < total_size);
328   offset -= header_byte_delta;
329 
330   unsigned i;
331   for (i = 0; offset >= chunks[i].size; i++)
332     offset -= chunks[i].size;
333 
334   return chunks[i].data + offset;
335 }
336 
337 /* BRIG string data hashing.  */
338 
339 struct brig_string_slot
340 {
341   const char *s;
342   char prefix;
343   int len;
344   uint32_t offset;
345 };
346 
347 /* Hash table helpers.  */
348 
349 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
350 {
351   static inline hashval_t hash (const value_type);
352   static inline bool equal (const value_type, const compare_type);
353   static inline void remove (value_type);
354 };
355 
356 /* Returns a hash code for DS.  Adapted from libiberty's htab_hash_string
357    to support strings that may not end in '\0'.  */
358 
359 inline hashval_t
hash(const value_type ds)360 brig_string_slot_hasher::hash (const value_type ds)
361 {
362   hashval_t r = ds->len;
363   int i;
364 
365   for (i = 0; i < ds->len; i++)
366      r = r * 67 + (unsigned) ds->s[i] - 113;
367   r = r * 67 + (unsigned) ds->prefix - 113;
368   return r;
369 }
370 
371 /* Returns nonzero if DS1 and DS2 are equal.  */
372 
373 inline bool
equal(const value_type ds1,const compare_type ds2)374 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
375 {
376   if (ds1->len == ds2->len)
377     return ds1->prefix == ds2->prefix
378       && memcmp (ds1->s, ds2->s, ds1->len) == 0;
379 
380   return 0;
381 }
382 
383 /* Deallocate memory for DS upon its removal.  */
384 
385 inline void
remove(value_type ds)386 brig_string_slot_hasher::remove (value_type ds)
387 {
388   free (const_cast<char *> (ds->s));
389   free (ds);
390 }
391 
392 /* Hash for strings we output in order not to duplicate them needlessly.  */
393 
394 static hash_table<brig_string_slot_hasher> *brig_string_htab;
395 
396 /* Emit a null terminated string STR to the data section and return its
397    offset in it.  If PREFIX is non-zero, output it just before STR too.
398    Sanitize the string if SANITIZE option is set to true.  */
399 
400 static unsigned
401 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
402 {
403   unsigned slen = strlen (str);
404   unsigned offset, len = slen + (prefix ? 1 : 0);
405   uint32_t hdr_len = lendian32 (len);
406   brig_string_slot s_slot;
407   brig_string_slot **slot;
408   char *str2;
409 
410   str2 = xstrdup (str);
411 
412   if (sanitize)
413     hsa_sanitize_name (str2);
414   s_slot.s = str2;
415   s_slot.len = slen;
416   s_slot.prefix = prefix;
417   s_slot.offset = 0;
418 
419   slot = brig_string_htab->find_slot (&s_slot, INSERT);
420   if (*slot == NULL)
421     {
422       brig_string_slot *new_slot = XCNEW (brig_string_slot);
423 
424       /* In theory we should fill in BrigData but that would mean copying
425 	 the string to a buffer for no reason, so we just emulate it.  */
426       offset = brig_data.add (&hdr_len, sizeof (hdr_len));
427       if (prefix)
428 	brig_data.add (&prefix, 1);
429 
430       brig_data.add (str2, slen);
431       brig_data.round_size_up (4);
432 
433       /* TODO: could use the string we just copied into
434 	 brig_string->cur_chunk */
435       new_slot->s = str2;
436       new_slot->len = slen;
437       new_slot->prefix = prefix;
438       new_slot->offset = offset;
439       *slot = new_slot;
440     }
441   else
442     {
443       offset = (*slot)->offset;
444       free (str2);
445     }
446 
447   return offset;
448 }
449 
450 /* Linked list of queued operands.  */
451 
452 static struct operand_queue
453 {
454   /* First from the chain of queued operands.  */
455   hsa_op_base *first_op, *last_op;
456 
457   /* The offset at which the next operand will be enqueued.  */
458   unsigned projected_size;
459 
460 } op_queue;
461 
462 /* Unless already initialized, initialize infrastructure to produce BRIG.  */
463 
464 static void
brig_init(void)465 brig_init (void)
466 {
467   brig_insn_count = 0;
468 
469   if (brig_initialized)
470     return;
471 
472   brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
473   brig_data.init (BRIG_SECTION_DATA_NAME);
474   brig_code.init (BRIG_SECTION_CODE_NAME);
475   brig_operand.init (BRIG_SECTION_OPERAND_NAME);
476   brig_initialized = true;
477 
478   struct BrigDirectiveModule moddir;
479   memset (&moddir, 0, sizeof (moddir));
480   moddir.base.byteCount = lendian16 (sizeof (moddir));
481 
482   char *modname;
483   if (main_input_filename && *main_input_filename != '\0')
484     {
485       const char *part = strrchr (main_input_filename, '/');
486       if (!part)
487 	part = main_input_filename;
488       else
489 	part++;
490       modname = concat ("&__hsa_module_", part, NULL);
491       char *extension = strchr (modname, '.');
492       if (extension)
493 	*extension = '\0';
494 
495       /* As in LTO mode, we have to emit a different module names.  */
496       if (flag_ltrans)
497 	{
498 	  part = strrchr (asm_file_name, '/');
499 	  if (!part)
500 	    part = asm_file_name;
501 	  else
502 	    part++;
503 	  char *modname2;
504 	  modname2 = xasprintf ("%s_%s", modname, part);
505 	  free (modname);
506 	  modname = modname2;
507 	}
508 
509       hsa_sanitize_name (modname);
510       moddir.name = brig_emit_string (modname);
511       free (modname);
512     }
513   else
514     moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
515   moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
516   moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
517   moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
518   moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
519   if (hsa_machine_large_p ())
520     moddir.machineModel = BRIG_MACHINE_LARGE;
521   else
522     moddir.machineModel = BRIG_MACHINE_SMALL;
523   moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
524   brig_code.add (&moddir, sizeof (moddir));
525 }
526 
527 /* Free all BRIG data.  */
528 
529 static void
brig_release_data(void)530 brig_release_data (void)
531 {
532   delete brig_string_htab;
533   brig_data.release ();
534   brig_code.release ();
535   brig_operand.release ();
536 
537   brig_initialized = 0;
538 }
539 
540 /* Enqueue operation OP.  Return the offset at which it will be stored.  */
541 
542 static unsigned int
enqueue_op(hsa_op_base * op)543 enqueue_op (hsa_op_base *op)
544 {
545   unsigned ret;
546 
547   if (op->m_brig_op_offset)
548     return op->m_brig_op_offset;
549 
550   ret = op_queue.projected_size;
551   op->m_brig_op_offset = op_queue.projected_size;
552 
553   if (!op_queue.first_op)
554     op_queue.first_op = op;
555   else
556     op_queue.last_op->m_next = op;
557   op_queue.last_op = op;
558 
559   if (is_a <hsa_op_immed *> (op))
560     op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
561   else if (is_a <hsa_op_reg *> (op))
562     op_queue.projected_size += sizeof (struct BrigOperandRegister);
563   else if (is_a <hsa_op_address *> (op))
564     op_queue.projected_size += sizeof (struct BrigOperandAddress);
565   else if (is_a <hsa_op_code_ref *> (op))
566     op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
567   else if (is_a <hsa_op_code_list *> (op))
568     op_queue.projected_size += sizeof (struct BrigOperandCodeList);
569   else if (is_a <hsa_op_operand_list *> (op))
570     op_queue.projected_size += sizeof (struct BrigOperandOperandList);
571   else
572     gcc_unreachable ();
573   return ret;
574 }
575 
576 static void emit_immediate_operand (hsa_op_immed *imm);
577 
578 /* Emit directive describing a symbol if it has not been emitted already.
579    Return the offset of the directive.  */
580 
581 static unsigned
emit_directive_variable(class hsa_symbol * symbol)582 emit_directive_variable (class hsa_symbol *symbol)
583 {
584   struct BrigDirectiveVariable dirvar;
585   unsigned name_offset;
586   static unsigned res_name_offset;
587 
588   if (symbol->m_directive_offset)
589     return symbol->m_directive_offset;
590 
591   memset (&dirvar, 0, sizeof (dirvar));
592   dirvar.base.byteCount = lendian16 (sizeof (dirvar));
593   dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
594   dirvar.allocation = symbol->m_allocation;
595 
596   char prefix = symbol->m_global_scope_p ? '&' : '%';
597 
598   if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
599     {
600       if (res_name_offset == 0)
601 	res_name_offset = brig_emit_string (symbol->m_name, '%');
602       name_offset = res_name_offset;
603     }
604   else if (symbol->m_name)
605     name_offset = brig_emit_string (symbol->m_name, prefix);
606   else
607     {
608       char buf[64];
609       snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
610 		symbol->m_name_number);
611       name_offset = brig_emit_string (buf, prefix);
612     }
613 
614   dirvar.name = lendian32 (name_offset);
615 
616   if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
617     {
618       hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
619       dirvar.init = lendian32 (enqueue_op (tmp));
620     }
621   else
622     dirvar.init = 0;
623   dirvar.type = lendian16 (symbol->m_type);
624   dirvar.segment = symbol->m_segment;
625   dirvar.align = symbol->m_align;
626   dirvar.linkage = symbol->m_linkage;
627   dirvar.dim.lo = symbol->m_dim;
628   dirvar.dim.hi = symbol->m_dim >> 32;
629 
630   /* Global variables are just declared and linked via HSA runtime.  */
631   if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
632     dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
633   dirvar.reserved = 0;
634 
635   if (symbol->m_cst_value)
636     {
637       dirvar.modifier |= BRIG_VARIABLE_CONST;
638       dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
639     }
640 
641   symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
642   return symbol->m_directive_offset;
643 }
644 
645 /* Emit directives describing either a function declaration or definition F and
646    return the produced BrigDirectiveExecutable structure.  The function does
647    not take into account any instructions when calculating nextModuleEntry
648    field of the produced BrigDirectiveExecutable structure so when emitting
649    actual definitions, this field needs to be updated after all of the function
650    is actually added to the code section.  */
651 
652 static BrigDirectiveExecutable *
emit_function_directives(hsa_function_representation * f,bool is_declaration)653 emit_function_directives (hsa_function_representation *f, bool is_declaration)
654 {
655   struct BrigDirectiveExecutable fndir;
656   unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
657   int count = 0;
658   void *ptr_to_fndir;
659   hsa_symbol *sym;
660 
661   if (!f->m_declaration_p)
662     for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
663       {
664 	gcc_assert (!sym->m_emitted_to_brig);
665 	sym->m_emitted_to_brig = true;
666 	emit_directive_variable (sym);
667 	brig_insn_count++;
668       }
669 
670   name_offset = brig_emit_string (f->m_name, '&');
671   inarg_off = brig_code.total_size + sizeof (fndir)
672     + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
673   scoped_off = inarg_off
674     + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
675 
676   if (!f->m_declaration_p)
677     {
678       count += f->m_spill_symbols.length ();
679       count += f->m_private_variables.length ();
680     }
681 
682   next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
683 
684   memset (&fndir, 0, sizeof (fndir));
685   fndir.base.byteCount = lendian16 (sizeof (fndir));
686   fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
687 			       : BRIG_KIND_DIRECTIVE_FUNCTION);
688   fndir.name = lendian32 (name_offset);
689   fndir.inArgCount = lendian16 (f->m_input_args.length ());
690   fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
691   fndir.firstInArg = lendian32 (inarg_off);
692   fndir.firstCodeBlockEntry = lendian32 (scoped_off);
693   fndir.nextModuleEntry = lendian32 (next_toplev_off);
694   fndir.linkage = f->get_linkage ();
695   if (!f->m_declaration_p)
696     fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
697   memset (&fndir.reserved, 0, sizeof (fndir.reserved));
698 
699   /* Once we put a definition of function_offsets, we should not overwrite
700      it with a declaration of the function.  */
701   if (f->m_internal_fn == NULL)
702     {
703       if (!function_offsets->get (f->m_decl) || !is_declaration)
704 	function_offsets->put (f->m_decl, brig_code.total_size);
705     }
706   else
707     {
708       /* Internal function.  */
709       hsa_internal_fn **slot
710 	= hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
711       hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
712       int_fn->m_offset = brig_code.total_size;
713       *slot = int_fn;
714     }
715 
716   brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
717 
718   if (f->m_output_arg)
719     emit_directive_variable (f->m_output_arg);
720   for (unsigned i = 0; i < f->m_input_args.length (); i++)
721     emit_directive_variable (f->m_input_args[i]);
722 
723   if (!f->m_declaration_p)
724     {
725       for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
726 	{
727 	  emit_directive_variable (sym);
728 	  brig_insn_count++;
729 	}
730       for (unsigned i = 0; i < f->m_private_variables.length (); i++)
731 	{
732 	  emit_directive_variable (f->m_private_variables[i]);
733 	  brig_insn_count++;
734 	}
735     }
736 
737   return (BrigDirectiveExecutable *) ptr_to_fndir;
738 }
739 
740 /* Emit a label directive for the given HBB.  We assume it is about to start on
741    the current offset in the code section.  */
742 
743 static void
emit_bb_label_directive(hsa_bb * hbb)744 emit_bb_label_directive (hsa_bb *hbb)
745 {
746   struct BrigDirectiveLabel lbldir;
747 
748   lbldir.base.byteCount = lendian16 (sizeof (lbldir));
749   lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
750   char buf[32];
751   snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
752 	    hbb->m_index);
753   lbldir.name = lendian32 (brig_emit_string (buf, '@'));
754 
755   hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
756 						       sizeof (lbldir));
757   brig_insn_count++;
758 }
759 
760 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
761    holding such, for constants and registers.  */
762 
763 static BrigType16_t
regtype_for_type(BrigType16_t t)764 regtype_for_type (BrigType16_t t)
765 {
766   switch (t)
767     {
768     case BRIG_TYPE_B1:
769       return BRIG_TYPE_B1;
770 
771     case BRIG_TYPE_U8:
772     case BRIG_TYPE_U16:
773     case BRIG_TYPE_U32:
774     case BRIG_TYPE_S8:
775     case BRIG_TYPE_S16:
776     case BRIG_TYPE_S32:
777     case BRIG_TYPE_B8:
778     case BRIG_TYPE_B16:
779     case BRIG_TYPE_B32:
780     case BRIG_TYPE_F16:
781     case BRIG_TYPE_F32:
782     case BRIG_TYPE_U8X4:
783     case BRIG_TYPE_U16X2:
784     case BRIG_TYPE_S8X4:
785     case BRIG_TYPE_S16X2:
786     case BRIG_TYPE_F16X2:
787       return BRIG_TYPE_B32;
788 
789     case BRIG_TYPE_U64:
790     case BRIG_TYPE_S64:
791     case BRIG_TYPE_F64:
792     case BRIG_TYPE_B64:
793     case BRIG_TYPE_U8X8:
794     case BRIG_TYPE_U16X4:
795     case BRIG_TYPE_U32X2:
796     case BRIG_TYPE_S8X8:
797     case BRIG_TYPE_S16X4:
798     case BRIG_TYPE_S32X2:
799     case BRIG_TYPE_F16X4:
800     case BRIG_TYPE_F32X2:
801       return BRIG_TYPE_B64;
802 
803     case BRIG_TYPE_B128:
804     case BRIG_TYPE_U8X16:
805     case BRIG_TYPE_U16X8:
806     case BRIG_TYPE_U32X4:
807     case BRIG_TYPE_U64X2:
808     case BRIG_TYPE_S8X16:
809     case BRIG_TYPE_S16X8:
810     case BRIG_TYPE_S32X4:
811     case BRIG_TYPE_S64X2:
812     case BRIG_TYPE_F16X8:
813     case BRIG_TYPE_F32X4:
814     case BRIG_TYPE_F64X2:
815       return BRIG_TYPE_B128;
816 
817     default:
818       gcc_unreachable ();
819     }
820 }
821 
822 /* Return the length of the BRIG type TYPE that is going to be streamed out as
823    an immediate constant (so it must not be B1).  */
824 
825 unsigned
hsa_get_imm_brig_type_len(BrigType16_t type)826 hsa_get_imm_brig_type_len (BrigType16_t type)
827 {
828   BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
829   BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
830 
831   switch (pack_type)
832     {
833     case BRIG_TYPE_PACK_NONE:
834       break;
835     case BRIG_TYPE_PACK_32:
836       return 4;
837     case BRIG_TYPE_PACK_64:
838       return 8;
839     case BRIG_TYPE_PACK_128:
840       return 16;
841     default:
842       gcc_unreachable ();
843     }
844 
845   switch (base_type)
846     {
847     case BRIG_TYPE_U8:
848     case BRIG_TYPE_S8:
849     case BRIG_TYPE_B8:
850       return 1;
851     case BRIG_TYPE_U16:
852     case BRIG_TYPE_S16:
853     case BRIG_TYPE_F16:
854     case BRIG_TYPE_B16:
855       return 2;
856     case BRIG_TYPE_U32:
857     case BRIG_TYPE_S32:
858     case BRIG_TYPE_F32:
859     case BRIG_TYPE_B32:
860       return 4;
861     case BRIG_TYPE_U64:
862     case BRIG_TYPE_S64:
863     case BRIG_TYPE_F64:
864     case BRIG_TYPE_B64:
865       return 8;
866     case BRIG_TYPE_B128:
867       return 16;
868     default:
869       gcc_unreachable ();
870     }
871 }
872 
873 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
874    If NEED_LEN is not equal to zero, shrink or extend the value
875    to NEED_LEN bytes.  Return how many bytes were written.  */
876 
877 static int
emit_immediate_scalar_to_buffer(tree value,char * data,unsigned need_len)878 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
879 {
880   union hsa_bytes bytes;
881 
882   memset (&bytes, 0, sizeof (bytes));
883   tree type = TREE_TYPE (value);
884   gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
885 
886   unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
887   if (INTEGRAL_TYPE_P (type)
888       || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
889     switch (data_len)
890       {
891       case 1:
892 	bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
893 	break;
894       case 2:
895 	bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
896 	break;
897       case 4:
898 	bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
899 	break;
900       case 8:
901 	bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
902 	break;
903       default:
904 	gcc_unreachable ();
905       }
906   else if (SCALAR_FLOAT_TYPE_P (type))
907     {
908       if (data_len == 2)
909 	{
910 	  sorry ("Support for HSA does not implement immediate 16 bit FPU "
911 		 "operands");
912 	  return 2;
913 	}
914       unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
915       /* There are always 32 bits in each long, no matter the size of
916 	 the hosts long.  */
917       long tmp[6];
918 
919       real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
920 
921       if (int_len == 4)
922 	bytes.b32 = (uint32_t) tmp[0];
923       else
924 	{
925 	  bytes.b64 = (uint64_t)(uint32_t) tmp[1];
926 	  bytes.b64 <<= 32;
927 	  bytes.b64 |= (uint32_t) tmp[0];
928 	}
929     }
930   else
931     gcc_unreachable ();
932 
933   int len;
934   if (need_len == 0)
935     len = data_len;
936   else
937     len = need_len;
938 
939   memcpy (data, &bytes, len);
940   return len;
941 }
942 
943 char *
emit_to_buffer(unsigned * brig_repr_size)944 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
945 {
946   char *brig_repr;
947   *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
948 
949   if (m_tree_value != NULL_TREE)
950     {
951       /* Update brig_repr_size for special tree values.  */
952       if (TREE_CODE (m_tree_value) == STRING_CST)
953 	*brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
954       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
955 	*brig_repr_size
956 	  = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
957 
958       unsigned total_len = *brig_repr_size;
959 
960       /* As we can have a constructor with fewer elements, fill the memory
961 	 with zeros.  */
962       brig_repr = XCNEWVEC (char, total_len);
963       char *p = brig_repr;
964 
965       if (TREE_CODE (m_tree_value) == VECTOR_CST)
966 	{
967 	  /* Variable-length vectors aren't supported.  */
968 	  int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant ();
969 	  for (i = 0; i < num; i++)
970 	    {
971 	      tree v = VECTOR_CST_ELT (m_tree_value, i);
972 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
973 	      total_len -= actual;
974 	      p += actual;
975 	    }
976 	  /* Vectors should have the exact size.  */
977 	  gcc_assert (total_len == 0);
978 	}
979       else if (TREE_CODE (m_tree_value) == STRING_CST)
980 	memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
981 		TREE_STRING_LENGTH (m_tree_value));
982       else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
983 	{
984 	  gcc_assert (total_len % 2 == 0);
985 	  unsigned actual;
986 	  actual
987 	    = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
988 					       total_len / 2);
989 
990 	  gcc_assert (actual == total_len / 2);
991 	  p += actual;
992 
993 	  actual
994 	    = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
995 					       total_len / 2);
996 	  gcc_assert (actual == total_len / 2);
997 	}
998       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
999 	{
1000 	  unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
1001 	  for (unsigned i = 0; i < len; i++)
1002 	    {
1003 	      tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1004 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1005 	      total_len -= actual;
1006 	      p += actual;
1007 	    }
1008 	}
1009       else
1010 	emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1011     }
1012   else
1013     {
1014       hsa_bytes bytes;
1015 
1016       switch (*brig_repr_size)
1017 	{
1018 	case 1:
1019 	  bytes.b8 = (uint8_t) m_int_value;
1020 	  break;
1021 	case 2:
1022 	  bytes.b16 = (uint16_t) m_int_value;
1023 	  break;
1024 	case 4:
1025 	  bytes.b32 = (uint32_t) m_int_value;
1026 	  break;
1027 	case 8:
1028 	  bytes.b64 = (uint64_t) m_int_value;
1029 	  break;
1030 	default:
1031 	  gcc_unreachable ();
1032 	}
1033 
1034       brig_repr = XNEWVEC (char, *brig_repr_size);
1035       memcpy (brig_repr, &bytes, *brig_repr_size);
1036     }
1037 
1038   return brig_repr;
1039 }
1040 
1041 /* Emit an immediate BRIG operand IMM.  The BRIG type of the immediate might
1042    have been massaged to comply with various HSA/BRIG type requirements, so the
1043    only important aspect of that is the length (because HSAIL might expect
1044    smaller constants or become bit-data).  The data should be represented
1045    according to what is in the tree representation.  */
1046 
1047 static void
emit_immediate_operand(hsa_op_immed * imm)1048 emit_immediate_operand (hsa_op_immed *imm)
1049 {
1050   unsigned brig_repr_size;
1051   char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1052   struct BrigOperandConstantBytes out;
1053 
1054   memset (&out, 0, sizeof (out));
1055   out.base.byteCount = lendian16 (sizeof (out));
1056   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1057   uint32_t byteCount = lendian32 (brig_repr_size);
1058   out.type = lendian16 (imm->m_type);
1059   out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1060   brig_operand.add (&out, sizeof (out));
1061   brig_data.add (brig_repr, brig_repr_size);
1062   brig_data.round_size_up (4);
1063 
1064   free (brig_repr);
1065 }
1066 
1067 /* Emit a register BRIG operand REG.  */
1068 
1069 static void
emit_register_operand(hsa_op_reg * reg)1070 emit_register_operand (hsa_op_reg *reg)
1071 {
1072   struct BrigOperandRegister out;
1073 
1074   out.base.byteCount = lendian16 (sizeof (out));
1075   out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1076   out.regNum = lendian32 (reg->m_hard_num);
1077 
1078   switch (regtype_for_type (reg->m_type))
1079     {
1080     case BRIG_TYPE_B32:
1081       out.regKind = BRIG_REGISTER_KIND_SINGLE;
1082       break;
1083     case BRIG_TYPE_B64:
1084       out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1085       break;
1086     case BRIG_TYPE_B128:
1087       out.regKind = BRIG_REGISTER_KIND_QUAD;
1088       break;
1089     case BRIG_TYPE_B1:
1090       out.regKind = BRIG_REGISTER_KIND_CONTROL;
1091       break;
1092     default:
1093       gcc_unreachable ();
1094     }
1095 
1096   brig_operand.add (&out, sizeof (out));
1097 }
1098 
1099 /* Emit an address BRIG operand ADDR.  */
1100 
1101 static void
emit_address_operand(hsa_op_address * addr)1102 emit_address_operand (hsa_op_address *addr)
1103 {
1104   struct BrigOperandAddress out;
1105 
1106   out.base.byteCount = lendian16 (sizeof (out));
1107   out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1108   out.symbol = addr->m_symbol
1109     ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1110   out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1111 
1112   if (sizeof (addr->m_imm_offset) == 8)
1113     {
1114       out.offset.lo = lendian32 (addr->m_imm_offset);
1115       out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1116     }
1117   else
1118     {
1119       gcc_assert (sizeof (addr->m_imm_offset) == 4);
1120       out.offset.lo = lendian32 (addr->m_imm_offset);
1121       out.offset.hi = 0;
1122     }
1123 
1124   brig_operand.add (&out, sizeof (out));
1125 }
1126 
1127 /* Emit a code reference operand REF.  */
1128 
1129 static void
emit_code_ref_operand(hsa_op_code_ref * ref)1130 emit_code_ref_operand (hsa_op_code_ref *ref)
1131 {
1132   struct BrigOperandCodeRef out;
1133 
1134   out.base.byteCount = lendian16 (sizeof (out));
1135   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1136   out.ref = lendian32 (ref->m_directive_offset);
1137   brig_operand.add (&out, sizeof (out));
1138 }
1139 
1140 /* Emit a code list operand CODE_LIST.  */
1141 
1142 static void
emit_code_list_operand(hsa_op_code_list * code_list)1143 emit_code_list_operand (hsa_op_code_list *code_list)
1144 {
1145   struct BrigOperandCodeList out;
1146   unsigned args = code_list->m_offsets.length ();
1147 
1148   for (unsigned i = 0; i < args; i++)
1149     gcc_assert (code_list->m_offsets[i]);
1150 
1151   out.base.byteCount = lendian16 (sizeof (out));
1152   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1153 
1154   uint32_t byteCount = lendian32 (4 * args);
1155 
1156   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1157   brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1158   brig_data.round_size_up (4);
1159   brig_operand.add (&out, sizeof (out));
1160 }
1161 
1162 /* Emit an operand list operand OPERAND_LIST.  */
1163 
1164 static void
emit_operand_list_operand(hsa_op_operand_list * operand_list)1165 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1166 {
1167   struct BrigOperandOperandList out;
1168   unsigned args = operand_list->m_offsets.length ();
1169 
1170   for (unsigned i = 0; i < args; i++)
1171     gcc_assert (operand_list->m_offsets[i]);
1172 
1173   out.base.byteCount = lendian16 (sizeof (out));
1174   out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1175 
1176   uint32_t byteCount = lendian32 (4 * args);
1177 
1178   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1179   brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1180   brig_data.round_size_up (4);
1181   brig_operand.add (&out, sizeof (out));
1182 }
1183 
1184 /* Emit all operands queued for writing.  */
1185 
1186 static void
emit_queued_operands(void)1187 emit_queued_operands (void)
1188 {
1189   for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1190     {
1191       gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1192       if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1193 	emit_immediate_operand (imm);
1194       else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1195 	emit_register_operand (reg);
1196       else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1197 	emit_address_operand (addr);
1198       else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1199 	emit_code_ref_operand (ref);
1200       else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1201 	emit_code_list_operand (code_list);
1202       else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1203 	emit_operand_list_operand (l);
1204       else
1205 	gcc_unreachable ();
1206     }
1207 }
1208 
1209 /* Emit directives describing the function that is used for
1210    a function declaration.  */
1211 
1212 static BrigDirectiveExecutable *
emit_function_declaration(tree decl)1213 emit_function_declaration (tree decl)
1214 {
1215   hsa_function_representation *f = hsa_generate_function_declaration (decl);
1216 
1217   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1218   emit_queued_operands ();
1219 
1220   delete f;
1221 
1222   return e;
1223 }
1224 
1225 /* Emit directives describing the function that is used for
1226    an internal function declaration.  */
1227 
1228 static BrigDirectiveExecutable *
emit_internal_fn_decl(hsa_internal_fn * fn)1229 emit_internal_fn_decl (hsa_internal_fn *fn)
1230 {
1231   hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1232 
1233   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1234   emit_queued_operands ();
1235 
1236   delete f;
1237 
1238   return e;
1239 }
1240 
1241 /* Enqueue all operands of INSN and return offset to BRIG data section
1242    to list of operand offsets.  */
1243 
1244 static unsigned
emit_insn_operands(hsa_insn_basic * insn)1245 emit_insn_operands (hsa_insn_basic *insn)
1246 {
1247   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1248     operand_offsets;
1249 
1250   unsigned l = insn->operand_count ();
1251 
1252   /* We have N operands so use 4 * N for the byte_count.  */
1253   uint32_t byte_count = lendian32 (4 * l);
1254   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1255   if (l > 0)
1256     {
1257       operand_offsets.safe_grow (l);
1258       for (unsigned i = 0; i < l; i++)
1259 	operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1260 
1261       brig_data.add (operand_offsets.address (),
1262 		     l * sizeof (BrigOperandOffset32_t));
1263     }
1264   brig_data.round_size_up (4);
1265   return offset;
1266 }
1267 
1268 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1269    to BRIG data section to list of operand offsets.  */
1270 
1271 static unsigned
1272 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1273 	       hsa_op_base *op2 = NULL)
1274 {
1275   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1276     operand_offsets;
1277 
1278   gcc_checking_assert (op0 != NULL);
1279   operand_offsets.safe_push (enqueue_op (op0));
1280 
1281   if (op1 != NULL)
1282     {
1283       operand_offsets.safe_push (enqueue_op (op1));
1284       if (op2 != NULL)
1285 	operand_offsets.safe_push (enqueue_op (op2));
1286     }
1287 
1288   unsigned l = operand_offsets.length ();
1289 
1290   /* We have N operands so use 4 * N for the byte_count.  */
1291   uint32_t byte_count = lendian32 (4 * l);
1292 
1293   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1294   brig_data.add (operand_offsets.address (),
1295 		 l * sizeof (BrigOperandOffset32_t));
1296 
1297   brig_data.round_size_up (4);
1298 
1299   return offset;
1300 }
1301 
1302 /* Emit an HSA memory instruction and all necessary directives, schedule
1303    necessary operands for writing.  */
1304 
1305 static void
emit_memory_insn(hsa_insn_mem * mem)1306 emit_memory_insn (hsa_insn_mem *mem)
1307 {
1308   struct BrigInstMem repr;
1309   gcc_checking_assert (mem->operand_count () == 2);
1310 
1311   hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1312 
1313   /* This is necessary because of the erroneous typedef of
1314      BrigMemoryModifier8_t which introduces padding which may then contain
1315      random stuff (which we do not want so that we can test things don't
1316      change).  */
1317   memset (&repr, 0, sizeof (repr));
1318   repr.base.base.byteCount = lendian16 (sizeof (repr));
1319   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1320   repr.base.opcode = lendian16 (mem->m_opcode);
1321   repr.base.type = lendian16 (mem->m_type);
1322   repr.base.operands = lendian32 (emit_insn_operands (mem));
1323 
1324   if (addr->m_symbol)
1325     repr.segment = addr->m_symbol->m_segment;
1326   else
1327     repr.segment = BRIG_SEGMENT_FLAT;
1328   repr.modifier = 0;
1329   repr.equivClass = mem->m_equiv_class;
1330   repr.align = mem->m_align;
1331   if (mem->m_opcode == BRIG_OPCODE_LD)
1332     repr.width = BRIG_WIDTH_1;
1333   else
1334     repr.width = BRIG_WIDTH_NONE;
1335   memset (&repr.reserved, 0, sizeof (repr.reserved));
1336   brig_code.add (&repr, sizeof (repr));
1337   brig_insn_count++;
1338 }
1339 
1340 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1341    necessary operands for writing.  */
1342 
1343 static void
emit_signal_insn(hsa_insn_signal * mem)1344 emit_signal_insn (hsa_insn_signal *mem)
1345 {
1346   struct BrigInstSignal repr;
1347 
1348   memset (&repr, 0, sizeof (repr));
1349   repr.base.base.byteCount = lendian16 (sizeof (repr));
1350   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1351   repr.base.opcode = lendian16 (mem->m_opcode);
1352   repr.base.type = lendian16 (mem->m_type);
1353   repr.base.operands = lendian32 (emit_insn_operands (mem));
1354 
1355   repr.memoryOrder = mem->m_memory_order;
1356   repr.signalOperation = mem->m_signalop;
1357   repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1358 
1359   brig_code.add (&repr, sizeof (repr));
1360   brig_insn_count++;
1361 }
1362 
1363 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1364    necessary operands for writing.  */
1365 
1366 static void
emit_atomic_insn(hsa_insn_atomic * mem)1367 emit_atomic_insn (hsa_insn_atomic *mem)
1368 {
1369   struct BrigInstAtomic repr;
1370 
1371   /* Either operand[0] or operand[1] must be an address operand.  */
1372   hsa_op_address *addr = NULL;
1373   if (is_a <hsa_op_address *> (mem->get_op (0)))
1374     addr = as_a <hsa_op_address *> (mem->get_op (0));
1375   else
1376     addr = as_a <hsa_op_address *> (mem->get_op (1));
1377 
1378   memset (&repr, 0, sizeof (repr));
1379   repr.base.base.byteCount = lendian16 (sizeof (repr));
1380   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1381   repr.base.opcode = lendian16 (mem->m_opcode);
1382   repr.base.type = lendian16 (mem->m_type);
1383   repr.base.operands = lendian32 (emit_insn_operands (mem));
1384 
1385   if (addr->m_symbol)
1386     repr.segment = addr->m_symbol->m_segment;
1387   else
1388     repr.segment = BRIG_SEGMENT_FLAT;
1389   repr.memoryOrder = mem->m_memoryorder;
1390   repr.memoryScope = mem->m_memoryscope;
1391   repr.atomicOperation = mem->m_atomicop;
1392 
1393   brig_code.add (&repr, sizeof (repr));
1394   brig_insn_count++;
1395 }
1396 
1397 /* Emit an HSA LDA instruction and all necessary directives, schedule
1398    necessary operands for writing.  */
1399 
1400 static void
emit_addr_insn(hsa_insn_basic * insn)1401 emit_addr_insn (hsa_insn_basic *insn)
1402 {
1403   struct BrigInstAddr repr;
1404 
1405   hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1406 
1407   repr.base.base.byteCount = lendian16 (sizeof (repr));
1408   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1409   repr.base.opcode = lendian16 (insn->m_opcode);
1410   repr.base.type = lendian16 (insn->m_type);
1411   repr.base.operands = lendian32 (emit_insn_operands (insn));
1412 
1413   if (addr->m_symbol)
1414     repr.segment = addr->m_symbol->m_segment;
1415   else
1416     repr.segment = BRIG_SEGMENT_FLAT;
1417   memset (&repr.reserved, 0, sizeof (repr.reserved));
1418 
1419   brig_code.add (&repr, sizeof (repr));
1420   brig_insn_count++;
1421 }
1422 
1423 /* Emit an HSA segment conversion instruction and all necessary directives,
1424    schedule necessary operands for writing.  */
1425 
1426 static void
emit_segment_insn(hsa_insn_seg * seg)1427 emit_segment_insn (hsa_insn_seg *seg)
1428 {
1429   struct BrigInstSegCvt repr;
1430 
1431   repr.base.base.byteCount = lendian16 (sizeof (repr));
1432   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1433   repr.base.opcode = lendian16 (seg->m_opcode);
1434   repr.base.type = lendian16 (seg->m_type);
1435   repr.base.operands = lendian32 (emit_insn_operands (seg));
1436   repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1437   repr.segment = seg->m_segment;
1438   repr.modifier = 0;
1439 
1440   brig_code.add (&repr, sizeof (repr));
1441 
1442   brig_insn_count++;
1443 }
1444 
1445 /* Emit an HSA alloca instruction and all necessary directives,
1446    schedule necessary operands for writing.  */
1447 
1448 static void
emit_alloca_insn(hsa_insn_alloca * alloca)1449 emit_alloca_insn (hsa_insn_alloca *alloca)
1450 {
1451   struct BrigInstMem repr;
1452   gcc_checking_assert (alloca->operand_count () == 2);
1453 
1454   memset (&repr, 0, sizeof (repr));
1455   repr.base.base.byteCount = lendian16 (sizeof (repr));
1456   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1457   repr.base.opcode = lendian16 (alloca->m_opcode);
1458   repr.base.type = lendian16 (alloca->m_type);
1459   repr.base.operands = lendian32 (emit_insn_operands (alloca));
1460   repr.segment = BRIG_SEGMENT_PRIVATE;
1461   repr.modifier = 0;
1462   repr.equivClass = 0;
1463   repr.align = alloca->m_align;
1464   repr.width = BRIG_WIDTH_NONE;
1465   memset (&repr.reserved, 0, sizeof (repr.reserved));
1466   brig_code.add (&repr, sizeof (repr));
1467   brig_insn_count++;
1468 }
1469 
1470 /* Emit an HSA comparison instruction and all necessary directives,
1471    schedule necessary operands for writing.  */
1472 
1473 static void
emit_cmp_insn(hsa_insn_cmp * cmp)1474 emit_cmp_insn (hsa_insn_cmp *cmp)
1475 {
1476   struct BrigInstCmp repr;
1477 
1478   memset (&repr, 0, sizeof (repr));
1479   repr.base.base.byteCount = lendian16 (sizeof (repr));
1480   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1481   repr.base.opcode = lendian16 (cmp->m_opcode);
1482   repr.base.type = lendian16 (cmp->m_type);
1483   repr.base.operands = lendian32 (emit_insn_operands (cmp));
1484 
1485   if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1486     repr.sourceType
1487       = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1488   else
1489     repr.sourceType
1490       = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1491   repr.modifier = 0;
1492   repr.compare = cmp->m_compare;
1493   repr.pack = 0;
1494 
1495   brig_code.add (&repr, sizeof (repr));
1496   brig_insn_count++;
1497 }
1498 
1499 /* Emit an HSA generic branching/sycnronization instruction.  */
1500 
1501 static void
emit_generic_branch_insn(hsa_insn_br * br)1502 emit_generic_branch_insn (hsa_insn_br *br)
1503 {
1504   struct BrigInstBr repr;
1505   repr.base.base.byteCount = lendian16 (sizeof (repr));
1506   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1507   repr.base.opcode = lendian16 (br->m_opcode);
1508   repr.width = br->m_width;
1509   repr.base.type = lendian16 (br->m_type);
1510   repr.base.operands = lendian32 (emit_insn_operands (br));
1511   memset (&repr.reserved, 0, sizeof (repr.reserved));
1512 
1513   brig_code.add (&repr, sizeof (repr));
1514   brig_insn_count++;
1515 }
1516 
1517 /* Emit an HSA conditional branching instruction and all necessary directives,
1518    schedule necessary operands for writing.  */
1519 
1520 static void
emit_cond_branch_insn(hsa_insn_cbr * br)1521 emit_cond_branch_insn (hsa_insn_cbr *br)
1522 {
1523   struct BrigInstBr repr;
1524 
1525   basic_block target = NULL;
1526   edge_iterator ei;
1527   edge e;
1528 
1529   /* At the moment we only handle direct conditional jumps.  */
1530   gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1531   repr.base.base.byteCount = lendian16 (sizeof (repr));
1532   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1533   repr.base.opcode = lendian16 (br->m_opcode);
1534   repr.width = br->m_width;
1535   /* For Conditional jumps the type is always B1.  */
1536   repr.base.type = lendian16 (BRIG_TYPE_B1);
1537 
1538   FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1539     if (e->flags & EDGE_TRUE_VALUE)
1540       {
1541 	target = e->dest;
1542 	break;
1543       }
1544   gcc_assert (target);
1545 
1546   repr.base.operands
1547     = lendian32 (emit_operands (br->get_op (0),
1548 				&hsa_bb_for_bb (target)->m_label_ref));
1549   memset (&repr.reserved, 0, sizeof (repr.reserved));
1550 
1551   brig_code.add (&repr, sizeof (repr));
1552   brig_insn_count++;
1553 }
1554 
1555 /* Emit an HSA unconditional jump branching instruction that points to
1556    a label REFERENCE.  */
1557 
1558 static void
emit_unconditional_jump(hsa_op_code_ref * reference)1559 emit_unconditional_jump (hsa_op_code_ref *reference)
1560 {
1561   struct BrigInstBr repr;
1562 
1563   repr.base.base.byteCount = lendian16 (sizeof (repr));
1564   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1565   repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1566   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1567   /* Direct branches to labels must be width(all).  */
1568   repr.width = BRIG_WIDTH_ALL;
1569 
1570   repr.base.operands = lendian32 (emit_operands (reference));
1571   memset (&repr.reserved, 0, sizeof (repr.reserved));
1572   brig_code.add (&repr, sizeof (repr));
1573   brig_insn_count++;
1574 }
1575 
1576 /* Emit an HSA switch jump instruction that uses a jump table to
1577    jump to a destination label.  */
1578 
1579 static void
emit_switch_insn(hsa_insn_sbr * sbr)1580 emit_switch_insn (hsa_insn_sbr *sbr)
1581 {
1582   struct BrigInstBr repr;
1583 
1584   gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1585   repr.base.base.byteCount = lendian16 (sizeof (repr));
1586   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1587   repr.base.opcode = lendian16 (sbr->m_opcode);
1588   repr.width = BRIG_WIDTH_1;
1589   /* For Conditional jumps the type is always B1.  */
1590   hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1591   repr.base.type = lendian16 (index->m_type);
1592   repr.base.operands
1593     = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1594   memset (&repr.reserved, 0, sizeof (repr.reserved));
1595 
1596   brig_code.add (&repr, sizeof (repr));
1597   brig_insn_count++;
1598 }
1599 
1600 /* Emit a HSA convert instruction and all necessary directives, schedule
1601    necessary operands for writing.  */
1602 
1603 static void
emit_cvt_insn(hsa_insn_cvt * insn)1604 emit_cvt_insn (hsa_insn_cvt *insn)
1605 {
1606   struct BrigInstCvt repr;
1607   BrigType16_t srctype;
1608 
1609   repr.base.base.byteCount = lendian16 (sizeof (repr));
1610   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1611   repr.base.opcode = lendian16 (insn->m_opcode);
1612   repr.base.type = lendian16 (insn->m_type);
1613   repr.base.operands = lendian32 (emit_insn_operands (insn));
1614 
1615   if (is_a <hsa_op_reg *> (insn->get_op (1)))
1616     srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1617   else
1618     srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1619   repr.sourceType = lendian16 (srctype);
1620   repr.modifier = 0;
1621   /* float to smaller float requires a rounding setting (we default
1622      to 'near'.  */
1623   if (hsa_type_float_p (insn->m_type)
1624       && (!hsa_type_float_p (srctype)
1625 	  || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1626 	      < (srctype & BRIG_TYPE_BASE_MASK))))
1627     repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1628   else if (hsa_type_integer_p (insn->m_type) &&
1629 	   hsa_type_float_p (srctype))
1630     repr.round = BRIG_ROUND_INTEGER_ZERO;
1631   else
1632     repr.round = BRIG_ROUND_NONE;
1633   brig_code.add (&repr, sizeof (repr));
1634   brig_insn_count++;
1635 }
1636 
1637 /* Emit call instruction INSN, where this instruction must be closed
1638    within a call block instruction.  */
1639 
1640 static void
emit_call_insn(hsa_insn_call * call)1641 emit_call_insn (hsa_insn_call *call)
1642 {
1643   struct BrigInstBr repr;
1644 
1645   repr.base.base.byteCount = lendian16 (sizeof (repr));
1646   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1647   repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1648   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1649 
1650   repr.base.operands
1651     = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1652 				call->m_args_code_list));
1653 
1654   /* Internal functions have not set m_called_function.  */
1655   if (call->m_called_function)
1656     {
1657       function_linkage_pair pair (call->m_called_function,
1658 				  call->m_func.m_brig_op_offset);
1659       function_call_linkage.safe_push (pair);
1660     }
1661   else
1662     {
1663       hsa_internal_fn *slot
1664 	= hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1665       gcc_assert (slot);
1666       gcc_assert (slot->m_offset > 0);
1667       call->m_func.m_directive_offset = slot->m_offset;
1668     }
1669 
1670   repr.width = BRIG_WIDTH_ALL;
1671   memset (&repr.reserved, 0, sizeof (repr.reserved));
1672 
1673   brig_code.add (&repr, sizeof (repr));
1674   brig_insn_count++;
1675 }
1676 
1677 /* Emit argument block directive.  */
1678 
1679 static void
emit_arg_block_insn(hsa_insn_arg_block * insn)1680 emit_arg_block_insn (hsa_insn_arg_block *insn)
1681 {
1682   switch (insn->m_kind)
1683     {
1684     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1685       {
1686 	struct BrigDirectiveArgBlock repr;
1687 	repr.base.byteCount = lendian16 (sizeof (repr));
1688 	repr.base.kind = lendian16 (insn->m_kind);
1689 	brig_code.add (&repr, sizeof (repr));
1690 
1691 	for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1692 	  {
1693 	    insn->m_call_insn->m_args_code_list->m_offsets[i]
1694 	      = lendian32 (emit_directive_variable
1695 			   (insn->m_call_insn->m_input_args[i]));
1696 	    brig_insn_count++;
1697 	  }
1698 
1699 	if (insn->m_call_insn->m_output_arg)
1700 	  {
1701 	    insn->m_call_insn->m_result_code_list->m_offsets[0]
1702 	      = lendian32 (emit_directive_variable
1703 			   (insn->m_call_insn->m_output_arg));
1704 	    brig_insn_count++;
1705 	  }
1706 
1707 	break;
1708       }
1709     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1710       {
1711 	struct BrigDirectiveArgBlock repr;
1712 	repr.base.byteCount = lendian16 (sizeof (repr));
1713 	repr.base.kind = lendian16 (insn->m_kind);
1714 	brig_code.add (&repr, sizeof (repr));
1715 	break;
1716       }
1717     default:
1718       gcc_unreachable ();
1719     }
1720 
1721   brig_insn_count++;
1722 }
1723 
1724 /* Emit comment directive.  */
1725 
1726 static void
emit_comment_insn(hsa_insn_comment * insn)1727 emit_comment_insn (hsa_insn_comment *insn)
1728 {
1729   struct BrigDirectiveComment repr;
1730   memset (&repr, 0, sizeof (repr));
1731 
1732   repr.base.byteCount = lendian16 (sizeof (repr));
1733   repr.base.kind = lendian16 (insn->m_opcode);
1734   repr.name = brig_emit_string (insn->m_comment, '\0', false);
1735   brig_code.add (&repr, sizeof (repr));
1736 }
1737 
1738 /* Emit queue instruction INSN.  */
1739 
1740 static void
emit_queue_insn(hsa_insn_queue * insn)1741 emit_queue_insn (hsa_insn_queue *insn)
1742 {
1743   BrigInstQueue repr;
1744   memset (&repr, 0, sizeof (repr));
1745 
1746   repr.base.base.byteCount = lendian16 (sizeof (repr));
1747   repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1748   repr.base.opcode = lendian16 (insn->m_opcode);
1749   repr.base.type = lendian16 (insn->m_type);
1750   repr.segment = insn->m_segment;
1751   repr.memoryOrder = insn->m_memory_order;
1752   repr.base.operands = lendian32 (emit_insn_operands (insn));
1753   brig_data.round_size_up (4);
1754   brig_code.add (&repr, sizeof (repr));
1755 
1756   brig_insn_count++;
1757 }
1758 
1759 /* Emit source type instruction INSN.  */
1760 
1761 static void
emit_srctype_insn(hsa_insn_srctype * insn)1762 emit_srctype_insn (hsa_insn_srctype *insn)
1763 {
1764   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1765   struct BrigInstSourceType repr;
1766   unsigned operand_count = insn->operand_count ();
1767   gcc_checking_assert (operand_count >= 2);
1768 
1769   memset (&repr, 0, sizeof (repr));
1770   repr.sourceType = lendian16 (insn->m_source_type);
1771   repr.base.base.byteCount = lendian16 (sizeof (repr));
1772   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1773   repr.base.opcode = lendian16 (insn->m_opcode);
1774   repr.base.type = lendian16 (insn->m_type);
1775 
1776   repr.base.operands = lendian32 (emit_insn_operands (insn));
1777   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1778   brig_insn_count++;
1779 }
1780 
1781 /* Emit packed instruction INSN.  */
1782 
1783 static void
emit_packed_insn(hsa_insn_packed * insn)1784 emit_packed_insn (hsa_insn_packed *insn)
1785 {
1786   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1787   struct BrigInstSourceType repr;
1788   unsigned operand_count = insn->operand_count ();
1789   gcc_checking_assert (operand_count >= 2);
1790 
1791   memset (&repr, 0, sizeof (repr));
1792   repr.sourceType = lendian16 (insn->m_source_type);
1793   repr.base.base.byteCount = lendian16 (sizeof (repr));
1794   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1795   repr.base.opcode = lendian16 (insn->m_opcode);
1796   repr.base.type = lendian16 (insn->m_type);
1797 
1798   if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1799     {
1800       /* Create operand list for packed type.  */
1801       for (unsigned i = 1; i < operand_count; i++)
1802 	{
1803 	  gcc_checking_assert (insn->get_op (i));
1804 	  insn->m_operand_list->m_offsets[i - 1]
1805 	    = lendian32 (enqueue_op (insn->get_op (i)));
1806 	}
1807 
1808       repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1809 						     insn->m_operand_list));
1810     }
1811   else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1812     {
1813       /* Create operand list for packed type.  */
1814       for (unsigned i = 0; i < operand_count - 1; i++)
1815 	{
1816 	  gcc_checking_assert (insn->get_op (i));
1817 	  insn->m_operand_list->m_offsets[i]
1818 	    = lendian32 (enqueue_op (insn->get_op (i)));
1819 	}
1820 
1821       unsigned ops = emit_operands (insn->m_operand_list,
1822 				    insn->get_op (insn->operand_count () - 1));
1823       repr.base.operands = lendian32 (ops);
1824     }
1825 
1826 
1827   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1828   brig_insn_count++;
1829 }
1830 
1831 /* Emit a basic HSA instruction and all necessary directives, schedule
1832    necessary operands for writing.  */
1833 
1834 static void
emit_basic_insn(hsa_insn_basic * insn)1835 emit_basic_insn (hsa_insn_basic *insn)
1836 {
1837   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1838   struct BrigInstMod repr;
1839   BrigType16_t type;
1840 
1841   memset (&repr, 0, sizeof (repr));
1842   repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1843   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1844   repr.base.opcode = lendian16 (insn->m_opcode);
1845   switch (insn->m_opcode)
1846     {
1847       /* And the bit-logical operations need bit types and whine about
1848 	 arithmetic types :-/  */
1849       case BRIG_OPCODE_AND:
1850       case BRIG_OPCODE_OR:
1851       case BRIG_OPCODE_XOR:
1852       case BRIG_OPCODE_NOT:
1853 	type = regtype_for_type (insn->m_type);
1854 	break;
1855       default:
1856 	type = insn->m_type;
1857 	break;
1858     }
1859   repr.base.type = lendian16 (type);
1860   repr.base.operands = lendian32 (emit_insn_operands (insn));
1861 
1862   if (hsa_type_packed_p (type))
1863     {
1864       if (hsa_type_float_p (type)
1865 	  && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1866 	repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1867       else
1868 	repr.round = 0;
1869       /* We assume that destination and sources agree in packing layout.  */
1870       if (insn->num_used_ops () >= 2)
1871 	repr.pack = BRIG_PACK_PP;
1872       else
1873 	repr.pack = BRIG_PACK_P;
1874       repr.reserved = 0;
1875       repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1876       repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1877       brig_code.add (&repr, sizeof (struct BrigInstMod));
1878     }
1879   else
1880     brig_code.add (&repr, sizeof (struct BrigInstBasic));
1881   brig_insn_count++;
1882 }
1883 
1884 /* Emit an HSA instruction and all necessary directives, schedule necessary
1885    operands for writing.  */
1886 
1887 static void
emit_insn(hsa_insn_basic * insn)1888 emit_insn (hsa_insn_basic *insn)
1889 {
1890   gcc_assert (!is_a <hsa_insn_phi *> (insn));
1891 
1892   insn->m_brig_offset = brig_code.total_size;
1893 
1894   if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1895     emit_signal_insn (signal);
1896   else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1897     emit_atomic_insn (atom);
1898   else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1899     emit_memory_insn (mem);
1900   else if (insn->m_opcode == BRIG_OPCODE_LDA)
1901     emit_addr_insn (insn);
1902   else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1903     emit_segment_insn (seg);
1904   else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1905     emit_cmp_insn (cmp);
1906   else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1907     emit_cond_branch_insn (br);
1908   else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1909     {
1910       if (switch_instructions == NULL)
1911 	switch_instructions = new vec <hsa_insn_sbr *> ();
1912 
1913       switch_instructions->safe_push (sbr);
1914       emit_switch_insn (sbr);
1915     }
1916   else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1917     emit_generic_branch_insn (br);
1918   else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1919     emit_arg_block_insn (block);
1920   else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1921     emit_call_insn (call);
1922   else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1923     emit_comment_insn (comment);
1924   else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1925     emit_queue_insn (queue);
1926   else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1927     emit_srctype_insn (srctype);
1928   else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1929     emit_packed_insn (packed);
1930   else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1931     emit_cvt_insn (cvt);
1932   else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1933     emit_alloca_insn (alloca);
1934   else
1935     emit_basic_insn (insn);
1936 }
1937 
1938 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1939    or we are about to finish emitting code, if it is NULL.  If the fall through
1940    edge from BB does not lead to NEXT_BB, emit an unconditional jump.  */
1941 
1942 static void
perhaps_emit_branch(basic_block bb,basic_block next_bb)1943 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1944 {
1945   basic_block t_bb = NULL, ff = NULL;
1946 
1947   edge_iterator ei;
1948   edge e;
1949 
1950   /* If the last instruction of BB is a switch, ignore emission of all
1951      edges.  */
1952   if (hsa_bb_for_bb (bb)->m_last_insn
1953       && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1954     return;
1955 
1956   FOR_EACH_EDGE (e, ei, bb->succs)
1957     if (e->flags & EDGE_TRUE_VALUE)
1958       {
1959 	gcc_assert (!t_bb);
1960 	t_bb = e->dest;
1961       }
1962     else
1963       {
1964 	gcc_assert (!ff);
1965 	ff = e->dest;
1966       }
1967 
1968   if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1969     return;
1970 
1971   emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1972 }
1973 
1974 /* Emit the a function with name NAME to the various brig sections.  */
1975 
1976 void
hsa_brig_emit_function(void)1977 hsa_brig_emit_function (void)
1978 {
1979   basic_block bb, prev_bb;
1980   hsa_insn_basic *insn;
1981   BrigDirectiveExecutable *ptr_to_fndir;
1982 
1983   brig_init ();
1984 
1985   brig_insn_count = 0;
1986   memset (&op_queue, 0, sizeof (op_queue));
1987   op_queue.projected_size = brig_operand.total_size;
1988 
1989   if (!function_offsets)
1990     function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1991 
1992   if (!emitted_declarations)
1993     emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1994 
1995   for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1996     {
1997       tree called = hsa_cfun->m_called_functions[i];
1998 
1999       /* If the function has no definition, emit a declaration.  */
2000       if (!emitted_declarations->get (called))
2001 	{
2002 	  BrigDirectiveExecutable *e = emit_function_declaration (called);
2003 	  emitted_declarations->put (called, e);
2004 	}
2005     }
2006 
2007   for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2008     {
2009       hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2010       emit_internal_fn_decl (called);
2011     }
2012 
2013   ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2014   for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2015        insn;
2016        insn = insn->m_next)
2017     emit_insn (insn);
2018   prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2019   FOR_EACH_BB_FN (bb, cfun)
2020     {
2021       perhaps_emit_branch (prev_bb, bb);
2022       emit_bb_label_directive (hsa_bb_for_bb (bb));
2023       for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2024 	emit_insn (insn);
2025       prev_bb = bb;
2026     }
2027   perhaps_emit_branch (prev_bb, NULL);
2028   ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2029 
2030   /* Fill up label references for all sbr instructions.  */
2031   if (switch_instructions)
2032     {
2033       for (unsigned i = 0; i < switch_instructions->length (); i++)
2034 	{
2035 	  hsa_insn_sbr *sbr = (*switch_instructions)[i];
2036 	  for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2037 	    {
2038 	      hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2039 	      sbr->m_label_code_list->m_offsets[j]
2040 		= hbb->m_label_ref.m_directive_offset;
2041 	    }
2042 	}
2043 
2044       switch_instructions->release ();
2045       delete switch_instructions;
2046       switch_instructions = NULL;
2047     }
2048 
2049   if (dump_file)
2050     {
2051       fprintf (dump_file, "------- After BRIG emission: -------\n");
2052       dump_hsa_cfun (dump_file);
2053     }
2054 
2055   emit_queued_operands ();
2056 }
2057 
2058 /* Emit all OMP symbols related to OMP.  */
2059 
2060 void
hsa_brig_emit_omp_symbols(void)2061 hsa_brig_emit_omp_symbols (void)
2062 {
2063   brig_init ();
2064   emit_directive_variable (hsa_num_threads);
2065 }
2066 
2067 /* Create and return __hsa_global_variables symbol that contains
2068    all informations consumed by libgomp to link global variables
2069    with their string names used by an HSA kernel.  */
2070 
2071 static tree
hsa_output_global_variables()2072 hsa_output_global_variables ()
2073 {
2074   unsigned l = hsa_global_variable_symbols->elements ();
2075 
2076   tree variable_info_type = make_node (RECORD_TYPE);
2077   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2078 			   get_identifier ("name"), ptr_type_node);
2079   DECL_CHAIN (id_f1) = NULL_TREE;
2080   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2081 			   get_identifier ("omp_data_size"),
2082 			   ptr_type_node);
2083   DECL_CHAIN (id_f2) = id_f1;
2084   finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2085 			 NULL_TREE);
2086 
2087   tree int_num_of_global_vars;
2088   int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2089   tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2090   tree global_vars_array_type = build_array_type (variable_info_type,
2091 						  global_vars_num_index_type);
2092   TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2093 
2094   vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2095 
2096   for (hash_table <hsa_noop_symbol_hasher>::iterator it
2097        = hsa_global_variable_symbols->begin ();
2098        it != hsa_global_variable_symbols->end (); ++it)
2099     {
2100       unsigned len = strlen ((*it)->m_name);
2101       char *copy = XNEWVEC (char, len + 2);
2102       copy[0] = '&';
2103       memcpy (copy + 1, (*it)->m_name, len);
2104       copy[len + 1] = '\0';
2105       len++;
2106       hsa_sanitize_name (copy);
2107 
2108       tree var_name = build_string (len, copy);
2109       TREE_TYPE (var_name)
2110 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2111       free (copy);
2112 
2113       vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2114       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2115 			      build1 (ADDR_EXPR,
2116 				      build_pointer_type (TREE_TYPE (var_name)),
2117 				      var_name));
2118       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2119 			      build_fold_addr_expr ((*it)->m_decl));
2120 
2121       tree variable_info_ctor = build_constructor (variable_info_type,
2122 						   variable_info_vec);
2123 
2124       CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2125 			      variable_info_ctor);
2126     }
2127 
2128   tree global_vars_ctor = build_constructor (global_vars_array_type,
2129 					     global_vars_vec);
2130 
2131   char tmp_name[64];
2132   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2133   tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2134 					   get_identifier (tmp_name),
2135 					   global_vars_array_type);
2136   TREE_STATIC (global_vars_table) = 1;
2137   TREE_READONLY (global_vars_table) = 1;
2138   TREE_PUBLIC (global_vars_table) = 0;
2139   DECL_ARTIFICIAL (global_vars_table) = 1;
2140   DECL_IGNORED_P (global_vars_table) = 1;
2141   DECL_EXTERNAL (global_vars_table) = 0;
2142   TREE_CONSTANT (global_vars_table) = 1;
2143   DECL_INITIAL (global_vars_table) = global_vars_ctor;
2144   varpool_node::finalize_decl (global_vars_table);
2145 
2146   return global_vars_table;
2147 }
2148 
2149 /* Create __hsa_host_functions and __hsa_kernels that contain
2150    all informations consumed by libgomp to register all kernels
2151    in the BRIG binary.  */
2152 
2153 static void
hsa_output_kernels(tree * host_func_table,tree * kernels)2154 hsa_output_kernels (tree *host_func_table, tree *kernels)
2155 {
2156   unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2157 
2158   tree int_num_of_kernels;
2159   int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2160   tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2161   tree host_functions_array_type = build_array_type (ptr_type_node,
2162 						     kernel_num_index_type);
2163   TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2164 
2165   vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2166   for (unsigned i = 0; i < map_count; ++i)
2167     {
2168       tree decl = hsa_get_decl_kernel_mapping_decl (i);
2169       tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2170       CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2171     }
2172   tree host_functions_ctor = build_constructor (host_functions_array_type,
2173 						host_functions_vec);
2174   char tmp_name[64];
2175   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2176   tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2177 					 get_identifier (tmp_name),
2178 					 host_functions_array_type);
2179   TREE_STATIC (hsa_host_func_table) = 1;
2180   TREE_READONLY (hsa_host_func_table) = 1;
2181   TREE_PUBLIC (hsa_host_func_table) = 0;
2182   DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2183   DECL_IGNORED_P (hsa_host_func_table) = 1;
2184   DECL_EXTERNAL (hsa_host_func_table) = 0;
2185   TREE_CONSTANT (hsa_host_func_table) = 1;
2186   DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2187   varpool_node::finalize_decl (hsa_host_func_table);
2188   *host_func_table = hsa_host_func_table;
2189 
2190   /* Following code emits list of kernel_info structures.  */
2191 
2192   tree kernel_info_type = make_node (RECORD_TYPE);
2193   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2194 			   get_identifier ("name"), ptr_type_node);
2195   DECL_CHAIN (id_f1) = NULL_TREE;
2196   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2197 			   get_identifier ("omp_data_size"),
2198 			   unsigned_type_node);
2199   DECL_CHAIN (id_f2) = id_f1;
2200   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2201 			   get_identifier ("gridified_kernel_p"),
2202 			   boolean_type_node);
2203   DECL_CHAIN (id_f3) = id_f2;
2204   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2205 			   get_identifier ("kernel_dependencies_count"),
2206 			   unsigned_type_node);
2207   DECL_CHAIN (id_f4) = id_f3;
2208   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2209 			   get_identifier ("kernel_dependencies"),
2210 			   build_pointer_type (build_pointer_type
2211 					       (char_type_node)));
2212   DECL_CHAIN (id_f5) = id_f4;
2213   finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2214 			 NULL_TREE);
2215 
2216   int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2217   tree kernel_info_vector_type
2218     = build_array_type (kernel_info_type,
2219 			build_index_type (int_num_of_kernels));
2220   TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2221 
2222   vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2223   tree kernel_dependencies_vector_type = NULL;
2224 
2225   for (unsigned i = 0; i < map_count; ++i)
2226     {
2227       tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2228       char *name = hsa_get_decl_kernel_mapping_name (i);
2229       unsigned len = strlen (name);
2230       char *copy = XNEWVEC (char, len + 2);
2231       copy[0] = '&';
2232       memcpy (copy + 1, name, len);
2233       copy[len + 1] = '\0';
2234       len++;
2235 
2236       tree kern_name = build_string (len, copy);
2237       TREE_TYPE (kern_name)
2238 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2239       free (copy);
2240 
2241       unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2242       tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2243       bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2244       tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2245 						     gridified_kernel_p);
2246       unsigned count = 0;
2247       vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2248       if (hsa_decl_kernel_dependencies)
2249 	{
2250 	  vec<const char *> **slot;
2251 	  slot = hsa_decl_kernel_dependencies->get (kernel);
2252 	  if (slot)
2253 	    {
2254 	      vec <const char *> *dependencies = *slot;
2255 	      count = dependencies->length ();
2256 
2257 	      kernel_dependencies_vector_type
2258 		= build_array_type (build_pointer_type (char_type_node),
2259 				    build_index_type (size_int (count)));
2260 	      TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2261 
2262 	      for (unsigned j = 0; j < count; j++)
2263 		{
2264 		  const char *d = (*dependencies)[j];
2265 		  len = strlen (d);
2266 		  tree dependency_name = build_string (len, d);
2267 		  TREE_TYPE (dependency_name)
2268 		    = build_array_type (char_type_node,
2269 					build_index_type (size_int (len)));
2270 
2271 		  CONSTRUCTOR_APPEND_ELT
2272 		    (kernel_dependencies_vec, NULL_TREE,
2273 		     build1 (ADDR_EXPR,
2274 			     build_pointer_type (TREE_TYPE (dependency_name)),
2275 			     dependency_name));
2276 		}
2277 	    }
2278 	}
2279 
2280       tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2281 
2282       vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2283       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2284 			      build1 (ADDR_EXPR,
2285 				      build_pointer_type (TREE_TYPE
2286 							  (kern_name)),
2287 				      kern_name));
2288       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2289       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2290 			      gridified_kernel_p_tree);
2291       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2292 
2293       if (count > 0)
2294 	{
2295 	  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2296 	  gcc_checking_assert (kernel_dependencies_vector_type);
2297 	  tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2298 					       get_identifier (tmp_name),
2299 					       kernel_dependencies_vector_type);
2300 
2301 	  TREE_STATIC (dependencies_list) = 1;
2302 	  TREE_READONLY (dependencies_list) = 1;
2303 	  TREE_PUBLIC (dependencies_list) = 0;
2304 	  DECL_ARTIFICIAL (dependencies_list) = 1;
2305 	  DECL_IGNORED_P (dependencies_list) = 1;
2306 	  DECL_EXTERNAL (dependencies_list) = 0;
2307 	  TREE_CONSTANT (dependencies_list) = 1;
2308 	  DECL_INITIAL (dependencies_list)
2309 	    = build_constructor (kernel_dependencies_vector_type,
2310 				 kernel_dependencies_vec);
2311 	  varpool_node::finalize_decl (dependencies_list);
2312 
2313 	  CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2314 				  build1 (ADDR_EXPR,
2315 					  build_pointer_type
2316 					    (TREE_TYPE (dependencies_list)),
2317 					  dependencies_list));
2318 	}
2319       else
2320 	CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2321 
2322       tree kernel_info_ctor = build_constructor (kernel_info_type,
2323 						 kernel_info_vec);
2324 
2325       CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2326 			      kernel_info_ctor);
2327     }
2328 
2329   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2330   tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2331 				 get_identifier (tmp_name),
2332 				 kernel_info_vector_type);
2333 
2334   TREE_STATIC (hsa_kernels) = 1;
2335   TREE_READONLY (hsa_kernels) = 1;
2336   TREE_PUBLIC (hsa_kernels) = 0;
2337   DECL_ARTIFICIAL (hsa_kernels) = 1;
2338   DECL_IGNORED_P (hsa_kernels) = 1;
2339   DECL_EXTERNAL (hsa_kernels) = 0;
2340   TREE_CONSTANT (hsa_kernels) = 1;
2341   DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2342 						  kernel_info_vector_vec);
2343   varpool_node::finalize_decl (hsa_kernels);
2344   *kernels = hsa_kernels;
2345 }
2346 
2347 /* Create a static constructor that will register out brig stuff with
2348    libgomp.  */
2349 
2350 static void
hsa_output_libgomp_mapping(tree brig_decl)2351 hsa_output_libgomp_mapping (tree brig_decl)
2352 {
2353   unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2354   unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2355 
2356   tree kernels;
2357   tree host_func_table;
2358 
2359   hsa_output_kernels (&host_func_table, &kernels);
2360   tree global_vars = hsa_output_global_variables ();
2361 
2362   tree hsa_image_desc_type = make_node (RECORD_TYPE);
2363   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2364 			   get_identifier ("brig_module"), ptr_type_node);
2365   DECL_CHAIN (id_f1) = NULL_TREE;
2366   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2367 			   get_identifier ("kernel_count"),
2368 			   unsigned_type_node);
2369 
2370   DECL_CHAIN (id_f2) = id_f1;
2371   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2372 			   get_identifier ("hsa_kernel_infos"),
2373 			   ptr_type_node);
2374   DECL_CHAIN (id_f3) = id_f2;
2375   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2376 			   get_identifier ("global_variable_count"),
2377 			   unsigned_type_node);
2378   DECL_CHAIN (id_f4) = id_f3;
2379   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2380 			   get_identifier ("hsa_global_variable_infos"),
2381 			   ptr_type_node);
2382   DECL_CHAIN (id_f5) = id_f4;
2383   finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2384 			 NULL_TREE);
2385   TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2386 
2387   vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2388   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2389 			  build_fold_addr_expr (brig_decl));
2390   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2391 			  build_int_cstu (unsigned_type_node, kernel_count));
2392   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2393 			  build1 (ADDR_EXPR,
2394 				  build_pointer_type (TREE_TYPE (kernels)),
2395 				  kernels));
2396   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2397 			  build_int_cstu (unsigned_type_node,
2398 					  global_variable_count));
2399   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2400 			  build1 (ADDR_EXPR,
2401 				  build_pointer_type (TREE_TYPE (global_vars)),
2402 				  global_vars));
2403 
2404   tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2405 
2406   char tmp_name[64];
2407   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2408   tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2409 					get_identifier (tmp_name),
2410 					hsa_image_desc_type);
2411   TREE_STATIC (hsa_img_descriptor) = 1;
2412   TREE_READONLY (hsa_img_descriptor) = 1;
2413   TREE_PUBLIC (hsa_img_descriptor) = 0;
2414   DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2415   DECL_IGNORED_P (hsa_img_descriptor) = 1;
2416   DECL_EXTERNAL (hsa_img_descriptor) = 0;
2417   TREE_CONSTANT (hsa_img_descriptor) = 1;
2418   DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2419   varpool_node::finalize_decl (hsa_img_descriptor);
2420 
2421   /* Construct the "host_table" libgomp expects.  */
2422   tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2423   tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2424   TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2425   vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2426   tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2427   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2428 			  host_func_table_addr);
2429   offset_int func_table_size
2430     = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2431   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2432 			  fold_build2 (POINTER_PLUS_EXPR,
2433 				       TREE_TYPE (host_func_table_addr),
2434 				       host_func_table_addr,
2435 				       build_int_cst (size_type_node,
2436 						      func_table_size.to_uhwi
2437 						      ())));
2438   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2439   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2440   tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2441 						    libgomp_host_table_vec);
2442   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2443   tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2444 					    get_identifier (tmp_name),
2445 					    libgomp_host_table_type);
2446 
2447   TREE_STATIC (hsa_libgomp_host_table) = 1;
2448   TREE_READONLY (hsa_libgomp_host_table) = 1;
2449   TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2450   DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2451   DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2452   DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2453   TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2454   DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2455   varpool_node::finalize_decl (hsa_libgomp_host_table);
2456 
2457   /* Generate an initializer with a call to the registration routine.  */
2458 
2459   tree offload_register
2460     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2461   gcc_checking_assert (offload_register);
2462 
2463   tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2464   append_to_statement_list
2465     (build_call_expr (offload_register, 4,
2466 		      build_int_cstu (unsigned_type_node,
2467 				      GOMP_VERSION_PACK (GOMP_VERSION,
2468 							 GOMP_VERSION_HSA)),
2469 		      build_fold_addr_expr (hsa_libgomp_host_table),
2470 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2471 		      build_fold_addr_expr (hsa_img_descriptor)),
2472      hsa_ctor_stmts);
2473 
2474   cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2475 
2476   tree offload_unregister
2477     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2478   gcc_checking_assert (offload_unregister);
2479 
2480   tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2481   append_to_statement_list
2482     (build_call_expr (offload_unregister, 4,
2483 		      build_int_cstu (unsigned_type_node,
2484 				      GOMP_VERSION_PACK (GOMP_VERSION,
2485 							 GOMP_VERSION_HSA)),
2486 		      build_fold_addr_expr (hsa_libgomp_host_table),
2487 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2488 		      build_fold_addr_expr (hsa_img_descriptor)),
2489      hsa_dtor_stmts);
2490   cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2491 }
2492 
2493 /* Emit the brig module we have compiled to a section in the final assembly and
2494    also create a compile unit static constructor that will register the brig
2495    module with libgomp.  */
2496 
2497 void
hsa_output_brig(void)2498 hsa_output_brig (void)
2499 {
2500   section *saved_section;
2501 
2502   if (!brig_initialized)
2503     return;
2504 
2505   for (unsigned i = 0; i < function_call_linkage.length (); i++)
2506     {
2507       function_linkage_pair p = function_call_linkage[i];
2508 
2509       BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2510       gcc_assert (*func_offset);
2511       BrigOperandCodeRef *code_ref
2512 	= (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2513       gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2514       code_ref->ref = lendian32 (*func_offset);
2515     }
2516 
2517   /* Iterate all function declarations and if we meet a function that should
2518      have module linkage and we are unable to emit HSAIL for the function,
2519      then change the linkage to program linkage.  Doing so, we will emit
2520      a valid BRIG image.  */
2521   if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2522     for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2523 	 = emitted_declarations->begin ();
2524 	 it != emitted_declarations->end ();
2525 	 ++it)
2526       {
2527 	if (hsa_failed_functions->contains ((*it).first))
2528 	  (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2529       }
2530 
2531   saved_section = in_section;
2532 
2533   switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2534   char tmp_name[64];
2535   ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2536   ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2537   tree brig_id = get_identifier (tmp_name);
2538   tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2539 			       char_type_node);
2540   SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2541   TREE_ADDRESSABLE (brig_decl) = 1;
2542   TREE_READONLY (brig_decl) = 1;
2543   DECL_ARTIFICIAL (brig_decl) = 1;
2544   DECL_IGNORED_P (brig_decl) = 1;
2545   TREE_STATIC (brig_decl) = 1;
2546   TREE_PUBLIC (brig_decl) = 0;
2547   TREE_USED (brig_decl) = 1;
2548   DECL_INITIAL (brig_decl) = brig_decl;
2549   TREE_ASM_WRITTEN (brig_decl) = 1;
2550 
2551   BrigModuleHeader module_header;
2552   memcpy (&module_header.identification, "HSA BRIG",
2553 	  sizeof (module_header.identification));
2554   module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2555   module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2556   uint64_t section_index[3];
2557 
2558   int data_padding, code_padding, operand_padding;
2559   data_padding = HSA_SECTION_ALIGNMENT
2560     - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2561   code_padding = HSA_SECTION_ALIGNMENT
2562     - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2563   operand_padding = HSA_SECTION_ALIGNMENT
2564     - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2565 
2566   uint64_t module_size = sizeof (module_header)
2567     + sizeof (section_index)
2568     + brig_data.total_size
2569     + data_padding
2570     + brig_code.total_size
2571     + code_padding
2572     + brig_operand.total_size
2573     + operand_padding;
2574   gcc_assert ((module_size % 16) == 0);
2575   module_header.byteCount = lendian64 (module_size);
2576   memset (&module_header.hash, 0, sizeof (module_header.hash));
2577   module_header.reserved = 0;
2578   module_header.sectionCount = lendian32 (3);
2579   module_header.sectionIndex = lendian64 (sizeof (module_header));
2580   assemble_string ((const char *) &module_header, sizeof (module_header));
2581   uint64_t off = sizeof (module_header) + sizeof (section_index);
2582   section_index[0] = lendian64 (off);
2583   off += brig_data.total_size + data_padding;
2584   section_index[1] = lendian64 (off);
2585   off += brig_code.total_size + code_padding;
2586   section_index[2] = lendian64 (off);
2587   assemble_string ((const char *) &section_index, sizeof (section_index));
2588 
2589   char padding[HSA_SECTION_ALIGNMENT];
2590   memset (padding, 0, sizeof (padding));
2591 
2592   brig_data.output ();
2593   assemble_string (padding, data_padding);
2594   brig_code.output ();
2595   assemble_string (padding, code_padding);
2596   brig_operand.output ();
2597   assemble_string (padding, operand_padding);
2598 
2599   if (saved_section)
2600     switch_to_section (saved_section);
2601 
2602   hsa_output_libgomp_mapping (brig_decl);
2603 
2604   hsa_free_decl_kernel_mapping ();
2605   brig_release_data ();
2606   hsa_deinit_compilation_unit_data ();
2607 
2608   delete emitted_declarations;
2609   emitted_declarations = NULL;
2610   delete function_offsets;
2611   function_offsets = NULL;
2612 }
2613