1 /* Producing binary form of HSA BRIG from our internal representation.
2    Copyright (C) 2013-2019 Free Software Foundation, Inc.
3    Contributed by Martin Jambor <mjambor@suse.cz> and
4    Martin Liska <mliska@suse.cz>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "function.h"
39 #include "cfg.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "gimple-pretty-print.h"
43 #include "diagnostic-core.h"
44 #include "cgraph.h"
45 #include "dumpfile.h"
46 #include "print-tree.h"
47 #include "symbol-summary.h"
48 #include "hsa-common.h"
49 #include "gomp-constants.h"
50 
51 /* Convert VAL to little endian form, if necessary.  */
52 
53 static uint16_t
lendian16(uint16_t val)54 lendian16 (uint16_t val)
55 {
56 #if GCC_VERSION >= 4008
57 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
58   return val;
59 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60   return __builtin_bswap16 (val);
61 #else   /* __ORDER_PDP_ENDIAN__ */
62   return val;
63 #endif
64 #else
65 // provide a safe slower default, with shifts and masking
66 #ifndef WORDS_BIGENDIAN
67   return val;
68 #else
69   return (val >> 8) | (val << 8);
70 #endif
71 #endif
72 }
73 
74 /* Convert VAL to little endian form, if necessary.  */
75 
76 static uint32_t
lendian32(uint32_t val)77 lendian32 (uint32_t val)
78 {
79 #if GCC_VERSION >= 4006
80 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
81   return val;
82 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
83   return __builtin_bswap32 (val);
84 #else  /* __ORDER_PDP_ENDIAN__ */
85   return (val >> 16) | (val << 16);
86 #endif
87 #else
88 // provide a safe slower default, with shifts and masking
89 #ifndef WORDS_BIGENDIAN
90   return val;
91 #else
92   val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
93   return (val >> 16) | (val << 16);
94 #endif
95 #endif
96 }
97 
98 /* Convert VAL to little endian form, if necessary.  */
99 
100 static uint64_t
lendian64(uint64_t val)101 lendian64 (uint64_t val)
102 {
103 #if GCC_VERSION >= 4006
104 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
105   return val;
106 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
107   return __builtin_bswap64 (val);
108 #else  /* __ORDER_PDP_ENDIAN__ */
109   return (((val & 0xffffll) << 48)
110 	  | ((val & 0xffff0000ll) << 16)
111 	  | ((val & 0xffff00000000ll) >> 16)
112 	  | ((val & 0xffff000000000000ll) >> 48));
113 #endif
114 #else
115 // provide a safe slower default, with shifts and masking
116 #ifndef WORDS_BIGENDIAN
117   return val;
118 #else
119   val = (((val & 0xff00ff00ff00ff00ll) >> 8)
120 	 | ((val & 0x00ff00ff00ff00ffll) << 8));
121   val = ((( val & 0xffff0000ffff0000ll) >> 16)
122 	 | (( val & 0x0000ffff0000ffffll) << 16));
123   return (val >> 32) | (val << 32);
124 #endif
125 #endif
126 }
127 
128 #define BRIG_ELF_SECTION_NAME ".brig"
129 #define BRIG_LABEL_STRING "hsa_brig"
130 #define BRIG_SECTION_DATA_NAME    "hsa_data"
131 #define BRIG_SECTION_CODE_NAME    "hsa_code"
132 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
133 
134 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
135 
136 /* Required HSA section alignment.  */
137 
138 #define HSA_SECTION_ALIGNMENT 16
139 
140 /* Chunks of BRIG binary data.  */
141 
142 struct hsa_brig_data_chunk
143 {
144   /* Size of the data already stored into a chunk.  */
145   unsigned size;
146 
147   /* Pointer to the data.  */
148   char *data;
149 };
150 
151 /* Structure representing a BRIG section, holding and writing its data.  */
152 
153 class hsa_brig_section
154 {
155 public:
156   /* Section name that will be output to the BRIG.  */
157   const char *section_name;
158   /* Size in bytes of all data stored in the section.  */
159   unsigned total_size;
160   /* The size of the header of the section including padding.  */
161   unsigned header_byte_count;
162   /* The size of the header of the section without any padding.  */
163   unsigned header_byte_delta;
164 
165   void init (const char *name);
166   void release ();
167   void output ();
168   unsigned add (const void *data, unsigned len, void **output = NULL);
169   void round_size_up (int factor);
170   void *get_ptr_by_offset (unsigned int offset);
171 
172 private:
173   void allocate_new_chunk ();
174 
175   /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes.  */
176   vec <struct hsa_brig_data_chunk> chunks;
177 
178   /* More convenient access to the last chunk from the vector above.  */
179   struct hsa_brig_data_chunk *cur_chunk;
180 };
181 
182 static struct hsa_brig_section brig_data, brig_code, brig_operand;
183 static uint32_t brig_insn_count;
184 static bool brig_initialized = false;
185 
186 /* Mapping between emitted HSA functions and their offset in code segment.  */
187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
188 
189 /* Hash map of emitted function declarations.  */
190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
191 
192 /* Hash table of emitted internal function declaration offsets.  */
193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
194 
195 /* List of sbr instructions.  */
196 static vec <hsa_insn_sbr *> *switch_instructions;
197 
198 struct function_linkage_pair
199 {
function_linkage_pairfunction_linkage_pair200   function_linkage_pair (tree decl, unsigned int off)
201     : function_decl (decl), offset (off) {}
202 
203   /* Declaration of called function.  */
204   tree function_decl;
205 
206   /* Offset in operand section.  */
207   unsigned int offset;
208 };
209 
210 /* Vector of function calls where we need to resolve function offsets.  */
211 static auto_vec <function_linkage_pair> function_call_linkage;
212 
213 /* Add a new chunk, allocate data for it and initialize it.  */
214 
215 void
allocate_new_chunk()216 hsa_brig_section::allocate_new_chunk ()
217 {
218   struct hsa_brig_data_chunk new_chunk;
219 
220   new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
221   new_chunk.size = 0;
222   cur_chunk = chunks.safe_push (new_chunk);
223 }
224 
225 /* Initialize the brig section.  */
226 
227 void
init(const char * name)228 hsa_brig_section::init (const char *name)
229 {
230   section_name = name;
231   /* While the following computation is basically wrong, because the intent
232      certainly wasn't to have the first character of name and padding, which
233      are a part of sizeof (BrigSectionHeader), included in the first addend,
234      this is what the disassembler expects.  */
235   total_size = sizeof (BrigSectionHeader) + strlen (section_name);
236   chunks.create (1);
237   allocate_new_chunk ();
238   header_byte_delta = total_size;
239   round_size_up (4);
240   header_byte_count = total_size;
241 }
242 
243 /* Free all data in the section.  */
244 
245 void
release()246 hsa_brig_section::release ()
247 {
248   for (unsigned i = 0; i < chunks.length (); i++)
249     free (chunks[i].data);
250   chunks.release ();
251   cur_chunk = NULL;
252 }
253 
254 /* Write the section to the output file to a section with the name given at
255    initialization.  Switches the output section and does not restore it.  */
256 
257 void
output()258 hsa_brig_section::output ()
259 {
260   struct BrigSectionHeader section_header;
261   char padding[8];
262 
263   section_header.byteCount = lendian64 (total_size);
264   section_header.headerByteCount = lendian32 (header_byte_count);
265   section_header.nameLength = lendian32 (strlen (section_name));
266   assemble_string ((const char *) &section_header, 16);
267   assemble_string (section_name, (section_header.nameLength));
268   memset (&padding, 0, sizeof (padding));
269   /* This is also a consequence of the wrong header size computation described
270      in a comment in hsa_brig_section::init.  */
271   assemble_string (padding, 8);
272   for (unsigned i = 0; i < chunks.length (); i++)
273     assemble_string (chunks[i].data, chunks[i].size);
274 }
275 
276 /* Add to the stream LEN bytes of opaque binary DATA.  Return the offset at
277    which it was stored.  If OUTPUT is not NULL, store into it the pointer to
278    the place where DATA was actually stored.  */
279 
280 unsigned
add(const void * data,unsigned len,void ** output)281 hsa_brig_section::add (const void *data, unsigned len, void **output)
282 {
283   unsigned offset = total_size;
284 
285   gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
286   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
287     allocate_new_chunk ();
288 
289   char *dst = cur_chunk->data + cur_chunk->size;
290   memcpy (dst, data, len);
291   if (output)
292     *output = dst;
293   cur_chunk->size += len;
294   total_size += len;
295 
296   return offset;
297 }
298 
299 /* Add padding to section so that its size is divisible by FACTOR.  */
300 
301 void
round_size_up(int factor)302 hsa_brig_section::round_size_up (int factor)
303 {
304   unsigned padding, res = total_size % factor;
305 
306   if (res == 0)
307     return;
308 
309   padding = factor - res;
310   total_size += padding;
311   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
312     {
313       padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
314       cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
315       allocate_new_chunk ();
316     }
317 
318   cur_chunk->size += padding;
319 }
320 
321 /* Return pointer to data by global OFFSET in the section.  */
322 
323 void *
get_ptr_by_offset(unsigned int offset)324 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
325 {
326   gcc_assert (offset < total_size);
327   offset -= header_byte_delta;
328 
329   unsigned i;
330   for (i = 0; offset >= chunks[i].size; i++)
331     offset -= chunks[i].size;
332 
333   return chunks[i].data + offset;
334 }
335 
336 /* BRIG string data hashing.  */
337 
338 struct brig_string_slot
339 {
340   const char *s;
341   char prefix;
342   int len;
343   uint32_t offset;
344 };
345 
346 /* Hash table helpers.  */
347 
348 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
349 {
350   static inline hashval_t hash (const value_type);
351   static inline bool equal (const value_type, const compare_type);
352   static inline void remove (value_type);
353 };
354 
355 /* Returns a hash code for DS.  Adapted from libiberty's htab_hash_string
356    to support strings that may not end in '\0'.  */
357 
358 inline hashval_t
hash(const value_type ds)359 brig_string_slot_hasher::hash (const value_type ds)
360 {
361   hashval_t r = ds->len;
362   int i;
363 
364   for (i = 0; i < ds->len; i++)
365      r = r * 67 + (unsigned) ds->s[i] - 113;
366   r = r * 67 + (unsigned) ds->prefix - 113;
367   return r;
368 }
369 
370 /* Returns nonzero if DS1 and DS2 are equal.  */
371 
372 inline bool
equal(const value_type ds1,const compare_type ds2)373 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
374 {
375   if (ds1->len == ds2->len)
376     return ds1->prefix == ds2->prefix
377       && memcmp (ds1->s, ds2->s, ds1->len) == 0;
378 
379   return 0;
380 }
381 
382 /* Deallocate memory for DS upon its removal.  */
383 
384 inline void
remove(value_type ds)385 brig_string_slot_hasher::remove (value_type ds)
386 {
387   free (const_cast<char *> (ds->s));
388   free (ds);
389 }
390 
391 /* Hash for strings we output in order not to duplicate them needlessly.  */
392 
393 static hash_table<brig_string_slot_hasher> *brig_string_htab;
394 
395 /* Emit a null terminated string STR to the data section and return its
396    offset in it.  If PREFIX is non-zero, output it just before STR too.
397    Sanitize the string if SANITIZE option is set to true.  */
398 
399 static unsigned
400 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
401 {
402   unsigned slen = strlen (str);
403   unsigned offset, len = slen + (prefix ? 1 : 0);
404   uint32_t hdr_len = lendian32 (len);
405   brig_string_slot s_slot;
406   brig_string_slot **slot;
407   char *str2;
408 
409   str2 = xstrdup (str);
410 
411   if (sanitize)
412     hsa_sanitize_name (str2);
413   s_slot.s = str2;
414   s_slot.len = slen;
415   s_slot.prefix = prefix;
416   s_slot.offset = 0;
417 
418   slot = brig_string_htab->find_slot (&s_slot, INSERT);
419   if (*slot == NULL)
420     {
421       brig_string_slot *new_slot = XCNEW (brig_string_slot);
422 
423       /* In theory we should fill in BrigData but that would mean copying
424 	 the string to a buffer for no reason, so we just emulate it.  */
425       offset = brig_data.add (&hdr_len, sizeof (hdr_len));
426       if (prefix)
427 	brig_data.add (&prefix, 1);
428 
429       brig_data.add (str2, slen);
430       brig_data.round_size_up (4);
431 
432       /* TODO: could use the string we just copied into
433 	 brig_string->cur_chunk */
434       new_slot->s = str2;
435       new_slot->len = slen;
436       new_slot->prefix = prefix;
437       new_slot->offset = offset;
438       *slot = new_slot;
439     }
440   else
441     {
442       offset = (*slot)->offset;
443       free (str2);
444     }
445 
446   return offset;
447 }
448 
449 /* Linked list of queued operands.  */
450 
451 static struct operand_queue
452 {
453   /* First from the chain of queued operands.  */
454   hsa_op_base *first_op, *last_op;
455 
456   /* The offset at which the next operand will be enqueued.  */
457   unsigned projected_size;
458 
459 } op_queue;
460 
461 /* Unless already initialized, initialize infrastructure to produce BRIG.  */
462 
463 static void
brig_init(void)464 brig_init (void)
465 {
466   brig_insn_count = 0;
467 
468   if (brig_initialized)
469     return;
470 
471   brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
472   brig_data.init (BRIG_SECTION_DATA_NAME);
473   brig_code.init (BRIG_SECTION_CODE_NAME);
474   brig_operand.init (BRIG_SECTION_OPERAND_NAME);
475   brig_initialized = true;
476 
477   struct BrigDirectiveModule moddir;
478   memset (&moddir, 0, sizeof (moddir));
479   moddir.base.byteCount = lendian16 (sizeof (moddir));
480 
481   char *modname;
482   if (main_input_filename && *main_input_filename != '\0')
483     {
484       const char *part = strrchr (main_input_filename, '/');
485       if (!part)
486 	part = main_input_filename;
487       else
488 	part++;
489       modname = concat ("&__hsa_module_", part, NULL);
490       char *extension = strchr (modname, '.');
491       if (extension)
492 	*extension = '\0';
493 
494       /* As in LTO mode, we have to emit a different module names.  */
495       if (flag_ltrans)
496 	{
497 	  part = strrchr (asm_file_name, '/');
498 	  if (!part)
499 	    part = asm_file_name;
500 	  else
501 	    part++;
502 	  char *modname2;
503 	  modname2 = xasprintf ("%s_%s", modname, part);
504 	  free (modname);
505 	  modname = modname2;
506 	}
507 
508       hsa_sanitize_name (modname);
509       moddir.name = brig_emit_string (modname);
510       free (modname);
511     }
512   else
513     moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
514   moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
515   moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
516   moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
517   moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
518   if (hsa_machine_large_p ())
519     moddir.machineModel = BRIG_MACHINE_LARGE;
520   else
521     moddir.machineModel = BRIG_MACHINE_SMALL;
522   moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
523   brig_code.add (&moddir, sizeof (moddir));
524 }
525 
526 /* Free all BRIG data.  */
527 
528 static void
brig_release_data(void)529 brig_release_data (void)
530 {
531   delete brig_string_htab;
532   brig_data.release ();
533   brig_code.release ();
534   brig_operand.release ();
535 
536   brig_initialized = 0;
537 }
538 
539 /* Enqueue operation OP.  Return the offset at which it will be stored.  */
540 
541 static unsigned int
enqueue_op(hsa_op_base * op)542 enqueue_op (hsa_op_base *op)
543 {
544   unsigned ret;
545 
546   if (op->m_brig_op_offset)
547     return op->m_brig_op_offset;
548 
549   ret = op_queue.projected_size;
550   op->m_brig_op_offset = op_queue.projected_size;
551 
552   if (!op_queue.first_op)
553     op_queue.first_op = op;
554   else
555     op_queue.last_op->m_next = op;
556   op_queue.last_op = op;
557 
558   if (is_a <hsa_op_immed *> (op))
559     op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
560   else if (is_a <hsa_op_reg *> (op))
561     op_queue.projected_size += sizeof (struct BrigOperandRegister);
562   else if (is_a <hsa_op_address *> (op))
563     op_queue.projected_size += sizeof (struct BrigOperandAddress);
564   else if (is_a <hsa_op_code_ref *> (op))
565     op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
566   else if (is_a <hsa_op_code_list *> (op))
567     op_queue.projected_size += sizeof (struct BrigOperandCodeList);
568   else if (is_a <hsa_op_operand_list *> (op))
569     op_queue.projected_size += sizeof (struct BrigOperandOperandList);
570   else
571     gcc_unreachable ();
572   return ret;
573 }
574 
575 static void emit_immediate_operand (hsa_op_immed *imm);
576 
577 /* Emit directive describing a symbol if it has not been emitted already.
578    Return the offset of the directive.  */
579 
580 static unsigned
emit_directive_variable(struct hsa_symbol * symbol)581 emit_directive_variable (struct hsa_symbol *symbol)
582 {
583   struct BrigDirectiveVariable dirvar;
584   unsigned name_offset;
585   static unsigned res_name_offset;
586 
587   if (symbol->m_directive_offset)
588     return symbol->m_directive_offset;
589 
590   memset (&dirvar, 0, sizeof (dirvar));
591   dirvar.base.byteCount = lendian16 (sizeof (dirvar));
592   dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
593   dirvar.allocation = symbol->m_allocation;
594 
595   char prefix = symbol->m_global_scope_p ? '&' : '%';
596 
597   if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
598     {
599       if (res_name_offset == 0)
600 	res_name_offset = brig_emit_string (symbol->m_name, '%');
601       name_offset = res_name_offset;
602     }
603   else if (symbol->m_name)
604     name_offset = brig_emit_string (symbol->m_name, prefix);
605   else
606     {
607       char buf[64];
608       snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
609 		symbol->m_name_number);
610       name_offset = brig_emit_string (buf, prefix);
611     }
612 
613   dirvar.name = lendian32 (name_offset);
614 
615   if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
616     {
617       hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
618       dirvar.init = lendian32 (enqueue_op (tmp));
619     }
620   else
621     dirvar.init = 0;
622   dirvar.type = lendian16 (symbol->m_type);
623   dirvar.segment = symbol->m_segment;
624   dirvar.align = symbol->m_align;
625   dirvar.linkage = symbol->m_linkage;
626   dirvar.dim.lo = symbol->m_dim;
627   dirvar.dim.hi = symbol->m_dim >> 32;
628 
629   /* Global variables are just declared and linked via HSA runtime.  */
630   if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
631     dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
632   dirvar.reserved = 0;
633 
634   if (symbol->m_cst_value)
635     {
636       dirvar.modifier |= BRIG_VARIABLE_CONST;
637       dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
638     }
639 
640   symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
641   return symbol->m_directive_offset;
642 }
643 
644 /* Emit directives describing either a function declaration or definition F and
645    return the produced BrigDirectiveExecutable structure.  The function does
646    not take into account any instructions when calculating nextModuleEntry
647    field of the produced BrigDirectiveExecutable structure so when emitting
648    actual definitions, this field needs to be updated after all of the function
649    is actually added to the code section.  */
650 
651 static BrigDirectiveExecutable *
emit_function_directives(hsa_function_representation * f,bool is_declaration)652 emit_function_directives (hsa_function_representation *f, bool is_declaration)
653 {
654   struct BrigDirectiveExecutable fndir;
655   unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
656   int count = 0;
657   void *ptr_to_fndir;
658   hsa_symbol *sym;
659 
660   if (!f->m_declaration_p)
661     for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
662       {
663 	gcc_assert (!sym->m_emitted_to_brig);
664 	sym->m_emitted_to_brig = true;
665 	emit_directive_variable (sym);
666 	brig_insn_count++;
667       }
668 
669   name_offset = brig_emit_string (f->m_name, '&');
670   inarg_off = brig_code.total_size + sizeof (fndir)
671     + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
672   scoped_off = inarg_off
673     + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
674 
675   if (!f->m_declaration_p)
676     {
677       count += f->m_spill_symbols.length ();
678       count += f->m_private_variables.length ();
679     }
680 
681   next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
682 
683   memset (&fndir, 0, sizeof (fndir));
684   fndir.base.byteCount = lendian16 (sizeof (fndir));
685   fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
686 			       : BRIG_KIND_DIRECTIVE_FUNCTION);
687   fndir.name = lendian32 (name_offset);
688   fndir.inArgCount = lendian16 (f->m_input_args.length ());
689   fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
690   fndir.firstInArg = lendian32 (inarg_off);
691   fndir.firstCodeBlockEntry = lendian32 (scoped_off);
692   fndir.nextModuleEntry = lendian32 (next_toplev_off);
693   fndir.linkage = f->get_linkage ();
694   if (!f->m_declaration_p)
695     fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
696   memset (&fndir.reserved, 0, sizeof (fndir.reserved));
697 
698   /* Once we put a definition of function_offsets, we should not overwrite
699      it with a declaration of the function.  */
700   if (f->m_internal_fn == NULL)
701     {
702       if (!function_offsets->get (f->m_decl) || !is_declaration)
703 	function_offsets->put (f->m_decl, brig_code.total_size);
704     }
705   else
706     {
707       /* Internal function.  */
708       hsa_internal_fn **slot
709 	= hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
710       hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
711       int_fn->m_offset = brig_code.total_size;
712       *slot = int_fn;
713     }
714 
715   brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
716 
717   if (f->m_output_arg)
718     emit_directive_variable (f->m_output_arg);
719   for (unsigned i = 0; i < f->m_input_args.length (); i++)
720     emit_directive_variable (f->m_input_args[i]);
721 
722   if (!f->m_declaration_p)
723     {
724       for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
725 	{
726 	  emit_directive_variable (sym);
727 	  brig_insn_count++;
728 	}
729       for (unsigned i = 0; i < f->m_private_variables.length (); i++)
730 	{
731 	  emit_directive_variable (f->m_private_variables[i]);
732 	  brig_insn_count++;
733 	}
734     }
735 
736   return (BrigDirectiveExecutable *) ptr_to_fndir;
737 }
738 
739 /* Emit a label directive for the given HBB.  We assume it is about to start on
740    the current offset in the code section.  */
741 
742 static void
emit_bb_label_directive(hsa_bb * hbb)743 emit_bb_label_directive (hsa_bb *hbb)
744 {
745   struct BrigDirectiveLabel lbldir;
746 
747   lbldir.base.byteCount = lendian16 (sizeof (lbldir));
748   lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
749   char buf[32];
750   snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
751 	    hbb->m_index);
752   lbldir.name = lendian32 (brig_emit_string (buf, '@'));
753 
754   hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
755 						       sizeof (lbldir));
756   brig_insn_count++;
757 }
758 
759 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
760    holding such, for constants and registers.  */
761 
762 static BrigType16_t
regtype_for_type(BrigType16_t t)763 regtype_for_type (BrigType16_t t)
764 {
765   switch (t)
766     {
767     case BRIG_TYPE_B1:
768       return BRIG_TYPE_B1;
769 
770     case BRIG_TYPE_U8:
771     case BRIG_TYPE_U16:
772     case BRIG_TYPE_U32:
773     case BRIG_TYPE_S8:
774     case BRIG_TYPE_S16:
775     case BRIG_TYPE_S32:
776     case BRIG_TYPE_B8:
777     case BRIG_TYPE_B16:
778     case BRIG_TYPE_B32:
779     case BRIG_TYPE_F16:
780     case BRIG_TYPE_F32:
781     case BRIG_TYPE_U8X4:
782     case BRIG_TYPE_U16X2:
783     case BRIG_TYPE_S8X4:
784     case BRIG_TYPE_S16X2:
785     case BRIG_TYPE_F16X2:
786       return BRIG_TYPE_B32;
787 
788     case BRIG_TYPE_U64:
789     case BRIG_TYPE_S64:
790     case BRIG_TYPE_F64:
791     case BRIG_TYPE_B64:
792     case BRIG_TYPE_U8X8:
793     case BRIG_TYPE_U16X4:
794     case BRIG_TYPE_U32X2:
795     case BRIG_TYPE_S8X8:
796     case BRIG_TYPE_S16X4:
797     case BRIG_TYPE_S32X2:
798     case BRIG_TYPE_F16X4:
799     case BRIG_TYPE_F32X2:
800       return BRIG_TYPE_B64;
801 
802     case BRIG_TYPE_B128:
803     case BRIG_TYPE_U8X16:
804     case BRIG_TYPE_U16X8:
805     case BRIG_TYPE_U32X4:
806     case BRIG_TYPE_U64X2:
807     case BRIG_TYPE_S8X16:
808     case BRIG_TYPE_S16X8:
809     case BRIG_TYPE_S32X4:
810     case BRIG_TYPE_S64X2:
811     case BRIG_TYPE_F16X8:
812     case BRIG_TYPE_F32X4:
813     case BRIG_TYPE_F64X2:
814       return BRIG_TYPE_B128;
815 
816     default:
817       gcc_unreachable ();
818     }
819 }
820 
821 /* Return the length of the BRIG type TYPE that is going to be streamed out as
822    an immediate constant (so it must not be B1).  */
823 
824 unsigned
hsa_get_imm_brig_type_len(BrigType16_t type)825 hsa_get_imm_brig_type_len (BrigType16_t type)
826 {
827   BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
828   BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
829 
830   switch (pack_type)
831     {
832     case BRIG_TYPE_PACK_NONE:
833       break;
834     case BRIG_TYPE_PACK_32:
835       return 4;
836     case BRIG_TYPE_PACK_64:
837       return 8;
838     case BRIG_TYPE_PACK_128:
839       return 16;
840     default:
841       gcc_unreachable ();
842     }
843 
844   switch (base_type)
845     {
846     case BRIG_TYPE_U8:
847     case BRIG_TYPE_S8:
848     case BRIG_TYPE_B8:
849       return 1;
850     case BRIG_TYPE_U16:
851     case BRIG_TYPE_S16:
852     case BRIG_TYPE_F16:
853     case BRIG_TYPE_B16:
854       return 2;
855     case BRIG_TYPE_U32:
856     case BRIG_TYPE_S32:
857     case BRIG_TYPE_F32:
858     case BRIG_TYPE_B32:
859       return 4;
860     case BRIG_TYPE_U64:
861     case BRIG_TYPE_S64:
862     case BRIG_TYPE_F64:
863     case BRIG_TYPE_B64:
864       return 8;
865     case BRIG_TYPE_B128:
866       return 16;
867     default:
868       gcc_unreachable ();
869     }
870 }
871 
872 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
873    If NEED_LEN is not equal to zero, shrink or extend the value
874    to NEED_LEN bytes.  Return how many bytes were written.  */
875 
876 static int
emit_immediate_scalar_to_buffer(tree value,char * data,unsigned need_len)877 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
878 {
879   union hsa_bytes bytes;
880 
881   memset (&bytes, 0, sizeof (bytes));
882   tree type = TREE_TYPE (value);
883   gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
884 
885   unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
886   if (INTEGRAL_TYPE_P (type)
887       || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
888     switch (data_len)
889       {
890       case 1:
891 	bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
892 	break;
893       case 2:
894 	bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
895 	break;
896       case 4:
897 	bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
898 	break;
899       case 8:
900 	bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
901 	break;
902       default:
903 	gcc_unreachable ();
904       }
905   else if (SCALAR_FLOAT_TYPE_P (type))
906     {
907       if (data_len == 2)
908 	{
909 	  sorry ("Support for HSA does not implement immediate 16 bit FPU "
910 		 "operands");
911 	  return 2;
912 	}
913       unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
914       /* There are always 32 bits in each long, no matter the size of
915 	 the hosts long.  */
916       long tmp[6];
917 
918       real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
919 
920       if (int_len == 4)
921 	bytes.b32 = (uint32_t) tmp[0];
922       else
923 	{
924 	  bytes.b64 = (uint64_t)(uint32_t) tmp[1];
925 	  bytes.b64 <<= 32;
926 	  bytes.b64 |= (uint32_t) tmp[0];
927 	}
928     }
929   else
930     gcc_unreachable ();
931 
932   int len;
933   if (need_len == 0)
934     len = data_len;
935   else
936     len = need_len;
937 
938   memcpy (data, &bytes, len);
939   return len;
940 }
941 
942 char *
emit_to_buffer(unsigned * brig_repr_size)943 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
944 {
945   char *brig_repr;
946   *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
947 
948   if (m_tree_value != NULL_TREE)
949     {
950       /* Update brig_repr_size for special tree values.  */
951       if (TREE_CODE (m_tree_value) == STRING_CST)
952 	*brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
953       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
954 	*brig_repr_size
955 	  = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
956 
957       unsigned total_len = *brig_repr_size;
958 
959       /* As we can have a constructor with fewer elements, fill the memory
960 	 with zeros.  */
961       brig_repr = XCNEWVEC (char, total_len);
962       char *p = brig_repr;
963 
964       if (TREE_CODE (m_tree_value) == VECTOR_CST)
965 	{
966 	  /* Variable-length vectors aren't supported.  */
967 	  int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant ();
968 	  for (i = 0; i < num; i++)
969 	    {
970 	      tree v = VECTOR_CST_ELT (m_tree_value, i);
971 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
972 	      total_len -= actual;
973 	      p += actual;
974 	    }
975 	  /* Vectors should have the exact size.  */
976 	  gcc_assert (total_len == 0);
977 	}
978       else if (TREE_CODE (m_tree_value) == STRING_CST)
979 	memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
980 		TREE_STRING_LENGTH (m_tree_value));
981       else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
982 	{
983 	  gcc_assert (total_len % 2 == 0);
984 	  unsigned actual;
985 	  actual
986 	    = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
987 					       total_len / 2);
988 
989 	  gcc_assert (actual == total_len / 2);
990 	  p += actual;
991 
992 	  actual
993 	    = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
994 					       total_len / 2);
995 	  gcc_assert (actual == total_len / 2);
996 	}
997       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
998 	{
999 	  unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
1000 	  for (unsigned i = 0; i < len; i++)
1001 	    {
1002 	      tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1003 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1004 	      total_len -= actual;
1005 	      p += actual;
1006 	    }
1007 	}
1008       else
1009 	emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1010     }
1011   else
1012     {
1013       hsa_bytes bytes;
1014 
1015       switch (*brig_repr_size)
1016 	{
1017 	case 1:
1018 	  bytes.b8 = (uint8_t) m_int_value;
1019 	  break;
1020 	case 2:
1021 	  bytes.b16 = (uint16_t) m_int_value;
1022 	  break;
1023 	case 4:
1024 	  bytes.b32 = (uint32_t) m_int_value;
1025 	  break;
1026 	case 8:
1027 	  bytes.b64 = (uint64_t) m_int_value;
1028 	  break;
1029 	default:
1030 	  gcc_unreachable ();
1031 	}
1032 
1033       brig_repr = XNEWVEC (char, *brig_repr_size);
1034       memcpy (brig_repr, &bytes, *brig_repr_size);
1035     }
1036 
1037   return brig_repr;
1038 }
1039 
1040 /* Emit an immediate BRIG operand IMM.  The BRIG type of the immediate might
1041    have been massaged to comply with various HSA/BRIG type requirements, so the
1042    only important aspect of that is the length (because HSAIL might expect
1043    smaller constants or become bit-data).  The data should be represented
1044    according to what is in the tree representation.  */
1045 
1046 static void
emit_immediate_operand(hsa_op_immed * imm)1047 emit_immediate_operand (hsa_op_immed *imm)
1048 {
1049   unsigned brig_repr_size;
1050   char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1051   struct BrigOperandConstantBytes out;
1052 
1053   memset (&out, 0, sizeof (out));
1054   out.base.byteCount = lendian16 (sizeof (out));
1055   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1056   uint32_t byteCount = lendian32 (brig_repr_size);
1057   out.type = lendian16 (imm->m_type);
1058   out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1059   brig_operand.add (&out, sizeof (out));
1060   brig_data.add (brig_repr, brig_repr_size);
1061   brig_data.round_size_up (4);
1062 
1063   free (brig_repr);
1064 }
1065 
1066 /* Emit a register BRIG operand REG.  */
1067 
1068 static void
emit_register_operand(hsa_op_reg * reg)1069 emit_register_operand (hsa_op_reg *reg)
1070 {
1071   struct BrigOperandRegister out;
1072 
1073   out.base.byteCount = lendian16 (sizeof (out));
1074   out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1075   out.regNum = lendian32 (reg->m_hard_num);
1076 
1077   switch (regtype_for_type (reg->m_type))
1078     {
1079     case BRIG_TYPE_B32:
1080       out.regKind = BRIG_REGISTER_KIND_SINGLE;
1081       break;
1082     case BRIG_TYPE_B64:
1083       out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1084       break;
1085     case BRIG_TYPE_B128:
1086       out.regKind = BRIG_REGISTER_KIND_QUAD;
1087       break;
1088     case BRIG_TYPE_B1:
1089       out.regKind = BRIG_REGISTER_KIND_CONTROL;
1090       break;
1091     default:
1092       gcc_unreachable ();
1093     }
1094 
1095   brig_operand.add (&out, sizeof (out));
1096 }
1097 
1098 /* Emit an address BRIG operand ADDR.  */
1099 
1100 static void
emit_address_operand(hsa_op_address * addr)1101 emit_address_operand (hsa_op_address *addr)
1102 {
1103   struct BrigOperandAddress out;
1104 
1105   out.base.byteCount = lendian16 (sizeof (out));
1106   out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1107   out.symbol = addr->m_symbol
1108     ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1109   out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1110 
1111   if (sizeof (addr->m_imm_offset) == 8)
1112     {
1113       out.offset.lo = lendian32 (addr->m_imm_offset);
1114       out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1115     }
1116   else
1117     {
1118       gcc_assert (sizeof (addr->m_imm_offset) == 4);
1119       out.offset.lo = lendian32 (addr->m_imm_offset);
1120       out.offset.hi = 0;
1121     }
1122 
1123   brig_operand.add (&out, sizeof (out));
1124 }
1125 
1126 /* Emit a code reference operand REF.  */
1127 
1128 static void
emit_code_ref_operand(hsa_op_code_ref * ref)1129 emit_code_ref_operand (hsa_op_code_ref *ref)
1130 {
1131   struct BrigOperandCodeRef out;
1132 
1133   out.base.byteCount = lendian16 (sizeof (out));
1134   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1135   out.ref = lendian32 (ref->m_directive_offset);
1136   brig_operand.add (&out, sizeof (out));
1137 }
1138 
1139 /* Emit a code list operand CODE_LIST.  */
1140 
1141 static void
emit_code_list_operand(hsa_op_code_list * code_list)1142 emit_code_list_operand (hsa_op_code_list *code_list)
1143 {
1144   struct BrigOperandCodeList out;
1145   unsigned args = code_list->m_offsets.length ();
1146 
1147   for (unsigned i = 0; i < args; i++)
1148     gcc_assert (code_list->m_offsets[i]);
1149 
1150   out.base.byteCount = lendian16 (sizeof (out));
1151   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1152 
1153   uint32_t byteCount = lendian32 (4 * args);
1154 
1155   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1156   brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1157   brig_data.round_size_up (4);
1158   brig_operand.add (&out, sizeof (out));
1159 }
1160 
1161 /* Emit an operand list operand OPERAND_LIST.  */
1162 
1163 static void
emit_operand_list_operand(hsa_op_operand_list * operand_list)1164 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1165 {
1166   struct BrigOperandOperandList out;
1167   unsigned args = operand_list->m_offsets.length ();
1168 
1169   for (unsigned i = 0; i < args; i++)
1170     gcc_assert (operand_list->m_offsets[i]);
1171 
1172   out.base.byteCount = lendian16 (sizeof (out));
1173   out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1174 
1175   uint32_t byteCount = lendian32 (4 * args);
1176 
1177   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1178   brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1179   brig_data.round_size_up (4);
1180   brig_operand.add (&out, sizeof (out));
1181 }
1182 
1183 /* Emit all operands queued for writing.  */
1184 
1185 static void
emit_queued_operands(void)1186 emit_queued_operands (void)
1187 {
1188   for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1189     {
1190       gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1191       if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1192 	emit_immediate_operand (imm);
1193       else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1194 	emit_register_operand (reg);
1195       else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1196 	emit_address_operand (addr);
1197       else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1198 	emit_code_ref_operand (ref);
1199       else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1200 	emit_code_list_operand (code_list);
1201       else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1202 	emit_operand_list_operand (l);
1203       else
1204 	gcc_unreachable ();
1205     }
1206 }
1207 
1208 /* Emit directives describing the function that is used for
1209    a function declaration.  */
1210 
1211 static BrigDirectiveExecutable *
emit_function_declaration(tree decl)1212 emit_function_declaration (tree decl)
1213 {
1214   hsa_function_representation *f = hsa_generate_function_declaration (decl);
1215 
1216   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1217   emit_queued_operands ();
1218 
1219   delete f;
1220 
1221   return e;
1222 }
1223 
1224 /* Emit directives describing the function that is used for
1225    an internal function declaration.  */
1226 
1227 static BrigDirectiveExecutable *
emit_internal_fn_decl(hsa_internal_fn * fn)1228 emit_internal_fn_decl (hsa_internal_fn *fn)
1229 {
1230   hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1231 
1232   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1233   emit_queued_operands ();
1234 
1235   delete f;
1236 
1237   return e;
1238 }
1239 
1240 /* Enqueue all operands of INSN and return offset to BRIG data section
1241    to list of operand offsets.  */
1242 
1243 static unsigned
emit_insn_operands(hsa_insn_basic * insn)1244 emit_insn_operands (hsa_insn_basic *insn)
1245 {
1246   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1247     operand_offsets;
1248 
1249   unsigned l = insn->operand_count ();
1250 
1251   /* We have N operands so use 4 * N for the byte_count.  */
1252   uint32_t byte_count = lendian32 (4 * l);
1253   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1254   if (l > 0)
1255     {
1256       operand_offsets.safe_grow (l);
1257       for (unsigned i = 0; i < l; i++)
1258 	operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1259 
1260       brig_data.add (operand_offsets.address (),
1261 		     l * sizeof (BrigOperandOffset32_t));
1262     }
1263   brig_data.round_size_up (4);
1264   return offset;
1265 }
1266 
1267 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1268    to BRIG data section to list of operand offsets.  */
1269 
1270 static unsigned
1271 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1272 	       hsa_op_base *op2 = NULL)
1273 {
1274   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1275     operand_offsets;
1276 
1277   gcc_checking_assert (op0 != NULL);
1278   operand_offsets.safe_push (enqueue_op (op0));
1279 
1280   if (op1 != NULL)
1281     {
1282       operand_offsets.safe_push (enqueue_op (op1));
1283       if (op2 != NULL)
1284 	operand_offsets.safe_push (enqueue_op (op2));
1285     }
1286 
1287   unsigned l = operand_offsets.length ();
1288 
1289   /* We have N operands so use 4 * N for the byte_count.  */
1290   uint32_t byte_count = lendian32 (4 * l);
1291 
1292   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1293   brig_data.add (operand_offsets.address (),
1294 		 l * sizeof (BrigOperandOffset32_t));
1295 
1296   brig_data.round_size_up (4);
1297 
1298   return offset;
1299 }
1300 
1301 /* Emit an HSA memory instruction and all necessary directives, schedule
1302    necessary operands for writing.  */
1303 
1304 static void
emit_memory_insn(hsa_insn_mem * mem)1305 emit_memory_insn (hsa_insn_mem *mem)
1306 {
1307   struct BrigInstMem repr;
1308   gcc_checking_assert (mem->operand_count () == 2);
1309 
1310   hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1311 
1312   /* This is necessary because of the erroneous typedef of
1313      BrigMemoryModifier8_t which introduces padding which may then contain
1314      random stuff (which we do not want so that we can test things don't
1315      change).  */
1316   memset (&repr, 0, sizeof (repr));
1317   repr.base.base.byteCount = lendian16 (sizeof (repr));
1318   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1319   repr.base.opcode = lendian16 (mem->m_opcode);
1320   repr.base.type = lendian16 (mem->m_type);
1321   repr.base.operands = lendian32 (emit_insn_operands (mem));
1322 
1323   if (addr->m_symbol)
1324     repr.segment = addr->m_symbol->m_segment;
1325   else
1326     repr.segment = BRIG_SEGMENT_FLAT;
1327   repr.modifier = 0;
1328   repr.equivClass = mem->m_equiv_class;
1329   repr.align = mem->m_align;
1330   if (mem->m_opcode == BRIG_OPCODE_LD)
1331     repr.width = BRIG_WIDTH_1;
1332   else
1333     repr.width = BRIG_WIDTH_NONE;
1334   memset (&repr.reserved, 0, sizeof (repr.reserved));
1335   brig_code.add (&repr, sizeof (repr));
1336   brig_insn_count++;
1337 }
1338 
1339 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1340    necessary operands for writing.  */
1341 
1342 static void
emit_signal_insn(hsa_insn_signal * mem)1343 emit_signal_insn (hsa_insn_signal *mem)
1344 {
1345   struct BrigInstSignal repr;
1346 
1347   memset (&repr, 0, sizeof (repr));
1348   repr.base.base.byteCount = lendian16 (sizeof (repr));
1349   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1350   repr.base.opcode = lendian16 (mem->m_opcode);
1351   repr.base.type = lendian16 (mem->m_type);
1352   repr.base.operands = lendian32 (emit_insn_operands (mem));
1353 
1354   repr.memoryOrder = mem->m_memory_order;
1355   repr.signalOperation = mem->m_signalop;
1356   repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1357 
1358   brig_code.add (&repr, sizeof (repr));
1359   brig_insn_count++;
1360 }
1361 
1362 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1363    necessary operands for writing.  */
1364 
1365 static void
emit_atomic_insn(hsa_insn_atomic * mem)1366 emit_atomic_insn (hsa_insn_atomic *mem)
1367 {
1368   struct BrigInstAtomic repr;
1369 
1370   /* Either operand[0] or operand[1] must be an address operand.  */
1371   hsa_op_address *addr = NULL;
1372   if (is_a <hsa_op_address *> (mem->get_op (0)))
1373     addr = as_a <hsa_op_address *> (mem->get_op (0));
1374   else
1375     addr = as_a <hsa_op_address *> (mem->get_op (1));
1376 
1377   memset (&repr, 0, sizeof (repr));
1378   repr.base.base.byteCount = lendian16 (sizeof (repr));
1379   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1380   repr.base.opcode = lendian16 (mem->m_opcode);
1381   repr.base.type = lendian16 (mem->m_type);
1382   repr.base.operands = lendian32 (emit_insn_operands (mem));
1383 
1384   if (addr->m_symbol)
1385     repr.segment = addr->m_symbol->m_segment;
1386   else
1387     repr.segment = BRIG_SEGMENT_FLAT;
1388   repr.memoryOrder = mem->m_memoryorder;
1389   repr.memoryScope = mem->m_memoryscope;
1390   repr.atomicOperation = mem->m_atomicop;
1391 
1392   brig_code.add (&repr, sizeof (repr));
1393   brig_insn_count++;
1394 }
1395 
1396 /* Emit an HSA LDA instruction and all necessary directives, schedule
1397    necessary operands for writing.  */
1398 
1399 static void
emit_addr_insn(hsa_insn_basic * insn)1400 emit_addr_insn (hsa_insn_basic *insn)
1401 {
1402   struct BrigInstAddr repr;
1403 
1404   hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1405 
1406   repr.base.base.byteCount = lendian16 (sizeof (repr));
1407   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1408   repr.base.opcode = lendian16 (insn->m_opcode);
1409   repr.base.type = lendian16 (insn->m_type);
1410   repr.base.operands = lendian32 (emit_insn_operands (insn));
1411 
1412   if (addr->m_symbol)
1413     repr.segment = addr->m_symbol->m_segment;
1414   else
1415     repr.segment = BRIG_SEGMENT_FLAT;
1416   memset (&repr.reserved, 0, sizeof (repr.reserved));
1417 
1418   brig_code.add (&repr, sizeof (repr));
1419   brig_insn_count++;
1420 }
1421 
1422 /* Emit an HSA segment conversion instruction and all necessary directives,
1423    schedule necessary operands for writing.  */
1424 
1425 static void
emit_segment_insn(hsa_insn_seg * seg)1426 emit_segment_insn (hsa_insn_seg *seg)
1427 {
1428   struct BrigInstSegCvt repr;
1429 
1430   repr.base.base.byteCount = lendian16 (sizeof (repr));
1431   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1432   repr.base.opcode = lendian16 (seg->m_opcode);
1433   repr.base.type = lendian16 (seg->m_type);
1434   repr.base.operands = lendian32 (emit_insn_operands (seg));
1435   repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1436   repr.segment = seg->m_segment;
1437   repr.modifier = 0;
1438 
1439   brig_code.add (&repr, sizeof (repr));
1440 
1441   brig_insn_count++;
1442 }
1443 
1444 /* Emit an HSA alloca instruction and all necessary directives,
1445    schedule necessary operands for writing.  */
1446 
1447 static void
emit_alloca_insn(hsa_insn_alloca * alloca)1448 emit_alloca_insn (hsa_insn_alloca *alloca)
1449 {
1450   struct BrigInstMem repr;
1451   gcc_checking_assert (alloca->operand_count () == 2);
1452 
1453   memset (&repr, 0, sizeof (repr));
1454   repr.base.base.byteCount = lendian16 (sizeof (repr));
1455   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1456   repr.base.opcode = lendian16 (alloca->m_opcode);
1457   repr.base.type = lendian16 (alloca->m_type);
1458   repr.base.operands = lendian32 (emit_insn_operands (alloca));
1459   repr.segment = BRIG_SEGMENT_PRIVATE;
1460   repr.modifier = 0;
1461   repr.equivClass = 0;
1462   repr.align = alloca->m_align;
1463   repr.width = BRIG_WIDTH_NONE;
1464   memset (&repr.reserved, 0, sizeof (repr.reserved));
1465   brig_code.add (&repr, sizeof (repr));
1466   brig_insn_count++;
1467 }
1468 
1469 /* Emit an HSA comparison instruction and all necessary directives,
1470    schedule necessary operands for writing.  */
1471 
1472 static void
emit_cmp_insn(hsa_insn_cmp * cmp)1473 emit_cmp_insn (hsa_insn_cmp *cmp)
1474 {
1475   struct BrigInstCmp repr;
1476 
1477   memset (&repr, 0, sizeof (repr));
1478   repr.base.base.byteCount = lendian16 (sizeof (repr));
1479   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1480   repr.base.opcode = lendian16 (cmp->m_opcode);
1481   repr.base.type = lendian16 (cmp->m_type);
1482   repr.base.operands = lendian32 (emit_insn_operands (cmp));
1483 
1484   if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1485     repr.sourceType
1486       = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1487   else
1488     repr.sourceType
1489       = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1490   repr.modifier = 0;
1491   repr.compare = cmp->m_compare;
1492   repr.pack = 0;
1493 
1494   brig_code.add (&repr, sizeof (repr));
1495   brig_insn_count++;
1496 }
1497 
1498 /* Emit an HSA generic branching/sycnronization instruction.  */
1499 
1500 static void
emit_generic_branch_insn(hsa_insn_br * br)1501 emit_generic_branch_insn (hsa_insn_br *br)
1502 {
1503   struct BrigInstBr repr;
1504   repr.base.base.byteCount = lendian16 (sizeof (repr));
1505   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1506   repr.base.opcode = lendian16 (br->m_opcode);
1507   repr.width = br->m_width;
1508   repr.base.type = lendian16 (br->m_type);
1509   repr.base.operands = lendian32 (emit_insn_operands (br));
1510   memset (&repr.reserved, 0, sizeof (repr.reserved));
1511 
1512   brig_code.add (&repr, sizeof (repr));
1513   brig_insn_count++;
1514 }
1515 
1516 /* Emit an HSA conditional branching instruction and all necessary directives,
1517    schedule necessary operands for writing.  */
1518 
1519 static void
emit_cond_branch_insn(hsa_insn_cbr * br)1520 emit_cond_branch_insn (hsa_insn_cbr *br)
1521 {
1522   struct BrigInstBr repr;
1523 
1524   basic_block target = NULL;
1525   edge_iterator ei;
1526   edge e;
1527 
1528   /* At the moment we only handle direct conditional jumps.  */
1529   gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1530   repr.base.base.byteCount = lendian16 (sizeof (repr));
1531   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1532   repr.base.opcode = lendian16 (br->m_opcode);
1533   repr.width = br->m_width;
1534   /* For Conditional jumps the type is always B1.  */
1535   repr.base.type = lendian16 (BRIG_TYPE_B1);
1536 
1537   FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1538     if (e->flags & EDGE_TRUE_VALUE)
1539       {
1540 	target = e->dest;
1541 	break;
1542       }
1543   gcc_assert (target);
1544 
1545   repr.base.operands
1546     = lendian32 (emit_operands (br->get_op (0),
1547 				&hsa_bb_for_bb (target)->m_label_ref));
1548   memset (&repr.reserved, 0, sizeof (repr.reserved));
1549 
1550   brig_code.add (&repr, sizeof (repr));
1551   brig_insn_count++;
1552 }
1553 
1554 /* Emit an HSA unconditional jump branching instruction that points to
1555    a label REFERENCE.  */
1556 
1557 static void
emit_unconditional_jump(hsa_op_code_ref * reference)1558 emit_unconditional_jump (hsa_op_code_ref *reference)
1559 {
1560   struct BrigInstBr repr;
1561 
1562   repr.base.base.byteCount = lendian16 (sizeof (repr));
1563   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1564   repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1565   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1566   /* Direct branches to labels must be width(all).  */
1567   repr.width = BRIG_WIDTH_ALL;
1568 
1569   repr.base.operands = lendian32 (emit_operands (reference));
1570   memset (&repr.reserved, 0, sizeof (repr.reserved));
1571   brig_code.add (&repr, sizeof (repr));
1572   brig_insn_count++;
1573 }
1574 
1575 /* Emit an HSA switch jump instruction that uses a jump table to
1576    jump to a destination label.  */
1577 
1578 static void
emit_switch_insn(hsa_insn_sbr * sbr)1579 emit_switch_insn (hsa_insn_sbr *sbr)
1580 {
1581   struct BrigInstBr repr;
1582 
1583   gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1584   repr.base.base.byteCount = lendian16 (sizeof (repr));
1585   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1586   repr.base.opcode = lendian16 (sbr->m_opcode);
1587   repr.width = BRIG_WIDTH_1;
1588   /* For Conditional jumps the type is always B1.  */
1589   hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1590   repr.base.type = lendian16 (index->m_type);
1591   repr.base.operands
1592     = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1593   memset (&repr.reserved, 0, sizeof (repr.reserved));
1594 
1595   brig_code.add (&repr, sizeof (repr));
1596   brig_insn_count++;
1597 }
1598 
1599 /* Emit a HSA convert instruction and all necessary directives, schedule
1600    necessary operands for writing.  */
1601 
1602 static void
emit_cvt_insn(hsa_insn_cvt * insn)1603 emit_cvt_insn (hsa_insn_cvt *insn)
1604 {
1605   struct BrigInstCvt repr;
1606   BrigType16_t srctype;
1607 
1608   repr.base.base.byteCount = lendian16 (sizeof (repr));
1609   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1610   repr.base.opcode = lendian16 (insn->m_opcode);
1611   repr.base.type = lendian16 (insn->m_type);
1612   repr.base.operands = lendian32 (emit_insn_operands (insn));
1613 
1614   if (is_a <hsa_op_reg *> (insn->get_op (1)))
1615     srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1616   else
1617     srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1618   repr.sourceType = lendian16 (srctype);
1619   repr.modifier = 0;
1620   /* float to smaller float requires a rounding setting (we default
1621      to 'near'.  */
1622   if (hsa_type_float_p (insn->m_type)
1623       && (!hsa_type_float_p (srctype)
1624 	  || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1625 	      < (srctype & BRIG_TYPE_BASE_MASK))))
1626     repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1627   else if (hsa_type_integer_p (insn->m_type) &&
1628 	   hsa_type_float_p (srctype))
1629     repr.round = BRIG_ROUND_INTEGER_ZERO;
1630   else
1631     repr.round = BRIG_ROUND_NONE;
1632   brig_code.add (&repr, sizeof (repr));
1633   brig_insn_count++;
1634 }
1635 
1636 /* Emit call instruction INSN, where this instruction must be closed
1637    within a call block instruction.  */
1638 
1639 static void
emit_call_insn(hsa_insn_call * call)1640 emit_call_insn (hsa_insn_call *call)
1641 {
1642   struct BrigInstBr repr;
1643 
1644   repr.base.base.byteCount = lendian16 (sizeof (repr));
1645   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1646   repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1647   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1648 
1649   repr.base.operands
1650     = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1651 				call->m_args_code_list));
1652 
1653   /* Internal functions have not set m_called_function.  */
1654   if (call->m_called_function)
1655     {
1656       function_linkage_pair pair (call->m_called_function,
1657 				  call->m_func.m_brig_op_offset);
1658       function_call_linkage.safe_push (pair);
1659     }
1660   else
1661     {
1662       hsa_internal_fn *slot
1663 	= hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1664       gcc_assert (slot);
1665       gcc_assert (slot->m_offset > 0);
1666       call->m_func.m_directive_offset = slot->m_offset;
1667     }
1668 
1669   repr.width = BRIG_WIDTH_ALL;
1670   memset (&repr.reserved, 0, sizeof (repr.reserved));
1671 
1672   brig_code.add (&repr, sizeof (repr));
1673   brig_insn_count++;
1674 }
1675 
1676 /* Emit argument block directive.  */
1677 
1678 static void
emit_arg_block_insn(hsa_insn_arg_block * insn)1679 emit_arg_block_insn (hsa_insn_arg_block *insn)
1680 {
1681   switch (insn->m_kind)
1682     {
1683     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1684       {
1685 	struct BrigDirectiveArgBlock repr;
1686 	repr.base.byteCount = lendian16 (sizeof (repr));
1687 	repr.base.kind = lendian16 (insn->m_kind);
1688 	brig_code.add (&repr, sizeof (repr));
1689 
1690 	for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1691 	  {
1692 	    insn->m_call_insn->m_args_code_list->m_offsets[i]
1693 	      = lendian32 (emit_directive_variable
1694 			   (insn->m_call_insn->m_input_args[i]));
1695 	    brig_insn_count++;
1696 	  }
1697 
1698 	if (insn->m_call_insn->m_output_arg)
1699 	  {
1700 	    insn->m_call_insn->m_result_code_list->m_offsets[0]
1701 	      = lendian32 (emit_directive_variable
1702 			   (insn->m_call_insn->m_output_arg));
1703 	    brig_insn_count++;
1704 	  }
1705 
1706 	break;
1707       }
1708     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1709       {
1710 	struct BrigDirectiveArgBlock repr;
1711 	repr.base.byteCount = lendian16 (sizeof (repr));
1712 	repr.base.kind = lendian16 (insn->m_kind);
1713 	brig_code.add (&repr, sizeof (repr));
1714 	break;
1715       }
1716     default:
1717       gcc_unreachable ();
1718     }
1719 
1720   brig_insn_count++;
1721 }
1722 
1723 /* Emit comment directive.  */
1724 
1725 static void
emit_comment_insn(hsa_insn_comment * insn)1726 emit_comment_insn (hsa_insn_comment *insn)
1727 {
1728   struct BrigDirectiveComment repr;
1729   memset (&repr, 0, sizeof (repr));
1730 
1731   repr.base.byteCount = lendian16 (sizeof (repr));
1732   repr.base.kind = lendian16 (insn->m_opcode);
1733   repr.name = brig_emit_string (insn->m_comment, '\0', false);
1734   brig_code.add (&repr, sizeof (repr));
1735 }
1736 
1737 /* Emit queue instruction INSN.  */
1738 
1739 static void
emit_queue_insn(hsa_insn_queue * insn)1740 emit_queue_insn (hsa_insn_queue *insn)
1741 {
1742   BrigInstQueue repr;
1743   memset (&repr, 0, sizeof (repr));
1744 
1745   repr.base.base.byteCount = lendian16 (sizeof (repr));
1746   repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1747   repr.base.opcode = lendian16 (insn->m_opcode);
1748   repr.base.type = lendian16 (insn->m_type);
1749   repr.segment = insn->m_segment;
1750   repr.memoryOrder = insn->m_memory_order;
1751   repr.base.operands = lendian32 (emit_insn_operands (insn));
1752   brig_data.round_size_up (4);
1753   brig_code.add (&repr, sizeof (repr));
1754 
1755   brig_insn_count++;
1756 }
1757 
1758 /* Emit source type instruction INSN.  */
1759 
1760 static void
emit_srctype_insn(hsa_insn_srctype * insn)1761 emit_srctype_insn (hsa_insn_srctype *insn)
1762 {
1763   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1764   struct BrigInstSourceType repr;
1765   unsigned operand_count = insn->operand_count ();
1766   gcc_checking_assert (operand_count >= 2);
1767 
1768   memset (&repr, 0, sizeof (repr));
1769   repr.sourceType = lendian16 (insn->m_source_type);
1770   repr.base.base.byteCount = lendian16 (sizeof (repr));
1771   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1772   repr.base.opcode = lendian16 (insn->m_opcode);
1773   repr.base.type = lendian16 (insn->m_type);
1774 
1775   repr.base.operands = lendian32 (emit_insn_operands (insn));
1776   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1777   brig_insn_count++;
1778 }
1779 
1780 /* Emit packed instruction INSN.  */
1781 
1782 static void
emit_packed_insn(hsa_insn_packed * insn)1783 emit_packed_insn (hsa_insn_packed *insn)
1784 {
1785   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1786   struct BrigInstSourceType repr;
1787   unsigned operand_count = insn->operand_count ();
1788   gcc_checking_assert (operand_count >= 2);
1789 
1790   memset (&repr, 0, sizeof (repr));
1791   repr.sourceType = lendian16 (insn->m_source_type);
1792   repr.base.base.byteCount = lendian16 (sizeof (repr));
1793   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1794   repr.base.opcode = lendian16 (insn->m_opcode);
1795   repr.base.type = lendian16 (insn->m_type);
1796 
1797   if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1798     {
1799       /* Create operand list for packed type.  */
1800       for (unsigned i = 1; i < operand_count; i++)
1801 	{
1802 	  gcc_checking_assert (insn->get_op (i));
1803 	  insn->m_operand_list->m_offsets[i - 1]
1804 	    = lendian32 (enqueue_op (insn->get_op (i)));
1805 	}
1806 
1807       repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1808 						     insn->m_operand_list));
1809     }
1810   else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1811     {
1812       /* Create operand list for packed type.  */
1813       for (unsigned i = 0; i < operand_count - 1; i++)
1814 	{
1815 	  gcc_checking_assert (insn->get_op (i));
1816 	  insn->m_operand_list->m_offsets[i]
1817 	    = lendian32 (enqueue_op (insn->get_op (i)));
1818 	}
1819 
1820       unsigned ops = emit_operands (insn->m_operand_list,
1821 				    insn->get_op (insn->operand_count () - 1));
1822       repr.base.operands = lendian32 (ops);
1823     }
1824 
1825 
1826   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1827   brig_insn_count++;
1828 }
1829 
1830 /* Emit a basic HSA instruction and all necessary directives, schedule
1831    necessary operands for writing.  */
1832 
1833 static void
emit_basic_insn(hsa_insn_basic * insn)1834 emit_basic_insn (hsa_insn_basic *insn)
1835 {
1836   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1837   struct BrigInstMod repr;
1838   BrigType16_t type;
1839 
1840   memset (&repr, 0, sizeof (repr));
1841   repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1842   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1843   repr.base.opcode = lendian16 (insn->m_opcode);
1844   switch (insn->m_opcode)
1845     {
1846       /* And the bit-logical operations need bit types and whine about
1847 	 arithmetic types :-/  */
1848       case BRIG_OPCODE_AND:
1849       case BRIG_OPCODE_OR:
1850       case BRIG_OPCODE_XOR:
1851       case BRIG_OPCODE_NOT:
1852 	type = regtype_for_type (insn->m_type);
1853 	break;
1854       default:
1855 	type = insn->m_type;
1856 	break;
1857     }
1858   repr.base.type = lendian16 (type);
1859   repr.base.operands = lendian32 (emit_insn_operands (insn));
1860 
1861   if (hsa_type_packed_p (type))
1862     {
1863       if (hsa_type_float_p (type)
1864 	  && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1865 	repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1866       else
1867 	repr.round = 0;
1868       /* We assume that destination and sources agree in packing layout.  */
1869       if (insn->num_used_ops () >= 2)
1870 	repr.pack = BRIG_PACK_PP;
1871       else
1872 	repr.pack = BRIG_PACK_P;
1873       repr.reserved = 0;
1874       repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1875       repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1876       brig_code.add (&repr, sizeof (struct BrigInstMod));
1877     }
1878   else
1879     brig_code.add (&repr, sizeof (struct BrigInstBasic));
1880   brig_insn_count++;
1881 }
1882 
1883 /* Emit an HSA instruction and all necessary directives, schedule necessary
1884    operands for writing.  */
1885 
1886 static void
emit_insn(hsa_insn_basic * insn)1887 emit_insn (hsa_insn_basic *insn)
1888 {
1889   gcc_assert (!is_a <hsa_insn_phi *> (insn));
1890 
1891   insn->m_brig_offset = brig_code.total_size;
1892 
1893   if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1894     emit_signal_insn (signal);
1895   else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1896     emit_atomic_insn (atom);
1897   else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1898     emit_memory_insn (mem);
1899   else if (insn->m_opcode == BRIG_OPCODE_LDA)
1900     emit_addr_insn (insn);
1901   else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1902     emit_segment_insn (seg);
1903   else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1904     emit_cmp_insn (cmp);
1905   else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1906     emit_cond_branch_insn (br);
1907   else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1908     {
1909       if (switch_instructions == NULL)
1910 	switch_instructions = new vec <hsa_insn_sbr *> ();
1911 
1912       switch_instructions->safe_push (sbr);
1913       emit_switch_insn (sbr);
1914     }
1915   else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1916     emit_generic_branch_insn (br);
1917   else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1918     emit_arg_block_insn (block);
1919   else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1920     emit_call_insn (call);
1921   else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1922     emit_comment_insn (comment);
1923   else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1924     emit_queue_insn (queue);
1925   else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1926     emit_srctype_insn (srctype);
1927   else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1928     emit_packed_insn (packed);
1929   else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1930     emit_cvt_insn (cvt);
1931   else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1932     emit_alloca_insn (alloca);
1933   else
1934     emit_basic_insn (insn);
1935 }
1936 
1937 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1938    or we are about to finish emitting code, if it is NULL.  If the fall through
1939    edge from BB does not lead to NEXT_BB, emit an unconditional jump.  */
1940 
1941 static void
perhaps_emit_branch(basic_block bb,basic_block next_bb)1942 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1943 {
1944   basic_block t_bb = NULL, ff = NULL;
1945 
1946   edge_iterator ei;
1947   edge e;
1948 
1949   /* If the last instruction of BB is a switch, ignore emission of all
1950      edges.  */
1951   if (hsa_bb_for_bb (bb)->m_last_insn
1952       && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1953     return;
1954 
1955   FOR_EACH_EDGE (e, ei, bb->succs)
1956     if (e->flags & EDGE_TRUE_VALUE)
1957       {
1958 	gcc_assert (!t_bb);
1959 	t_bb = e->dest;
1960       }
1961     else
1962       {
1963 	gcc_assert (!ff);
1964 	ff = e->dest;
1965       }
1966 
1967   if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1968     return;
1969 
1970   emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1971 }
1972 
1973 /* Emit the a function with name NAME to the various brig sections.  */
1974 
1975 void
hsa_brig_emit_function(void)1976 hsa_brig_emit_function (void)
1977 {
1978   basic_block bb, prev_bb;
1979   hsa_insn_basic *insn;
1980   BrigDirectiveExecutable *ptr_to_fndir;
1981 
1982   brig_init ();
1983 
1984   brig_insn_count = 0;
1985   memset (&op_queue, 0, sizeof (op_queue));
1986   op_queue.projected_size = brig_operand.total_size;
1987 
1988   if (!function_offsets)
1989     function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1990 
1991   if (!emitted_declarations)
1992     emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1993 
1994   for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1995     {
1996       tree called = hsa_cfun->m_called_functions[i];
1997 
1998       /* If the function has no definition, emit a declaration.  */
1999       if (!emitted_declarations->get (called))
2000 	{
2001 	  BrigDirectiveExecutable *e = emit_function_declaration (called);
2002 	  emitted_declarations->put (called, e);
2003 	}
2004     }
2005 
2006   for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2007     {
2008       hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2009       emit_internal_fn_decl (called);
2010     }
2011 
2012   ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2013   for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2014        insn;
2015        insn = insn->m_next)
2016     emit_insn (insn);
2017   prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2018   FOR_EACH_BB_FN (bb, cfun)
2019     {
2020       perhaps_emit_branch (prev_bb, bb);
2021       emit_bb_label_directive (hsa_bb_for_bb (bb));
2022       for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2023 	emit_insn (insn);
2024       prev_bb = bb;
2025     }
2026   perhaps_emit_branch (prev_bb, NULL);
2027   ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2028 
2029   /* Fill up label references for all sbr instructions.  */
2030   if (switch_instructions)
2031     {
2032       for (unsigned i = 0; i < switch_instructions->length (); i++)
2033 	{
2034 	  hsa_insn_sbr *sbr = (*switch_instructions)[i];
2035 	  for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2036 	    {
2037 	      hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2038 	      sbr->m_label_code_list->m_offsets[j]
2039 		= hbb->m_label_ref.m_directive_offset;
2040 	    }
2041 	}
2042 
2043       switch_instructions->release ();
2044       delete switch_instructions;
2045       switch_instructions = NULL;
2046     }
2047 
2048   if (dump_file)
2049     {
2050       fprintf (dump_file, "------- After BRIG emission: -------\n");
2051       dump_hsa_cfun (dump_file);
2052     }
2053 
2054   emit_queued_operands ();
2055 }
2056 
2057 /* Emit all OMP symbols related to OMP.  */
2058 
2059 void
hsa_brig_emit_omp_symbols(void)2060 hsa_brig_emit_omp_symbols (void)
2061 {
2062   brig_init ();
2063   emit_directive_variable (hsa_num_threads);
2064 }
2065 
2066 /* Create and return __hsa_global_variables symbol that contains
2067    all informations consumed by libgomp to link global variables
2068    with their string names used by an HSA kernel.  */
2069 
2070 static tree
hsa_output_global_variables()2071 hsa_output_global_variables ()
2072 {
2073   unsigned l = hsa_global_variable_symbols->elements ();
2074 
2075   tree variable_info_type = make_node (RECORD_TYPE);
2076   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2077 			   get_identifier ("name"), ptr_type_node);
2078   DECL_CHAIN (id_f1) = NULL_TREE;
2079   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2080 			   get_identifier ("omp_data_size"),
2081 			   ptr_type_node);
2082   DECL_CHAIN (id_f2) = id_f1;
2083   finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2084 			 NULL_TREE);
2085 
2086   tree int_num_of_global_vars;
2087   int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2088   tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2089   tree global_vars_array_type = build_array_type (variable_info_type,
2090 						  global_vars_num_index_type);
2091   TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2092 
2093   vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2094 
2095   for (hash_table <hsa_noop_symbol_hasher>::iterator it
2096        = hsa_global_variable_symbols->begin ();
2097        it != hsa_global_variable_symbols->end (); ++it)
2098     {
2099       unsigned len = strlen ((*it)->m_name);
2100       char *copy = XNEWVEC (char, len + 2);
2101       copy[0] = '&';
2102       memcpy (copy + 1, (*it)->m_name, len);
2103       copy[len + 1] = '\0';
2104       len++;
2105       hsa_sanitize_name (copy);
2106 
2107       tree var_name = build_string (len, copy);
2108       TREE_TYPE (var_name)
2109 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2110       free (copy);
2111 
2112       vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2113       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2114 			      build1 (ADDR_EXPR,
2115 				      build_pointer_type (TREE_TYPE (var_name)),
2116 				      var_name));
2117       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2118 			      build_fold_addr_expr ((*it)->m_decl));
2119 
2120       tree variable_info_ctor = build_constructor (variable_info_type,
2121 						   variable_info_vec);
2122 
2123       CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2124 			      variable_info_ctor);
2125     }
2126 
2127   tree global_vars_ctor = build_constructor (global_vars_array_type,
2128 					     global_vars_vec);
2129 
2130   char tmp_name[64];
2131   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2132   tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2133 					   get_identifier (tmp_name),
2134 					   global_vars_array_type);
2135   TREE_STATIC (global_vars_table) = 1;
2136   TREE_READONLY (global_vars_table) = 1;
2137   TREE_PUBLIC (global_vars_table) = 0;
2138   DECL_ARTIFICIAL (global_vars_table) = 1;
2139   DECL_IGNORED_P (global_vars_table) = 1;
2140   DECL_EXTERNAL (global_vars_table) = 0;
2141   TREE_CONSTANT (global_vars_table) = 1;
2142   DECL_INITIAL (global_vars_table) = global_vars_ctor;
2143   varpool_node::finalize_decl (global_vars_table);
2144 
2145   return global_vars_table;
2146 }
2147 
2148 /* Create __hsa_host_functions and __hsa_kernels that contain
2149    all informations consumed by libgomp to register all kernels
2150    in the BRIG binary.  */
2151 
2152 static void
hsa_output_kernels(tree * host_func_table,tree * kernels)2153 hsa_output_kernels (tree *host_func_table, tree *kernels)
2154 {
2155   unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2156 
2157   tree int_num_of_kernels;
2158   int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2159   tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2160   tree host_functions_array_type = build_array_type (ptr_type_node,
2161 						     kernel_num_index_type);
2162   TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2163 
2164   vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2165   for (unsigned i = 0; i < map_count; ++i)
2166     {
2167       tree decl = hsa_get_decl_kernel_mapping_decl (i);
2168       tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2169       CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2170     }
2171   tree host_functions_ctor = build_constructor (host_functions_array_type,
2172 						host_functions_vec);
2173   char tmp_name[64];
2174   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2175   tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2176 					 get_identifier (tmp_name),
2177 					 host_functions_array_type);
2178   TREE_STATIC (hsa_host_func_table) = 1;
2179   TREE_READONLY (hsa_host_func_table) = 1;
2180   TREE_PUBLIC (hsa_host_func_table) = 0;
2181   DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2182   DECL_IGNORED_P (hsa_host_func_table) = 1;
2183   DECL_EXTERNAL (hsa_host_func_table) = 0;
2184   TREE_CONSTANT (hsa_host_func_table) = 1;
2185   DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2186   varpool_node::finalize_decl (hsa_host_func_table);
2187   *host_func_table = hsa_host_func_table;
2188 
2189   /* Following code emits list of kernel_info structures.  */
2190 
2191   tree kernel_info_type = make_node (RECORD_TYPE);
2192   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2193 			   get_identifier ("name"), ptr_type_node);
2194   DECL_CHAIN (id_f1) = NULL_TREE;
2195   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2196 			   get_identifier ("omp_data_size"),
2197 			   unsigned_type_node);
2198   DECL_CHAIN (id_f2) = id_f1;
2199   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2200 			   get_identifier ("gridified_kernel_p"),
2201 			   boolean_type_node);
2202   DECL_CHAIN (id_f3) = id_f2;
2203   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2204 			   get_identifier ("kernel_dependencies_count"),
2205 			   unsigned_type_node);
2206   DECL_CHAIN (id_f4) = id_f3;
2207   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2208 			   get_identifier ("kernel_dependencies"),
2209 			   build_pointer_type (build_pointer_type
2210 					       (char_type_node)));
2211   DECL_CHAIN (id_f5) = id_f4;
2212   finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2213 			 NULL_TREE);
2214 
2215   int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2216   tree kernel_info_vector_type
2217     = build_array_type (kernel_info_type,
2218 			build_index_type (int_num_of_kernels));
2219   TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2220 
2221   vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2222   tree kernel_dependencies_vector_type = NULL;
2223 
2224   for (unsigned i = 0; i < map_count; ++i)
2225     {
2226       tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2227       char *name = hsa_get_decl_kernel_mapping_name (i);
2228       unsigned len = strlen (name);
2229       char *copy = XNEWVEC (char, len + 2);
2230       copy[0] = '&';
2231       memcpy (copy + 1, name, len);
2232       copy[len + 1] = '\0';
2233       len++;
2234 
2235       tree kern_name = build_string (len, copy);
2236       TREE_TYPE (kern_name)
2237 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2238       free (copy);
2239 
2240       unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2241       tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2242       bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2243       tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2244 						     gridified_kernel_p);
2245       unsigned count = 0;
2246       vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2247       if (hsa_decl_kernel_dependencies)
2248 	{
2249 	  vec<const char *> **slot;
2250 	  slot = hsa_decl_kernel_dependencies->get (kernel);
2251 	  if (slot)
2252 	    {
2253 	      vec <const char *> *dependencies = *slot;
2254 	      count = dependencies->length ();
2255 
2256 	      kernel_dependencies_vector_type
2257 		= build_array_type (build_pointer_type (char_type_node),
2258 				    build_index_type (size_int (count)));
2259 	      TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2260 
2261 	      for (unsigned j = 0; j < count; j++)
2262 		{
2263 		  const char *d = (*dependencies)[j];
2264 		  len = strlen (d);
2265 		  tree dependency_name = build_string (len, d);
2266 		  TREE_TYPE (dependency_name)
2267 		    = build_array_type (char_type_node,
2268 					build_index_type (size_int (len)));
2269 
2270 		  CONSTRUCTOR_APPEND_ELT
2271 		    (kernel_dependencies_vec, NULL_TREE,
2272 		     build1 (ADDR_EXPR,
2273 			     build_pointer_type (TREE_TYPE (dependency_name)),
2274 			     dependency_name));
2275 		}
2276 	    }
2277 	}
2278 
2279       tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2280 
2281       vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2282       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2283 			      build1 (ADDR_EXPR,
2284 				      build_pointer_type (TREE_TYPE
2285 							  (kern_name)),
2286 				      kern_name));
2287       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2288       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2289 			      gridified_kernel_p_tree);
2290       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2291 
2292       if (count > 0)
2293 	{
2294 	  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2295 	  gcc_checking_assert (kernel_dependencies_vector_type);
2296 	  tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2297 					       get_identifier (tmp_name),
2298 					       kernel_dependencies_vector_type);
2299 
2300 	  TREE_STATIC (dependencies_list) = 1;
2301 	  TREE_READONLY (dependencies_list) = 1;
2302 	  TREE_PUBLIC (dependencies_list) = 0;
2303 	  DECL_ARTIFICIAL (dependencies_list) = 1;
2304 	  DECL_IGNORED_P (dependencies_list) = 1;
2305 	  DECL_EXTERNAL (dependencies_list) = 0;
2306 	  TREE_CONSTANT (dependencies_list) = 1;
2307 	  DECL_INITIAL (dependencies_list)
2308 	    = build_constructor (kernel_dependencies_vector_type,
2309 				 kernel_dependencies_vec);
2310 	  varpool_node::finalize_decl (dependencies_list);
2311 
2312 	  CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2313 				  build1 (ADDR_EXPR,
2314 					  build_pointer_type
2315 					    (TREE_TYPE (dependencies_list)),
2316 					  dependencies_list));
2317 	}
2318       else
2319 	CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2320 
2321       tree kernel_info_ctor = build_constructor (kernel_info_type,
2322 						 kernel_info_vec);
2323 
2324       CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2325 			      kernel_info_ctor);
2326     }
2327 
2328   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2329   tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2330 				 get_identifier (tmp_name),
2331 				 kernel_info_vector_type);
2332 
2333   TREE_STATIC (hsa_kernels) = 1;
2334   TREE_READONLY (hsa_kernels) = 1;
2335   TREE_PUBLIC (hsa_kernels) = 0;
2336   DECL_ARTIFICIAL (hsa_kernels) = 1;
2337   DECL_IGNORED_P (hsa_kernels) = 1;
2338   DECL_EXTERNAL (hsa_kernels) = 0;
2339   TREE_CONSTANT (hsa_kernels) = 1;
2340   DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2341 						  kernel_info_vector_vec);
2342   varpool_node::finalize_decl (hsa_kernels);
2343   *kernels = hsa_kernels;
2344 }
2345 
2346 /* Create a static constructor that will register out brig stuff with
2347    libgomp.  */
2348 
2349 static void
hsa_output_libgomp_mapping(tree brig_decl)2350 hsa_output_libgomp_mapping (tree brig_decl)
2351 {
2352   unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2353   unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2354 
2355   tree kernels;
2356   tree host_func_table;
2357 
2358   hsa_output_kernels (&host_func_table, &kernels);
2359   tree global_vars = hsa_output_global_variables ();
2360 
2361   tree hsa_image_desc_type = make_node (RECORD_TYPE);
2362   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2363 			   get_identifier ("brig_module"), ptr_type_node);
2364   DECL_CHAIN (id_f1) = NULL_TREE;
2365   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2366 			   get_identifier ("kernel_count"),
2367 			   unsigned_type_node);
2368 
2369   DECL_CHAIN (id_f2) = id_f1;
2370   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2371 			   get_identifier ("hsa_kernel_infos"),
2372 			   ptr_type_node);
2373   DECL_CHAIN (id_f3) = id_f2;
2374   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2375 			   get_identifier ("global_variable_count"),
2376 			   unsigned_type_node);
2377   DECL_CHAIN (id_f4) = id_f3;
2378   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2379 			   get_identifier ("hsa_global_variable_infos"),
2380 			   ptr_type_node);
2381   DECL_CHAIN (id_f5) = id_f4;
2382   finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2383 			 NULL_TREE);
2384   TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2385 
2386   vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2387   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2388 			  build_fold_addr_expr (brig_decl));
2389   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2390 			  build_int_cstu (unsigned_type_node, kernel_count));
2391   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2392 			  build1 (ADDR_EXPR,
2393 				  build_pointer_type (TREE_TYPE (kernels)),
2394 				  kernels));
2395   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2396 			  build_int_cstu (unsigned_type_node,
2397 					  global_variable_count));
2398   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2399 			  build1 (ADDR_EXPR,
2400 				  build_pointer_type (TREE_TYPE (global_vars)),
2401 				  global_vars));
2402 
2403   tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2404 
2405   char tmp_name[64];
2406   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2407   tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2408 					get_identifier (tmp_name),
2409 					hsa_image_desc_type);
2410   TREE_STATIC (hsa_img_descriptor) = 1;
2411   TREE_READONLY (hsa_img_descriptor) = 1;
2412   TREE_PUBLIC (hsa_img_descriptor) = 0;
2413   DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2414   DECL_IGNORED_P (hsa_img_descriptor) = 1;
2415   DECL_EXTERNAL (hsa_img_descriptor) = 0;
2416   TREE_CONSTANT (hsa_img_descriptor) = 1;
2417   DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2418   varpool_node::finalize_decl (hsa_img_descriptor);
2419 
2420   /* Construct the "host_table" libgomp expects.  */
2421   tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2422   tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2423   TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2424   vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2425   tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2426   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2427 			  host_func_table_addr);
2428   offset_int func_table_size
2429     = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2430   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2431 			  fold_build2 (POINTER_PLUS_EXPR,
2432 				       TREE_TYPE (host_func_table_addr),
2433 				       host_func_table_addr,
2434 				       build_int_cst (size_type_node,
2435 						      func_table_size.to_uhwi
2436 						      ())));
2437   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2438   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2439   tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2440 						    libgomp_host_table_vec);
2441   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2442   tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2443 					    get_identifier (tmp_name),
2444 					    libgomp_host_table_type);
2445 
2446   TREE_STATIC (hsa_libgomp_host_table) = 1;
2447   TREE_READONLY (hsa_libgomp_host_table) = 1;
2448   TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2449   DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2450   DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2451   DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2452   TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2453   DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2454   varpool_node::finalize_decl (hsa_libgomp_host_table);
2455 
2456   /* Generate an initializer with a call to the registration routine.  */
2457 
2458   tree offload_register
2459     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2460   gcc_checking_assert (offload_register);
2461 
2462   tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2463   append_to_statement_list
2464     (build_call_expr (offload_register, 4,
2465 		      build_int_cstu (unsigned_type_node,
2466 				      GOMP_VERSION_PACK (GOMP_VERSION,
2467 							 GOMP_VERSION_HSA)),
2468 		      build_fold_addr_expr (hsa_libgomp_host_table),
2469 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2470 		      build_fold_addr_expr (hsa_img_descriptor)),
2471      hsa_ctor_stmts);
2472 
2473   cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2474 
2475   tree offload_unregister
2476     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2477   gcc_checking_assert (offload_unregister);
2478 
2479   tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2480   append_to_statement_list
2481     (build_call_expr (offload_unregister, 4,
2482 		      build_int_cstu (unsigned_type_node,
2483 				      GOMP_VERSION_PACK (GOMP_VERSION,
2484 							 GOMP_VERSION_HSA)),
2485 		      build_fold_addr_expr (hsa_libgomp_host_table),
2486 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2487 		      build_fold_addr_expr (hsa_img_descriptor)),
2488      hsa_dtor_stmts);
2489   cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2490 }
2491 
2492 /* Emit the brig module we have compiled to a section in the final assembly and
2493    also create a compile unit static constructor that will register the brig
2494    module with libgomp.  */
2495 
2496 void
hsa_output_brig(void)2497 hsa_output_brig (void)
2498 {
2499   section *saved_section;
2500 
2501   if (!brig_initialized)
2502     return;
2503 
2504   for (unsigned i = 0; i < function_call_linkage.length (); i++)
2505     {
2506       function_linkage_pair p = function_call_linkage[i];
2507 
2508       BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2509       gcc_assert (*func_offset);
2510       BrigOperandCodeRef *code_ref
2511 	= (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2512       gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2513       code_ref->ref = lendian32 (*func_offset);
2514     }
2515 
2516   /* Iterate all function declarations and if we meet a function that should
2517      have module linkage and we are unable to emit HSAIL for the function,
2518      then change the linkage to program linkage.  Doing so, we will emit
2519      a valid BRIG image.  */
2520   if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2521     for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2522 	 = emitted_declarations->begin ();
2523 	 it != emitted_declarations->end ();
2524 	 ++it)
2525       {
2526 	if (hsa_failed_functions->contains ((*it).first))
2527 	  (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2528       }
2529 
2530   saved_section = in_section;
2531 
2532   switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2533   char tmp_name[64];
2534   ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2535   ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2536   tree brig_id = get_identifier (tmp_name);
2537   tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2538 			       char_type_node);
2539   SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2540   TREE_ADDRESSABLE (brig_decl) = 1;
2541   TREE_READONLY (brig_decl) = 1;
2542   DECL_ARTIFICIAL (brig_decl) = 1;
2543   DECL_IGNORED_P (brig_decl) = 1;
2544   TREE_STATIC (brig_decl) = 1;
2545   TREE_PUBLIC (brig_decl) = 0;
2546   TREE_USED (brig_decl) = 1;
2547   DECL_INITIAL (brig_decl) = brig_decl;
2548   TREE_ASM_WRITTEN (brig_decl) = 1;
2549 
2550   BrigModuleHeader module_header;
2551   memcpy (&module_header.identification, "HSA BRIG",
2552 	  sizeof (module_header.identification));
2553   module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2554   module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2555   uint64_t section_index[3];
2556 
2557   int data_padding, code_padding, operand_padding;
2558   data_padding = HSA_SECTION_ALIGNMENT
2559     - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2560   code_padding = HSA_SECTION_ALIGNMENT
2561     - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2562   operand_padding = HSA_SECTION_ALIGNMENT
2563     - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2564 
2565   uint64_t module_size = sizeof (module_header)
2566     + sizeof (section_index)
2567     + brig_data.total_size
2568     + data_padding
2569     + brig_code.total_size
2570     + code_padding
2571     + brig_operand.total_size
2572     + operand_padding;
2573   gcc_assert ((module_size % 16) == 0);
2574   module_header.byteCount = lendian64 (module_size);
2575   memset (&module_header.hash, 0, sizeof (module_header.hash));
2576   module_header.reserved = 0;
2577   module_header.sectionCount = lendian32 (3);
2578   module_header.sectionIndex = lendian64 (sizeof (module_header));
2579   assemble_string ((const char *) &module_header, sizeof (module_header));
2580   uint64_t off = sizeof (module_header) + sizeof (section_index);
2581   section_index[0] = lendian64 (off);
2582   off += brig_data.total_size + data_padding;
2583   section_index[1] = lendian64 (off);
2584   off += brig_code.total_size + code_padding;
2585   section_index[2] = lendian64 (off);
2586   assemble_string ((const char *) &section_index, sizeof (section_index));
2587 
2588   char padding[HSA_SECTION_ALIGNMENT];
2589   memset (padding, 0, sizeof (padding));
2590 
2591   brig_data.output ();
2592   assemble_string (padding, data_padding);
2593   brig_code.output ();
2594   assemble_string (padding, code_padding);
2595   brig_operand.output ();
2596   assemble_string (padding, operand_padding);
2597 
2598   if (saved_section)
2599     switch_to_section (saved_section);
2600 
2601   hsa_output_libgomp_mapping (brig_decl);
2602 
2603   hsa_free_decl_kernel_mapping ();
2604   brig_release_data ();
2605   hsa_deinit_compilation_unit_data ();
2606 
2607   delete emitted_declarations;
2608   emitted_declarations = NULL;
2609   delete function_offsets;
2610   function_offsets = NULL;
2611 }
2612