1 /* Producing binary form of HSA BRIG from our internal representation.
2    Copyright (C) 2013-2016 Free Software Foundation, Inc.
3    Contributed by Martin Jambor <mjambor@suse.cz> and
4    Martin Liska <mliska@suse.cz>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "tm_p.h"
28 #include "is-a.h"
29 #include "vec.h"
30 #include "hash-table.h"
31 #include "hash-map.h"
32 #include "tree.h"
33 #include "tree-iterator.h"
34 #include "stor-layout.h"
35 #include "output.h"
36 #include "cfg.h"
37 #include "function.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "gimple-pretty-print.h"
41 #include "diagnostic-core.h"
42 #include "cgraph.h"
43 #include "dumpfile.h"
44 #include "print-tree.h"
45 #include "symbol-summary.h"
46 #include "hsa.h"
47 #include "gomp-constants.h"
48 
49 /* Convert VAL to little endian form, if necessary.  */
50 
51 static uint16_t
lendian16(uint16_t val)52 lendian16 (uint16_t val)
53 {
54 #if GCC_VERSION >= 4008
55 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
56   return val;
57 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
58   return __builtin_bswap16 (val);
59 #else   /* __ORDER_PDP_ENDIAN__ */
60   return val;
61 #endif
62 #else
63 // provide a safe slower default, with shifts and masking
64 #ifndef WORDS_BIGENDIAN
65   return val;
66 #else
67   return (val >> 8) | (val << 8);
68 #endif
69 #endif
70 }
71 
72 /* Convert VAL to little endian form, if necessary.  */
73 
74 static uint32_t
lendian32(uint32_t val)75 lendian32 (uint32_t val)
76 {
77 #if GCC_VERSION >= 4006
78 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
79   return val;
80 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
81   return __builtin_bswap32 (val);
82 #else  /* __ORDER_PDP_ENDIAN__ */
83   return (val >> 16) | (val << 16);
84 #endif
85 #else
86 // provide a safe slower default, with shifts and masking
87 #ifndef WORDS_BIGENDIAN
88   return val;
89 #else
90   val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
91   return (val >> 16) | (val << 16);
92 #endif
93 #endif
94 }
95 
96 /* Convert VAL to little endian form, if necessary.  */
97 
98 static uint64_t
lendian64(uint64_t val)99 lendian64 (uint64_t val)
100 {
101 #if GCC_VERSION >= 4006
102 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
103   return val;
104 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
105   return __builtin_bswap64 (val);
106 #else  /* __ORDER_PDP_ENDIAN__ */
107   return (((val & 0xffffll) << 48)
108 	  | ((val & 0xffff0000ll) << 16)
109 	  | ((val & 0xffff00000000ll) >> 16)
110 	  | ((val & 0xffff000000000000ll) >> 48));
111 #endif
112 #else
113 // provide a safe slower default, with shifts and masking
114 #ifndef WORDS_BIGENDIAN
115   return val;
116 #else
117   val = (((val & 0xff00ff00ff00ff00ll) >> 8)
118 	 | ((val & 0x00ff00ff00ff00ffll) << 8));
119   val = ((( val & 0xffff0000ffff0000ll) >> 16)
120 	 | (( val & 0x0000ffff0000ffffll) << 16));
121   return (val >> 32) | (val << 32);
122 #endif
123 #endif
124 }
125 
126 #define BRIG_ELF_SECTION_NAME ".brig"
127 #define BRIG_LABEL_STRING "hsa_brig"
128 #define BRIG_SECTION_DATA_NAME    "hsa_data"
129 #define BRIG_SECTION_CODE_NAME    "hsa_code"
130 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
131 
132 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
133 
134 /* Required HSA section alignment.  */
135 
136 #define HSA_SECTION_ALIGNMENT 16
137 
138 /* Chunks of BRIG binary data.  */
139 
140 struct hsa_brig_data_chunk
141 {
142   /* Size of the data already stored into a chunk.  */
143   unsigned size;
144 
145   /* Pointer to the data.  */
146   char *data;
147 };
148 
149 /* Structure representing a BRIG section, holding and writing its data.  */
150 
151 class hsa_brig_section
152 {
153 public:
154   /* Section name that will be output to the BRIG.  */
155   const char *section_name;
156   /* Size in bytes of all data stored in the section.  */
157   unsigned total_size;
158   /* The size of the header of the section including padding.  */
159   unsigned header_byte_count;
160   /* The size of the header of the section without any padding.  */
161   unsigned header_byte_delta;
162 
163   /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes.  */
164   vec <struct hsa_brig_data_chunk> chunks;
165 
166   /* More convenient access to the last chunk from the vector above.  */
167   struct hsa_brig_data_chunk *cur_chunk;
168 
169   void allocate_new_chunk ();
170   void init (const char *name);
171   void release ();
172   void output ();
173   unsigned add (const void *data, unsigned len);
174   void round_size_up (int factor);
175   void *get_ptr_by_offset (unsigned int offset);
176 };
177 
178 static struct hsa_brig_section brig_data, brig_code, brig_operand;
179 static uint32_t brig_insn_count;
180 static bool brig_initialized = false;
181 
182 /* Mapping between emitted HSA functions and their offset in code segment.  */
183 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
184 
185 /* Hash map of emitted function declarations.  */
186 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
187 
188 /* Hash table of emitted internal function declaration offsets.  */
189 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
190 
191 /* List of sbr instructions.  */
192 static vec <hsa_insn_sbr *> *switch_instructions;
193 
194 struct function_linkage_pair
195 {
function_linkage_pairfunction_linkage_pair196   function_linkage_pair (tree decl, unsigned int off)
197     : function_decl (decl), offset (off) {}
198 
199   /* Declaration of called function.  */
200   tree function_decl;
201 
202   /* Offset in operand section.  */
203   unsigned int offset;
204 };
205 
206 /* Vector of function calls where we need to resolve function offsets.  */
207 static auto_vec <function_linkage_pair> function_call_linkage;
208 
209 /* Add a new chunk, allocate data for it and initialize it.  */
210 
211 void
allocate_new_chunk()212 hsa_brig_section::allocate_new_chunk ()
213 {
214   struct hsa_brig_data_chunk new_chunk;
215 
216   new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
217   new_chunk.size = 0;
218   cur_chunk = chunks.safe_push (new_chunk);
219 }
220 
221 /* Initialize the brig section.  */
222 
223 void
init(const char * name)224 hsa_brig_section::init (const char *name)
225 {
226   section_name = name;
227   /* While the following computation is basically wrong, because the intent
228      certainly wasn't to have the first character of name and padding, which
229      are a part of sizeof (BrigSectionHeader), included in the first addend,
230      this is what the disassembler expects.  */
231   total_size = sizeof (BrigSectionHeader) + strlen (section_name);
232   chunks.create (1);
233   allocate_new_chunk ();
234   header_byte_delta = total_size;
235   round_size_up (4);
236   header_byte_count = total_size;
237 }
238 
239 /* Free all data in the section.  */
240 
241 void
release()242 hsa_brig_section::release ()
243 {
244   for (unsigned i = 0; i < chunks.length (); i++)
245     free (chunks[i].data);
246   chunks.release ();
247   cur_chunk = NULL;
248 }
249 
250 /* Write the section to the output file to a section with the name given at
251    initialization.  Switches the output section and does not restore it.  */
252 
253 void
output()254 hsa_brig_section::output ()
255 {
256   struct BrigSectionHeader section_header;
257   char padding[8];
258 
259   section_header.byteCount = lendian64 (total_size);
260   section_header.headerByteCount = lendian32 (header_byte_count);
261   section_header.nameLength = lendian32 (strlen (section_name));
262   assemble_string ((const char *) &section_header, 16);
263   assemble_string (section_name, (section_header.nameLength));
264   memset (&padding, 0, sizeof (padding));
265   /* This is also a consequence of the wrong header size computation described
266      in a comment in hsa_brig_section::init.  */
267   assemble_string (padding, 8);
268   for (unsigned i = 0; i < chunks.length (); i++)
269     assemble_string (chunks[i].data, chunks[i].size);
270 }
271 
272 /* Add to the stream LEN bytes of opaque binary DATA.  Return the offset at
273    which it was stored.  */
274 
275 unsigned
add(const void * data,unsigned len)276 hsa_brig_section::add (const void *data, unsigned len)
277 {
278   unsigned offset = total_size;
279 
280   gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
281   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
282     allocate_new_chunk ();
283 
284   memcpy (cur_chunk->data + cur_chunk->size, data, len);
285   cur_chunk->size += len;
286   total_size += len;
287 
288   return offset;
289 }
290 
291 /* Add padding to section so that its size is divisible by FACTOR.  */
292 
293 void
round_size_up(int factor)294 hsa_brig_section::round_size_up (int factor)
295 {
296   unsigned padding, res = total_size % factor;
297 
298   if (res == 0)
299     return;
300 
301   padding = factor - res;
302   total_size += padding;
303   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
304     {
305       padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
306       cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
307       allocate_new_chunk ();
308     }
309 
310   cur_chunk->size += padding;
311 }
312 
313 /* Return pointer to data by global OFFSET in the section.  */
314 
315 void *
get_ptr_by_offset(unsigned int offset)316 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
317 {
318   gcc_assert (offset < total_size);
319   offset -= header_byte_delta;
320 
321   unsigned i;
322   for (i = 0; offset >= chunks[i].size; i++)
323     offset -= chunks[i].size;
324 
325   return chunks[i].data + offset;
326 }
327 
328 /* BRIG string data hashing.  */
329 
330 struct brig_string_slot
331 {
332   const char *s;
333   char prefix;
334   int len;
335   uint32_t offset;
336 };
337 
338 /* Hash table helpers.  */
339 
340 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
341 {
342   static inline hashval_t hash (const value_type);
343   static inline bool equal (const value_type, const compare_type);
344   static inline void remove (value_type);
345 };
346 
347 /* Returns a hash code for DS.  Adapted from libiberty's htab_hash_string
348    to support strings that may not end in '\0'.  */
349 
350 inline hashval_t
hash(const value_type ds)351 brig_string_slot_hasher::hash (const value_type ds)
352 {
353   hashval_t r = ds->len;
354   int i;
355 
356   for (i = 0; i < ds->len; i++)
357      r = r * 67 + (unsigned) ds->s[i] - 113;
358   r = r * 67 + (unsigned) ds->prefix - 113;
359   return r;
360 }
361 
362 /* Returns nonzero if DS1 and DS2 are equal.  */
363 
364 inline bool
equal(const value_type ds1,const compare_type ds2)365 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
366 {
367   if (ds1->len == ds2->len)
368     return ds1->prefix == ds2->prefix
369       && memcmp (ds1->s, ds2->s, ds1->len) == 0;
370 
371   return 0;
372 }
373 
374 /* Deallocate memory for DS upon its removal.  */
375 
376 inline void
remove(value_type ds)377 brig_string_slot_hasher::remove (value_type ds)
378 {
379   free (const_cast<char *> (ds->s));
380   free (ds);
381 }
382 
383 /* Hash for strings we output in order not to duplicate them needlessly.  */
384 
385 static hash_table<brig_string_slot_hasher> *brig_string_htab;
386 
387 /* Emit a null terminated string STR to the data section and return its
388    offset in it.  If PREFIX is non-zero, output it just before STR too.
389    Sanitize the string if SANITIZE option is set to true.  */
390 
391 static unsigned
392 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
393 {
394   unsigned slen = strlen (str);
395   unsigned offset, len = slen + (prefix ? 1 : 0);
396   uint32_t hdr_len = lendian32 (len);
397   brig_string_slot s_slot;
398   brig_string_slot **slot;
399   char *str2;
400 
401   str2 = xstrdup (str);
402 
403   if (sanitize)
404     hsa_sanitize_name (str2);
405   s_slot.s = str2;
406   s_slot.len = slen;
407   s_slot.prefix = prefix;
408   s_slot.offset = 0;
409 
410   slot = brig_string_htab->find_slot (&s_slot, INSERT);
411   if (*slot == NULL)
412     {
413       brig_string_slot *new_slot = XCNEW (brig_string_slot);
414 
415       /* In theory we should fill in BrigData but that would mean copying
416 	 the string to a buffer for no reason, so we just emulate it.  */
417       offset = brig_data.add (&hdr_len, sizeof (hdr_len));
418       if (prefix)
419 	brig_data.add (&prefix, 1);
420 
421       brig_data.add (str2, slen);
422       brig_data.round_size_up (4);
423 
424       /* TODO: could use the string we just copied into
425 	 brig_string->cur_chunk */
426       new_slot->s = str2;
427       new_slot->len = slen;
428       new_slot->prefix = prefix;
429       new_slot->offset = offset;
430       *slot = new_slot;
431     }
432   else
433     {
434       offset = (*slot)->offset;
435       free (str2);
436     }
437 
438   return offset;
439 }
440 
441 /* Linked list of queued operands.  */
442 
443 static struct operand_queue
444 {
445   /* First from the chain of queued operands.  */
446   hsa_op_base *first_op, *last_op;
447 
448   /* The offset at which the next operand will be enqueued.  */
449   unsigned projected_size;
450 
451 } op_queue;
452 
453 /* Unless already initialized, initialize infrastructure to produce BRIG.  */
454 
455 static void
brig_init(void)456 brig_init (void)
457 {
458   brig_insn_count = 0;
459 
460   if (brig_initialized)
461     return;
462 
463   brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
464   brig_data.init (BRIG_SECTION_DATA_NAME);
465   brig_code.init (BRIG_SECTION_CODE_NAME);
466   brig_operand.init (BRIG_SECTION_OPERAND_NAME);
467   brig_initialized = true;
468 
469   struct BrigDirectiveModule moddir;
470   memset (&moddir, 0, sizeof (moddir));
471   moddir.base.byteCount = lendian16 (sizeof (moddir));
472 
473   char *modname;
474   if (main_input_filename && *main_input_filename != '\0')
475     {
476       const char *part = strrchr (main_input_filename, '/');
477       if (!part)
478 	part = main_input_filename;
479       else
480 	part++;
481       modname = concat ("&__hsa_module_", part, NULL);
482       char *extension = strchr (modname, '.');
483       if (extension)
484 	*extension = '\0';
485 
486       /* As in LTO mode, we have to emit a different module names.  */
487       if (flag_ltrans)
488 	{
489 	  part = strrchr (asm_file_name, '/');
490 	  if (!part)
491 	    part = asm_file_name;
492 	  else
493 	    part++;
494 	  char *modname2;
495 	  modname2 = xasprintf ("%s_%s", modname, part);
496 	  free (modname);
497 	  modname = modname2;
498 	}
499 
500       hsa_sanitize_name (modname);
501       moddir.name = brig_emit_string (modname);
502       free (modname);
503     }
504   else
505     moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
506   moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
507   moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
508   moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
509   moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
510   if (hsa_machine_large_p ())
511     moddir.machineModel = BRIG_MACHINE_LARGE;
512   else
513     moddir.machineModel = BRIG_MACHINE_SMALL;
514   moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
515   brig_code.add (&moddir, sizeof (moddir));
516 }
517 
518 /* Free all BRIG data.  */
519 
520 static void
brig_release_data(void)521 brig_release_data (void)
522 {
523   delete brig_string_htab;
524   brig_data.release ();
525   brig_code.release ();
526   brig_operand.release ();
527 
528   brig_initialized = 0;
529 }
530 
531 /* Enqueue operation OP.  Return the offset at which it will be stored.  */
532 
533 static unsigned int
enqueue_op(hsa_op_base * op)534 enqueue_op (hsa_op_base *op)
535 {
536   unsigned ret;
537 
538   if (op->m_brig_op_offset)
539     return op->m_brig_op_offset;
540 
541   ret = op_queue.projected_size;
542   op->m_brig_op_offset = op_queue.projected_size;
543 
544   if (!op_queue.first_op)
545     op_queue.first_op = op;
546   else
547     op_queue.last_op->m_next = op;
548   op_queue.last_op = op;
549 
550   if (is_a <hsa_op_immed *> (op))
551     op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
552   else if (is_a <hsa_op_reg *> (op))
553     op_queue.projected_size += sizeof (struct BrigOperandRegister);
554   else if (is_a <hsa_op_address *> (op))
555     op_queue.projected_size += sizeof (struct BrigOperandAddress);
556   else if (is_a <hsa_op_code_ref *> (op))
557     op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
558   else if (is_a <hsa_op_code_list *> (op))
559     op_queue.projected_size += sizeof (struct BrigOperandCodeList);
560   else if (is_a <hsa_op_operand_list *> (op))
561     op_queue.projected_size += sizeof (struct BrigOperandOperandList);
562   else
563     gcc_unreachable ();
564   return ret;
565 }
566 
567 
568 /* Emit directive describing a symbol if it has not been emitted already.
569    Return the offset of the directive.  */
570 
571 static unsigned
emit_directive_variable(struct hsa_symbol * symbol)572 emit_directive_variable (struct hsa_symbol *symbol)
573 {
574   struct BrigDirectiveVariable dirvar;
575   unsigned name_offset;
576   static unsigned res_name_offset;
577 
578   if (symbol->m_directive_offset)
579     return symbol->m_directive_offset;
580 
581   memset (&dirvar, 0, sizeof (dirvar));
582   dirvar.base.byteCount = lendian16 (sizeof (dirvar));
583   dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
584   dirvar.allocation = symbol->m_allocation;
585 
586   char prefix = symbol->m_global_scope_p ? '&' : '%';
587 
588   if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
589     {
590       if (res_name_offset == 0)
591 	res_name_offset = brig_emit_string (symbol->m_name, '%');
592       name_offset = res_name_offset;
593     }
594   else if (symbol->m_name)
595     name_offset = brig_emit_string (symbol->m_name, prefix);
596   else
597     {
598       char buf[64];
599       snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
600 		symbol->m_name_number);
601       name_offset = brig_emit_string (buf, prefix);
602     }
603 
604   dirvar.name = lendian32 (name_offset);
605   dirvar.init = 0;
606   dirvar.type = lendian16 (symbol->m_type);
607   dirvar.segment = symbol->m_segment;
608   dirvar.align = symbol->m_align;
609   dirvar.linkage = symbol->m_linkage;
610   dirvar.dim.lo = symbol->m_dim;
611   dirvar.dim.hi = symbol->m_dim >> 32;
612 
613   /* Global variables are just declared and linked via HSA runtime.  */
614   if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
615     dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
616   dirvar.reserved = 0;
617 
618   if (symbol->m_cst_value)
619     {
620       dirvar.modifier |= BRIG_VARIABLE_CONST;
621       dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
622     }
623 
624   symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
625   return symbol->m_directive_offset;
626 }
627 
628 /* Emit directives describing either a function declaration or
629    definition F.  */
630 
631 static BrigDirectiveExecutable *
emit_function_directives(hsa_function_representation * f,bool is_declaration)632 emit_function_directives (hsa_function_representation *f, bool is_declaration)
633 {
634   struct BrigDirectiveExecutable fndir;
635   unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
636   int count = 0;
637   BrigDirectiveExecutable *ptr_to_fndir;
638   hsa_symbol *sym;
639 
640   if (!f->m_declaration_p)
641     for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
642       {
643 	gcc_assert (!sym->m_emitted_to_brig);
644 	sym->m_emitted_to_brig = true;
645 	emit_directive_variable (sym);
646 	brig_insn_count++;
647       }
648 
649   name_offset = brig_emit_string (f->m_name, '&');
650   inarg_off = brig_code.total_size + sizeof (fndir)
651     + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
652   scoped_off = inarg_off
653     + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
654 
655   if (!f->m_declaration_p)
656     {
657       count += f->m_spill_symbols.length ();
658       count += f->m_private_variables.length ();
659     }
660 
661   next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
662 
663   memset (&fndir, 0, sizeof (fndir));
664   fndir.base.byteCount = lendian16 (sizeof (fndir));
665   fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
666 			       : BRIG_KIND_DIRECTIVE_FUNCTION);
667   fndir.name = lendian32 (name_offset);
668   fndir.inArgCount = lendian16 (f->m_input_args.length ());
669   fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
670   fndir.firstInArg = lendian32 (inarg_off);
671   fndir.firstCodeBlockEntry = lendian32 (scoped_off);
672   fndir.nextModuleEntry = lendian32 (next_toplev_off);
673   fndir.linkage = f->get_linkage ();
674   if (!f->m_declaration_p)
675     fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
676   memset (&fndir.reserved, 0, sizeof (fndir.reserved));
677 
678   /* Once we put a definition of function_offsets, we should not overwrite
679      it with a declaration of the function.  */
680   if (f->m_internal_fn == NULL)
681     {
682       if (!function_offsets->get (f->m_decl) || !is_declaration)
683 	function_offsets->put (f->m_decl, brig_code.total_size);
684     }
685   else
686     {
687       /* Internal function.  */
688       hsa_internal_fn **slot
689 	= hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
690       hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
691       int_fn->m_offset = brig_code.total_size;
692       *slot = int_fn;
693     }
694 
695   brig_code.add (&fndir, sizeof (fndir));
696   /* terrible hack: we need to set instCount after we emit all
697      insns, but we need to emit directive in order, and we emit directives
698      during insn emitting.  So we need to emit the FUNCTION directive
699      early, then the insns, and then we need to set instCount, so remember
700      a pointer to it, in some horrible way.  cur_chunk.data+size points
701      directly to after fndir here.  */
702   ptr_to_fndir
703       = (BrigDirectiveExecutable *)(brig_code.cur_chunk->data
704 				    + brig_code.cur_chunk->size
705 				    - sizeof (fndir));
706 
707   if (f->m_output_arg)
708     emit_directive_variable (f->m_output_arg);
709   for (unsigned i = 0; i < f->m_input_args.length (); i++)
710     emit_directive_variable (f->m_input_args[i]);
711 
712   if (!f->m_declaration_p)
713     {
714       for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
715 	{
716 	  emit_directive_variable (sym);
717 	  brig_insn_count++;
718 	}
719       for (unsigned i = 0; i < f->m_private_variables.length (); i++)
720 	{
721 	  emit_directive_variable (f->m_private_variables[i]);
722 	  brig_insn_count++;
723 	}
724     }
725 
726   return ptr_to_fndir;
727 }
728 
729 /* Emit a label directive for the given HBB.  We assume it is about to start on
730    the current offset in the code section.  */
731 
732 static void
emit_bb_label_directive(hsa_bb * hbb)733 emit_bb_label_directive (hsa_bb *hbb)
734 {
735   struct BrigDirectiveLabel lbldir;
736 
737   lbldir.base.byteCount = lendian16 (sizeof (lbldir));
738   lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
739   char buf[32];
740   snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
741 	    hbb->m_index);
742   lbldir.name = lendian32 (brig_emit_string (buf, '@'));
743 
744   hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
745 						       sizeof (lbldir));
746   brig_insn_count++;
747 }
748 
749 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
750    holding such, for constants and registers.  */
751 
752 static BrigType16_t
regtype_for_type(BrigType16_t t)753 regtype_for_type (BrigType16_t t)
754 {
755   switch (t)
756     {
757     case BRIG_TYPE_B1:
758       return BRIG_TYPE_B1;
759 
760     case BRIG_TYPE_U8:
761     case BRIG_TYPE_U16:
762     case BRIG_TYPE_U32:
763     case BRIG_TYPE_S8:
764     case BRIG_TYPE_S16:
765     case BRIG_TYPE_S32:
766     case BRIG_TYPE_B8:
767     case BRIG_TYPE_B16:
768     case BRIG_TYPE_B32:
769     case BRIG_TYPE_F16:
770     case BRIG_TYPE_F32:
771     case BRIG_TYPE_U8X4:
772     case BRIG_TYPE_U16X2:
773     case BRIG_TYPE_S8X4:
774     case BRIG_TYPE_S16X2:
775     case BRIG_TYPE_F16X2:
776       return BRIG_TYPE_B32;
777 
778     case BRIG_TYPE_U64:
779     case BRIG_TYPE_S64:
780     case BRIG_TYPE_F64:
781     case BRIG_TYPE_B64:
782     case BRIG_TYPE_U8X8:
783     case BRIG_TYPE_U16X4:
784     case BRIG_TYPE_U32X2:
785     case BRIG_TYPE_S8X8:
786     case BRIG_TYPE_S16X4:
787     case BRIG_TYPE_S32X2:
788     case BRIG_TYPE_F16X4:
789     case BRIG_TYPE_F32X2:
790       return BRIG_TYPE_B64;
791 
792     case BRIG_TYPE_B128:
793     case BRIG_TYPE_U8X16:
794     case BRIG_TYPE_U16X8:
795     case BRIG_TYPE_U32X4:
796     case BRIG_TYPE_U64X2:
797     case BRIG_TYPE_S8X16:
798     case BRIG_TYPE_S16X8:
799     case BRIG_TYPE_S32X4:
800     case BRIG_TYPE_S64X2:
801     case BRIG_TYPE_F16X8:
802     case BRIG_TYPE_F32X4:
803     case BRIG_TYPE_F64X2:
804       return BRIG_TYPE_B128;
805 
806     default:
807       gcc_unreachable ();
808     }
809 }
810 
811 /* Return the length of the BRIG type TYPE that is going to be streamed out as
812    an immediate constant (so it must not be B1).  */
813 
814 unsigned
hsa_get_imm_brig_type_len(BrigType16_t type)815 hsa_get_imm_brig_type_len (BrigType16_t type)
816 {
817   BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
818   BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
819 
820   switch (pack_type)
821     {
822     case BRIG_TYPE_PACK_NONE:
823       break;
824     case BRIG_TYPE_PACK_32:
825       return 4;
826     case BRIG_TYPE_PACK_64:
827       return 8;
828     case BRIG_TYPE_PACK_128:
829       return 16;
830     default:
831       gcc_unreachable ();
832     }
833 
834   switch (base_type)
835     {
836     case BRIG_TYPE_U8:
837     case BRIG_TYPE_S8:
838     case BRIG_TYPE_B8:
839       return 1;
840     case BRIG_TYPE_U16:
841     case BRIG_TYPE_S16:
842     case BRIG_TYPE_F16:
843     case BRIG_TYPE_B16:
844       return 2;
845     case BRIG_TYPE_U32:
846     case BRIG_TYPE_S32:
847     case BRIG_TYPE_F32:
848     case BRIG_TYPE_B32:
849       return 4;
850     case BRIG_TYPE_U64:
851     case BRIG_TYPE_S64:
852     case BRIG_TYPE_F64:
853     case BRIG_TYPE_B64:
854       return 8;
855     case BRIG_TYPE_B128:
856       return 16;
857     default:
858       gcc_unreachable ();
859     }
860 }
861 
862 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
863    If NEED_LEN is not equal to zero, shrink or extend the value
864    to NEED_LEN bytes.  Return how many bytes were written.  */
865 
866 static int
emit_immediate_scalar_to_buffer(tree value,char * data,unsigned need_len)867 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
868 {
869   union hsa_bytes bytes;
870 
871   memset (&bytes, 0, sizeof (bytes));
872   tree type = TREE_TYPE (value);
873   gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
874 
875   unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
876   if (INTEGRAL_TYPE_P (type)
877       || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
878     switch (data_len)
879       {
880       case 1:
881 	bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
882 	break;
883       case 2:
884 	bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
885 	break;
886       case 4:
887 	bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
888 	break;
889       case 8:
890 	bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
891 	break;
892       default:
893 	gcc_unreachable ();
894       }
895   else if (SCALAR_FLOAT_TYPE_P (type))
896     {
897       if (data_len == 2)
898 	{
899 	  sorry ("Support for HSA does not implement immediate 16 bit FPU "
900 		 "operands");
901 	  return 2;
902 	}
903       unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
904       /* There are always 32 bits in each long, no matter the size of
905 	 the hosts long.  */
906       long tmp[6];
907 
908       real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
909 
910       if (int_len == 4)
911 	bytes.b32 = (uint32_t) tmp[0];
912       else
913 	{
914 	  bytes.b64 = (uint64_t)(uint32_t) tmp[1];
915 	  bytes.b64 <<= 32;
916 	  bytes.b64 |= (uint32_t) tmp[0];
917 	}
918     }
919   else
920     gcc_unreachable ();
921 
922   int len;
923   if (need_len == 0)
924     len = data_len;
925   else
926     len = need_len;
927 
928   memcpy (data, &bytes, len);
929   return len;
930 }
931 
932 char *
emit_to_buffer(unsigned * brig_repr_size)933 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
934 {
935   char *brig_repr;
936   *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
937 
938   if (m_tree_value != NULL_TREE)
939     {
940       /* Update brig_repr_size for special tree values.  */
941       if (TREE_CODE (m_tree_value) == STRING_CST)
942 	*brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
943       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
944 	*brig_repr_size
945 	  = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
946 
947       unsigned total_len = *brig_repr_size;
948 
949       /* As we can have a constructor with fewer elements, fill the memory
950 	 with zeros.  */
951       brig_repr = XCNEWVEC (char, total_len);
952       char *p = brig_repr;
953 
954       if (TREE_CODE (m_tree_value) == VECTOR_CST)
955 	{
956 	  int i, num = VECTOR_CST_NELTS (m_tree_value);
957 	  for (i = 0; i < num; i++)
958 	    {
959 	      tree v = VECTOR_CST_ELT (m_tree_value, i);
960 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
961 	      total_len -= actual;
962 	      p += actual;
963 	    }
964 	  /* Vectors should have the exact size.  */
965 	  gcc_assert (total_len == 0);
966 	}
967       else if (TREE_CODE (m_tree_value) == STRING_CST)
968 	memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
969 		TREE_STRING_LENGTH (m_tree_value));
970       else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
971 	{
972 	  gcc_assert (total_len % 2 == 0);
973 	  unsigned actual;
974 	  actual
975 	    = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
976 					       total_len / 2);
977 
978 	  gcc_assert (actual == total_len / 2);
979 	  p += actual;
980 
981 	  actual
982 	    = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
983 					       total_len / 2);
984 	  gcc_assert (actual == total_len / 2);
985 	}
986       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
987 	{
988 	  unsigned len = vec_safe_length (CONSTRUCTOR_ELTS (m_tree_value));
989 	  for (unsigned i = 0; i < len; i++)
990 	    {
991 	      tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
992 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
993 	      total_len -= actual;
994 	      p += actual;
995 	    }
996 	}
997       else
998 	emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
999     }
1000   else
1001     {
1002       hsa_bytes bytes;
1003 
1004       switch (*brig_repr_size)
1005 	{
1006 	case 1:
1007 	  bytes.b8 = (uint8_t) m_int_value;
1008 	  break;
1009 	case 2:
1010 	  bytes.b16 = (uint16_t) m_int_value;
1011 	  break;
1012 	case 4:
1013 	  bytes.b32 = (uint32_t) m_int_value;
1014 	  break;
1015 	case 8:
1016 	  bytes.b64 = (uint64_t) m_int_value;
1017 	  break;
1018 	default:
1019 	  gcc_unreachable ();
1020 	}
1021 
1022       brig_repr = XNEWVEC (char, *brig_repr_size);
1023       memcpy (brig_repr, &bytes, *brig_repr_size);
1024     }
1025 
1026   return brig_repr;
1027 }
1028 
1029 /* Emit an immediate BRIG operand IMM.  The BRIG type of the immediate might
1030    have been massaged to comply with various HSA/BRIG type requirements, so the
1031    only important aspect of that is the length (because HSAIL might expect
1032    smaller constants or become bit-data).  The data should be represented
1033    according to what is in the tree representation.  */
1034 
1035 static void
emit_immediate_operand(hsa_op_immed * imm)1036 emit_immediate_operand (hsa_op_immed *imm)
1037 {
1038   unsigned brig_repr_size;
1039   char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1040   struct BrigOperandConstantBytes out;
1041 
1042   memset (&out, 0, sizeof (out));
1043   out.base.byteCount = lendian16 (sizeof (out));
1044   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1045   uint32_t byteCount = lendian32 (brig_repr_size);
1046   out.type = lendian16 (imm->m_type);
1047   out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1048   brig_operand.add (&out, sizeof (out));
1049   brig_data.add (brig_repr, brig_repr_size);
1050   brig_data.round_size_up (4);
1051 
1052   free (brig_repr);
1053 }
1054 
1055 /* Emit a register BRIG operand REG.  */
1056 
1057 static void
emit_register_operand(hsa_op_reg * reg)1058 emit_register_operand (hsa_op_reg *reg)
1059 {
1060   struct BrigOperandRegister out;
1061 
1062   out.base.byteCount = lendian16 (sizeof (out));
1063   out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1064   out.regNum = lendian32 (reg->m_hard_num);
1065 
1066   switch (regtype_for_type (reg->m_type))
1067     {
1068     case BRIG_TYPE_B32:
1069       out.regKind = BRIG_REGISTER_KIND_SINGLE;
1070       break;
1071     case BRIG_TYPE_B64:
1072       out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1073       break;
1074     case BRIG_TYPE_B128:
1075       out.regKind = BRIG_REGISTER_KIND_QUAD;
1076       break;
1077     case BRIG_TYPE_B1:
1078       out.regKind = BRIG_REGISTER_KIND_CONTROL;
1079       break;
1080     default:
1081       gcc_unreachable ();
1082     }
1083 
1084   brig_operand.add (&out, sizeof (out));
1085 }
1086 
1087 /* Emit an address BRIG operand ADDR.  */
1088 
1089 static void
emit_address_operand(hsa_op_address * addr)1090 emit_address_operand (hsa_op_address *addr)
1091 {
1092   struct BrigOperandAddress out;
1093 
1094   out.base.byteCount = lendian16 (sizeof (out));
1095   out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1096   out.symbol = addr->m_symbol
1097     ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1098   out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1099 
1100   if (sizeof (addr->m_imm_offset) == 8)
1101     {
1102       out.offset.lo = lendian32 (addr->m_imm_offset);
1103       out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1104     }
1105   else
1106     {
1107       gcc_assert (sizeof (addr->m_imm_offset) == 4);
1108       out.offset.lo = lendian32 (addr->m_imm_offset);
1109       out.offset.hi = 0;
1110     }
1111 
1112   brig_operand.add (&out, sizeof (out));
1113 }
1114 
1115 /* Emit a code reference operand REF.  */
1116 
1117 static void
emit_code_ref_operand(hsa_op_code_ref * ref)1118 emit_code_ref_operand (hsa_op_code_ref *ref)
1119 {
1120   struct BrigOperandCodeRef out;
1121 
1122   out.base.byteCount = lendian16 (sizeof (out));
1123   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1124   out.ref = lendian32 (ref->m_directive_offset);
1125   brig_operand.add (&out, sizeof (out));
1126 }
1127 
1128 /* Emit a code list operand CODE_LIST.  */
1129 
1130 static void
emit_code_list_operand(hsa_op_code_list * code_list)1131 emit_code_list_operand (hsa_op_code_list *code_list)
1132 {
1133   struct BrigOperandCodeList out;
1134   unsigned args = code_list->m_offsets.length ();
1135 
1136   for (unsigned i = 0; i < args; i++)
1137     gcc_assert (code_list->m_offsets[i]);
1138 
1139   out.base.byteCount = lendian16 (sizeof (out));
1140   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1141 
1142   uint32_t byteCount = lendian32 (4 * args);
1143 
1144   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1145   brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1146   brig_data.round_size_up (4);
1147   brig_operand.add (&out, sizeof (out));
1148 }
1149 
1150 /* Emit an operand list operand OPERAND_LIST.  */
1151 
1152 static void
emit_operand_list_operand(hsa_op_operand_list * operand_list)1153 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1154 {
1155   struct BrigOperandOperandList out;
1156   unsigned args = operand_list->m_offsets.length ();
1157 
1158   for (unsigned i = 0; i < args; i++)
1159     gcc_assert (operand_list->m_offsets[i]);
1160 
1161   out.base.byteCount = lendian16 (sizeof (out));
1162   out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1163 
1164   uint32_t byteCount = lendian32 (4 * args);
1165 
1166   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1167   brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1168   brig_data.round_size_up (4);
1169   brig_operand.add (&out, sizeof (out));
1170 }
1171 
1172 /* Emit all operands queued for writing.  */
1173 
1174 static void
emit_queued_operands(void)1175 emit_queued_operands (void)
1176 {
1177   for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1178     {
1179       gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1180       if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1181 	emit_immediate_operand (imm);
1182       else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1183 	emit_register_operand (reg);
1184       else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1185 	emit_address_operand (addr);
1186       else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1187 	emit_code_ref_operand (ref);
1188       else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1189 	emit_code_list_operand (code_list);
1190       else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1191 	emit_operand_list_operand (l);
1192       else
1193 	gcc_unreachable ();
1194     }
1195 }
1196 
1197 /* Emit directives describing the function that is used for
1198    a function declaration.  */
1199 
1200 static BrigDirectiveExecutable *
emit_function_declaration(tree decl)1201 emit_function_declaration (tree decl)
1202 {
1203   hsa_function_representation *f = hsa_generate_function_declaration (decl);
1204 
1205   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1206   emit_queued_operands ();
1207 
1208   delete f;
1209 
1210   return e;
1211 }
1212 
1213 /* Emit directives describing the function that is used for
1214    an internal function declaration.  */
1215 
1216 static BrigDirectiveExecutable *
emit_internal_fn_decl(hsa_internal_fn * fn)1217 emit_internal_fn_decl (hsa_internal_fn *fn)
1218 {
1219   hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1220 
1221   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1222   emit_queued_operands ();
1223 
1224   delete f;
1225 
1226   return e;
1227 }
1228 
1229 /* Enqueue all operands of INSN and return offset to BRIG data section
1230    to list of operand offsets.  */
1231 
1232 static unsigned
emit_insn_operands(hsa_insn_basic * insn)1233 emit_insn_operands (hsa_insn_basic *insn)
1234 {
1235   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1236     operand_offsets;
1237 
1238   unsigned l = insn->operand_count ();
1239   operand_offsets.safe_grow (l);
1240 
1241   for (unsigned i = 0; i < l; i++)
1242     operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1243 
1244   /* We have N operands so use 4 * N for the byte_count.  */
1245   uint32_t byte_count = lendian32 (4 * l);
1246 
1247   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1248   brig_data.add (operand_offsets.address (),
1249 		 l * sizeof (BrigOperandOffset32_t));
1250 
1251   brig_data.round_size_up (4);
1252 
1253   return offset;
1254 }
1255 
1256 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1257    to BRIG data section to list of operand offsets.  */
1258 
1259 static unsigned
1260 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1261 	       hsa_op_base *op2 = NULL)
1262 {
1263   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1264     operand_offsets;
1265 
1266   gcc_checking_assert (op0 != NULL);
1267   operand_offsets.safe_push (enqueue_op (op0));
1268 
1269   if (op1 != NULL)
1270     {
1271       operand_offsets.safe_push (enqueue_op (op1));
1272       if (op2 != NULL)
1273 	operand_offsets.safe_push (enqueue_op (op2));
1274     }
1275 
1276   unsigned l = operand_offsets.length ();
1277 
1278   /* We have N operands so use 4 * N for the byte_count.  */
1279   uint32_t byte_count = lendian32 (4 * l);
1280 
1281   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1282   brig_data.add (operand_offsets.address (),
1283 		 l * sizeof (BrigOperandOffset32_t));
1284 
1285   brig_data.round_size_up (4);
1286 
1287   return offset;
1288 }
1289 
1290 /* Emit an HSA memory instruction and all necessary directives, schedule
1291    necessary operands for writing.  */
1292 
1293 static void
emit_memory_insn(hsa_insn_mem * mem)1294 emit_memory_insn (hsa_insn_mem *mem)
1295 {
1296   struct BrigInstMem repr;
1297   gcc_checking_assert (mem->operand_count () == 2);
1298 
1299   hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1300 
1301   /* This is necessary because of the erroneous typedef of
1302      BrigMemoryModifier8_t which introduces padding which may then contain
1303      random stuff (which we do not want so that we can test things don't
1304      change).  */
1305   memset (&repr, 0, sizeof (repr));
1306   repr.base.base.byteCount = lendian16 (sizeof (repr));
1307   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1308   repr.base.opcode = lendian16 (mem->m_opcode);
1309   repr.base.type = lendian16 (mem->m_type);
1310   repr.base.operands = lendian32 (emit_insn_operands (mem));
1311 
1312   if (addr->m_symbol)
1313     repr.segment = addr->m_symbol->m_segment;
1314   else
1315     repr.segment = BRIG_SEGMENT_FLAT;
1316   repr.modifier = 0;
1317   repr.equivClass = mem->m_equiv_class;
1318   repr.align = mem->m_align;
1319   if (mem->m_opcode == BRIG_OPCODE_LD)
1320     repr.width = BRIG_WIDTH_1;
1321   else
1322     repr.width = BRIG_WIDTH_NONE;
1323   memset (&repr.reserved, 0, sizeof (repr.reserved));
1324   brig_code.add (&repr, sizeof (repr));
1325   brig_insn_count++;
1326 }
1327 
1328 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1329    necessary operands for writing.  */
1330 
1331 static void
emit_signal_insn(hsa_insn_signal * mem)1332 emit_signal_insn (hsa_insn_signal *mem)
1333 {
1334   struct BrigInstSignal repr;
1335 
1336   /* This is necessary because of the erroneous typedef of
1337      BrigMemoryModifier8_t which introduces padding which may then contain
1338      random stuff (which we do not want so that we can test things don't
1339      change).  */
1340   memset (&repr, 0, sizeof (repr));
1341   repr.base.base.byteCount = lendian16 (sizeof (repr));
1342   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1343   repr.base.opcode = lendian16 (mem->m_opcode);
1344   repr.base.type = lendian16 (mem->m_type);
1345   repr.base.operands = lendian32 (emit_insn_operands (mem));
1346 
1347   repr.memoryOrder = mem->m_memoryorder;
1348   repr.signalOperation = mem->m_atomicop;
1349   repr.signalType = BRIG_TYPE_SIG64;
1350 
1351   brig_code.add (&repr, sizeof (repr));
1352   brig_insn_count++;
1353 }
1354 
1355 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1356    necessary operands for writing.  */
1357 
1358 static void
emit_atomic_insn(hsa_insn_atomic * mem)1359 emit_atomic_insn (hsa_insn_atomic *mem)
1360 {
1361   struct BrigInstAtomic repr;
1362 
1363   /* Either operand[0] or operand[1] must be an address operand.  */
1364   hsa_op_address *addr = NULL;
1365   if (is_a <hsa_op_address *> (mem->get_op (0)))
1366     addr = as_a <hsa_op_address *> (mem->get_op (0));
1367   else
1368     addr = as_a <hsa_op_address *> (mem->get_op (1));
1369 
1370   /* This is necessary because of the erroneous typedef of
1371      BrigMemoryModifier8_t which introduces padding which may then contain
1372      random stuff (which we do not want so that we can test things don't
1373      change).  */
1374   memset (&repr, 0, sizeof (repr));
1375   repr.base.base.byteCount = lendian16 (sizeof (repr));
1376   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1377   repr.base.opcode = lendian16 (mem->m_opcode);
1378   repr.base.type = lendian16 (mem->m_type);
1379   repr.base.operands = lendian32 (emit_insn_operands (mem));
1380 
1381   if (addr->m_symbol)
1382     repr.segment = addr->m_symbol->m_segment;
1383   else
1384     repr.segment = BRIG_SEGMENT_FLAT;
1385   repr.memoryOrder = mem->m_memoryorder;
1386   repr.memoryScope = mem->m_memoryscope;
1387   repr.atomicOperation = mem->m_atomicop;
1388 
1389   brig_code.add (&repr, sizeof (repr));
1390   brig_insn_count++;
1391 }
1392 
1393 /* Emit an HSA LDA instruction and all necessary directives, schedule
1394    necessary operands for writing.  */
1395 
1396 static void
emit_addr_insn(hsa_insn_basic * insn)1397 emit_addr_insn (hsa_insn_basic *insn)
1398 {
1399   struct BrigInstAddr repr;
1400 
1401   hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1402 
1403   repr.base.base.byteCount = lendian16 (sizeof (repr));
1404   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1405   repr.base.opcode = lendian16 (insn->m_opcode);
1406   repr.base.type = lendian16 (insn->m_type);
1407   repr.base.operands = lendian32 (emit_insn_operands (insn));
1408 
1409   if (addr->m_symbol)
1410     repr.segment = addr->m_symbol->m_segment;
1411   else
1412     repr.segment = BRIG_SEGMENT_FLAT;
1413   memset (&repr.reserved, 0, sizeof (repr.reserved));
1414 
1415   brig_code.add (&repr, sizeof (repr));
1416   brig_insn_count++;
1417 }
1418 
1419 /* Emit an HSA segment conversion instruction and all necessary directives,
1420    schedule necessary operands for writing.  */
1421 
1422 static void
emit_segment_insn(hsa_insn_seg * seg)1423 emit_segment_insn (hsa_insn_seg *seg)
1424 {
1425   struct BrigInstSegCvt repr;
1426 
1427   repr.base.base.byteCount = lendian16 (sizeof (repr));
1428   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1429   repr.base.opcode = lendian16 (seg->m_opcode);
1430   repr.base.type = lendian16 (seg->m_type);
1431   repr.base.operands = lendian32 (emit_insn_operands (seg));
1432   repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1433   repr.segment = seg->m_segment;
1434   repr.modifier = 0;
1435 
1436   brig_code.add (&repr, sizeof (repr));
1437 
1438   brig_insn_count++;
1439 }
1440 
1441 /* Emit an HSA alloca instruction and all necessary directives,
1442    schedule necessary operands for writing.  */
1443 
1444 static void
emit_alloca_insn(hsa_insn_alloca * alloca)1445 emit_alloca_insn (hsa_insn_alloca *alloca)
1446 {
1447   struct BrigInstMem repr;
1448   gcc_checking_assert (alloca->operand_count () == 2);
1449 
1450   /* This is necessary because of the erroneous typedef of
1451      BrigMemoryModifier8_t which introduces padding which may then contain
1452      random stuff (which we do not want so that we can test things don't
1453      change).  */
1454   memset (&repr, 0, sizeof (repr));
1455   repr.base.base.byteCount = lendian16 (sizeof (repr));
1456   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1457   repr.base.opcode = lendian16 (alloca->m_opcode);
1458   repr.base.type = lendian16 (alloca->m_type);
1459   repr.base.operands = lendian32 (emit_insn_operands (alloca));
1460   repr.segment = BRIG_SEGMENT_PRIVATE;
1461   repr.modifier = 0;
1462   repr.equivClass = 0;
1463   repr.align = alloca->m_align;
1464   repr.width = BRIG_WIDTH_NONE;
1465   memset (&repr.reserved, 0, sizeof (repr.reserved));
1466   brig_code.add (&repr, sizeof (repr));
1467   brig_insn_count++;
1468 }
1469 
1470 /* Emit an HSA comparison instruction and all necessary directives,
1471    schedule necessary operands for writing.  */
1472 
1473 static void
emit_cmp_insn(hsa_insn_cmp * cmp)1474 emit_cmp_insn (hsa_insn_cmp *cmp)
1475 {
1476   struct BrigInstCmp repr;
1477 
1478   memset (&repr, 0, sizeof (repr));
1479   repr.base.base.byteCount = lendian16 (sizeof (repr));
1480   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1481   repr.base.opcode = lendian16 (cmp->m_opcode);
1482   repr.base.type = lendian16 (cmp->m_type);
1483   repr.base.operands = lendian32 (emit_insn_operands (cmp));
1484 
1485   if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1486     repr.sourceType
1487       = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1488   else
1489     repr.sourceType
1490       = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1491   repr.modifier = 0;
1492   repr.compare = cmp->m_compare;
1493   repr.pack = 0;
1494 
1495   brig_code.add (&repr, sizeof (repr));
1496   brig_insn_count++;
1497 }
1498 
1499 /* Emit an HSA branching instruction and all necessary directives, schedule
1500    necessary operands for writing.  */
1501 
1502 static void
emit_branch_insn(hsa_insn_br * br)1503 emit_branch_insn (hsa_insn_br *br)
1504 {
1505   struct BrigInstBr repr;
1506 
1507   basic_block target = NULL;
1508   edge_iterator ei;
1509   edge e;
1510 
1511   /* At the moment we only handle direct conditional jumps.  */
1512   gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1513   repr.base.base.byteCount = lendian16 (sizeof (repr));
1514   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1515   repr.base.opcode = lendian16 (br->m_opcode);
1516   repr.width = BRIG_WIDTH_1;
1517   /* For Conditional jumps the type is always B1.  */
1518   repr.base.type = lendian16 (BRIG_TYPE_B1);
1519 
1520   FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1521     if (e->flags & EDGE_TRUE_VALUE)
1522       {
1523 	target = e->dest;
1524 	break;
1525       }
1526   gcc_assert (target);
1527 
1528   repr.base.operands
1529     = lendian32 (emit_operands (br->get_op (0),
1530 				&hsa_bb_for_bb (target)->m_label_ref));
1531   memset (&repr.reserved, 0, sizeof (repr.reserved));
1532 
1533   brig_code.add (&repr, sizeof (repr));
1534   brig_insn_count++;
1535 }
1536 
1537 /* Emit an HSA unconditional jump branching instruction that points to
1538    a label REFERENCE.  */
1539 
1540 static void
emit_unconditional_jump(hsa_op_code_ref * reference)1541 emit_unconditional_jump (hsa_op_code_ref *reference)
1542 {
1543   struct BrigInstBr repr;
1544 
1545   repr.base.base.byteCount = lendian16 (sizeof (repr));
1546   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1547   repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1548   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1549   /* Direct branches to labels must be width(all).  */
1550   repr.width = BRIG_WIDTH_ALL;
1551 
1552   repr.base.operands = lendian32 (emit_operands (reference));
1553   memset (&repr.reserved, 0, sizeof (repr.reserved));
1554   brig_code.add (&repr, sizeof (repr));
1555   brig_insn_count++;
1556 }
1557 
1558 /* Emit an HSA switch jump instruction that uses a jump table to
1559    jump to a destination label.  */
1560 
1561 static void
emit_switch_insn(hsa_insn_sbr * sbr)1562 emit_switch_insn (hsa_insn_sbr *sbr)
1563 {
1564   struct BrigInstBr repr;
1565 
1566   gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1567   repr.base.base.byteCount = lendian16 (sizeof (repr));
1568   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1569   repr.base.opcode = lendian16 (sbr->m_opcode);
1570   repr.width = BRIG_WIDTH_1;
1571   /* For Conditional jumps the type is always B1.  */
1572   hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1573   repr.base.type = lendian16 (index->m_type);
1574   repr.base.operands
1575     = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1576   memset (&repr.reserved, 0, sizeof (repr.reserved));
1577 
1578   brig_code.add (&repr, sizeof (repr));
1579   brig_insn_count++;
1580 }
1581 
1582 /* Emit a HSA convert instruction and all necessary directives, schedule
1583    necessary operands for writing.  */
1584 
1585 static void
emit_cvt_insn(hsa_insn_cvt * insn)1586 emit_cvt_insn (hsa_insn_cvt *insn)
1587 {
1588   struct BrigInstCvt repr;
1589   BrigType16_t srctype;
1590 
1591   repr.base.base.byteCount = lendian16 (sizeof (repr));
1592   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1593   repr.base.opcode = lendian16 (insn->m_opcode);
1594   repr.base.type = lendian16 (insn->m_type);
1595   repr.base.operands = lendian32 (emit_insn_operands (insn));
1596 
1597   if (is_a <hsa_op_reg *> (insn->get_op (1)))
1598     srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1599   else
1600     srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1601   repr.sourceType = lendian16 (srctype);
1602   repr.modifier = 0;
1603   /* float to smaller float requires a rounding setting (we default
1604      to 'near'.  */
1605   if (hsa_type_float_p (insn->m_type)
1606       && (!hsa_type_float_p (srctype)
1607 	  || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1608 	      < (srctype & BRIG_TYPE_BASE_MASK))))
1609     repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1610   else if (hsa_type_integer_p (insn->m_type) &&
1611 	   hsa_type_float_p (srctype))
1612     repr.round = BRIG_ROUND_INTEGER_ZERO;
1613   else
1614     repr.round = BRIG_ROUND_NONE;
1615   brig_code.add (&repr, sizeof (repr));
1616   brig_insn_count++;
1617 }
1618 
1619 /* Emit call instruction INSN, where this instruction must be closed
1620    within a call block instruction.  */
1621 
1622 static void
emit_call_insn(hsa_insn_call * call)1623 emit_call_insn (hsa_insn_call *call)
1624 {
1625   struct BrigInstBr repr;
1626 
1627   repr.base.base.byteCount = lendian16 (sizeof (repr));
1628   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1629   repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1630   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1631 
1632   repr.base.operands
1633     = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1634 				call->m_args_code_list));
1635 
1636   /* Internal functions have not set m_called_function.  */
1637   if (call->m_called_function)
1638     {
1639       function_linkage_pair pair (call->m_called_function,
1640 				  call->m_func.m_brig_op_offset);
1641       function_call_linkage.safe_push (pair);
1642     }
1643   else
1644     {
1645       hsa_internal_fn *slot
1646 	= hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1647       gcc_assert (slot);
1648       gcc_assert (slot->m_offset > 0);
1649       call->m_func.m_directive_offset = slot->m_offset;
1650     }
1651 
1652   repr.width = BRIG_WIDTH_ALL;
1653   memset (&repr.reserved, 0, sizeof (repr.reserved));
1654 
1655   brig_code.add (&repr, sizeof (repr));
1656   brig_insn_count++;
1657 }
1658 
1659 /* Emit argument block directive.  */
1660 
1661 static void
emit_arg_block_insn(hsa_insn_arg_block * insn)1662 emit_arg_block_insn (hsa_insn_arg_block *insn)
1663 {
1664   switch (insn->m_kind)
1665     {
1666     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1667       {
1668 	struct BrigDirectiveArgBlock repr;
1669 	repr.base.byteCount = lendian16 (sizeof (repr));
1670 	repr.base.kind = lendian16 (insn->m_kind);
1671 	brig_code.add (&repr, sizeof (repr));
1672 
1673 	for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1674 	  {
1675 	    insn->m_call_insn->m_args_code_list->m_offsets[i]
1676 	      = lendian32 (emit_directive_variable
1677 			   (insn->m_call_insn->m_input_args[i]));
1678 	    brig_insn_count++;
1679 	  }
1680 
1681 	if (insn->m_call_insn->m_output_arg)
1682 	  {
1683 	    insn->m_call_insn->m_result_code_list->m_offsets[0]
1684 	      = lendian32 (emit_directive_variable
1685 			   (insn->m_call_insn->m_output_arg));
1686 	    brig_insn_count++;
1687 	  }
1688 
1689 	break;
1690       }
1691     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1692       {
1693 	struct BrigDirectiveArgBlock repr;
1694 	repr.base.byteCount = lendian16 (sizeof (repr));
1695 	repr.base.kind = lendian16 (insn->m_kind);
1696 	brig_code.add (&repr, sizeof (repr));
1697 	break;
1698       }
1699     default:
1700       gcc_unreachable ();
1701     }
1702 
1703   brig_insn_count++;
1704 }
1705 
1706 /* Emit comment directive.  */
1707 
1708 static void
emit_comment_insn(hsa_insn_comment * insn)1709 emit_comment_insn (hsa_insn_comment *insn)
1710 {
1711   struct BrigDirectiveComment repr;
1712   memset (&repr, 0, sizeof (repr));
1713 
1714   repr.base.byteCount = lendian16 (sizeof (repr));
1715   repr.base.kind = lendian16 (insn->m_opcode);
1716   repr.name = brig_emit_string (insn->m_comment, '\0', false);
1717   brig_code.add (&repr, sizeof (repr));
1718 }
1719 
1720 /* Emit queue instruction INSN.  */
1721 
1722 static void
emit_queue_insn(hsa_insn_queue * insn)1723 emit_queue_insn (hsa_insn_queue *insn)
1724 {
1725   BrigInstQueue repr;
1726   memset (&repr, 0, sizeof (repr));
1727 
1728   repr.base.base.byteCount = lendian16 (sizeof (repr));
1729   repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1730   repr.base.opcode = lendian16 (insn->m_opcode);
1731   repr.base.type = lendian16 (insn->m_type);
1732   repr.segment = BRIG_SEGMENT_GLOBAL;
1733   repr.memoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE;
1734   repr.base.operands = lendian32 (emit_insn_operands (insn));
1735   brig_data.round_size_up (4);
1736   brig_code.add (&repr, sizeof (repr));
1737 
1738   brig_insn_count++;
1739 }
1740 
1741 /* Emit source type instruction INSN.  */
1742 
1743 static void
emit_srctype_insn(hsa_insn_srctype * insn)1744 emit_srctype_insn (hsa_insn_srctype *insn)
1745 {
1746   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1747   struct BrigInstSourceType repr;
1748   unsigned operand_count = insn->operand_count ();
1749   gcc_checking_assert (operand_count >= 2);
1750 
1751   memset (&repr, 0, sizeof (repr));
1752   repr.sourceType = lendian16 (insn->m_source_type);
1753   repr.base.base.byteCount = lendian16 (sizeof (repr));
1754   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1755   repr.base.opcode = lendian16 (insn->m_opcode);
1756   repr.base.type = lendian16 (insn->m_type);
1757 
1758   repr.base.operands = lendian32 (emit_insn_operands (insn));
1759   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1760   brig_insn_count++;
1761 }
1762 
1763 /* Emit packed instruction INSN.  */
1764 
1765 static void
emit_packed_insn(hsa_insn_packed * insn)1766 emit_packed_insn (hsa_insn_packed *insn)
1767 {
1768   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1769   struct BrigInstSourceType repr;
1770   unsigned operand_count = insn->operand_count ();
1771   gcc_checking_assert (operand_count >= 2);
1772 
1773   memset (&repr, 0, sizeof (repr));
1774   repr.sourceType = lendian16 (insn->m_source_type);
1775   repr.base.base.byteCount = lendian16 (sizeof (repr));
1776   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1777   repr.base.opcode = lendian16 (insn->m_opcode);
1778   repr.base.type = lendian16 (insn->m_type);
1779 
1780   if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1781     {
1782       /* Create operand list for packed type.  */
1783       for (unsigned i = 1; i < operand_count; i++)
1784 	{
1785 	  gcc_checking_assert (insn->get_op (i));
1786 	  insn->m_operand_list->m_offsets[i - 1]
1787 	    = lendian32 (enqueue_op (insn->get_op (i)));
1788 	}
1789 
1790       repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1791 						     insn->m_operand_list));
1792     }
1793   else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1794     {
1795       /* Create operand list for packed type.  */
1796       for (unsigned i = 0; i < operand_count - 1; i++)
1797 	{
1798 	  gcc_checking_assert (insn->get_op (i));
1799 	  insn->m_operand_list->m_offsets[i]
1800 	    = lendian32 (enqueue_op (insn->get_op (i)));
1801 	}
1802 
1803       unsigned ops = emit_operands (insn->m_operand_list,
1804 				    insn->get_op (insn->operand_count () - 1));
1805       repr.base.operands = lendian32 (ops);
1806     }
1807 
1808 
1809   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1810   brig_insn_count++;
1811 }
1812 
1813 /* Emit a basic HSA instruction and all necessary directives, schedule
1814    necessary operands for writing.  */
1815 
1816 static void
emit_basic_insn(hsa_insn_basic * insn)1817 emit_basic_insn (hsa_insn_basic *insn)
1818 {
1819   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1820   struct BrigInstMod repr;
1821   BrigType16_t type;
1822 
1823   memset (&repr, 0, sizeof (repr));
1824   repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1825   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1826   repr.base.opcode = lendian16 (insn->m_opcode);
1827   switch (insn->m_opcode)
1828     {
1829       /* And the bit-logical operations need bit types and whine about
1830 	 arithmetic types :-/  */
1831       case BRIG_OPCODE_AND:
1832       case BRIG_OPCODE_OR:
1833       case BRIG_OPCODE_XOR:
1834       case BRIG_OPCODE_NOT:
1835 	type = regtype_for_type (insn->m_type);
1836 	break;
1837       default:
1838 	type = insn->m_type;
1839 	break;
1840     }
1841   repr.base.type = lendian16 (type);
1842   repr.base.operands = lendian32 (emit_insn_operands (insn));
1843 
1844   if (hsa_type_packed_p (type))
1845     {
1846       if (hsa_type_float_p (type)
1847 	  && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1848 	repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1849       else
1850 	repr.round = 0;
1851       /* We assume that destination and sources agree in packing layout.  */
1852       if (insn->num_used_ops () >= 2)
1853 	repr.pack = BRIG_PACK_PP;
1854       else
1855 	repr.pack = BRIG_PACK_P;
1856       repr.reserved = 0;
1857       repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1858       repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1859       brig_code.add (&repr, sizeof (struct BrigInstMod));
1860     }
1861   else
1862     brig_code.add (&repr, sizeof (struct BrigInstBasic));
1863   brig_insn_count++;
1864 }
1865 
1866 /* Emit an HSA instruction and all necessary directives, schedule necessary
1867    operands for writing.  */
1868 
1869 static void
emit_insn(hsa_insn_basic * insn)1870 emit_insn (hsa_insn_basic *insn)
1871 {
1872   gcc_assert (!is_a <hsa_insn_phi *> (insn));
1873 
1874   insn->m_brig_offset = brig_code.total_size;
1875 
1876   if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1877     emit_signal_insn (signal);
1878   else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1879     emit_atomic_insn (atom);
1880   else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1881     emit_memory_insn (mem);
1882   else if (insn->m_opcode == BRIG_OPCODE_LDA)
1883     emit_addr_insn (insn);
1884   else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1885     emit_segment_insn (seg);
1886   else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1887     emit_cmp_insn (cmp);
1888   else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1889     emit_branch_insn (br);
1890   else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1891     {
1892       if (switch_instructions == NULL)
1893 	switch_instructions = new vec <hsa_insn_sbr *> ();
1894 
1895       switch_instructions->safe_push (sbr);
1896       emit_switch_insn (sbr);
1897     }
1898   else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1899     emit_arg_block_insn (block);
1900   else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1901     emit_call_insn (call);
1902   else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1903     emit_comment_insn (comment);
1904   else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1905     emit_queue_insn (queue);
1906   else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1907     emit_srctype_insn (srctype);
1908   else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1909     emit_packed_insn (packed);
1910   else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1911     emit_cvt_insn (cvt);
1912   else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1913     emit_alloca_insn (alloca);
1914   else
1915     emit_basic_insn (insn);
1916 }
1917 
1918 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1919    or we are about to finish emitting code, if it is NULL.  If the fall through
1920    edge from BB does not lead to NEXT_BB, emit an unconditional jump.  */
1921 
1922 static void
perhaps_emit_branch(basic_block bb,basic_block next_bb)1923 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1924 {
1925   basic_block t_bb = NULL, ff = NULL;
1926 
1927   edge_iterator ei;
1928   edge e;
1929 
1930   /* If the last instruction of BB is a switch, ignore emission of all
1931      edges.  */
1932   if (hsa_bb_for_bb (bb)->m_last_insn
1933       && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1934     return;
1935 
1936   FOR_EACH_EDGE (e, ei, bb->succs)
1937     if (e->flags & EDGE_TRUE_VALUE)
1938       {
1939 	gcc_assert (!t_bb);
1940 	t_bb = e->dest;
1941       }
1942     else
1943       {
1944 	gcc_assert (!ff);
1945 	ff = e->dest;
1946       }
1947 
1948   if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1949     return;
1950 
1951   emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1952 }
1953 
1954 /* Emit the a function with name NAME to the various brig sections.  */
1955 
1956 void
hsa_brig_emit_function(void)1957 hsa_brig_emit_function (void)
1958 {
1959   basic_block bb, prev_bb;
1960   hsa_insn_basic *insn;
1961   BrigDirectiveExecutable *ptr_to_fndir;
1962 
1963   brig_init ();
1964 
1965   brig_insn_count = 0;
1966   memset (&op_queue, 0, sizeof (op_queue));
1967   op_queue.projected_size = brig_operand.total_size;
1968 
1969   if (!function_offsets)
1970     function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1971 
1972   if (!emitted_declarations)
1973     emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1974 
1975   for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1976     {
1977       tree called = hsa_cfun->m_called_functions[i];
1978 
1979       /* If the function has no definition, emit a declaration.  */
1980       if (!emitted_declarations->get (called))
1981 	{
1982 	  BrigDirectiveExecutable *e = emit_function_declaration (called);
1983 	  emitted_declarations->put (called, e);
1984 	}
1985     }
1986 
1987   for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
1988     {
1989       hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
1990       emit_internal_fn_decl (called);
1991     }
1992 
1993   ptr_to_fndir = emit_function_directives (hsa_cfun, false);
1994   for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
1995        insn;
1996        insn = insn->m_next)
1997     emit_insn (insn);
1998   prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1999   FOR_EACH_BB_FN (bb, cfun)
2000     {
2001       perhaps_emit_branch (prev_bb, bb);
2002       emit_bb_label_directive (hsa_bb_for_bb (bb));
2003       for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2004 	emit_insn (insn);
2005       prev_bb = bb;
2006     }
2007   perhaps_emit_branch (prev_bb, NULL);
2008   ptr_to_fndir->nextModuleEntry = brig_code.total_size;
2009 
2010   /* Fill up label references for all sbr instructions.  */
2011   if (switch_instructions)
2012     {
2013       for (unsigned i = 0; i < switch_instructions->length (); i++)
2014 	{
2015 	  hsa_insn_sbr *sbr = (*switch_instructions)[i];
2016 	  for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2017 	    {
2018 	      hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2019 	      sbr->m_label_code_list->m_offsets[j]
2020 		= hbb->m_label_ref.m_directive_offset;
2021 	    }
2022 	}
2023 
2024       switch_instructions->release ();
2025       delete switch_instructions;
2026       switch_instructions = NULL;
2027     }
2028 
2029   if (dump_file)
2030     {
2031       fprintf (dump_file, "------- After BRIG emission: -------\n");
2032       dump_hsa_cfun (dump_file);
2033     }
2034 
2035   emit_queued_operands ();
2036 }
2037 
2038 /* Emit all OMP symbols related to OMP.  */
2039 
2040 void
hsa_brig_emit_omp_symbols(void)2041 hsa_brig_emit_omp_symbols (void)
2042 {
2043   brig_init ();
2044   emit_directive_variable (hsa_num_threads);
2045 }
2046 
2047 /* Create and return __hsa_global_variables symbol that contains
2048    all informations consumed by libgomp to link global variables
2049    with their string names used by an HSA kernel.  */
2050 
2051 static tree
hsa_output_global_variables()2052 hsa_output_global_variables ()
2053 {
2054   unsigned l = hsa_global_variable_symbols->elements ();
2055 
2056   tree variable_info_type = make_node (RECORD_TYPE);
2057   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2058 			   get_identifier ("name"), ptr_type_node);
2059   DECL_CHAIN (id_f1) = NULL_TREE;
2060   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2061 			   get_identifier ("omp_data_size"),
2062 			   ptr_type_node);
2063   DECL_CHAIN (id_f2) = id_f1;
2064   finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2065 			 NULL_TREE);
2066 
2067   tree int_num_of_global_vars;
2068   int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2069   tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2070   tree global_vars_array_type = build_array_type (variable_info_type,
2071 						  global_vars_num_index_type);
2072   TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2073 
2074   vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2075 
2076   for (hash_table <hsa_noop_symbol_hasher>::iterator it
2077        = hsa_global_variable_symbols->begin ();
2078        it != hsa_global_variable_symbols->end (); ++it)
2079     {
2080       unsigned len = strlen ((*it)->m_name);
2081       char *copy = XNEWVEC (char, len + 2);
2082       copy[0] = '&';
2083       memcpy (copy + 1, (*it)->m_name, len);
2084       copy[len + 1] = '\0';
2085       len++;
2086       hsa_sanitize_name (copy);
2087 
2088       tree var_name = build_string (len, copy);
2089       TREE_TYPE (var_name)
2090 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2091       free (copy);
2092 
2093       vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2094       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2095 			      build1 (ADDR_EXPR,
2096 				      build_pointer_type (TREE_TYPE (var_name)),
2097 				      var_name));
2098       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2099 			      build_fold_addr_expr ((*it)->m_decl));
2100 
2101       tree variable_info_ctor = build_constructor (variable_info_type,
2102 						   variable_info_vec);
2103 
2104       CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2105 			      variable_info_ctor);
2106     }
2107 
2108   tree global_vars_ctor = build_constructor (global_vars_array_type,
2109 					     global_vars_vec);
2110 
2111   char tmp_name[64];
2112   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2113   tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2114 					   get_identifier (tmp_name),
2115 					   global_vars_array_type);
2116   TREE_STATIC (global_vars_table) = 1;
2117   TREE_READONLY (global_vars_table) = 1;
2118   TREE_PUBLIC (global_vars_table) = 0;
2119   DECL_ARTIFICIAL (global_vars_table) = 1;
2120   DECL_IGNORED_P (global_vars_table) = 1;
2121   DECL_EXTERNAL (global_vars_table) = 0;
2122   TREE_CONSTANT (global_vars_table) = 1;
2123   DECL_INITIAL (global_vars_table) = global_vars_ctor;
2124   varpool_node::finalize_decl (global_vars_table);
2125 
2126   return global_vars_table;
2127 }
2128 
2129 /* Create __hsa_host_functions and __hsa_kernels that contain
2130    all informations consumed by libgomp to register all kernels
2131    in the BRIG binary.  */
2132 
2133 static void
hsa_output_kernels(tree * host_func_table,tree * kernels)2134 hsa_output_kernels (tree *host_func_table, tree *kernels)
2135 {
2136   unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2137 
2138   tree int_num_of_kernels;
2139   int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2140   tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2141   tree host_functions_array_type = build_array_type (ptr_type_node,
2142 						     kernel_num_index_type);
2143   TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2144 
2145   vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2146   for (unsigned i = 0; i < map_count; ++i)
2147     {
2148       tree decl = hsa_get_decl_kernel_mapping_decl (i);
2149       tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2150       CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2151     }
2152   tree host_functions_ctor = build_constructor (host_functions_array_type,
2153 						host_functions_vec);
2154   char tmp_name[64];
2155   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2156   tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2157 					 get_identifier (tmp_name),
2158 					 host_functions_array_type);
2159   TREE_STATIC (hsa_host_func_table) = 1;
2160   TREE_READONLY (hsa_host_func_table) = 1;
2161   TREE_PUBLIC (hsa_host_func_table) = 0;
2162   DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2163   DECL_IGNORED_P (hsa_host_func_table) = 1;
2164   DECL_EXTERNAL (hsa_host_func_table) = 0;
2165   TREE_CONSTANT (hsa_host_func_table) = 1;
2166   DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2167   varpool_node::finalize_decl (hsa_host_func_table);
2168   *host_func_table = hsa_host_func_table;
2169 
2170   /* Following code emits list of kernel_info structures.  */
2171 
2172   tree kernel_info_type = make_node (RECORD_TYPE);
2173   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2174 			   get_identifier ("name"), ptr_type_node);
2175   DECL_CHAIN (id_f1) = NULL_TREE;
2176   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2177 			   get_identifier ("omp_data_size"),
2178 			   unsigned_type_node);
2179   DECL_CHAIN (id_f2) = id_f1;
2180   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2181 			   get_identifier ("gridified_kernel_p"),
2182 			   boolean_type_node);
2183   DECL_CHAIN (id_f3) = id_f2;
2184   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2185 			   get_identifier ("kernel_dependencies_count"),
2186 			   unsigned_type_node);
2187   DECL_CHAIN (id_f4) = id_f3;
2188   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2189 			   get_identifier ("kernel_dependencies"),
2190 			   build_pointer_type (build_pointer_type
2191 					       (char_type_node)));
2192   DECL_CHAIN (id_f5) = id_f4;
2193   finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2194 			 NULL_TREE);
2195 
2196   int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2197   tree kernel_info_vector_type
2198     = build_array_type (kernel_info_type,
2199 			build_index_type (int_num_of_kernels));
2200   TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2201 
2202   vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2203   tree kernel_dependencies_vector_type = NULL;
2204 
2205   for (unsigned i = 0; i < map_count; ++i)
2206     {
2207       tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2208       char *name = hsa_get_decl_kernel_mapping_name (i);
2209       unsigned len = strlen (name);
2210       char *copy = XNEWVEC (char, len + 2);
2211       copy[0] = '&';
2212       memcpy (copy + 1, name, len);
2213       copy[len + 1] = '\0';
2214       len++;
2215 
2216       tree kern_name = build_string (len, copy);
2217       TREE_TYPE (kern_name)
2218 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2219       free (copy);
2220 
2221       unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2222       tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2223       bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2224       tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2225 						     gridified_kernel_p);
2226       unsigned count = 0;
2227 
2228       kernel_dependencies_vector_type
2229 	= build_array_type (build_pointer_type (char_type_node),
2230 			    build_index_type (size_int (0)));
2231 
2232       vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2233       if (hsa_decl_kernel_dependencies)
2234 	{
2235 	  vec<const char *> **slot;
2236 	  slot = hsa_decl_kernel_dependencies->get (kernel);
2237 	  if (slot)
2238 	    {
2239 	      vec <const char *> *dependencies = *slot;
2240 	      count = dependencies->length ();
2241 
2242 	      kernel_dependencies_vector_type
2243 		= build_array_type (build_pointer_type (char_type_node),
2244 				    build_index_type (size_int (count)));
2245 	      TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2246 
2247 	      for (unsigned j = 0; j < count; j++)
2248 		{
2249 		  const char *d = (*dependencies)[j];
2250 		  len = strlen (d);
2251 		  tree dependency_name = build_string (len, d);
2252 		  TREE_TYPE (dependency_name)
2253 		    = build_array_type (char_type_node,
2254 					build_index_type (size_int (len)));
2255 
2256 		  CONSTRUCTOR_APPEND_ELT
2257 		    (kernel_dependencies_vec, NULL_TREE,
2258 		     build1 (ADDR_EXPR,
2259 			     build_pointer_type (TREE_TYPE (dependency_name)),
2260 			     dependency_name));
2261 		}
2262 	    }
2263 	}
2264 
2265       tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2266 
2267       vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2268       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2269 			      build1 (ADDR_EXPR,
2270 				      build_pointer_type (TREE_TYPE
2271 							  (kern_name)),
2272 				      kern_name));
2273       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2274       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2275 			      gridified_kernel_p_tree);
2276       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2277 
2278       if (count > 0)
2279 	{
2280 	  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2281 	  tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2282 					       get_identifier (tmp_name),
2283 					       kernel_dependencies_vector_type);
2284 
2285 	  TREE_STATIC (dependencies_list) = 1;
2286 	  TREE_READONLY (dependencies_list) = 1;
2287 	  TREE_PUBLIC (dependencies_list) = 0;
2288 	  DECL_ARTIFICIAL (dependencies_list) = 1;
2289 	  DECL_IGNORED_P (dependencies_list) = 1;
2290 	  DECL_EXTERNAL (dependencies_list) = 0;
2291 	  TREE_CONSTANT (dependencies_list) = 1;
2292 	  DECL_INITIAL (dependencies_list)
2293 	    = build_constructor (kernel_dependencies_vector_type,
2294 				 kernel_dependencies_vec);
2295 	  varpool_node::finalize_decl (dependencies_list);
2296 
2297 	  CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2298 				  build1 (ADDR_EXPR,
2299 					  build_pointer_type
2300 					    (TREE_TYPE (dependencies_list)),
2301 					  dependencies_list));
2302 	}
2303       else
2304 	CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2305 
2306       tree kernel_info_ctor = build_constructor (kernel_info_type,
2307 						 kernel_info_vec);
2308 
2309       CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2310 			      kernel_info_ctor);
2311     }
2312 
2313   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2314   tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2315 				 get_identifier (tmp_name),
2316 				 kernel_info_vector_type);
2317 
2318   TREE_STATIC (hsa_kernels) = 1;
2319   TREE_READONLY (hsa_kernels) = 1;
2320   TREE_PUBLIC (hsa_kernels) = 0;
2321   DECL_ARTIFICIAL (hsa_kernels) = 1;
2322   DECL_IGNORED_P (hsa_kernels) = 1;
2323   DECL_EXTERNAL (hsa_kernels) = 0;
2324   TREE_CONSTANT (hsa_kernels) = 1;
2325   DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2326 						  kernel_info_vector_vec);
2327   varpool_node::finalize_decl (hsa_kernels);
2328   *kernels = hsa_kernels;
2329 }
2330 
2331 /* Create a static constructor that will register out brig stuff with
2332    libgomp.  */
2333 
2334 static void
hsa_output_libgomp_mapping(tree brig_decl)2335 hsa_output_libgomp_mapping (tree brig_decl)
2336 {
2337   unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2338   unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2339 
2340   tree kernels;
2341   tree host_func_table;
2342 
2343   hsa_output_kernels (&host_func_table, &kernels);
2344   tree global_vars = hsa_output_global_variables ();
2345 
2346   tree hsa_image_desc_type = make_node (RECORD_TYPE);
2347   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2348 			   get_identifier ("brig_module"), ptr_type_node);
2349   DECL_CHAIN (id_f1) = NULL_TREE;
2350   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2351 			   get_identifier ("kernel_count"),
2352 			   unsigned_type_node);
2353 
2354   DECL_CHAIN (id_f2) = id_f1;
2355   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2356 			   get_identifier ("hsa_kernel_infos"),
2357 			   ptr_type_node);
2358   DECL_CHAIN (id_f3) = id_f2;
2359   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2360 			   get_identifier ("global_variable_count"),
2361 			   unsigned_type_node);
2362   DECL_CHAIN (id_f4) = id_f3;
2363   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2364 			   get_identifier ("hsa_global_variable_infos"),
2365 			   ptr_type_node);
2366   DECL_CHAIN (id_f5) = id_f4;
2367   finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2368 			 NULL_TREE);
2369   TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2370 
2371   vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2372   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2373 			  build_fold_addr_expr (brig_decl));
2374   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2375 			  build_int_cstu (unsigned_type_node, kernel_count));
2376   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2377 			  build1 (ADDR_EXPR,
2378 				  build_pointer_type (TREE_TYPE (kernels)),
2379 				  kernels));
2380   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2381 			  build_int_cstu (unsigned_type_node,
2382 					  global_variable_count));
2383   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2384 			  build1 (ADDR_EXPR,
2385 				  build_pointer_type (TREE_TYPE (global_vars)),
2386 				  global_vars));
2387 
2388   tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2389 
2390   char tmp_name[64];
2391   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2392   tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2393 					get_identifier (tmp_name),
2394 					hsa_image_desc_type);
2395   TREE_STATIC (hsa_img_descriptor) = 1;
2396   TREE_READONLY (hsa_img_descriptor) = 1;
2397   TREE_PUBLIC (hsa_img_descriptor) = 0;
2398   DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2399   DECL_IGNORED_P (hsa_img_descriptor) = 1;
2400   DECL_EXTERNAL (hsa_img_descriptor) = 0;
2401   TREE_CONSTANT (hsa_img_descriptor) = 1;
2402   DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2403   varpool_node::finalize_decl (hsa_img_descriptor);
2404 
2405   /* Construct the "host_table" libgomp expects.  */
2406   tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2407   tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2408   TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2409   vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2410   tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2411   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2412 			  host_func_table_addr);
2413   offset_int func_table_size
2414     = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2415   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2416 			  fold_build2 (POINTER_PLUS_EXPR,
2417 				       TREE_TYPE (host_func_table_addr),
2418 				       host_func_table_addr,
2419 				       build_int_cst (size_type_node,
2420 						      func_table_size.to_uhwi
2421 						      ())));
2422   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2423   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2424   tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2425 						    libgomp_host_table_vec);
2426   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2427   tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2428 					    get_identifier (tmp_name),
2429 					    libgomp_host_table_type);
2430 
2431   TREE_STATIC (hsa_libgomp_host_table) = 1;
2432   TREE_READONLY (hsa_libgomp_host_table) = 1;
2433   TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2434   DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2435   DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2436   DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2437   TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2438   DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2439   varpool_node::finalize_decl (hsa_libgomp_host_table);
2440 
2441   /* Generate an initializer with a call to the registration routine.  */
2442 
2443   tree offload_register
2444     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2445   gcc_checking_assert (offload_register);
2446 
2447   tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2448   append_to_statement_list
2449     (build_call_expr (offload_register, 4,
2450 		      build_int_cstu (unsigned_type_node,
2451 				      GOMP_VERSION_PACK (GOMP_VERSION,
2452 							 GOMP_VERSION_HSA)),
2453 		      build_fold_addr_expr (hsa_libgomp_host_table),
2454 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2455 		      build_fold_addr_expr (hsa_img_descriptor)),
2456      hsa_ctor_stmts);
2457 
2458   cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2459 
2460   tree offload_unregister
2461     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2462   gcc_checking_assert (offload_unregister);
2463 
2464   tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2465   append_to_statement_list
2466     (build_call_expr (offload_unregister, 4,
2467 		      build_int_cstu (unsigned_type_node,
2468 				      GOMP_VERSION_PACK (GOMP_VERSION,
2469 							 GOMP_VERSION_HSA)),
2470 		      build_fold_addr_expr (hsa_libgomp_host_table),
2471 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2472 		      build_fold_addr_expr (hsa_img_descriptor)),
2473      hsa_dtor_stmts);
2474   cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2475 }
2476 
2477 /* Emit the brig module we have compiled to a section in the final assembly and
2478    also create a compile unit static constructor that will register the brig
2479    module with libgomp.  */
2480 
2481 void
hsa_output_brig(void)2482 hsa_output_brig (void)
2483 {
2484   section *saved_section;
2485 
2486   if (!brig_initialized)
2487     return;
2488 
2489   for (unsigned i = 0; i < function_call_linkage.length (); i++)
2490     {
2491       function_linkage_pair p = function_call_linkage[i];
2492 
2493       BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2494       gcc_assert (*func_offset);
2495       BrigOperandCodeRef *code_ref
2496 	= (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2497       gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2498       code_ref->ref = lendian32 (*func_offset);
2499     }
2500 
2501   /* Iterate all function declarations and if we meet a function that should
2502      have module linkage and we are unable to emit HSAIL for the function,
2503      then change the linkage to program linkage.  Doing so, we will emit
2504      a valid BRIG image.  */
2505   if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2506     for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2507 	 = emitted_declarations->begin ();
2508 	 it != emitted_declarations->end ();
2509 	 ++it)
2510       {
2511 	if (hsa_failed_functions->contains ((*it).first))
2512 	  (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2513       }
2514 
2515   saved_section = in_section;
2516 
2517   switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2518   char tmp_name[64];
2519   ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2520   ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2521   tree brig_id = get_identifier (tmp_name);
2522   tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2523 			       char_type_node);
2524   SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2525   TREE_ADDRESSABLE (brig_decl) = 1;
2526   TREE_READONLY (brig_decl) = 1;
2527   DECL_ARTIFICIAL (brig_decl) = 1;
2528   DECL_IGNORED_P (brig_decl) = 1;
2529   TREE_STATIC (brig_decl) = 1;
2530   TREE_PUBLIC (brig_decl) = 0;
2531   TREE_USED (brig_decl) = 1;
2532   DECL_INITIAL (brig_decl) = brig_decl;
2533   TREE_ASM_WRITTEN (brig_decl) = 1;
2534 
2535   BrigModuleHeader module_header;
2536   memcpy (&module_header.identification, "HSA BRIG",
2537 	  sizeof (module_header.identification));
2538   module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2539   module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2540   uint64_t section_index[3];
2541 
2542   int data_padding, code_padding, operand_padding;
2543   data_padding = HSA_SECTION_ALIGNMENT
2544     - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2545   code_padding = HSA_SECTION_ALIGNMENT
2546     - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2547   operand_padding = HSA_SECTION_ALIGNMENT
2548     - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2549 
2550   uint64_t module_size = sizeof (module_header)
2551     + sizeof (section_index)
2552     + brig_data.total_size
2553     + data_padding
2554     + brig_code.total_size
2555     + code_padding
2556     + brig_operand.total_size
2557     + operand_padding;
2558   gcc_assert ((module_size % 16) == 0);
2559   module_header.byteCount = lendian64 (module_size);
2560   memset (&module_header.hash, 0, sizeof (module_header.hash));
2561   module_header.reserved = 0;
2562   module_header.sectionCount = lendian32 (3);
2563   module_header.sectionIndex = lendian64 (sizeof (module_header));
2564   assemble_string ((const char *) &module_header, sizeof (module_header));
2565   uint64_t off = sizeof (module_header) + sizeof (section_index);
2566   section_index[0] = lendian64 (off);
2567   off += brig_data.total_size + data_padding;
2568   section_index[1] = lendian64 (off);
2569   off += brig_code.total_size + code_padding;
2570   section_index[2] = lendian64 (off);
2571   assemble_string ((const char *) &section_index, sizeof (section_index));
2572 
2573   char padding[HSA_SECTION_ALIGNMENT];
2574   memset (padding, 0, sizeof (padding));
2575 
2576   brig_data.output ();
2577   assemble_string (padding, data_padding);
2578   brig_code.output ();
2579   assemble_string (padding, code_padding);
2580   brig_operand.output ();
2581   assemble_string (padding, operand_padding);
2582 
2583   if (saved_section)
2584     switch_to_section (saved_section);
2585 
2586   hsa_output_libgomp_mapping (brig_decl);
2587 
2588   hsa_free_decl_kernel_mapping ();
2589   brig_release_data ();
2590   hsa_deinit_compilation_unit_data ();
2591 
2592   delete emitted_declarations;
2593   emitted_declarations = NULL;
2594   delete function_offsets;
2595   function_offsets = NULL;
2596 }
2597