1 /* brig2tree.cc -- brig to gcc generic/gimple tree conversion
2    Copyright (C) 2016-2021 Free Software Foundation, Inc.
3    Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4    for General Processor Tech.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it under
9    the terms of the GNU General Public License as published by the Free
10    Software Foundation; either version 3, or (at your option) any later
11    version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14    WARRANTY; without even the implied warranty of MERCHANTABILITY or
15    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16    for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with GCC; see the file COPYING3.  If not see
20    <http://www.gnu.org/licenses/>.  */
21 
22 #include <cassert>
23 #include <iostream>
24 #include <iomanip>
25 #include <sstream>
26 
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "target.h"
31 #include "function.h"
32 #include "brig-to-generic.h"
33 #include "stringpool.h"
34 #include "tree-iterator.h"
35 #include "toplev.h"
36 #include "gimplify.h"
37 #include "gimple-expr.h"
38 #include "print-tree.h"
39 #include "hsa-brig-format.h"
40 #include "stor-layout.h"
41 #include "diagnostic-core.h"
42 #include "brig-code-entry-handler.h"
43 #include "brig-machine.h"
44 #include "brig-util.h"
45 #include "phsa.h"
46 #include "tree-pretty-print.h"
47 #include "dumpfile.h"
48 #include "profile-count.h"
49 #include "tree-cfg.h"
50 #include "errors.h"
51 #include "fold-const.h"
52 #include "cgraph.h"
53 #include "dumpfile.h"
54 #include "tree-pretty-print.h"
55 #include "attribs.h"
56 
57 extern int gccbrig_verbose;
58 
59 tree brig_to_generic::s_fp16_type;
60 tree brig_to_generic::s_fp32_type;
61 tree brig_to_generic::s_fp64_type;
62 
brig_to_generic()63 brig_to_generic::brig_to_generic ()
64   : m_cf (NULL), m_analyzing (true), m_total_group_segment_usage (0),
65     m_brig (NULL), m_next_private_offset (0)
66 {
67   m_globals = NULL_TREE;
68 
69   /* Initialize the basic REAL types.
70      This doesn't work straight away because most of the targets
71      do not support fp16 natively.  Let's by default convert
72      to fp32 and back before and after each instruction (handle it as
73      a storage format only), and later add an optimization pass
74      that removes the extra converts (in case of multiple fp16 ops
75      in a row).  */
76   s_fp16_type = make_node (REAL_TYPE);
77   TYPE_PRECISION (s_fp16_type) = 16;
78   TYPE_SIZE (s_fp16_type) = bitsize_int (16);
79   TYPE_SIZE_UNIT (s_fp16_type) = size_int (2);
80   SET_TYPE_ALIGN (s_fp16_type, 16);
81   layout_type (s_fp16_type);
82 
83   s_fp32_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F32);
84   s_fp64_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F64);
85 
86   /* TODO: (machine)query the preferred rounding mode that is set by
87      the machine by default.  This can be redefined by each BRIG module
88      header.  */
89   m_default_float_rounding_mode = BRIG_ROUND_FLOAT_ZERO;
90 
91   m_dump_file = dump_begin (TDI_original, &m_dump_flags);
92 }
93 
94 class unimplemented_entry_handler : public brig_code_entry_handler
95 {
96 public:
unimplemented_entry_handler(brig_to_generic & parent)97   unimplemented_entry_handler (brig_to_generic &parent)
98     : brig_code_entry_handler (parent)
99   {
100   }
101 
102   size_t
operator ()(const BrigBase * base)103   operator () (const BrigBase *base)
104   {
105     gcc_unreachable ();
106     return base->byteCount;
107   }
108 };
109 
110 /* Handler for entries that can be (and are) safely skipped for the purposes
111    of GENERIC generation.  */
112 
113 class skipped_entry_handler : public brig_code_entry_handler
114 {
115 public:
skipped_entry_handler(brig_to_generic & parent)116   skipped_entry_handler (brig_to_generic &parent)
117     : brig_code_entry_handler (parent)
118   {
119   }
120 
121   size_t
operator ()(const BrigBase * base)122   operator () (const BrigBase *base)
123   {
124     return base->byteCount;
125   }
126 };
127 
128 class brig_reg_use_analyzer : public brig_code_entry_handler
129 {
130 public:
brig_reg_use_analyzer(brig_to_generic & parent)131   brig_reg_use_analyzer (brig_to_generic &parent)
132     : brig_code_entry_handler (parent)
133   {
134   }
135 
136   size_t
operator ()(const BrigBase * base)137   operator () (const BrigBase *base)
138   {
139     const BrigInstBase *brig_inst = (const BrigInstBase *) base;
140     analyze_operands (*brig_inst);
141     return base->byteCount;
142   }
143 
144 };
145 
146 /* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that
147    should handle its data.  */
148 
149 struct code_entry_handler_info
150 {
151   BrigKind kind;
152   brig_code_entry_handler *handler;
153 };
154 
155 
156 /* Finds the BRIG file sections in the currently processed file.  */
157 
158 void
find_brig_sections()159 brig_to_generic::find_brig_sections ()
160 {
161   m_data = m_code = m_operand = NULL;
162   const BrigModuleHeader *mheader = (const BrigModuleHeader *) m_brig;
163 
164   /* Find the positions of the different sections.  */
165   for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec)
166     {
167       uint64_t offset
168 	= ((const uint64_t *) (m_brig + mheader->sectionIndex))[sec];
169 
170       const BrigSectionHeader *section_header
171 	= (const BrigSectionHeader *) (m_brig + offset);
172 
173       std::string name ((const char *) (&section_header->name),
174 			section_header->nameLength);
175 
176       if (sec == BRIG_SECTION_INDEX_DATA && name == "hsa_data")
177 	{
178 	  m_data = (const char *) section_header;
179 	  m_data_size = section_header->byteCount;
180 	}
181       else if (sec == BRIG_SECTION_INDEX_CODE && name == "hsa_code")
182 	{
183 	  m_code = (const char *) section_header;
184 	  m_code_size = section_header->byteCount;
185 	}
186       else if (sec == BRIG_SECTION_INDEX_OPERAND && name == "hsa_operand")
187 	{
188 	  m_operand = (const char *) section_header;
189 	  m_operand_size = section_header->byteCount;
190 	}
191       else
192 	{
193 	  gcc_unreachable ();
194 	}
195     }
196 
197   if (m_code == NULL)
198     gcc_unreachable ();
199   if (m_data == NULL)
200     gcc_unreachable ();
201   if (m_operand == NULL)
202     gcc_unreachable ();
203 
204 }
205 
206 /* Does a first pass over the given BRIG to collect data needed for the
207    actual parsing.  Currently this includes only collecting the
208    group segment variable usage to support the experimental HSA PRM feature
209    where group variables can be declared also in module and function scope
210    (in addition to kernel scope).
211 */
212 
213 void
analyze(const char * brig_blob)214 brig_to_generic::analyze (const char *brig_blob)
215 {
216   const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob;
217 
218   if (strncmp (mheader->identification, "HSA BRIG", 8) != 0)
219     fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
220 		 "Unrecognized file format.");
221   if (mheader->brigMajor != 1 || mheader->brigMinor != 0)
222     fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
223 		 "BRIG version not supported. BRIG 1.0 required.");
224 
225   m_brig = brig_blob;
226 
227   find_brig_sections ();
228 
229   brig_directive_variable_handler var_handler (*this);
230   brig_directive_fbarrier_handler fbar_handler (*this);
231   brig_directive_function_handler func_handler (*this);
232   brig_reg_use_analyzer reg_use_analyzer (*this);
233 
234   /* Need this for grabbing the module names for mangling the
235      group variable names.  */
236   brig_directive_module_handler module_handler (*this);
237   skipped_entry_handler skipped_handler (*this);
238 
239   const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
240 
241   code_entry_handler_info handlers[]
242     = {{BRIG_KIND_INST_BASIC, &reg_use_analyzer},
243        {BRIG_KIND_INST_MOD, &reg_use_analyzer},
244        {BRIG_KIND_INST_CMP, &reg_use_analyzer},
245        {BRIG_KIND_INST_MEM, &reg_use_analyzer},
246        {BRIG_KIND_INST_CVT, &reg_use_analyzer},
247        {BRIG_KIND_INST_SEG_CVT, &reg_use_analyzer},
248        {BRIG_KIND_INST_SEG, &reg_use_analyzer},
249        {BRIG_KIND_INST_ADDR, &reg_use_analyzer},
250        {BRIG_KIND_INST_SOURCE_TYPE, &reg_use_analyzer},
251        {BRIG_KIND_INST_ATOMIC, &reg_use_analyzer},
252        {BRIG_KIND_INST_SIGNAL, &reg_use_analyzer},
253        {BRIG_KIND_INST_BR, &reg_use_analyzer},
254        {BRIG_KIND_INST_LANE, &reg_use_analyzer},
255        {BRIG_KIND_INST_QUEUE, &reg_use_analyzer},
256        {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
257        {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
258        {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
259        {BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
260        {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}};
261 
262   m_analyzing = true;
263   for (size_t b = csection_header->headerByteCount; b < m_code_size;)
264     {
265       const BrigBase *entry = (const BrigBase *) (m_code + b);
266 
267       brig_code_entry_handler *handler = &skipped_handler;
268 
269       if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
270 	{
271 	  /* The function definition ended.  We can just discard the place
272 	     holder function. */
273 	  m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
274 	  delete m_cf;
275 	  m_cf = NULL;
276 	}
277 
278       /* Find a handler.  */
279       for (size_t i = 0;
280 	   i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
281 	{
282 	  if (handlers[i].kind == entry->kind)
283 	    handler = handlers[i].handler;
284 	}
285 
286       int bytes_processed = (*handler) (entry);
287       if (bytes_processed == 0)
288 	fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_CORRUPTED_MODULE
289 		     "Element with 0 bytes.");
290       b += bytes_processed;
291     }
292 
293   if (m_cf != NULL)
294     {
295       m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
296       delete m_cf;
297       m_cf = NULL;
298     }
299 
300   m_total_group_segment_usage += m_module_group_variables.size ();
301   m_analyzing = false;
302 }
303 
304 /* Parses the given BRIG blob.  */
305 
306 void
parse(const char * brig_blob)307 brig_to_generic::parse (const char *brig_blob)
308 {
309   m_brig = brig_blob;
310   find_brig_sections ();
311 
312   brig_basic_inst_handler inst_handler (*this);
313   brig_branch_inst_handler branch_inst_handler (*this);
314   brig_cvt_inst_handler cvt_inst_handler (*this);
315   brig_seg_inst_handler seg_inst_handler (*this);
316   brig_copy_move_inst_handler copy_move_inst_handler (*this);
317   brig_signal_inst_handler signal_inst_handler (*this);
318   brig_atomic_inst_handler atomic_inst_handler (*this);
319   brig_cmp_inst_handler cmp_inst_handler (*this);
320   brig_mem_inst_handler mem_inst_handler (*this);
321   brig_inst_mod_handler inst_mod_handler (*this);
322   brig_directive_label_handler label_handler (*this);
323   brig_directive_variable_handler var_handler (*this);
324   brig_directive_fbarrier_handler fbar_handler (*this);
325   brig_directive_comment_handler comment_handler (*this);
326   brig_directive_function_handler func_handler (*this);
327   brig_directive_control_handler control_handler (*this);
328   brig_directive_arg_block_handler arg_block_handler (*this);
329   brig_directive_module_handler module_handler (*this);
330   brig_lane_inst_handler lane_inst_handler (*this);
331   brig_queue_inst_handler queue_inst_handler (*this);
332   skipped_entry_handler skipped_handler (*this);
333   unimplemented_entry_handler unimplemented_handler (*this);
334 
335   struct code_entry_handler_info
336   {
337     BrigKind kind;
338     brig_code_entry_handler *handler;
339   };
340 
341   /* TODO: Convert to a hash table / map.  For now, put the more common
342      entries to the top to keep the scan fast on average.  */
343   code_entry_handler_info handlers[]
344     = {{BRIG_KIND_INST_BASIC, &inst_handler},
345        {BRIG_KIND_INST_CMP, &cmp_inst_handler},
346        {BRIG_KIND_INST_MEM, &mem_inst_handler},
347        {BRIG_KIND_INST_MOD, &inst_mod_handler},
348        {BRIG_KIND_INST_CVT, &cvt_inst_handler},
349        {BRIG_KIND_INST_SEG_CVT, &seg_inst_handler},
350        {BRIG_KIND_INST_SEG, &seg_inst_handler},
351        {BRIG_KIND_INST_ADDR, &copy_move_inst_handler},
352        {BRIG_KIND_INST_SOURCE_TYPE, &copy_move_inst_handler},
353        {BRIG_KIND_INST_ATOMIC, &atomic_inst_handler},
354        {BRIG_KIND_INST_SIGNAL, &signal_inst_handler},
355        {BRIG_KIND_INST_BR, &branch_inst_handler},
356        {BRIG_KIND_INST_LANE, &lane_inst_handler},
357        {BRIG_KIND_INST_QUEUE, &queue_inst_handler},
358        /* Assuming fences are not needed.  FIXME: call builtins
359 	  when porting to a platform where they are.  */
360        {BRIG_KIND_INST_MEM_FENCE, &skipped_handler},
361        {BRIG_KIND_DIRECTIVE_LABEL, &label_handler},
362        {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
363        {BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, &arg_block_handler},
364        {BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, &arg_block_handler},
365        {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
366        {BRIG_KIND_DIRECTIVE_COMMENT, &comment_handler},
367        {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
368        {BRIG_KIND_DIRECTIVE_SIGNATURE, &func_handler},
369        {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler},
370        {BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION, &func_handler},
371        {BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
372        /* Skipping debug locations for now as not needed for conformance.  */
373        {BRIG_KIND_DIRECTIVE_LOC, &skipped_handler},
374        /* There are no supported pragmas at this moment.  */
375        {BRIG_KIND_DIRECTIVE_PRAGMA, &skipped_handler},
376        {BRIG_KIND_DIRECTIVE_CONTROL, &control_handler},
377        {BRIG_KIND_DIRECTIVE_EXTENSION, &skipped_handler},
378        /* BRIG_KIND_NONE entries are valid anywhere.  They can be used
379 	  for patching BRIGs before finalization.  */
380        {BRIG_KIND_NONE, &skipped_handler}};
381 
382   const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
383 
384   for (size_t b = csection_header->headerByteCount; b < m_code_size;)
385     {
386       const BrigBase *entry = (const BrigBase *) (m_code + b);
387 
388       brig_code_entry_handler *handler = &unimplemented_handler;
389 
390       if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
391 	finish_function (); /* The function definition ended.  */
392 
393       /* Find a handler.  */
394       for (size_t i = 0;
395 	   i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
396 	{
397 	  if (handlers[i].kind == entry->kind)
398 	    handler = handlers[i].handler;
399 	}
400       b += (*handler) (entry);
401     }
402 
403   finish_function ();
404 }
405 
406 const BrigData *
get_brig_data_entry(size_t entry_offset) const407 brig_to_generic::get_brig_data_entry (size_t entry_offset) const
408 {
409   return (const BrigData *) (m_data + entry_offset);
410 }
411 
412 const BrigBase *
get_brig_operand_entry(size_t entry_offset) const413 brig_to_generic::get_brig_operand_entry (size_t entry_offset) const
414 {
415   return (const BrigBase *) (m_operand + entry_offset);
416 }
417 
418 const BrigBase *
get_brig_code_entry(size_t entry_offset) const419 brig_to_generic::get_brig_code_entry (size_t entry_offset) const
420 {
421   return (const BrigBase *) (m_code + entry_offset);
422 }
423 
424 void
append_global(tree g)425 brig_to_generic::append_global (tree g)
426 {
427   if (m_globals == NULL_TREE)
428     {
429       m_globals = g;
430       return;
431     }
432   else
433     {
434       tree last = tree_last (m_globals);
435       TREE_CHAIN (last) = g;
436     }
437 }
438 
439 tree
global_variable(const std::string & name) const440 brig_to_generic::global_variable (const std::string &name) const
441 {
442   label_index::const_iterator i = m_global_variables.find (name);
443   if (i == m_global_variables.end ())
444     return NULL_TREE;
445   else
446     return (*i).second;
447 }
448 
449 /* Returns a function declaration with the given name.  Assumes it has been
450    created previously via a DirectiveFunction or similar.  */
451 
452 tree
function_decl(const std::string & name)453 brig_to_generic::function_decl (const std::string &name)
454 {
455   label_index::const_iterator i = m_function_index.find (name);
456   if (i == m_function_index.end ())
457     return NULL_TREE;
458   return (*i).second;
459 }
460 
461 void
add_function_decl(const std::string & name,tree func_decl)462 brig_to_generic::add_function_decl (const std::string &name, tree func_decl)
463 {
464   m_function_index[name] = func_decl;
465 }
466 
467 /* Adds a GENERIC global variable VAR_DECL with the given NAME to the
468    current module.  If we have generated a host def var ptr (a place holder
469    for variables that are defined by the HSA host code) for this global
470    variable definition (because there was a declaration earlier which looked
471    like it might have been a host defined variable), we now have
472    to assign its address and make it private to allow the references to
473    point to the defined variable instead.  */
474 
475 void
add_global_variable(const std::string & name,tree var_decl)476 brig_to_generic::add_global_variable (const std::string &name, tree var_decl)
477 {
478   append_global (var_decl);
479   m_global_variables[name] = var_decl;
480 
481   std::string host_def_var_name
482     = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name;
483   tree host_def_var = global_variable (host_def_var_name);
484   if (host_def_var == NULL_TREE)
485     return;
486 
487   tree ptype = build_pointer_type (TREE_TYPE (var_decl));
488   tree var_addr = build1 (ADDR_EXPR, ptype, var_decl);
489 
490   DECL_INITIAL (host_def_var) = var_addr;
491   TREE_PUBLIC (host_def_var) = 1;
492 
493   set_externally_visible (host_def_var);
494 }
495 
496 /* Adds an indirection pointer for a potential host-defined program scope
497    variable declaration.  */
498 
499 void
add_host_def_var_ptr(const std::string & name,tree var_decl)500 brig_to_generic::add_host_def_var_ptr (const std::string &name, tree var_decl)
501 {
502   std::string var_name = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name;
503 
504   tree name_identifier = get_identifier (var_name.c_str ());
505 
506   tree ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier,
507 			     build_pointer_type (TREE_TYPE (var_decl)));
508   DECL_EXTERNAL (ptr_var) = 0;
509   DECL_ARTIFICIAL (ptr_var) = 0;
510 
511   TREE_PUBLIC (ptr_var) = 1;
512   TREE_USED (ptr_var) = 1;
513   TREE_ADDRESSABLE (ptr_var) = 1;
514   TREE_STATIC (ptr_var) = 1;
515 
516   set_externally_visible (ptr_var);
517 
518   append_global (ptr_var);
519   m_global_variables[var_name] = ptr_var;
520 }
521 
522 void
add_decl_call(tree call)523 brig_to_generic::add_decl_call (tree call)
524 {
525   m_decl_call.push_back (call);
526 }
527 
528 /* Produce a "mangled name" for the given brig function or kernel.
529    The mangling is used to make unique global symbol name in case of
530    module scope functions.  Program scope functions are not mangled
531    (except for dropping the leading &), which makes the functions
532    directly visible for linking using the original function name.  */
533 
534 std::string
get_mangled_name(const BrigDirectiveExecutable * func) const535 brig_to_generic::get_mangled_name
536 (const BrigDirectiveExecutable *func) const
537 {
538   /* Strip the leading &.  */
539   std::string func_name = get_string (func->name).substr (1);
540   if (func->linkage == BRIG_LINKAGE_MODULE)
541     {
542       /* Mangle the module scope function names with the module name and
543 	 make them public so they can be queried by the HSA runtime from
544 	 the produced binary.  Assume it's the currently processed function
545 	 we are always referring to.  */
546       func_name = "gccbrig." + m_module_name + "." + func_name;
547     }
548   return func_name;
549 }
550 
551 std::string
get_string(size_t entry_offset) const552 brig_to_generic::get_string (size_t entry_offset) const
553 {
554   const BrigData *data_item = get_brig_data_entry (entry_offset);
555   return std::string ((const char *) &data_item->bytes, data_item->byteCount);
556 }
557 
558 /* Adapted from c-semantics.c.  */
559 
560 tree
build_stmt(enum tree_code code,...)561 build_stmt (enum tree_code code, ...)
562 {
563   tree ret;
564   int length, i;
565   va_list p;
566   bool side_effects;
567 
568   /* This function cannot be used to construct variably-sized nodes.  */
569   gcc_assert (TREE_CODE_CLASS (code) != tcc_vl_exp);
570 
571   va_start (p, code);
572 
573   ret = make_node (code);
574   TREE_TYPE (ret) = void_type_node;
575   length = TREE_CODE_LENGTH (code);
576 
577   /* TREE_SIDE_EFFECTS will already be set for statements with
578      implicit side effects.  Here we make sure it is set for other
579      expressions by checking whether the parameters have side
580      effects.  */
581 
582   side_effects = false;
583   for (i = 0; i < length; i++)
584     {
585       tree t = va_arg (p, tree);
586       if (t && !TYPE_P (t))
587 	side_effects |= TREE_SIDE_EFFECTS (t);
588       TREE_OPERAND (ret, i) = t;
589     }
590 
591   TREE_SIDE_EFFECTS (ret) |= side_effects;
592 
593   va_end (p);
594   return ret;
595 }
596 
597 /* BRIG regs are untyped, but GENERIC is not.  We need to add implicit casts
598    in case treating the operand with an instruction with a type different
599    than the created reg var type in order to select correct instruction type
600    later on.  This function creates the necessary reinterpret type cast from
601    a source variable to the destination type.  In case no cast is needed to
602    the same type, SOURCE is returned directly.
603 
604    In case of mismatched type sizes, casting:
605    - to narrower type the upper bits are clipped and
606    - to wider type the source value is zero extended.  */
607 
608 tree
build_resize_convert_view(tree destination_type,tree source)609 build_resize_convert_view (tree destination_type, tree source)
610 {
611 
612   gcc_assert (source && destination_type && TREE_TYPE (source) != NULL_TREE
613 	      && destination_type != NULL_TREE);
614 
615   tree source_type = TREE_TYPE (source);
616   if (TREE_CODE (source) == CALL_EXPR)
617     {
618       tree func_decl = TREE_OPERAND (TREE_OPERAND (source, 1), 0);
619       source_type = TREE_TYPE (TREE_TYPE (func_decl));
620     }
621 
622   if (destination_type == source_type)
623     return source;
624 
625   size_t src_size = int_size_in_bytes (source_type);
626   size_t dst_size = int_size_in_bytes (destination_type);
627   if (src_size == dst_size)
628     return build1 (VIEW_CONVERT_EXPR, destination_type, source);
629   else /* src_size != dst_size  */
630     {
631       /* The src_size can be smaller at least with f16 scalars which are
632 	 stored to 32b register variables.  First convert to an equivalent
633 	 size unsigned type, then extend to an unsigned type of the
634 	 target width, after which VIEW_CONVERT_EXPR can be used to
635 	 force to the target type.  */
636       tree resized = convert (get_scalar_unsigned_int_type (destination_type),
637 			      build_reinterpret_to_uint (source));
638       gcc_assert ((size_t)int_size_in_bytes (TREE_TYPE (resized)) == dst_size);
639       return build_resize_convert_view (destination_type, resized);
640     }
641 }
642 
643 /* Reinterprets SOURCE as a scalar unsigned int with the size
644    corresponding to the orignal.  */
645 
build_reinterpret_to_uint(tree source)646 tree build_reinterpret_to_uint (tree source)
647 {
648   tree src_type = TREE_TYPE (source);
649   if (INTEGRAL_TYPE_P (src_type) && TYPE_UNSIGNED (src_type))
650     return source;
651   tree dest_type = get_scalar_unsigned_int_type (src_type);
652   return build1 (VIEW_CONVERT_EXPR, dest_type, source);
653 }
654 
655 /* Returns the finished brig_function for the given generic FUNC_DECL,
656    or NULL, if not found.  */
657 
658 brig_function *
get_finished_function(tree func_decl)659 brig_to_generic::get_finished_function (tree func_decl)
660 {
661   std::string func_name
662     = identifier_to_locale (IDENTIFIER_POINTER (DECL_NAME (func_decl)));
663   std::map<std::string, brig_function *>::iterator i
664     = m_finished_functions.find (func_name);
665   if (i != m_finished_functions.end ())
666     return (*i).second;
667   else
668     return NULL;
669 }
670 
671 /* Adds a group variable to a correct book keeping structure depending
672    on its segment.  */
673 
674 void
add_group_variable(const std::string & name,size_t size,size_t alignment,bool function_scope)675 brig_to_generic::add_group_variable (const std::string &name, size_t size,
676 				     size_t alignment, bool function_scope)
677 {
678   /* Module and function scope group region variables are an experimental
679      feature.  We implement module scope group variables with a separate
680      book keeping inside brig_to_generic which is populated in the 'analyze()'
681      prepass.  This is to ensure we know the group segment offsets when
682      processing the functions that might refer to them.  */
683   if (!function_scope)
684     {
685       if (!m_module_group_variables.has_variable (name))
686 	m_module_group_variables.add (name, size, alignment);
687       return;
688     }
689 
690   if (!m_cf->m_local_group_variables.has_variable (name))
691     m_cf->m_local_group_variables.add (name, size, alignment);
692 }
693 
694 /* Finalizes the currently handled function.  Should be called before
695    setting a new function.  */
696 
697 void
finish_function()698 brig_to_generic::finish_function ()
699 {
700   if (m_cf == NULL || m_cf->m_func_decl == NULL_TREE)
701     {
702       /* It can be a finished func declaration fingerprint, in that case we
703 	 don't have m_func_decl.  */
704       m_cf = NULL;
705       return;
706     }
707 
708   if (!m_cf->m_is_kernel)
709     {
710       tree bind_expr = m_cf->m_current_bind_expr;
711       tree stmts = BIND_EXPR_BODY (bind_expr);
712       m_cf->finish ();
713       m_cf->emit_metadata (stmts);
714       dump_function (m_dump_file, m_cf);
715     }
716   else
717     /* Emit the kernel only at the very end so we can analyze the total
718        group and private memory usage.  */
719     m_kernels.push_back (m_cf);
720 
721   pop_cfun ();
722 
723   m_finished_functions[m_cf->m_name] = m_cf;
724   m_cf = NULL;
725 }
726 
727 /* Initializes a new currently handled function.  */
728 
729 void
start_function(tree f)730 brig_to_generic::start_function (tree f)
731 {
732   if (DECL_STRUCT_FUNCTION (f) == NULL)
733     push_struct_function (f);
734   else
735     push_cfun (DECL_STRUCT_FUNCTION (f));
736 
737   m_cf->m_func_decl = f;
738 }
739 
740 /* Appends a new variable to the current kernel's private segment.  */
741 
742 void
append_private_variable(const std::string & name,size_t size,size_t alignment)743 brig_to_generic::append_private_variable (const std::string &name,
744 					  size_t size, size_t alignment)
745 {
746   /* We need to take care of two cases of alignment with private
747      variables because of the layout where the same variable for
748      each work-item is laid out in successive addresses.
749 
750      1) Ensure the first work-item's variable is in an aligned
751      offset:  */
752   size_t align_padding = m_next_private_offset % alignment == 0 ?
753     0 : (alignment - m_next_private_offset % alignment);
754 
755   /* 2) Each successive per-work-item copy should be aligned.
756      If the variable has wider alignment than size then we need
757      to add extra padding to ensure it.  The padding must be
758      included in the size to allow per-work-item offset computation
759      to find their own aligned copy.  */
760 
761   size_t per_var_padding = size % alignment == 0 ?
762     0 : (alignment - size % alignment);
763   m_private_data_sizes[name] = size + per_var_padding;
764 
765   m_next_private_offset += align_padding;
766   m_private_offsets[name] = m_next_private_offset;
767   m_next_private_offset += size + per_var_padding;
768 }
769 
770 size_t
private_variable_segment_offset(const std::string & name) const771 brig_to_generic::private_variable_segment_offset
772   (const std::string &name) const
773 {
774   var_offset_table::const_iterator i = m_private_offsets.find (name);
775   gcc_assert (i != m_private_offsets.end ());
776   return (*i).second;
777 }
778 
779 bool
has_private_variable(const std::string & name) const780 brig_to_generic::has_private_variable (const std::string &name) const
781 {
782   std::map<std::string, size_t>::const_iterator i
783     = m_private_data_sizes.find (name);
784   return i != m_private_data_sizes.end ();
785 }
786 
787 size_t
private_variable_size(const std::string & name) const788 brig_to_generic::private_variable_size (const std::string &name) const
789 {
790   std::map<std::string, size_t>::const_iterator i
791     = m_private_data_sizes.find (name);
792   gcc_assert (i != m_private_data_sizes.end ());
793   return (*i).second;
794 }
795 
796 
797 /* The size of private segment required by a single work-item executing
798    the currently processed kernel.  */
799 
800 size_t
private_segment_size() const801 brig_to_generic::private_segment_size () const
802 {
803   return m_next_private_offset;
804 }
805 
806 /* Cached builtins indexed by name.  */
807 
808 typedef std::map<std::string, tree> builtin_index;
809 builtin_index builtin_cache_;
810 
811 /* Build a call to a builtin function.  PDECL is the builtin function to
812    call.  NARGS is the number of input arguments, RETTYPE the built-in
813    functions return value type, and ... is the list of arguments passed to
814    the call with type first, then the value.  */
815 
816 tree
call_builtin(tree pdecl,int nargs,tree rettype,...)817 call_builtin (tree pdecl, int nargs, tree rettype, ...)
818 {
819   if (rettype == error_mark_node)
820     return error_mark_node;
821 
822   tree *types = new tree[nargs];
823   tree *args = new tree[nargs];
824 
825   va_list ap;
826   va_start (ap, rettype);
827   for (int i = 0; i < nargs; ++i)
828     {
829       types[i] = va_arg (ap, tree);
830       tree arg = va_arg (ap, tree);
831       args[i] = build_resize_convert_view (types[i], arg);
832       if (types[i] == error_mark_node || args[i] == error_mark_node)
833 	{
834 	  delete[] types;
835 	  delete[] args;
836 	  va_end (ap);
837 	  return error_mark_node;
838 	}
839     }
840   va_end (ap);
841 
842   tree fnptr = build_fold_addr_expr (pdecl);
843 
844   tree ret = build_call_array (rettype, fnptr, nargs, args);
845 
846   delete[] types;
847   delete[] args;
848 
849   return ret;
850 }
851 
852 /* Generate all global declarations.  Should be called after the last
853    BRIG has been fed in.  */
854 
855 void
write_globals()856 brig_to_generic::write_globals ()
857 {
858 
859   /* Replace calls to declarations with calls to definitions.  Otherwise
860      inlining will fail to find the definition to inline from.  */
861 
862   for (size_t i = 0; i < m_decl_call.size(); ++i)
863     {
864       tree decl_call = m_decl_call.at(i);
865       tree func_decl = get_callee_fndecl (decl_call);
866       brig_function *brig_function = get_finished_function (func_decl);
867 
868       if (brig_function && brig_function->m_func_decl
869 	  && DECL_EXTERNAL (brig_function->m_func_decl) == 0
870 	  && brig_function->m_func_decl != func_decl)
871 	{
872 
873 	  decl_call = CALL_EXPR_FN (decl_call);
874 	  STRIP_NOPS (decl_call);
875 	  if (TREE_CODE (decl_call) == ADDR_EXPR
876 	      && TREE_CODE (TREE_OPERAND (decl_call, 0)) == FUNCTION_DECL)
877 	    TREE_OPERAND (decl_call, 0) = brig_function->m_func_decl;
878 	}
879     }
880 
881   for (std::map<std::string, brig_function *>::iterator i
882 	 = m_finished_functions.begin(), e = m_finished_functions.end();
883        i != e; ++i)
884     {
885       brig_function *brig_f = (*i).second;
886       if (brig_f->m_is_kernel)
887 	continue;
888 
889       /* Finalize only at this point to allow the cgraph analysis to
890 	 see definitions to calls to later functions.  */
891       gimplify_function_tree (brig_f->m_func_decl);
892       cgraph_node::finalize_function (brig_f->m_func_decl, true);
893     }
894 
895   /* Now that the whole BRIG module has been processed, build a launcher
896      and a metadata section for each built kernel.  */
897   for (size_t i = 0; i < m_kernels.size (); ++i)
898     {
899       brig_function *f = m_kernels[i];
900 
901       /* Finish kernels now that we know the call graphs and their barrier
902 	 usage.  */
903       f->finish_kernel ();
904 
905       dump_function (m_dump_file, f);
906       gimplify_function_tree (f->m_func_decl);
907       cgraph_node::finalize_function (f->m_func_decl, true);
908 
909       f->m_descriptor.is_kernel = 1;
910       /* TODO: analyze the kernel's actual private and group segment usage
911 	 using call graph.  Now the mem size is overly
912 	 pessimistic in case of multiple kernels in the same module.
913       */
914       f->m_descriptor.group_segment_size = m_total_group_segment_usage;
915       f->m_descriptor.private_segment_size = private_segment_size ();
916 
917       /* The kernarg size is rounded up to a multiple of 16 according to
918 	 the PRM specs.  */
919       f->m_descriptor.kernarg_segment_size = f->m_next_kernarg_offset;
920       if (f->m_descriptor.kernarg_segment_size % 16 > 0)
921 	f->m_descriptor.kernarg_segment_size
922 	  += 16 - f->m_next_kernarg_offset % 16;
923       f->m_descriptor.kernarg_max_align = f->m_kernarg_max_align;
924 
925       tree launcher = f->emit_launcher_and_metadata ();
926 
927       append_global (launcher);
928 
929       if (m_dump_file)
930 	{
931 	  std::string kern_name = f->m_name.substr (1);
932 	  fprintf (m_dump_file, "\n;; Function %s", kern_name.c_str());
933 	  fprintf (m_dump_file, "\n;; enabled by -%s\n\n",
934 		   dump_flag_name (TDI_original));
935 	  print_generic_decl (m_dump_file, launcher, TDF_NONE);
936 	  print_generic_expr (m_dump_file, DECL_SAVED_TREE (launcher),
937 			      TDF_NONE);
938 	  fprintf (m_dump_file, "\n");
939 	}
940 
941       gimplify_function_tree (launcher);
942       cgraph_node::finalize_function (launcher, true);
943       pop_cfun ();
944     }
945 
946   int no_globals = list_length (m_globals);
947   tree *vec = new tree[no_globals];
948 
949   int i = 0;
950   tree global = m_globals;
951   while (global)
952     {
953       vec[i] = global;
954       ++i;
955       global = TREE_CHAIN (global);
956     }
957 
958   wrapup_global_declarations (vec, no_globals);
959 
960   delete[] vec;
961 
962 }
963 
964 /* Returns an type with unsigned int elements corresponding to the
965    size and element count of ORIGINAL_TYPE.  */
966 
967 tree
get_unsigned_int_type(tree original_type)968 get_unsigned_int_type (tree original_type)
969 {
970   if (VECTOR_TYPE_P (original_type))
971     {
972       size_t esize
973 	= int_size_in_bytes (TREE_TYPE (original_type)) * BITS_PER_UNIT;
974       poly_uint64 ecount = TYPE_VECTOR_SUBPARTS (original_type);
975       return build_vector_type (build_nonstandard_integer_type (esize, true),
976 				ecount);
977     }
978   else
979     return build_nonstandard_integer_type (int_size_in_bytes (original_type)
980 					   * BITS_PER_UNIT,
981 					   true);
982 }
983 
984 /* Returns a type with unsigned int corresponding to the size
985    ORIGINAL_TYPE.  */
986 
987 tree
get_scalar_unsigned_int_type(tree original_type)988 get_scalar_unsigned_int_type (tree original_type)
989 {
990   return build_nonstandard_integer_type (int_size_in_bytes (original_type)
991 					 * BITS_PER_UNIT, true);
992 }
993 
994 /* Set the declaration externally visible so it won't get removed by
995    whole program optimizations.  */
996 
997 void
set_externally_visible(tree decl)998 set_externally_visible (tree decl)
999 {
1000   if (!lookup_attribute ("externally_visible", DECL_ATTRIBUTES (decl)))
1001     DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("externally_visible"),
1002 					NULL, DECL_ATTRIBUTES (decl));
1003 }
1004 
1005 void
set_inline(tree decl)1006 set_inline (tree decl)
1007 {
1008   if (!lookup_attribute ("inline", DECL_ATTRIBUTES (decl)))
1009     DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("inline"),
1010 					NULL, DECL_ATTRIBUTES (decl));
1011 }
1012 
1013 void
dump_function(FILE * dump_file,brig_function * f)1014 dump_function (FILE *dump_file, brig_function *f)
1015 {
1016   /* Dump the BRIG-specific tree IR.  */
1017   if (dump_file)
1018     {
1019       fprintf (dump_file, "\n;; Function %s", f->m_name.c_str ());
1020       fprintf (dump_file, "\n;; enabled by -%s\n\n",
1021 	       dump_flag_name (TDI_original));
1022       print_generic_decl (dump_file, f->m_func_decl, TDF_NONE);
1023       print_generic_expr (dump_file, f->m_current_bind_expr, TDF_NONE);
1024       fprintf (dump_file, "\n");
1025     }
1026 }
1027 
1028 /* Records use of the BRIG_REG as a TYPE in the current function.  */
1029 
1030 void
add_reg_used_as_type(const BrigOperandRegister & brig_reg,tree type)1031 brig_to_generic::add_reg_used_as_type (const BrigOperandRegister &brig_reg,
1032 				       tree type)
1033 {
1034   gcc_assert (m_cf);
1035   reg_use_info &info
1036     = m_fn_regs_use_index[m_cf->m_name][gccbrig_hsa_reg_id (brig_reg)];
1037 
1038   if (info.m_type_refs_lookup.count (type))
1039     info.m_type_refs[info.m_type_refs_lookup[type]].second++;
1040   else
1041     {
1042       info.m_type_refs.push_back (std::make_pair (type, 1));
1043       info.m_type_refs_lookup[type] = info.m_type_refs.size () - 1;
1044     }
1045 }
1046