1 /* brig-function.cc -- declaration of brig_function class.
2    Copyright (C) 2016-2020 Free Software Foundation, Inc.
3    Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4    for General Processor Tech.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it under
9    the terms of the GNU General Public License as published by the Free
10    Software Foundation; either version 3, or (at your option) any later
11    version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14    WARRANTY; without even the implied warranty of MERCHANTABILITY or
15    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16    for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with GCC; see the file COPYING3.  If not see
20    <http://www.gnu.org/licenses/>.  */
21 
22 #include <sstream>
23 #include <iomanip>
24 
25 #include "brig-function.h"
26 #include "stringpool.h"
27 #include "tree-iterator.h"
28 #include "toplev.h"
29 #include "gimplify.h"
30 #include "gimple-expr.h"
31 #include "print-tree.h"
32 #include "hsa-brig-format.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-code-entry-handler.h"
36 #include "brig-machine.h"
37 #include "brig-util.h"
38 #include "phsa.h"
39 #include "tree-pretty-print.h"
40 #include "dumpfile.h"
41 #include "profile-count.h"
42 #include "tree-cfg.h"
43 #include "errors.h"
44 #include "function.h"
45 #include "brig-to-generic.h"
46 #include "brig-builtins.h"
47 #include "options.h"
48 #include "fold-const.h"
49 #include "target.h"
50 #include "builtins.h"
51 
52 brig_function::builtin_map brig_function::s_custom_builtins;
53 
brig_function(const BrigDirectiveExecutable * exec,brig_to_generic * parent)54 brig_function::brig_function (const BrigDirectiveExecutable *exec,
55 			      brig_to_generic *parent)
56   : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""),
57     m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE),
58     m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
59     m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
60     m_next_kernarg_offset (0), m_kernarg_max_align (0),
61     m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
62     m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
63     m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
64     m_generating_arg_block (false), m_parent (parent)
65 {
66   memset (m_regs, 0,
67 	  BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
68   memset (&m_descriptor, 0, sizeof (phsa_descriptor));
69 
70   if (s_custom_builtins.size () > 0) return;
71 
72   /* Populate the builtin index.  */
73 #undef DEF_HSAIL_ATOMIC_BUILTIN
74 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
75 #undef DEF_HSAIL_INTR_BUILTIN
76 #undef DEF_HSAIL_SAT_BUILTIN
77 #undef DEF_HSAIL_BUILTIN
78 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
79   s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)]		\
80     = builtin_decl_explicit (ENUM);
81 
82 #include "brig-builtins.def"
83 }
84 
~brig_function()85 brig_function::~brig_function ()
86 {
87   for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i)
88     {
89       if (m_regs[i] != NULL)
90 	{
91 	  delete m_regs[i];
92 	  m_regs[i] = NULL;
93 	}
94     }
95 }
96 
97 /* Returns a GENERIC label with the given name in the given function.
98    Creates it, if not yet found.  */
99 
100 tree
label(const std::string & name)101 brig_function::label (const std::string &name)
102 {
103   label_index::const_iterator i = m_label_index.find (name);
104   if (i == m_label_index.end ())
105     {
106       tree name_identifier
107 	= get_identifier_with_length (name.c_str (), name.size ());
108 
109       tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
110 				    name_identifier, void_type_node);
111 
112       DECL_CONTEXT (label_decl) = m_func_decl;
113       DECL_ARTIFICIAL (label_decl) = 0;
114 
115       m_label_index[name] = label_decl;
116       return label_decl;
117     }
118   else
119     return (*i).second;
120 }
121 
122 /* Record an argument variable for later use.  This includes both local
123    variables inside arg blocks and incoming function arguments.  */
124 
125 void
add_arg_variable(const BrigDirectiveVariable * brigVar,tree treeDecl)126 brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar,
127 				 tree treeDecl)
128 {
129   m_arg_variables[brigVar] = treeDecl;
130 }
131 
132 tree
arg_variable(const BrigDirectiveVariable * var) const133 brig_function::arg_variable (const BrigDirectiveVariable *var) const
134 {
135   variable_index::const_iterator i = m_arg_variables.find (var);
136   if (i == m_arg_variables.end ())
137     return NULL_TREE;
138   else
139     return (*i).second;
140 }
141 
142 /* Appends a new kernel argument descriptor for the current kernel's
143    arg space.  */
144 
145 void
append_kernel_arg(const BrigDirectiveVariable * var,size_t size,size_t alignment)146 brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size,
147 				  size_t alignment)
148 {
149   gcc_assert (m_func_decl != NULL_TREE);
150   gcc_assert (m_is_kernel);
151 
152   size_t align_padding = m_next_kernarg_offset % alignment == 0 ?
153     0 : (alignment - m_next_kernarg_offset % alignment);
154   m_next_kernarg_offset += align_padding;
155   m_kernarg_offsets[var] = m_next_kernarg_offset;
156   m_next_kernarg_offset += size;
157 
158   m_kernarg_max_align
159     = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align;
160 }
161 
162 size_t
kernel_arg_offset(const BrigDirectiveVariable * var) const163 brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const
164 {
165   var_offset_table::const_iterator i = m_kernarg_offsets.find (var);
166   gcc_assert (i != m_kernarg_offsets.end ());
167   return (*i).second;
168 }
169 
170 /* Add work-item ID variables to the beginning of the kernel function
171    which can be used for address computation as kernel dispatch packet
172    instructions can be expanded to GENERIC nodes referring to them.  */
173 
174 void
add_id_variables()175 brig_function::add_id_variables ()
176 {
177   tree bind_expr = m_current_bind_expr;
178   tree stmts = BIND_EXPR_BODY (bind_expr);
179 
180   /* Initialize the WG limits and local ids.  */
181   m_kernel_entry = tsi_start (stmts);
182 
183   for (int i = 0; i < 3; ++i)
184     {
185       char dim_char = (char) ((int) 'x' + i);
186 
187       /* The local sizes are limited to 16b values, but let's still use 32b
188 	 to avoid unnecessary casts (the ID functions are 32b).  */
189       m_local_id_vars[i]
190 	= add_local_variable (std::string ("__local_") + dim_char,
191 			      long_long_integer_type_node);
192 
193       tree workitemid_call
194 	= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2,
195 			uint32_type_node, uint32_type_node,
196 			build_int_cst (uint32_type_node, i), ptr_type_node,
197 			m_context_arg);
198 
199       tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]),
200 			     m_local_id_vars[i],
201 			     convert (TREE_TYPE (m_local_id_vars[i]),
202 				      workitemid_call));
203 
204       append_statement (id_init);
205 
206       m_cur_wg_size_vars[i]
207 	= add_local_variable (std::string ("__cur_wg_size_") + dim_char,
208 			      long_long_integer_type_node);
209 
210       tree cwgz_call;
211       if (flag_assume_phsa)
212 	{
213 	  tree_stl_vec operands
214 	    = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
215 	  cwgz_call
216 	    = expand_or_call_builtin (BRIG_OPCODE_CURRENTWORKGROUPSIZE,
217 				      BRIG_TYPE_U32, uint32_type_node,
218 				      operands);
219 	}
220       else
221 	cwgz_call = call_builtin
222 	  (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE),
223 	   2, uint32_type_node, uint32_type_node,
224 	   build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg);
225 
226       tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]),
227 				m_cur_wg_size_vars[i],
228 				convert (TREE_TYPE (m_cur_wg_size_vars[i]),
229 					 cwgz_call));
230 
231       append_statement (limit_init);
232 
233       m_wg_id_vars[i]
234 	= add_local_variable (std::string ("__workgroupid_") + dim_char,
235 			      uint32_type_node);
236 
237       tree wgid_call;
238       if (flag_assume_phsa)
239 	{
240 	  tree_stl_vec operands
241 	    = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
242 	  wgid_call
243 	    = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPID, BRIG_TYPE_U32,
244 				      uint32_type_node, operands);
245 	}
246       else
247 	wgid_call
248 	  = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID),
249 			  2, uint32_type_node, uint32_type_node,
250 			  build_int_cst (uint32_type_node, i), ptr_type_node,
251 			  m_context_arg);
252 
253       tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]),
254 			       m_wg_id_vars[i], wgid_call);
255 
256       append_statement (wgid_init);
257 
258       m_wg_size_vars[i]
259 	= add_local_variable (std::string ("__workgroupsize_") + dim_char,
260 			      uint32_type_node);
261 
262       tree wgsize_call;
263       if (flag_assume_phsa)
264 	{
265 	  tree_stl_vec operands
266 	    = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
267 	  wgsize_call
268 	    = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
269 				      uint32_type_node, operands);
270 	}
271       else
272 	wgsize_call
273 	  = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE),
274 			  2, uint32_type_node, uint32_type_node,
275 			  build_int_cst (uint32_type_node, i), ptr_type_node,
276 			  m_context_arg);
277 
278       tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]),
279 				 m_wg_size_vars[i], wgsize_call);
280 
281       append_statement (wgsize_init);
282 
283       m_grid_size_vars[i]
284 	= add_local_variable (std::string ("__gridsize_") + dim_char,
285 			      uint32_type_node);
286 
287       tree gridsize_call
288 	= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2,
289 			uint32_type_node, uint32_type_node,
290 			build_int_cst (uint32_type_node, i), ptr_type_node,
291 			m_context_arg);
292 
293       tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]),
294 				   m_grid_size_vars[i], gridsize_call);
295 
296       append_statement (gridsize_init);
297 
298       m_abs_id_base_vars[i]
299 	= add_local_variable (std::string ("__abs_id_base_") + dim_char,
300 			      long_long_integer_type_node);
301 
302       m_abs_id_vars[i]
303 	= add_local_variable (std::string ("__abs_id_") + dim_char,
304 			      long_long_integer_type_node);
305 
306       tree abs_id_base
307 	= build2 (MULT_EXPR, long_long_integer_type_node,
308 		  convert (long_long_integer_type_node, m_wg_id_vars[i]),
309 		  convert (long_long_integer_type_node, m_wg_size_vars[i]));
310       tree abs_id
311 	= build2 (PLUS_EXPR, long_long_integer_type_node, abs_id_base,
312 		  convert (long_long_integer_type_node, m_local_id_vars[i]));
313 
314       tree abs_id_base_init
315 	= build2 (MODIFY_EXPR, TREE_TYPE (m_abs_id_base_vars[i]),
316 		  m_abs_id_base_vars[i], abs_id_base);
317       append_statement (abs_id_base_init);
318 
319       tree abs_id_init = build2 (MODIFY_EXPR,
320 				 TREE_TYPE (m_abs_id_vars[i]),
321 				 m_abs_id_vars[i], abs_id);
322       append_statement (abs_id_init);
323     }
324 }
325 
326 /* Creates a new local variable with the given NAME and given GENERIC
327    TYPE.  */
328 
329 tree
add_local_variable(std::string name,tree type)330 brig_function::add_local_variable (std::string name, tree type)
331 {
332   tree name_identifier
333     = get_identifier_with_length (name.c_str (), name.size ());
334   tree variable
335     = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type);
336 
337   DECL_NONLOCAL (variable) = 0;
338   TREE_ADDRESSABLE (variable) = 0;
339   TREE_STATIC (variable) = 0;
340   TREE_USED (variable) = 1;
341   DECL_ARTIFICIAL (variable) = 0;
342 
343   tree bind_expr = DECL_SAVED_TREE (m_func_decl);
344 
345   DECL_CONTEXT (variable) = m_func_decl;
346 
347   DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr);
348   BIND_EXPR_VARS (bind_expr) = variable;
349   return variable;
350 }
351 
352 /* Return tree type for an HSA register.
353 
354    The tree type can be anything (scalar, vector, int, float, etc.)
355    but its size is guaranteed to match the HSA register size.
356 
357    HSA registers are untyped but we select a type based on their use
358    to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
359    to occur when use or def reaches over current BB).  */
360 
361 tree
get_tree_type_for_hsa_reg(const BrigOperandRegister * reg) const362 brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
363 {
364   size_t reg_size = gccbrig_reg_size (reg);
365 
366   /* The default type.  */
367   tree type = build_nonstandard_integer_type (reg_size, true);
368 
369   if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
370     return type;
371 
372   const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
373   size_t reg_id = gccbrig_hsa_reg_id (*reg);
374   if (index.count (reg_id) == 0)
375     return type;
376 
377   const reg_use_info &info = index.find (reg_id)->second;
378   std::vector<std::pair<tree, size_t> >::const_iterator it
379     = info.m_type_refs.begin ();
380   std::vector<std::pair<tree, size_t> >::const_iterator it_end
381     = info.m_type_refs.end ();
382   size_t max_refs_as_type_count = 0;
383   for (; it != it_end; it++)
384     {
385       size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
386       if (type_bit_size != reg_size) continue;
387       if (it->second > max_refs_as_type_count)
388 	{
389 	  type = it->first;
390 	  max_refs_as_type_count = it->second;
391 	}
392     }
393 
394   return type;
395 }
396 
397 /* Returns a DECL_VAR for the given HSAIL operand register.
398    If it has not been created yet for the function being generated,
399    creates it as a type determined by analysis phase.  */
400 
401 tree
get_m_var_declfor_reg(const BrigOperandRegister * reg)402 brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
403 {
404   size_t offset = gccbrig_hsa_reg_id (*reg);
405 
406   reg_decl_index_entry *regEntry = m_regs[offset];
407   if (regEntry == NULL)
408     {
409       size_t reg_size = gccbrig_reg_size (reg);
410       tree type;
411       if (reg_size > 1)
412 	type = get_tree_type_for_hsa_reg (reg);
413       else
414 	type = boolean_type_node;
415 
416       /* Drop the const qualifier so we do not end up with a read only
417 	 register variable which cannot be written to later.  */
418       tree nonconst_type = build_type_variant (type, false, false);
419 
420       regEntry = new reg_decl_index_entry;
421 
422       regEntry->m_var_decl
423 	= add_local_variable (gccbrig_reg_name (reg), nonconst_type);
424       m_regs[offset] = regEntry;
425     }
426   return regEntry->m_var_decl;
427 }
428 
429 /* Builds a work-item do..while loop for a single DIM.  HEADER_ENTRY is
430    a statement after which the iteration variables should be initialized and
431    the loop body starts.  BRANCH_AFTER is the statement after which the loop
432    predicate check and the back edge goto will be appended.  */
433 
434 void
add_wi_loop(int dim,tree_stmt_iterator * header_entry,tree_stmt_iterator * branch_after)435 brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry,
436 			    tree_stmt_iterator *branch_after)
437 {
438   tree ivar = m_local_id_vars[dim];
439   tree abs_id_base_var = m_abs_id_base_vars[dim];
440   tree abs_id_var = m_abs_id_vars[dim];
441   tree ivar_max = m_cur_wg_size_vars[dim];
442   tree_stmt_iterator entry = *header_entry;
443 
444   /* TODO: this is not a parallel loop as we share the "register variables"
445      across work-items.  Should create a copy of them per WI instance.  That
446      is, declare temporaries for new definitions inside the loop body, not at
447      function scope.  */
448 
449   tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar,
450 			   build_zero_cst (TREE_TYPE (ivar)));
451   tsi_link_after (&entry, ivar_init, TSI_NEW_STMT);
452 
453   tree abs_id_var_init = build2 (MODIFY_EXPR, TREE_TYPE (abs_id_var),
454 				 abs_id_var,
455 				 convert (TREE_TYPE (abs_id_var),
456 					  abs_id_base_var));
457   tsi_link_after (&entry, abs_id_var_init, TSI_NEW_STMT);
458 
459   tree loop_body_label
460     = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim));
461   tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label);
462 
463   tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT);
464 
465   if (m_has_unexpanded_dp_builtins)
466     {
467       if (!flag_assume_phsa)
468 	{
469 	  tree id_set_builtin
470 	    = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID);
471 	  /* Set the local ID to the current wi-loop iteration variable value
472 	     to ensure the builtins see the correct values.  */
473 	  tree id_set_call
474 	    = call_builtin (id_set_builtin, 3,
475 			    void_type_node, uint32_type_node,
476 			    build_int_cst (uint32_type_node, dim),
477 			    uint32_type_node, convert (uint32_type_node, ivar),
478 			    ptr_type_node, m_context_arg);
479 	  tsi_link_after (&entry, id_set_call, TSI_NEW_STMT);
480 	}
481       else
482 	{
483 	  tree ptr_type = build_pointer_type (uint32_type_node);
484 	  tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
485 			     build_int_cst (ptr_type, dim * 4));
486 	  tree assign = build2 (MODIFY_EXPR, uint32_type_node, ctx,
487 				convert (uint32_type_node, ivar));
488 
489 	  tsi_link_after (&entry, assign, TSI_NEW_STMT);
490 	}
491     }
492 
493   /* Increment the WI iteration variable.  */
494   tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar,
495 		      build_one_cst (TREE_TYPE (ivar)));
496 
497   tsi_link_after (branch_after, incr, TSI_NEW_STMT);
498 
499   /* ...and the abs id variable.  */
500   tree abs_id_incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (abs_id_var),
501 			     abs_id_var,
502 			     build_one_cst (TREE_TYPE (abs_id_var)));
503 
504   tsi_link_after (branch_after, abs_id_incr, TSI_NEW_STMT);
505 
506   /* Append the predicate check with the back edge goto.  */
507   tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max);
508   tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label);
509   tree if_stmt
510     = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE);
511   tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT);
512 }
513 
514 /* Recursively analyzes the function and its callees for barrier usage.  */
515 
516 void
analyze_calls()517 brig_function::analyze_calls ()
518 {
519   if (m_calls_analyzed)
520     return;
521 
522   /* Set this early to not get stuck in case of recursive call graphs.
523      This is safe because if the function calls itself, either the function
524      has barrier calls which implies a call to a function with barrier calls,
525      or it doesn't in which case the result depends on the later called
526      functions.  */
527   m_calls_analyzed = true;
528 
529   for (size_t i = 0; i < m_called_functions.size (); ++i)
530     {
531       tree f = m_called_functions[i];
532       brig_function *called_f = m_parent->get_finished_function (f);
533       if (called_f == NULL)
534 	{
535 	  /* Unfinished function (only declaration within the set of BRIGs)
536 	     found.  Cannot finish the CG analysis.  Have to assume it does have
537 	     a barrier for safety.  */
538 	  m_has_function_calls_with_barriers = true;
539 	  m_has_unexpanded_dp_builtins = true;
540 	  break;
541 	}
542       called_f->analyze_calls ();
543       /* We can assume m_has_barriers has been correctly set during the
544 	 construction of the function decl.  No need to reanalyze it.  */
545       m_has_function_calls_with_barriers |= called_f->m_has_barriers;
546 
547       /* If the function or any of its called functions has dispatch
548 	 packet builtin calls that require the local id, we need to
549 	 set the local id to the context in the work item loop before
550 	 the functions are called.  If we analyze the opposite, these
551 	 function calls can be omitted.  */
552       m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins;
553     }
554 }
555 
556 /* Tries to convert the current kernel to a work-group function that executes
557    all work-items using loops.  Returns true in case the conversion was
558    successful.  */
559 
560 bool
convert_to_wg_function()561 brig_function::convert_to_wg_function ()
562 {
563   if (!m_calls_analyzed)
564     analyze_calls ();
565 
566   if (m_has_barriers || m_has_function_calls_with_barriers)
567     return false;
568 
569   /* The most trivial case: No barriers at all in the kernel.
570      We can create one big work-item loop around the whole kernel.  */
571   tree bind_expr = m_current_bind_expr;
572   tree stmts = BIND_EXPR_BODY (bind_expr);
573 
574   for (int i = 0; i < 3; ++i)
575     {
576       /* The previous loop has added a new label to the end of the function,
577 	 the next level loop should wrap around it also.  */
578       tree_stmt_iterator function_exit = tsi_last (stmts);
579       add_wi_loop (i, &m_kernel_entry, &function_exit);
580     }
581 
582   m_is_wg_function = true;
583   return false;
584 }
585 
586 /* Emits a kernel description to a special ELF section so it can be
587    utilized by an HSA runtime implementation.  The assembly block
588    must be emitted to a statement list of an function, which is given
589    as an argument.  Returns the assembly block used to emit the section. */
590 
591 tree
emit_metadata(tree stmt_list)592 brig_function::emit_metadata (tree stmt_list)
593 {
594   /* Emit an ELF section via an assembly directive that generates a special
595      ELF section for each kernel that contains raw bytes of a descriptor
596      object.  This is pretty disgusting, but life is never perfect ;)  */
597 
598   /* Use the original kernel name without the '_' prefix in the section name.  */
599   std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name;
600 
601   std::ostringstream strstr;
602   strstr << std::endl
603 	 << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name
604 	 << std::endl
605 	 << "\t.p2align 1, 1, 1" << std::endl
606 	 << "\t.byte ";
607 
608   for (size_t i = 0; i < sizeof (phsa_descriptor); ++i)
609     {
610       strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex
611 	     << (unsigned) *((unsigned char *) &m_descriptor + i);
612       if (i + 1 < sizeof (phsa_descriptor))
613 	strstr << ", ";
614     }
615 
616   strstr << std::endl << ".popsection" << std::endl << std::endl;
617 
618   tree metadata_asm
619     = build_stmt (ASM_EXPR,
620 		  build_string (strstr.str ().size (), strstr.str ().c_str ()),
621 		  NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE);
622 
623   append_to_statement_list_force (metadata_asm, &stmt_list);
624   return metadata_asm;
625 }
626 
627 /* Emits the kernel launcher function.  Also emits the metadata section
628    creation statements in it.
629 
630    The launcher function calls the device-side runtime
631    that runs the kernel for all work-items.  In C:
632 
633    void KernelName (void* context, void* group_base_addr)
634    {
635      __hsail_launch_kernel (_KernelName, context, group_base_addr);
636    }
637 
638    or, in case of a successful conversion to a work-group function:
639 
640    void KernelName (void* context, void* group_base_addr)
641    {
642      __hsail_launch_wg_function (_KernelName, context, group_base_addr);
643    }
644 
645    The user/host sees this function as the kernel to call from the
646    outside.  The actual kernel generated from HSAIL was named _KernelName.
647 */
648 
649 tree
emit_launcher_and_metadata()650 brig_function::emit_launcher_and_metadata ()
651 {
652   /* The original kernel name without the '_' prefix.  */
653   std::string kern_name = m_name.substr (1);
654 
655   tree name_identifier
656     = get_identifier_with_length (kern_name.c_str (), kern_name.size ());
657 
658   tree restrict_void_ptr
659     = build_qualified_type (build_pointer_type (void_type_node),
660 			    TYPE_QUAL_RESTRICT);
661   tree restrict_char_ptr
662     = build_qualified_type (build_pointer_type (char_type_node),
663 			    TYPE_QUAL_RESTRICT);
664   tree launcher
665     = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
666 		  build_function_type_list (void_type_node, restrict_void_ptr,
667 					    restrict_char_ptr, NULL_TREE));
668 
669   TREE_USED (launcher) = 1;
670   DECL_ARTIFICIAL (launcher) = 1;
671 
672   tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
673 				 get_identifier ("__context"),
674 				 restrict_void_ptr);
675 
676   DECL_ARGUMENTS (launcher) = context_arg;
677   DECL_ARG_TYPE (context_arg) = restrict_void_ptr;
678   DECL_CONTEXT (context_arg) = launcher;
679   TREE_USED (context_arg) = 1;
680   DECL_ARTIFICIAL (context_arg) = 1;
681 
682   tree group_base_addr_arg
683     = build_decl (UNKNOWN_LOCATION, PARM_DECL,
684 		  get_identifier ("__group_base_addr"), restrict_char_ptr);
685 
686   chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg);
687   DECL_ARG_TYPE (group_base_addr_arg) = restrict_char_ptr;
688   DECL_CONTEXT (group_base_addr_arg) = launcher;
689   TREE_USED (group_base_addr_arg) = 1;
690   DECL_ARTIFICIAL (group_base_addr_arg) = 1;
691 
692   tree resdecl
693     = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
694 
695   DECL_RESULT (launcher) = resdecl;
696   DECL_CONTEXT (resdecl) = launcher;
697 
698   DECL_INITIAL (launcher) = make_node (BLOCK);
699   TREE_USED (DECL_INITIAL (launcher)) = 1;
700 
701   tree stmt_list = alloc_stmt_list ();
702 
703   tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
704 
705   TREE_STATIC (launcher) = 1;
706   TREE_PUBLIC (launcher) = 1;
707 
708   DECL_SAVED_TREE (launcher) = bind_expr;
709 
710   if (DECL_STRUCT_FUNCTION (launcher) == NULL)
711     push_struct_function (launcher);
712   else
713     push_cfun (DECL_STRUCT_FUNCTION (launcher));
714 
715   tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl);
716 
717   tree phsail_launch_kernel_call;
718 
719   /* Compute the local group segment frame start pointer.  */
720   tree group_local_offset_temp
721     = create_tmp_var (uint32_type_node, "group_local_offset");
722   tree group_local_offset_arg
723     = build2 (MODIFY_EXPR, uint32_type_node,
724 	      group_local_offset_temp,
725 	      build_int_cst (uint32_type_node,
726 			     m_parent->m_module_group_variables.size()));
727 
728   /* Emit a launcher depending whether we converted the kernel function to
729      a work group function or not.  */
730   if (m_is_wg_function)
731     phsail_launch_kernel_call
732       = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
733 		      4, void_type_node,
734 		      ptr_type_node, kernel_func_ptr, restrict_void_ptr,
735 		      context_arg, restrict_char_ptr, group_base_addr_arg,
736 		      uint32_type_node, group_local_offset_arg);
737   else
738     phsail_launch_kernel_call
739       = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
740 		      4, void_type_node,
741 		      ptr_type_node, kernel_func_ptr, restrict_void_ptr,
742 		      context_arg, restrict_char_ptr, group_base_addr_arg,
743 		      uint32_type_node, group_local_offset_arg);
744 
745   append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
746 
747   emit_metadata (stmt_list);
748 
749   set_externally_visible (launcher);
750 
751   return launcher;
752 }
753 
754 tree
append_statement(tree stmt)755 brig_function::append_statement (tree stmt)
756 {
757   gcc_assert (m_func_decl != NULL);
758 
759   tree bind_expr = m_current_bind_expr;
760   tree stmts = BIND_EXPR_BODY (bind_expr);
761 
762   append_to_statement_list_force (stmt, &stmts);
763   return stmt;
764 }
765 
766 /* Creates a new "alloca frame" for the current function by
767    injecting an alloca frame push in the beginning of the function
768    and an alloca frame pop before all function exit points.  */
769 
770 void
create_alloca_frame()771 brig_function::create_alloca_frame ()
772 {
773   tree_stmt_iterator entry;
774 
775   /* Adds the alloca push only after the ids have been initialized
776      in case of a kernel function.  */
777   if (m_is_kernel)
778     entry = m_kernel_entry;
779   else
780     {
781       tree bind_expr = m_current_bind_expr;
782       tree stmts = BIND_EXPR_BODY (bind_expr);
783       entry = tsi_start (stmts);
784     }
785 
786   tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME);
787   tree push_frame_call
788     = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node,
789 		    m_context_arg);
790 
791   tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT);
792 
793   tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME);
794 
795   do
796     {
797       tree stmt = tsi_stmt (entry);
798       if (TREE_CODE (stmt) == RETURN_EXPR)
799 	{
800 	  tree pop_frame_call
801 	    = call_builtin (pop_frame_builtin, 1, void_type_node,
802 			    ptr_type_node, m_context_arg);
803 
804 	  tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT);
805 	}
806       tsi_next (&entry);
807     }
808   while (!tsi_end_p (entry));
809 }
810 
811 /* Finishes the currently built function.  After calling this, no new
812    statements should be appeneded to the function.  */
813 void
finish()814 brig_function::finish ()
815 {
816   append_return_stmt ();
817 
818   /* Currently assume single alloca frame per WG.  */
819   if (m_has_allocas)
820     create_alloca_frame ();
821 }
822 
823 void
finish_kernel()824 brig_function::finish_kernel ()
825 {
826   /* Kernel functions should have a single exit point.
827      Let's create one.  The return instructions should have
828      been converted to branches to this label.  */
829   append_statement (build_stmt (LABEL_EXPR, m_exit_label));
830   /* Attempt to convert the kernel to a work-group function that
831      executes all work-items of the WG using a loop.  */
832   convert_to_wg_function ();
833 
834   append_return_stmt ();
835 
836   /* Currently assume single alloca frame per WG.  */
837   if (m_has_allocas)
838     create_alloca_frame ();
839 }
840 
841 void
append_return_stmt()842 brig_function::append_return_stmt ()
843 {
844   gcc_assert (m_current_bind_expr != NULL_TREE);
845   tree stmts = BIND_EXPR_BODY (m_current_bind_expr);
846 
847   if (STATEMENT_LIST_TAIL (stmts) == NULL)
848     return; /* Empty function.  */
849 
850   tree last_stmt = tsi_stmt (tsi_last (stmts));
851 
852   if (TREE_CODE (last_stmt) == RETURN_EXPR)
853     return;
854 
855   if (m_ret_value != NULL_TREE)
856     {
857       tree result_assign
858 	= build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value,
859 		  m_ret_temp);
860 
861       tree return_expr
862 	= build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign);
863       append_to_statement_list_force (return_expr, &stmts);
864     }
865   else
866     {
867       tree return_stmt = build_stmt (RETURN_EXPR, NULL);
868       append_to_statement_list_force (return_stmt, &stmts);
869     }
870 }
871 
872 bool
has_function_scope_var(const BrigBase * var) const873 brig_function::has_function_scope_var (const BrigBase* var) const
874 {
875   return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
876 }
877 
878 size_t
group_variable_segment_offset(const std::string & name) const879 brig_function::group_variable_segment_offset (const std::string &name) const
880 {
881   if (m_local_group_variables.has_variable (name))
882     return m_local_group_variables.segment_offset (name);
883 
884   gcc_assert (m_parent->m_module_group_variables.has_variable (name));
885   return m_parent->m_module_group_variables.segment_offset (name);
886 }
887 
888 /* Try to expand the given builtin call to reuse a previously generated
889    variable, if possible.  If not, just call the given builtin.
890    BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
891    ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
892    input operands.  */
893 
894 tree
expand_or_call_builtin(BrigOpcode16_t brig_opcode,BrigType16_t brig_type,tree arith_type,tree_stl_vec & operands)895 brig_function::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
896 				       BrigType16_t brig_type,
897 				       tree arith_type,
898 				       tree_stl_vec &operands)
899 {
900   if (needs_workitem_context_data (brig_opcode))
901     m_has_unexpanded_dp_builtins = true;
902 
903   if (can_expand_builtin (brig_opcode))
904     return expand_builtin (brig_opcode, operands);
905 
906   tree built_in
907     = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
908 
909   if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
910       && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
911       && brig_opcode != BRIG_OPCODE_LERP
912       && brig_opcode != BRIG_OPCODE_PACKCVT
913       && brig_opcode != BRIG_OPCODE_SAD
914       && brig_opcode != BRIG_OPCODE_SADHI)
915     {
916       /* Call the scalar built-in for all elements in the vector.  */
917       tree_stl_vec operand0_elements;
918       if (operands.size () > 0)
919 	unpack (operands[0], operand0_elements);
920 
921       tree_stl_vec operand1_elements;
922       if (operands.size () > 1)
923 	unpack (operands[1], operand1_elements);
924 
925       tree_stl_vec result_elements;
926 
927       size_t element_count = gccbrig_type_vector_subparts (arith_type);
928       for (size_t i = 0; i < element_count; ++i)
929 	{
930 	  tree_stl_vec call_operands;
931 	  if (operand0_elements.size () > 0)
932 	    call_operands.push_back (operand0_elements.at (i));
933 
934 	  if (operand1_elements.size () > 0)
935 	    call_operands.push_back (operand1_elements.at (i));
936 
937 	  result_elements.push_back
938 	    (expand_or_call_builtin (brig_opcode, brig_type,
939 				     TREE_TYPE (arith_type),
940 				     call_operands));
941 	}
942       return pack (result_elements);
943     }
944 
945   tree_stl_vec call_operands;
946   tree_stl_vec operand_types;
947 
948   tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
949 
950   for (size_t i = 0; i < operands.size (); ++i)
951     {
952       tree operand_type = TREE_VALUE (arg_type_chain);
953       call_operands.push_back (convert (operand_type, operands[i]));
954       operand_types.push_back (operand_type);
955       arg_type_chain = TREE_CHAIN (arg_type_chain);
956     }
957 
958   if (needs_workitem_context_data (brig_opcode))
959     {
960       call_operands.push_back (m_context_arg);
961       operand_types.push_back (ptr_type_node);
962     }
963 
964   size_t operand_count = call_operands.size ();
965 
966   call_operands.resize (4, NULL_TREE);
967   operand_types.resize (4, NULL_TREE);
968   for (size_t i = 0; i < operand_count; ++i)
969     call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
970 						      call_operands.at (i));
971 
972   tree fnptr = build_fold_addr_expr (built_in);
973   return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
974 			   operand_count, &call_operands[0]);
975 }
976 
977 /* Instead of calling a built-in function, use a more efficient mechanism
978    such as reuse a previously returned value known to be still valid, or
979    access the work-item context struct directly.  This is beneficial especially
980    for the work-item identification related builtins as not having them as
981    unanalyzable black box calls can lead to more easily vectorizable parallel
982    loops for multi work-item work-groups.  BRIG_OPCODE identifies the builtin
983    and OPERANDS store the operands.  */
984 
985 tree
expand_builtin(BrigOpcode16_t brig_opcode,tree_stl_vec & operands)986 brig_function::expand_builtin (BrigOpcode16_t brig_opcode,
987 			       tree_stl_vec &operands)
988 {
989   tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
990 
991   tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
992 
993   tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
994 
995   if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
996     {
997       tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
998       id0 = convert (uint64_type_node, id0);
999 
1000       tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
1001       id1 = convert (uint64_type_node, id1);
1002 
1003       tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
1004       id2 = convert (uint64_type_node, id2);
1005 
1006       tree max0 = convert (uint64_type_node, m_grid_size_vars[0]);
1007       tree max1 = convert (uint64_type_node, m_grid_size_vars[1]);
1008 
1009       tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
1010       id2_x_max0_x_max1
1011 	= build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
1012 
1013       tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
1014 
1015       tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
1016       sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
1017 
1018       return add_temp_var ("workitemflatabsid", sum);
1019     }
1020   else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
1021     {
1022       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1023       return m_abs_id_vars[dim];
1024     }
1025   else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
1026     {
1027 
1028       tree wg_size_x = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_0);
1029       tree wg_size_y = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_1);
1030       tree z_x_wgsx_wgsy
1031 	= build2 (MULT_EXPR, uint32_type_node,
1032 		  convert (uint32_type_node,
1033 			   expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_2)),
1034 		  wg_size_x);
1035       z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
1036 			      wg_size_y);
1037 
1038       tree y_x_wgsx
1039 	= build2 (MULT_EXPR, uint32_type_node,
1040 		  convert (uint32_type_node,
1041 			   expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_1)),
1042 		  wg_size_x);
1043 
1044       tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
1045       sum = build2 (PLUS_EXPR, uint32_type_node,
1046 		    convert (uint32_type_node,
1047 			     expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_0)),
1048 		    sum);
1049       return add_temp_var ("workitemflatid", sum);
1050     }
1051   else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
1052     {
1053       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1054       if (flag_assume_phsa)
1055 	{
1056 	  tree ptr_type = build_pointer_type (uint32_type_node);
1057 	  tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1058 			     build_int_cst (ptr_type,
1059 					    PHSA_CONTEXT_WG_SIZES
1060 					    + dim * 4));
1061 	  std::string name ("wgsize_x");
1062 	  name [name.length() - 1] += dim;
1063 	  return add_temp_var (name.c_str(), ctx);
1064 	}
1065       else if (m_is_kernel)
1066 	{
1067 	  /* For kernels without phsa we generate certain temps before
1068 	     the WI loop, which means we don't need to rely on LICM to get
1069 	     them moved out.  */
1070 	  return m_wg_size_vars[dim];
1071 	}
1072       else
1073 	gcc_unreachable ();
1074     }
1075   else if (brig_opcode == BRIG_OPCODE_WORKITEMID)
1076     {
1077       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1078       if (m_is_kernel)
1079 	{
1080 	  return m_local_id_vars [dim];
1081 	}
1082       else if (flag_assume_phsa)
1083 	{
1084 	  tree ptr_type = build_pointer_type (uint32_type_node);
1085 	  tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1086 			     build_int_cst (ptr_type,
1087 					    PHSA_CONTEXT_OFFS_WI_IDS
1088 					    + dim * 4));
1089 	  std::string name ("wiid_x");
1090 	  name [name.length() - 1] += dim;
1091 	  return add_temp_var (name.c_str(), ctx);
1092 	}
1093       else
1094 	gcc_unreachable ();
1095     }
1096   else if (brig_opcode == BRIG_OPCODE_WORKGROUPID)
1097     {
1098       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1099       if (flag_assume_phsa)
1100 	{
1101 	  tree ptr_type = build_pointer_type (uint32_type_node);
1102 	  tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1103 			     build_int_cst (ptr_type,
1104 					    PHSA_CONTEXT_OFFS_WG_IDS
1105 					    + dim * 4));
1106 	  std::string name ("wgid_x");
1107 	  name [name.length() - 1] += dim;
1108 	  return add_temp_var (name.c_str(), ctx);
1109 	} else if (m_is_kernel)
1110 	return m_wg_id_vars [dim];
1111       else
1112 	gcc_unreachable ();
1113     }
1114   else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
1115     {
1116       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1117       if (flag_assume_phsa)
1118 	{
1119 	  tree ptr_type = build_pointer_type (uint32_type_node);
1120 	  tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1121 			     build_int_cst (ptr_type,
1122 					    PHSA_CONTEXT_CURRENT_WG_SIZES
1123 					    + dim * 4));
1124 	  std::string name ("curwgsize_x");
1125 	  name [name.length() - 1] += dim;
1126 	  return add_temp_var (name.c_str(), ctx);
1127 	} else if (m_is_kernel)
1128 	return m_cur_wg_size_vars[dim];
1129       else
1130 	gcc_unreachable ();
1131     }
1132   else
1133     gcc_unreachable ();
1134 
1135   return NULL_TREE;
1136 }
1137 
1138 /* Returns true in case the given opcode that would normally be generated
1139    as a builtin call can be expanded to tree nodes.  */
1140 
1141 bool
can_expand_builtin(BrigOpcode16_t brig_opcode) const1142 brig_function::can_expand_builtin (BrigOpcode16_t brig_opcode) const
1143 {
1144   switch (brig_opcode)
1145     {
1146     case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
1147     case BRIG_OPCODE_WORKITEMFLATID:
1148     case BRIG_OPCODE_WORKITEMID:
1149     case BRIG_OPCODE_WORKGROUPID:
1150     case BRIG_OPCODE_WORKGROUPSIZE:
1151       return m_is_kernel || flag_assume_phsa;
1152     case BRIG_OPCODE_WORKITEMFLATABSID:
1153     case BRIG_OPCODE_WORKITEMABSID:
1154       return m_is_kernel;
1155     default:
1156       return false;
1157     };
1158 }
1159 
1160 /* In case the HSA instruction must be implemented using a builtin,
1161    this function is called to get the correct builtin function.
1162    TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
1163    brig instruction and BRIG_TYPE the brig instruction's type.  */
1164 
1165 tree
get_builtin_for_hsa_opcode(tree type,BrigOpcode16_t brig_opcode,BrigType16_t brig_type) const1166 brig_function::get_builtin_for_hsa_opcode
1167   (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
1168 {
1169   tree builtin = NULL_TREE;
1170   tree builtin_type = type;
1171 
1172   /* For vector types, first find the scalar version of the builtin.  */
1173   if (type != NULL_TREE && VECTOR_TYPE_P (type))
1174     builtin_type = TREE_TYPE (type);
1175   BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
1176 
1177   /* Some BRIG opcodes can use the same builtins for unsigned and
1178      signed types.  Force these cases to unsigned types.  */
1179 
1180   if (brig_opcode == BRIG_OPCODE_BORROW
1181       || brig_opcode == BRIG_OPCODE_CARRY
1182       || brig_opcode == BRIG_OPCODE_LASTBIT
1183       || brig_opcode == BRIG_OPCODE_BITINSERT)
1184     {
1185       if (brig_type == BRIG_TYPE_S32)
1186 	brig_type = BRIG_TYPE_U32;
1187       else if (brig_type == BRIG_TYPE_S64)
1188 	brig_type = BRIG_TYPE_U64;
1189     }
1190 
1191   switch (brig_opcode)
1192     {
1193     case BRIG_OPCODE_FLOOR:
1194       builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
1195       break;
1196     case BRIG_OPCODE_CEIL:
1197       builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
1198       break;
1199     case BRIG_OPCODE_SQRT:
1200     case BRIG_OPCODE_NSQRT:
1201       builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
1202       break;
1203     case BRIG_OPCODE_RINT:
1204       builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
1205       break;
1206     case BRIG_OPCODE_TRUNC:
1207       builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
1208       break;
1209     case BRIG_OPCODE_COPYSIGN:
1210       builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
1211       break;
1212     case BRIG_OPCODE_NSIN:
1213       builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
1214       break;
1215     case BRIG_OPCODE_NLOG2:
1216       builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
1217       break;
1218     case BRIG_OPCODE_NEXP2:
1219       builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
1220       break;
1221     case BRIG_OPCODE_FMA:
1222     case BRIG_OPCODE_NFMA:
1223       builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
1224       break;
1225     case BRIG_OPCODE_NCOS:
1226       builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
1227       break;
1228     case BRIG_OPCODE_POPCOUNT:
1229       /* Popcount should be typed by its argument type (the return value
1230 	 is always u32).  Let's use a b64 version for also for b32 for now.  */
1231       return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
1232     case BRIG_OPCODE_BORROW:
1233       /* Borrow uses the same builtin for unsigned and signed types.  */
1234       if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
1235 	return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
1236       else
1237 	return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
1238     case BRIG_OPCODE_CARRY:
1239       /* Carry also uses the same builtin for unsigned and signed types.  */
1240       if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
1241 	return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
1242       else
1243 	return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
1244     default:
1245 
1246       /* Use our builtin index for finding a proper builtin for the BRIG
1247 	 opcode and BRIG type.  This takes care most of the builtin cases,
1248 	 the special cases are handled in the separate 'case' statements
1249 	 above.  */
1250       builtin_map::const_iterator i
1251 	= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
1252       if (i != s_custom_builtins.end ())
1253 	return (*i).second;
1254 
1255       if (brig_inner_type != brig_type)
1256 	{
1257 	  /* Try to find a scalar built-in we could use.  */
1258 	  i = s_custom_builtins.find
1259 	    (std::make_pair (brig_opcode, brig_inner_type));
1260 	  if (i != s_custom_builtins.end ())
1261 	    return (*i).second;
1262 	}
1263 
1264       /* In case this is an fp16 operation that is promoted to fp32,
1265 	 try to find a fp32 scalar built-in.  */
1266       if (brig_inner_type == BRIG_TYPE_F16)
1267 	{
1268 	  i = s_custom_builtins.find
1269 	    (std::make_pair (brig_opcode, BRIG_TYPE_F32));
1270 	  if (i != s_custom_builtins.end ())
1271 	    return (*i).second;
1272 	}
1273       gcc_unreachable ();
1274     }
1275 
1276   if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
1277     {
1278       /* Try to find a vectorized version of the built-in.
1279 	 TODO: properly assert that builtin is a mathfn builtin? */
1280       tree vec_builtin
1281 	= targetm.vectorize.builtin_vectorized_function
1282 	(builtin_mathfn_code (builtin), type, type);
1283       if (vec_builtin != NULL_TREE)
1284 	return vec_builtin;
1285       else
1286 	return builtin;
1287     }
1288   if (builtin == NULL_TREE)
1289     gcc_unreachable ();
1290   return builtin;
1291 }
1292 
1293 /* Unpacks the elements of the vector in VALUE to scalars (bit field
1294    references) in ELEMENTS.  */
1295 
1296 void
unpack(tree value,tree_stl_vec & elements)1297 brig_function::unpack (tree value, tree_stl_vec &elements)
1298 {
1299   size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
1300   size_t element_size
1301     = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
1302   size_t element_count
1303     = vec_size * BITS_PER_UNIT / element_size;
1304 
1305   tree input_element_type = TREE_TYPE (TREE_TYPE (value));
1306 
1307   value = add_temp_var ("unpack_input", value);
1308 
1309   for (size_t i = 0; i < element_count; ++i)
1310     {
1311       tree element
1312 	= build3 (BIT_FIELD_REF, input_element_type, value,
1313 		  TYPE_SIZE (input_element_type),
1314 		  bitsize_int(i * element_size));
1315 
1316       element = add_temp_var ("scalar", element);
1317       elements.push_back (element);
1318     }
1319 }
1320 
1321 /* Pack the elements of the scalars in ELEMENTS to the returned vector.  */
1322 
1323 tree
pack(tree_stl_vec & elements)1324 brig_function::pack (tree_stl_vec &elements)
1325 {
1326   size_t element_count = elements.size ();
1327 
1328   gcc_assert (element_count > 1);
1329 
1330   tree output_element_type = TREE_TYPE (elements.at (0));
1331 
1332   vec<constructor_elt, va_gc> *constructor_vals = NULL;
1333   for (size_t i = 0; i < element_count; ++i)
1334     CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
1335 
1336   tree vec_type = build_vector_type (output_element_type, element_count);
1337 
1338   /* build_constructor creates a vector type which is not a vector_cst
1339      that requires compile time constant elements.  */
1340   tree vec = build_constructor (vec_type, constructor_vals);
1341 
1342   /* Add a temp variable for readability.  */
1343   tree tmp_var = create_tmp_var (vec_type, "vec_out");
1344   tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1345   append_statement (vec_tmp_assign);
1346   return tmp_var;
1347 }
1348 
1349 /* Returns true in case the given opcode needs to know about work-item context
1350    data.  In such case the context data is passed as a pointer to a work-item
1351    context object, as the last argument in the builtin call.  */
1352 
1353 bool
needs_workitem_context_data(BrigOpcode16_t brig_opcode)1354 brig_function::needs_workitem_context_data
1355 (BrigOpcode16_t brig_opcode)
1356 {
1357   switch (brig_opcode)
1358     {
1359     case BRIG_OPCODE_WORKITEMABSID:
1360     case BRIG_OPCODE_WORKITEMFLATABSID:
1361     case BRIG_OPCODE_WORKITEMFLATID:
1362     case BRIG_OPCODE_CURRENTWORKITEMFLATID:
1363     case BRIG_OPCODE_WORKITEMID:
1364     case BRIG_OPCODE_WORKGROUPID:
1365     case BRIG_OPCODE_WORKGROUPSIZE:
1366     case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
1367     case BRIG_OPCODE_GRIDGROUPS:
1368     case BRIG_OPCODE_GRIDSIZE:
1369     case BRIG_OPCODE_DIM:
1370     case BRIG_OPCODE_PACKETID:
1371     case BRIG_OPCODE_PACKETCOMPLETIONSIG:
1372     case BRIG_OPCODE_BARRIER:
1373     case BRIG_OPCODE_WAVEBARRIER:
1374     case BRIG_OPCODE_ARRIVEFBAR:
1375     case BRIG_OPCODE_INITFBAR:
1376     case BRIG_OPCODE_JOINFBAR:
1377     case BRIG_OPCODE_LEAVEFBAR:
1378     case BRIG_OPCODE_RELEASEFBAR:
1379     case BRIG_OPCODE_WAITFBAR:
1380     case BRIG_OPCODE_CUID:
1381     case BRIG_OPCODE_MAXCUID:
1382     case BRIG_OPCODE_DEBUGTRAP:
1383     case BRIG_OPCODE_GROUPBASEPTR:
1384     case BRIG_OPCODE_KERNARGBASEPTR:
1385     case BRIG_OPCODE_ALLOCA:
1386       return true;
1387     default:
1388       return false;
1389     };
1390 }
1391 
1392 /* Appends and returns a new temp variable and an accompanying assignment
1393    statement that stores the value of the given EXPR and has the given NAME.  */
1394 
1395 tree
add_temp_var(std::string name,tree expr)1396 brig_function::add_temp_var (std::string name, tree expr)
1397 {
1398   tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
1399   tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
1400   append_statement (assign);
1401   return temp_var;
1402 }
1403 
1404 /* Returns the integer constant value of the given node.
1405    If it's a cast, looks into the source of the cast.  */
1406 
1407 HOST_WIDE_INT
int_constant_value(tree node)1408 brig_function::int_constant_value (tree node)
1409 {
1410   tree n = node;
1411   if (TREE_CODE (n) == VIEW_CONVERT_EXPR)
1412     n = TREE_OPERAND (n, 0);
1413   return int_cst_value (n);
1414 }
1415 
1416 /* Returns the tree code that should be used to implement the given
1417    HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
1418    (BRIG_TYPE).  In case the opcode cannot be mapped to a TREE node directly,
1419    returns TREE_LIST (if it can be emulated with a simple chain of tree
1420    nodes) or CALL_EXPR if the opcode should be implemented using a builtin
1421    call.  */
1422 
1423 tree_code
get_tree_code_for_hsa_opcode(BrigOpcode16_t brig_opcode,BrigType16_t brig_type)1424 brig_function::get_tree_code_for_hsa_opcode
1425   (BrigOpcode16_t brig_opcode, BrigType16_t brig_type)
1426 {
1427   BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
1428   switch (brig_opcode)
1429     {
1430     case BRIG_OPCODE_NOP:
1431       return NOP_EXPR;
1432     case BRIG_OPCODE_ADD:
1433       return PLUS_EXPR;
1434     case BRIG_OPCODE_CMOV:
1435       if (brig_inner_type == brig_type)
1436 	return COND_EXPR;
1437       else
1438 	return VEC_COND_EXPR;
1439     case BRIG_OPCODE_SUB:
1440       return MINUS_EXPR;
1441     case BRIG_OPCODE_MUL:
1442     case BRIG_OPCODE_MUL24:
1443       return MULT_EXPR;
1444     case BRIG_OPCODE_MULHI:
1445     case BRIG_OPCODE_MUL24HI:
1446       return MULT_HIGHPART_EXPR;
1447     case BRIG_OPCODE_DIV:
1448       if (gccbrig_is_float_type (brig_inner_type))
1449 	return RDIV_EXPR;
1450       else
1451 	return TRUNC_DIV_EXPR;
1452     case BRIG_OPCODE_NEG:
1453       return NEGATE_EXPR;
1454     case BRIG_OPCODE_MIN:
1455       if (gccbrig_is_float_type (brig_inner_type))
1456 	return CALL_EXPR;
1457       else
1458 	return MIN_EXPR;
1459     case BRIG_OPCODE_MAX:
1460       if (gccbrig_is_float_type (brig_inner_type))
1461 	return CALL_EXPR;
1462       else
1463 	return MAX_EXPR;
1464     case BRIG_OPCODE_ABS:
1465       return ABS_EXPR;
1466     case BRIG_OPCODE_SHL:
1467       return LSHIFT_EXPR;
1468     case BRIG_OPCODE_SHR:
1469       return RSHIFT_EXPR;
1470     case BRIG_OPCODE_OR:
1471       return BIT_IOR_EXPR;
1472     case BRIG_OPCODE_XOR:
1473       return BIT_XOR_EXPR;
1474     case BRIG_OPCODE_AND:
1475       return BIT_AND_EXPR;
1476     case BRIG_OPCODE_NOT:
1477       return BIT_NOT_EXPR;
1478     case BRIG_OPCODE_RET:
1479       return RETURN_EXPR;
1480     case BRIG_OPCODE_MOV:
1481     case BRIG_OPCODE_LDF:
1482       return MODIFY_EXPR;
1483     case BRIG_OPCODE_LD:
1484     case BRIG_OPCODE_ST:
1485       return MEM_REF;
1486     case BRIG_OPCODE_BR:
1487       return GOTO_EXPR;
1488     case BRIG_OPCODE_REM:
1489       if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32)
1490 	return TRUNC_MOD_EXPR;
1491       else
1492 	return CALL_EXPR;
1493     case BRIG_OPCODE_NRCP:
1494     case BRIG_OPCODE_NRSQRT:
1495       /* Implement as 1/f (x).  gcc should pattern detect that and
1496 	 use a native instruction, if available, for it.  */
1497       return TREE_LIST;
1498     case BRIG_OPCODE_FMA:
1499     case BRIG_OPCODE_FLOOR:
1500     case BRIG_OPCODE_CEIL:
1501     case BRIG_OPCODE_SQRT:
1502     case BRIG_OPCODE_NSQRT:
1503     case BRIG_OPCODE_RINT:
1504     case BRIG_OPCODE_TRUNC:
1505     case BRIG_OPCODE_POPCOUNT:
1506     case BRIG_OPCODE_COPYSIGN:
1507     case BRIG_OPCODE_NCOS:
1508     case BRIG_OPCODE_NSIN:
1509     case BRIG_OPCODE_NLOG2:
1510     case BRIG_OPCODE_NEXP2:
1511     case BRIG_OPCODE_NFMA:
1512       /* Class has type B1 regardless of the float type, thus
1513 	 the below builtin map search cannot find it.  */
1514     case BRIG_OPCODE_CLASS:
1515     case BRIG_OPCODE_WORKITEMABSID:
1516       return CALL_EXPR;
1517     default:
1518 
1519       /* Some BRIG opcodes can use the same builtins for unsigned and
1520 	 signed types.  Force these cases to unsigned types.
1521       */
1522 
1523       if (brig_opcode == BRIG_OPCODE_BORROW
1524 	  || brig_opcode == BRIG_OPCODE_CARRY
1525 	  || brig_opcode == BRIG_OPCODE_LASTBIT
1526 	  || brig_opcode == BRIG_OPCODE_BITINSERT)
1527 	{
1528 	  if (brig_type == BRIG_TYPE_S32)
1529 	    brig_type = BRIG_TYPE_U32;
1530 	  else if (brig_type == BRIG_TYPE_S64)
1531 	    brig_type = BRIG_TYPE_U64;
1532 	}
1533 
1534 
1535       builtin_map::const_iterator i
1536 	= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
1537       if (i != s_custom_builtins.end ())
1538 	return CALL_EXPR;
1539       else if (s_custom_builtins.find
1540 	       (std::make_pair (brig_opcode, brig_inner_type))
1541 	       != s_custom_builtins.end ())
1542 	return CALL_EXPR;
1543       if (brig_inner_type == BRIG_TYPE_F16
1544 	  && s_custom_builtins.find
1545 	  (std::make_pair (brig_opcode, BRIG_TYPE_F32))
1546 	  != s_custom_builtins.end ())
1547 	return CALL_EXPR;
1548       break;
1549     }
1550   return TREE_LIST; /* Emulate using a chain of nodes.  */
1551 }
1552 
1553 /* Inform of an update to the REG_VAR.  */
1554 
1555 void
add_reg_var_update(tree reg_var,tree var)1556 brig_function::add_reg_var_update (tree reg_var, tree var)
1557 {
1558   if (var == m_abs_id_vars[0] || var == m_abs_id_vars[1]
1559       || var == m_abs_id_vars[2] || var == m_local_id_vars[0]
1560       || var == m_local_id_vars[1] || var == m_local_id_vars[2])
1561     m_id_val_defs [reg_var] = var;
1562   else
1563     {
1564       /* Possible overwrite of an ID value.  */
1565 
1566       id_val_map::iterator i = m_id_val_defs.find (reg_var);
1567       if (i != m_id_val_defs.end())
1568 	m_id_val_defs.erase (i);
1569     }
1570 }
1571 
1572 /* If the REG_VAR is known to contain an ID value at this point in
1573    the basic block, return true.  */
1574 
1575 bool
is_id_val(tree reg_var)1576 brig_function::is_id_val (tree reg_var)
1577 {
1578   id_val_map::iterator i = m_id_val_defs.find (reg_var);
1579   return i != m_id_val_defs.end();
1580 }
1581 
1582 /* Return an ID value for the given REG_VAR if its known to contain
1583    one at this point in the BB, NULL_TREE otherwise.  */
1584 
1585 tree
id_val(tree reg_var)1586 brig_function::id_val (tree reg_var)
1587 {
1588   id_val_map::iterator i = m_id_val_defs.find (reg_var);
1589   if (i != m_id_val_defs.end())
1590     return (*i).second;
1591   else
1592     return NULL_TREE;
1593 }
1594 
1595 /* Informs of starting a new basic block.  Called when generating
1596    a label, a call, a jump, or a return.  */
1597 
1598 void
start_new_bb()1599 brig_function::start_new_bb ()
1600 {
1601   m_id_val_defs.clear ();
1602 }
1603