1 /* brig-function.cc -- declaration of brig_function class.
2    Copyright (C) 2016-2018 Free Software Foundation, Inc.
3    Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4    for General Processor Tech.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it under
9    the terms of the GNU General Public License as published by the Free
10    Software Foundation; either version 3, or (at your option) any later
11    version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14    WARRANTY; without even the implied warranty of MERCHANTABILITY or
15    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16    for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with GCC; see the file COPYING3.  If not see
20    <http://www.gnu.org/licenses/>.  */
21 
22 #include <sstream>
23 #include <iomanip>
24 
25 #include "brig-function.h"
26 #include "stringpool.h"
27 #include "tree-iterator.h"
28 #include "toplev.h"
29 #include "gimplify.h"
30 #include "gimple-expr.h"
31 #include "print-tree.h"
32 #include "hsa-brig-format.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-code-entry-handler.h"
36 #include "brig-machine.h"
37 #include "brig-util.h"
38 #include "phsa.h"
39 #include "tree-pretty-print.h"
40 #include "dumpfile.h"
41 #include "profile-count.h"
42 #include "tree-cfg.h"
43 #include "errors.h"
44 #include "function.h"
45 #include "brig-to-generic.h"
46 #include "brig-builtins.h"
47 
brig_function(const BrigDirectiveExecutable * exec,brig_to_generic * parent)48 brig_function::brig_function (const BrigDirectiveExecutable *exec,
49 			      brig_to_generic *parent)
50   : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""),
51     m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE),
52     m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
53     m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
54     m_next_kernarg_offset (0), m_kernarg_max_align (0),
55     m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
56     m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
57     m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
58     m_generating_arg_block (false), m_parent (parent)
59 {
60   memset (m_regs, 0,
61 	  BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
62   memset (&m_descriptor, 0, sizeof (phsa_descriptor));
63 }
64 
~brig_function()65 brig_function::~brig_function ()
66 {
67   for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i)
68     {
69       if (m_regs[i] != NULL)
70 	{
71 	  delete m_regs[i];
72 	  m_regs[i] = NULL;
73 	}
74     }
75 }
76 
77 /* Returns a GENERIC label with the given name in the given function.
78    Creates it, if not yet found.  */
79 
80 tree
label(const std::string & name)81 brig_function::label (const std::string &name)
82 {
83   label_index::const_iterator i = m_label_index.find (name);
84   if (i == m_label_index.end ())
85     {
86       tree name_identifier
87 	= get_identifier_with_length (name.c_str (), name.size ());
88 
89       tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
90 				    name_identifier, void_type_node);
91 
92       DECL_CONTEXT (label_decl) = m_func_decl;
93       DECL_ARTIFICIAL (label_decl) = 0;
94 
95       m_label_index[name] = label_decl;
96       return label_decl;
97     }
98   else
99     return (*i).second;
100 }
101 
102 /* Record an argument variable for later use.  This includes both local
103    variables inside arg blocks and incoming function arguments.  */
104 
105 void
add_arg_variable(const BrigDirectiveVariable * brigVar,tree treeDecl)106 brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar,
107 				 tree treeDecl)
108 {
109   m_arg_variables[brigVar] = treeDecl;
110 }
111 
112 tree
arg_variable(const BrigDirectiveVariable * var) const113 brig_function::arg_variable (const BrigDirectiveVariable *var) const
114 {
115   variable_index::const_iterator i = m_arg_variables.find (var);
116   if (i == m_arg_variables.end ())
117     return NULL_TREE;
118   else
119     return (*i).second;
120 }
121 
122 /* Appends a new kernel argument descriptor for the current kernel's
123    arg space.  */
124 
125 void
append_kernel_arg(const BrigDirectiveVariable * var,size_t size,size_t alignment)126 brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size,
127 				  size_t alignment)
128 {
129   gcc_assert (m_func_decl != NULL_TREE);
130   gcc_assert (m_is_kernel);
131 
132   size_t align_padding = m_next_kernarg_offset % alignment == 0 ?
133     0 : (alignment - m_next_kernarg_offset % alignment);
134   m_next_kernarg_offset += align_padding;
135   m_kernarg_offsets[var] = m_next_kernarg_offset;
136   m_next_kernarg_offset += size;
137 
138   m_kernarg_max_align
139     = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align;
140 }
141 
142 size_t
kernel_arg_offset(const BrigDirectiveVariable * var) const143 brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const
144 {
145   var_offset_table::const_iterator i = m_kernarg_offsets.find (var);
146   gcc_assert (i != m_kernarg_offsets.end ());
147   return (*i).second;
148 }
149 
150 /* Add work-item ID variables to the beginning of the kernel function
151    which can be used for address computation as kernel dispatch packet
152    instructions can be expanded to GENERIC nodes referring to them.  */
153 
154 void
add_id_variables()155 brig_function::add_id_variables ()
156 {
157   tree bind_expr = m_current_bind_expr;
158   tree stmts = BIND_EXPR_BODY (bind_expr);
159 
160   /* Initialize the WG limits and local ids.  */
161 
162   tree_stmt_iterator entry = tsi_start (stmts);
163 
164   for (int i = 0; i < 3; ++i)
165     {
166       char dim_char = (char) ((int) 'x' + i);
167 
168       /* The local sizes are limited to 16b values, but let's still use 32b
169 	 to avoid unnecessary casts (the ID functions are 32b).  */
170       m_local_id_vars[i]
171 	= add_local_variable (std::string ("__local_") + dim_char,
172 			      uint32_type_node);
173 
174       tree workitemid_call
175 	= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2,
176 			uint32_type_node, uint32_type_node,
177 			build_int_cst (uint32_type_node, i), ptr_type_node,
178 			m_context_arg);
179 
180       tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]),
181 			     m_local_id_vars[i], workitemid_call);
182 
183       tsi_link_after (&entry, id_init, TSI_NEW_STMT);
184 
185       m_cur_wg_size_vars[i]
186 	= add_local_variable (std::string ("__cur_wg_size_") + dim_char,
187 			      uint32_type_node);
188 
189       tree cwgz_call
190 	= call_builtin
191 	(builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE),
192 	 2, uint32_type_node, uint32_type_node,
193 	 build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg);
194 
195       tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]),
196 				m_cur_wg_size_vars[i], cwgz_call);
197 
198       tsi_link_after (&entry, limit_init, TSI_NEW_STMT);
199 
200       m_wg_id_vars[i]
201 	= add_local_variable (std::string ("__workgroupid_") + dim_char,
202 			      uint32_type_node);
203 
204       tree wgid_call
205 	= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID),
206 			2, uint32_type_node, uint32_type_node,
207 			build_int_cst (uint32_type_node, i), ptr_type_node,
208 			m_context_arg);
209 
210       tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]),
211 			       m_wg_id_vars[i], wgid_call);
212 
213       tsi_link_after (&entry, wgid_init, TSI_NEW_STMT);
214 
215       m_wg_size_vars[i]
216 	= add_local_variable (std::string ("__workgroupsize_") + dim_char,
217 			      uint32_type_node);
218 
219       tree wgsize_call
220 	= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE),
221 			2, uint32_type_node, uint32_type_node,
222 			build_int_cst (uint32_type_node, i), ptr_type_node,
223 			m_context_arg);
224 
225       tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]),
226 				 m_wg_size_vars[i], wgsize_call);
227 
228       tsi_link_after (&entry, wgsize_init, TSI_NEW_STMT);
229 
230       m_grid_size_vars[i]
231 	= add_local_variable (std::string ("__gridsize_") + dim_char,
232 			      uint32_type_node);
233 
234       tree gridsize_call
235 	= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2,
236 			uint32_type_node, uint32_type_node,
237 			build_int_cst (uint32_type_node, i), ptr_type_node,
238 			m_context_arg);
239 
240       tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]),
241 				   m_grid_size_vars[i], gridsize_call);
242 
243       tsi_link_after (&entry, gridsize_init, TSI_NEW_STMT);
244     }
245 
246   m_kernel_entry = entry;
247 }
248 
249 /* Creates a new local variable with the given NAME and given GENERIC
250    TYPE.  */
251 
252 tree
add_local_variable(std::string name,tree type)253 brig_function::add_local_variable (std::string name, tree type)
254 {
255   tree name_identifier
256     = get_identifier_with_length (name.c_str (), name.size ());
257   tree variable
258     = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type);
259 
260   DECL_NONLOCAL (variable) = 0;
261   TREE_ADDRESSABLE (variable) = 0;
262   TREE_STATIC (variable) = 0;
263   TREE_USED (variable) = 1;
264   DECL_ARTIFICIAL (variable) = 0;
265 
266   tree bind_expr = DECL_SAVED_TREE (m_func_decl);
267 
268   DECL_CONTEXT (variable) = m_func_decl;
269 
270   DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr);
271   BIND_EXPR_VARS (bind_expr) = variable;
272   return variable;
273 }
274 
275 /* Return tree type for an HSA register.
276 
277    The tree type can be anything (scalar, vector, int, float, etc.)
278    but its size is guaranteed to match the HSA register size.
279 
280    HSA registers are untyped but we select a type based on their use
281    to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
282    to occur when use or def reaches over current BB).  */
283 
284 tree
get_tree_type_for_hsa_reg(const BrigOperandRegister * reg) const285 brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
286 {
287   size_t reg_size = gccbrig_reg_size (reg);
288 
289   /* The default type.  */
290   tree type = build_nonstandard_integer_type (reg_size, true);
291 
292   if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
293     return type;
294 
295   const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
296   size_t reg_id = gccbrig_hsa_reg_id (*reg);
297   if (index.count (reg_id) == 0)
298     return type;
299 
300   const reg_use_info &info = index.find (reg_id)->second;
301   std::vector<std::pair<tree, size_t> >::const_iterator it
302     = info.m_type_refs.begin ();
303   std::vector<std::pair<tree, size_t> >::const_iterator it_end
304     = info.m_type_refs.end ();
305   size_t max_refs_as_type_count = 0;
306   for (; it != it_end; it++)
307     {
308       size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
309       if (type_bit_size != reg_size) continue;
310       if (it->second > max_refs_as_type_count)
311 	{
312 	  type = it->first;
313 	  max_refs_as_type_count = it->second;
314 	}
315     }
316 
317   return type;
318 }
319 
320 /* Returns a DECL_VAR for the given HSAIL operand register.
321    If it has not been created yet for the function being generated,
322    creates it as a type determined by analysis phase.  */
323 
324 tree
get_m_var_declfor_reg(const BrigOperandRegister * reg)325 brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
326 {
327   size_t offset = gccbrig_hsa_reg_id (*reg);
328 
329   reg_decl_index_entry *regEntry = m_regs[offset];
330   if (regEntry == NULL)
331     {
332       size_t reg_size = gccbrig_reg_size (reg);
333       tree type;
334       if (reg_size > 1)
335 	type = get_tree_type_for_hsa_reg (reg);
336       else
337 	type = boolean_type_node;
338 
339       /* Drop the const qualifier so we do not end up with a read only
340 	 register variable which cannot be written to later.  */
341       tree nonconst_type = build_type_variant (type, false, false);
342 
343       regEntry = new reg_decl_index_entry;
344 
345       regEntry->m_var_decl
346 	= add_local_variable (gccbrig_reg_name (reg), nonconst_type);
347       m_regs[offset] = regEntry;
348     }
349   return regEntry->m_var_decl;
350 }
351 
352 /* Builds a work-item do..while loop for a single DIM.  HEADER_ENTRY is
353    a statement after which the iteration variables should be initialized and
354    the loop body starts.  BRANCH_AFTER is the statement after which the loop
355    predicate check and the back edge goto will be appended.  */
356 
357 void
add_wi_loop(int dim,tree_stmt_iterator * header_entry,tree_stmt_iterator * branch_after)358 brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry,
359 			    tree_stmt_iterator *branch_after)
360 {
361   tree ivar = m_local_id_vars[dim];
362   tree ivar_max = m_cur_wg_size_vars[dim];
363   tree_stmt_iterator entry = *header_entry;
364 
365   /* TODO: this is not a parallel loop as we share the "register variables"
366      across work-items.  Should create a copy of them per WI instance.  That
367      is, declare temporaries for new definitions inside the loop body, not at
368      function scope.  */
369 
370   tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar,
371 			   build_zero_cst (TREE_TYPE (ivar)));
372   tsi_link_after (&entry, ivar_init, TSI_NEW_STMT);
373 
374   tree loop_body_label
375     = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim));
376   tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label);
377 
378   tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT);
379 
380   if (m_has_unexpanded_dp_builtins)
381     {
382       tree id_set_builtin
383 	= builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID);
384       /* Set the local ID to the current wi-loop iteration variable value to
385 	 ensure the builtins see the correct values.  */
386       tree id_set_call
387 	= call_builtin (id_set_builtin, 3,
388 			void_type_node, uint32_type_node,
389 			build_int_cst (uint32_type_node, dim), uint32_type_node,
390 			ivar, ptr_type_node, m_context_arg);
391       tsi_link_after (&entry, id_set_call, TSI_NEW_STMT);
392     }
393 
394   /* Increment the WI iteration variable.  */
395   tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar,
396 		      build_one_cst (TREE_TYPE (ivar)));
397 
398   tsi_link_after (branch_after, incr, TSI_NEW_STMT);
399 
400   /* Append the predicate check with the back edge goto.  */
401   tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max);
402   tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label);
403   tree if_stmt
404     = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE);
405   tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT);
406 }
407 
408 /* Recursively analyzes the function and its callees for barrier usage.  */
409 
410 void
analyze_calls()411 brig_function::analyze_calls ()
412 {
413   if (m_calls_analyzed)
414     return;
415 
416   /* Set this early to not get stuck in case of recursive call graphs.
417      This is safe because if the function calls itself, either the function
418      has barrier calls which implies a call to a function with barrier calls,
419      or it doesn't in which case the result depends on the later called
420      functions.  */
421   m_calls_analyzed = true;
422 
423   for (size_t i = 0; i < m_called_functions.size (); ++i)
424     {
425       tree f = m_called_functions[i];
426       brig_function *called_f = m_parent->get_finished_function (f);
427       if (called_f == NULL)
428 	{
429 	  /* Unfinished function (only declaration within the set of BRIGs)
430 	     found.  Cannot finish the CG analysis.  Have to assume it does have
431 	     a barrier for safety.  */
432 	  m_has_function_calls_with_barriers = true;
433 	  m_has_unexpanded_dp_builtins = true;
434 	  break;
435 	}
436       called_f->analyze_calls ();
437       /* We can assume m_has_barriers has been correctly set during the
438 	 construction of the function decl.  No need to reanalyze it.  */
439       m_has_function_calls_with_barriers |= called_f->m_has_barriers;
440 
441       /* If the function or any of its called functions has dispatch
442 	 packet builtin calls that require the local id, we need to
443 	 set the local id to the context in the work item loop before
444 	 the functions are called.  If we analyze the opposite, these
445 	 function calls can be omitted.  */
446       m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins;
447     }
448 }
449 
450 /* Tries to convert the current kernel to a work-group function that executes
451    all work-items using loops.  Returns true in case the conversion was
452    successful.  */
453 
454 bool
convert_to_wg_function()455 brig_function::convert_to_wg_function ()
456 {
457   if (!m_calls_analyzed)
458     analyze_calls ();
459 
460   if (m_has_barriers || m_has_function_calls_with_barriers)
461     return false;
462 
463   /* The most trivial case: No barriers at all in the kernel.
464      We can create one big work-item loop around the whole kernel.  */
465   tree bind_expr = m_current_bind_expr;
466   tree stmts = BIND_EXPR_BODY (bind_expr);
467 
468   for (int i = 0; i < 3; ++i)
469     {
470       /* The previous loop has added a new label to the end of the function,
471 	 the next level loop should wrap around it also.  */
472       tree_stmt_iterator function_exit = tsi_last (stmts);
473       add_wi_loop (i, &m_kernel_entry, &function_exit);
474     }
475 
476   m_is_wg_function = true;
477   return false;
478 }
479 
480 /* Emits a kernel description to a special ELF section so it can be
481    utilized by an HSA runtime implementation.  The assembly block
482    must be emitted to a statement list of an function, which is given
483    as an argument.  Returns the assembly block used to emit the section. */
484 
485 tree
emit_metadata(tree stmt_list)486 brig_function::emit_metadata (tree stmt_list)
487 {
488   /* Emit an ELF section via an assembly directive that generates a special
489      ELF section for each kernel that contains raw bytes of a descriptor
490      object.  This is pretty disgusting, but life is never perfect ;)  */
491 
492   /* Use the original kernel name without the '_' prefix in the section name.  */
493   std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name;
494 
495   std::ostringstream strstr;
496   strstr << std::endl
497 	 << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name
498 	 << std::endl
499 	 << "\t.p2align 1, 1, 1" << std::endl
500 	 << "\t.byte ";
501 
502   for (size_t i = 0; i < sizeof (phsa_descriptor); ++i)
503     {
504       strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex
505 	     << (unsigned) *((unsigned char *) &m_descriptor + i);
506       if (i + 1 < sizeof (phsa_descriptor))
507 	strstr << ", ";
508     }
509 
510   strstr << std::endl << ".popsection" << std::endl << std::endl;
511 
512   tree metadata_asm
513     = build_stmt (ASM_EXPR,
514 		  build_string (strstr.str ().size (), strstr.str ().c_str ()),
515 		  NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE);
516 
517   append_to_statement_list_force (metadata_asm, &stmt_list);
518   return metadata_asm;
519 }
520 
521 /* Emits the kernel launcher function.  Also emits the metadata section
522    creation statements in it.
523 
524    The launcher function calls the device-side runtime
525    that runs the kernel for all work-items.  In C:
526 
527    void KernelName (void* context, void* group_base_addr)
528    {
529      __hsail_launch_kernel (_KernelName, context, group_base_addr);
530    }
531 
532    or, in case of a successful conversion to a work-group function:
533 
534    void KernelName (void* context, void* group_base_addr)
535    {
536      __hsail_launch_wg_function (_KernelName, context, group_base_addr);
537    }
538 
539    The user/host sees this function as the kernel to call from the
540    outside.  The actual kernel generated from HSAIL was named _KernelName.
541 */
542 
543 tree
emit_launcher_and_metadata()544 brig_function::emit_launcher_and_metadata ()
545 {
546   /* The original kernel name without the '_' prefix.  */
547   std::string kern_name = m_name.substr (1);
548 
549   tree name_identifier
550     = get_identifier_with_length (kern_name.c_str (), kern_name.size ());
551 
552   tree launcher
553     = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
554 		  build_function_type_list (void_type_node, ptr_type_node,
555 					    ptr_type_node, NULL_TREE));
556 
557   TREE_USED (launcher) = 1;
558   DECL_ARTIFICIAL (launcher) = 1;
559 
560   tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
561 				 get_identifier ("__context"), ptr_type_node);
562 
563   DECL_ARGUMENTS (launcher) = context_arg;
564   DECL_ARG_TYPE (context_arg) = ptr_type_node;
565   DECL_CONTEXT (context_arg) = launcher;
566   TREE_USED (context_arg) = 1;
567   DECL_ARTIFICIAL (context_arg) = 1;
568 
569   tree group_base_addr_arg
570     = build_decl (UNKNOWN_LOCATION, PARM_DECL,
571 		  get_identifier ("__group_base_addr"), ptr_type_node);
572 
573   chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg);
574   DECL_ARG_TYPE (group_base_addr_arg) = ptr_type_node;
575   DECL_CONTEXT (group_base_addr_arg) = launcher;
576   TREE_USED (group_base_addr_arg) = 1;
577   DECL_ARTIFICIAL (group_base_addr_arg) = 1;
578 
579   tree resdecl
580     = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
581 
582   DECL_RESULT (launcher) = resdecl;
583   DECL_CONTEXT (resdecl) = launcher;
584 
585   DECL_INITIAL (launcher) = make_node (BLOCK);
586   TREE_USED (DECL_INITIAL (launcher)) = 1;
587 
588   tree stmt_list = alloc_stmt_list ();
589 
590   tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
591 
592   TREE_STATIC (launcher) = 0;
593   TREE_PUBLIC (launcher) = 1;
594 
595   DECL_SAVED_TREE (launcher) = bind_expr;
596 
597   if (DECL_STRUCT_FUNCTION (launcher) == NULL)
598     push_struct_function (launcher);
599   else
600     push_cfun (DECL_STRUCT_FUNCTION (launcher));
601 
602   tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl);
603 
604   tree phsail_launch_kernel_call;
605 
606   /* Compute the local group segment frame start pointer.  */
607   tree group_local_offset_temp
608     = create_tmp_var (uint32_type_node, "group_local_offset");
609   tree group_local_offset_arg
610     = build2 (MODIFY_EXPR, uint32_type_node,
611 	      group_local_offset_temp,
612 	      build_int_cst (uint32_type_node,
613 			     m_parent->m_module_group_variables.size()));
614 
615   /* Emit a launcher depending whether we converted the kernel function to
616      a work group function or not.  */
617   if (m_is_wg_function)
618     phsail_launch_kernel_call
619       = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
620 		      4, void_type_node,
621 		      ptr_type_node, kernel_func_ptr, ptr_type_node,
622 		      context_arg, ptr_type_node, group_base_addr_arg,
623 		      uint32_type_node, group_local_offset_arg);
624   else
625     phsail_launch_kernel_call
626       = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
627 		      4, void_type_node,
628 		      ptr_type_node, kernel_func_ptr, ptr_type_node,
629 		      context_arg, ptr_type_node, group_base_addr_arg,
630 		      uint32_type_node, group_local_offset_arg);
631 
632   append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
633 
634   emit_metadata (stmt_list);
635 
636   return launcher;
637 }
638 
639 tree
append_statement(tree stmt)640 brig_function::append_statement (tree stmt)
641 {
642   gcc_assert (m_func_decl != NULL);
643 
644   tree bind_expr = m_current_bind_expr;
645   tree stmts = BIND_EXPR_BODY (bind_expr);
646 
647   append_to_statement_list_force (stmt, &stmts);
648   return stmt;
649 }
650 
651 /* Creates a new "alloca frame" for the current function by
652    injecting an alloca frame push in the beginning of the function
653    and an alloca frame pop before all function exit points.  */
654 
655 void
create_alloca_frame()656 brig_function::create_alloca_frame ()
657 {
658   tree_stmt_iterator entry;
659 
660   /* Adds the alloca push only after the ids have been initialized
661      in case of a kernel function.  */
662   if (m_is_kernel)
663     entry = m_kernel_entry;
664   else
665     {
666       tree bind_expr = m_current_bind_expr;
667       tree stmts = BIND_EXPR_BODY (bind_expr);
668       entry = tsi_start (stmts);
669     }
670 
671   tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME);
672   tree push_frame_call
673     = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node,
674 		    m_context_arg);
675 
676   tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT);
677 
678   tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME);
679 
680   do
681     {
682       tree stmt = tsi_stmt (entry);
683       if (TREE_CODE (stmt) == RETURN_EXPR)
684 	{
685 	  tree pop_frame_call
686 	    = call_builtin (pop_frame_builtin, 1, void_type_node,
687 			    ptr_type_node, m_context_arg);
688 
689 	  tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT);
690 	}
691       tsi_next (&entry);
692     }
693   while (!tsi_end_p (entry));
694 }
695 
696 /* Finishes the currently built function.  After calling this, no new
697    statements should be appeneded to the function.  */
698 void
finish()699 brig_function::finish ()
700 {
701   append_return_stmt ();
702 
703   /* Currently assume single alloca frame per WG.  */
704   if (m_has_allocas)
705     create_alloca_frame ();
706 }
707 
708 void
finish_kernel()709 brig_function::finish_kernel ()
710 {
711   /* Kernel functions should have a single exit point.
712      Let's create one.  The return instructions should have
713      been converted to branches to this label.  */
714   append_statement (build_stmt (LABEL_EXPR, m_exit_label));
715   /* Attempt to convert the kernel to a work-group function that
716      executes all work-items of the WG using a loop.  */
717   convert_to_wg_function ();
718 
719   append_return_stmt ();
720 
721   /* Currently assume single alloca frame per WG.  */
722   if (m_has_allocas)
723     create_alloca_frame ();
724 }
725 
726 void
append_return_stmt()727 brig_function::append_return_stmt ()
728 {
729   gcc_assert (m_current_bind_expr != NULL_TREE);
730   tree stmts = BIND_EXPR_BODY (m_current_bind_expr);
731 
732   if (STATEMENT_LIST_TAIL (stmts) == NULL)
733     return; /* Empty function.  */
734 
735   tree last_stmt = tsi_stmt (tsi_last (stmts));
736 
737   if (TREE_CODE (last_stmt) == RETURN_EXPR)
738     return;
739 
740   if (m_ret_value != NULL_TREE)
741     {
742       tree result_assign
743 	= build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value,
744 		  m_ret_temp);
745 
746       tree return_expr
747 	= build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign);
748       append_to_statement_list_force (return_expr, &stmts);
749     }
750   else
751     {
752       tree return_stmt = build_stmt (RETURN_EXPR, NULL);
753       append_to_statement_list_force (return_stmt, &stmts);
754     }
755 }
756 
757 bool
has_function_scope_var(const BrigBase * var) const758 brig_function::has_function_scope_var (const BrigBase* var) const
759 {
760   return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
761 }
762 
763 size_t
group_variable_segment_offset(const std::string & name) const764 brig_function::group_variable_segment_offset (const std::string &name) const
765 {
766   if (m_local_group_variables.has_variable (name))
767     return m_local_group_variables.segment_offset (name);
768 
769   gcc_assert (m_parent->m_module_group_variables.has_variable (name));
770   return m_parent->m_module_group_variables.segment_offset (name);
771 }
772