1 /* brig-function.cc -- declaration of brig_function class.
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23 #include <iomanip>
24
25 #include "brig-function.h"
26 #include "stringpool.h"
27 #include "tree-iterator.h"
28 #include "toplev.h"
29 #include "gimplify.h"
30 #include "gimple-expr.h"
31 #include "print-tree.h"
32 #include "hsa-brig-format.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-code-entry-handler.h"
36 #include "brig-machine.h"
37 #include "brig-util.h"
38 #include "phsa.h"
39 #include "tree-pretty-print.h"
40 #include "dumpfile.h"
41 #include "profile-count.h"
42 #include "tree-cfg.h"
43 #include "errors.h"
44 #include "function.h"
45 #include "brig-to-generic.h"
46 #include "brig-builtins.h"
47
brig_function(const BrigDirectiveExecutable * exec,brig_to_generic * parent)48 brig_function::brig_function (const BrigDirectiveExecutable *exec,
49 brig_to_generic *parent)
50 : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""),
51 m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE),
52 m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
53 m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
54 m_next_kernarg_offset (0), m_kernarg_max_align (0),
55 m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
56 m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
57 m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
58 m_generating_arg_block (false), m_parent (parent)
59 {
60 memset (m_regs, 0,
61 BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
62 memset (&m_descriptor, 0, sizeof (phsa_descriptor));
63 }
64
~brig_function()65 brig_function::~brig_function ()
66 {
67 for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i)
68 {
69 if (m_regs[i] != NULL)
70 {
71 delete m_regs[i];
72 m_regs[i] = NULL;
73 }
74 }
75 }
76
77 /* Returns a GENERIC label with the given name in the given function.
78 Creates it, if not yet found. */
79
80 tree
label(const std::string & name)81 brig_function::label (const std::string &name)
82 {
83 label_index::const_iterator i = m_label_index.find (name);
84 if (i == m_label_index.end ())
85 {
86 tree name_identifier
87 = get_identifier_with_length (name.c_str (), name.size ());
88
89 tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
90 name_identifier, void_type_node);
91
92 DECL_CONTEXT (label_decl) = m_func_decl;
93 DECL_ARTIFICIAL (label_decl) = 0;
94
95 m_label_index[name] = label_decl;
96 return label_decl;
97 }
98 else
99 return (*i).second;
100 }
101
102 /* Record an argument variable for later use. This includes both local
103 variables inside arg blocks and incoming function arguments. */
104
105 void
add_arg_variable(const BrigDirectiveVariable * brigVar,tree treeDecl)106 brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar,
107 tree treeDecl)
108 {
109 m_arg_variables[brigVar] = treeDecl;
110 }
111
112 tree
arg_variable(const BrigDirectiveVariable * var) const113 brig_function::arg_variable (const BrigDirectiveVariable *var) const
114 {
115 variable_index::const_iterator i = m_arg_variables.find (var);
116 if (i == m_arg_variables.end ())
117 return NULL_TREE;
118 else
119 return (*i).second;
120 }
121
122 /* Appends a new kernel argument descriptor for the current kernel's
123 arg space. */
124
125 void
append_kernel_arg(const BrigDirectiveVariable * var,size_t size,size_t alignment)126 brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size,
127 size_t alignment)
128 {
129 gcc_assert (m_func_decl != NULL_TREE);
130 gcc_assert (m_is_kernel);
131
132 size_t align_padding = m_next_kernarg_offset % alignment == 0 ?
133 0 : (alignment - m_next_kernarg_offset % alignment);
134 m_next_kernarg_offset += align_padding;
135 m_kernarg_offsets[var] = m_next_kernarg_offset;
136 m_next_kernarg_offset += size;
137
138 m_kernarg_max_align
139 = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align;
140 }
141
142 size_t
kernel_arg_offset(const BrigDirectiveVariable * var) const143 brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const
144 {
145 var_offset_table::const_iterator i = m_kernarg_offsets.find (var);
146 gcc_assert (i != m_kernarg_offsets.end ());
147 return (*i).second;
148 }
149
150 /* Add work-item ID variables to the beginning of the kernel function
151 which can be used for address computation as kernel dispatch packet
152 instructions can be expanded to GENERIC nodes referring to them. */
153
154 void
add_id_variables()155 brig_function::add_id_variables ()
156 {
157 tree bind_expr = m_current_bind_expr;
158 tree stmts = BIND_EXPR_BODY (bind_expr);
159
160 /* Initialize the WG limits and local ids. */
161
162 tree_stmt_iterator entry = tsi_start (stmts);
163
164 for (int i = 0; i < 3; ++i)
165 {
166 char dim_char = (char) ((int) 'x' + i);
167
168 /* The local sizes are limited to 16b values, but let's still use 32b
169 to avoid unnecessary casts (the ID functions are 32b). */
170 m_local_id_vars[i]
171 = add_local_variable (std::string ("__local_") + dim_char,
172 uint32_type_node);
173
174 tree workitemid_call
175 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2,
176 uint32_type_node, uint32_type_node,
177 build_int_cst (uint32_type_node, i), ptr_type_node,
178 m_context_arg);
179
180 tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]),
181 m_local_id_vars[i], workitemid_call);
182
183 tsi_link_after (&entry, id_init, TSI_NEW_STMT);
184
185 m_cur_wg_size_vars[i]
186 = add_local_variable (std::string ("__cur_wg_size_") + dim_char,
187 uint32_type_node);
188
189 tree cwgz_call
190 = call_builtin
191 (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE),
192 2, uint32_type_node, uint32_type_node,
193 build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg);
194
195 tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]),
196 m_cur_wg_size_vars[i], cwgz_call);
197
198 tsi_link_after (&entry, limit_init, TSI_NEW_STMT);
199
200 m_wg_id_vars[i]
201 = add_local_variable (std::string ("__workgroupid_") + dim_char,
202 uint32_type_node);
203
204 tree wgid_call
205 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID),
206 2, uint32_type_node, uint32_type_node,
207 build_int_cst (uint32_type_node, i), ptr_type_node,
208 m_context_arg);
209
210 tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]),
211 m_wg_id_vars[i], wgid_call);
212
213 tsi_link_after (&entry, wgid_init, TSI_NEW_STMT);
214
215 m_wg_size_vars[i]
216 = add_local_variable (std::string ("__workgroupsize_") + dim_char,
217 uint32_type_node);
218
219 tree wgsize_call
220 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE),
221 2, uint32_type_node, uint32_type_node,
222 build_int_cst (uint32_type_node, i), ptr_type_node,
223 m_context_arg);
224
225 tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]),
226 m_wg_size_vars[i], wgsize_call);
227
228 tsi_link_after (&entry, wgsize_init, TSI_NEW_STMT);
229
230 m_grid_size_vars[i]
231 = add_local_variable (std::string ("__gridsize_") + dim_char,
232 uint32_type_node);
233
234 tree gridsize_call
235 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2,
236 uint32_type_node, uint32_type_node,
237 build_int_cst (uint32_type_node, i), ptr_type_node,
238 m_context_arg);
239
240 tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]),
241 m_grid_size_vars[i], gridsize_call);
242
243 tsi_link_after (&entry, gridsize_init, TSI_NEW_STMT);
244 }
245
246 m_kernel_entry = entry;
247 }
248
249 /* Creates a new local variable with the given NAME and given GENERIC
250 TYPE. */
251
252 tree
add_local_variable(std::string name,tree type)253 brig_function::add_local_variable (std::string name, tree type)
254 {
255 tree name_identifier
256 = get_identifier_with_length (name.c_str (), name.size ());
257 tree variable
258 = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type);
259
260 DECL_NONLOCAL (variable) = 0;
261 TREE_ADDRESSABLE (variable) = 0;
262 TREE_STATIC (variable) = 0;
263 TREE_USED (variable) = 1;
264 DECL_ARTIFICIAL (variable) = 0;
265
266 tree bind_expr = DECL_SAVED_TREE (m_func_decl);
267
268 DECL_CONTEXT (variable) = m_func_decl;
269
270 DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr);
271 BIND_EXPR_VARS (bind_expr) = variable;
272 return variable;
273 }
274
275 /* Return tree type for an HSA register.
276
277 The tree type can be anything (scalar, vector, int, float, etc.)
278 but its size is guaranteed to match the HSA register size.
279
280 HSA registers are untyped but we select a type based on their use
281 to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
282 to occur when use or def reaches over current BB). */
283
284 tree
get_tree_type_for_hsa_reg(const BrigOperandRegister * reg) const285 brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
286 {
287 size_t reg_size = gccbrig_reg_size (reg);
288
289 /* The default type. */
290 tree type = build_nonstandard_integer_type (reg_size, true);
291
292 if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
293 return type;
294
295 const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
296 size_t reg_id = gccbrig_hsa_reg_id (*reg);
297 if (index.count (reg_id) == 0)
298 return type;
299
300 const reg_use_info &info = index.find (reg_id)->second;
301 std::vector<std::pair<tree, size_t> >::const_iterator it
302 = info.m_type_refs.begin ();
303 std::vector<std::pair<tree, size_t> >::const_iterator it_end
304 = info.m_type_refs.end ();
305 size_t max_refs_as_type_count = 0;
306 for (; it != it_end; it++)
307 {
308 size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
309 if (type_bit_size != reg_size) continue;
310 if (it->second > max_refs_as_type_count)
311 {
312 type = it->first;
313 max_refs_as_type_count = it->second;
314 }
315 }
316
317 return type;
318 }
319
320 /* Returns a DECL_VAR for the given HSAIL operand register.
321 If it has not been created yet for the function being generated,
322 creates it as a type determined by analysis phase. */
323
324 tree
get_m_var_declfor_reg(const BrigOperandRegister * reg)325 brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
326 {
327 size_t offset = gccbrig_hsa_reg_id (*reg);
328
329 reg_decl_index_entry *regEntry = m_regs[offset];
330 if (regEntry == NULL)
331 {
332 size_t reg_size = gccbrig_reg_size (reg);
333 tree type;
334 if (reg_size > 1)
335 type = get_tree_type_for_hsa_reg (reg);
336 else
337 type = boolean_type_node;
338
339 /* Drop the const qualifier so we do not end up with a read only
340 register variable which cannot be written to later. */
341 tree nonconst_type = build_type_variant (type, false, false);
342
343 regEntry = new reg_decl_index_entry;
344
345 regEntry->m_var_decl
346 = add_local_variable (gccbrig_reg_name (reg), nonconst_type);
347 m_regs[offset] = regEntry;
348 }
349 return regEntry->m_var_decl;
350 }
351
352 /* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is
353 a statement after which the iteration variables should be initialized and
354 the loop body starts. BRANCH_AFTER is the statement after which the loop
355 predicate check and the back edge goto will be appended. */
356
357 void
add_wi_loop(int dim,tree_stmt_iterator * header_entry,tree_stmt_iterator * branch_after)358 brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry,
359 tree_stmt_iterator *branch_after)
360 {
361 tree ivar = m_local_id_vars[dim];
362 tree ivar_max = m_cur_wg_size_vars[dim];
363 tree_stmt_iterator entry = *header_entry;
364
365 /* TODO: this is not a parallel loop as we share the "register variables"
366 across work-items. Should create a copy of them per WI instance. That
367 is, declare temporaries for new definitions inside the loop body, not at
368 function scope. */
369
370 tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar,
371 build_zero_cst (TREE_TYPE (ivar)));
372 tsi_link_after (&entry, ivar_init, TSI_NEW_STMT);
373
374 tree loop_body_label
375 = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim));
376 tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label);
377
378 tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT);
379
380 if (m_has_unexpanded_dp_builtins)
381 {
382 tree id_set_builtin
383 = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID);
384 /* Set the local ID to the current wi-loop iteration variable value to
385 ensure the builtins see the correct values. */
386 tree id_set_call
387 = call_builtin (id_set_builtin, 3,
388 void_type_node, uint32_type_node,
389 build_int_cst (uint32_type_node, dim), uint32_type_node,
390 ivar, ptr_type_node, m_context_arg);
391 tsi_link_after (&entry, id_set_call, TSI_NEW_STMT);
392 }
393
394 /* Increment the WI iteration variable. */
395 tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar,
396 build_one_cst (TREE_TYPE (ivar)));
397
398 tsi_link_after (branch_after, incr, TSI_NEW_STMT);
399
400 /* Append the predicate check with the back edge goto. */
401 tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max);
402 tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label);
403 tree if_stmt
404 = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE);
405 tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT);
406 }
407
408 /* Recursively analyzes the function and its callees for barrier usage. */
409
410 void
analyze_calls()411 brig_function::analyze_calls ()
412 {
413 if (m_calls_analyzed)
414 return;
415
416 /* Set this early to not get stuck in case of recursive call graphs.
417 This is safe because if the function calls itself, either the function
418 has barrier calls which implies a call to a function with barrier calls,
419 or it doesn't in which case the result depends on the later called
420 functions. */
421 m_calls_analyzed = true;
422
423 for (size_t i = 0; i < m_called_functions.size (); ++i)
424 {
425 tree f = m_called_functions[i];
426 brig_function *called_f = m_parent->get_finished_function (f);
427 if (called_f == NULL)
428 {
429 /* Unfinished function (only declaration within the set of BRIGs)
430 found. Cannot finish the CG analysis. Have to assume it does have
431 a barrier for safety. */
432 m_has_function_calls_with_barriers = true;
433 m_has_unexpanded_dp_builtins = true;
434 break;
435 }
436 called_f->analyze_calls ();
437 /* We can assume m_has_barriers has been correctly set during the
438 construction of the function decl. No need to reanalyze it. */
439 m_has_function_calls_with_barriers |= called_f->m_has_barriers;
440
441 /* If the function or any of its called functions has dispatch
442 packet builtin calls that require the local id, we need to
443 set the local id to the context in the work item loop before
444 the functions are called. If we analyze the opposite, these
445 function calls can be omitted. */
446 m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins;
447 }
448 }
449
450 /* Tries to convert the current kernel to a work-group function that executes
451 all work-items using loops. Returns true in case the conversion was
452 successful. */
453
454 bool
convert_to_wg_function()455 brig_function::convert_to_wg_function ()
456 {
457 if (!m_calls_analyzed)
458 analyze_calls ();
459
460 if (m_has_barriers || m_has_function_calls_with_barriers)
461 return false;
462
463 /* The most trivial case: No barriers at all in the kernel.
464 We can create one big work-item loop around the whole kernel. */
465 tree bind_expr = m_current_bind_expr;
466 tree stmts = BIND_EXPR_BODY (bind_expr);
467
468 for (int i = 0; i < 3; ++i)
469 {
470 /* The previous loop has added a new label to the end of the function,
471 the next level loop should wrap around it also. */
472 tree_stmt_iterator function_exit = tsi_last (stmts);
473 add_wi_loop (i, &m_kernel_entry, &function_exit);
474 }
475
476 m_is_wg_function = true;
477 return false;
478 }
479
480 /* Emits a kernel description to a special ELF section so it can be
481 utilized by an HSA runtime implementation. The assembly block
482 must be emitted to a statement list of an function, which is given
483 as an argument. Returns the assembly block used to emit the section. */
484
485 tree
emit_metadata(tree stmt_list)486 brig_function::emit_metadata (tree stmt_list)
487 {
488 /* Emit an ELF section via an assembly directive that generates a special
489 ELF section for each kernel that contains raw bytes of a descriptor
490 object. This is pretty disgusting, but life is never perfect ;) */
491
492 /* Use the original kernel name without the '_' prefix in the section name. */
493 std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name;
494
495 std::ostringstream strstr;
496 strstr << std::endl
497 << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name
498 << std::endl
499 << "\t.p2align 1, 1, 1" << std::endl
500 << "\t.byte ";
501
502 for (size_t i = 0; i < sizeof (phsa_descriptor); ++i)
503 {
504 strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex
505 << (unsigned) *((unsigned char *) &m_descriptor + i);
506 if (i + 1 < sizeof (phsa_descriptor))
507 strstr << ", ";
508 }
509
510 strstr << std::endl << ".popsection" << std::endl << std::endl;
511
512 tree metadata_asm
513 = build_stmt (ASM_EXPR,
514 build_string (strstr.str ().size (), strstr.str ().c_str ()),
515 NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE);
516
517 append_to_statement_list_force (metadata_asm, &stmt_list);
518 return metadata_asm;
519 }
520
521 /* Emits the kernel launcher function. Also emits the metadata section
522 creation statements in it.
523
524 The launcher function calls the device-side runtime
525 that runs the kernel for all work-items. In C:
526
527 void KernelName (void* context, void* group_base_addr)
528 {
529 __hsail_launch_kernel (_KernelName, context, group_base_addr);
530 }
531
532 or, in case of a successful conversion to a work-group function:
533
534 void KernelName (void* context, void* group_base_addr)
535 {
536 __hsail_launch_wg_function (_KernelName, context, group_base_addr);
537 }
538
539 The user/host sees this function as the kernel to call from the
540 outside. The actual kernel generated from HSAIL was named _KernelName.
541 */
542
543 tree
emit_launcher_and_metadata()544 brig_function::emit_launcher_and_metadata ()
545 {
546 /* The original kernel name without the '_' prefix. */
547 std::string kern_name = m_name.substr (1);
548
549 tree name_identifier
550 = get_identifier_with_length (kern_name.c_str (), kern_name.size ());
551
552 tree launcher
553 = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
554 build_function_type_list (void_type_node, ptr_type_node,
555 ptr_type_node, NULL_TREE));
556
557 TREE_USED (launcher) = 1;
558 DECL_ARTIFICIAL (launcher) = 1;
559
560 tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
561 get_identifier ("__context"), ptr_type_node);
562
563 DECL_ARGUMENTS (launcher) = context_arg;
564 DECL_ARG_TYPE (context_arg) = ptr_type_node;
565 DECL_CONTEXT (context_arg) = launcher;
566 TREE_USED (context_arg) = 1;
567 DECL_ARTIFICIAL (context_arg) = 1;
568
569 tree group_base_addr_arg
570 = build_decl (UNKNOWN_LOCATION, PARM_DECL,
571 get_identifier ("__group_base_addr"), ptr_type_node);
572
573 chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg);
574 DECL_ARG_TYPE (group_base_addr_arg) = ptr_type_node;
575 DECL_CONTEXT (group_base_addr_arg) = launcher;
576 TREE_USED (group_base_addr_arg) = 1;
577 DECL_ARTIFICIAL (group_base_addr_arg) = 1;
578
579 tree resdecl
580 = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
581
582 DECL_RESULT (launcher) = resdecl;
583 DECL_CONTEXT (resdecl) = launcher;
584
585 DECL_INITIAL (launcher) = make_node (BLOCK);
586 TREE_USED (DECL_INITIAL (launcher)) = 1;
587
588 tree stmt_list = alloc_stmt_list ();
589
590 tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
591
592 TREE_STATIC (launcher) = 0;
593 TREE_PUBLIC (launcher) = 1;
594
595 DECL_SAVED_TREE (launcher) = bind_expr;
596
597 if (DECL_STRUCT_FUNCTION (launcher) == NULL)
598 push_struct_function (launcher);
599 else
600 push_cfun (DECL_STRUCT_FUNCTION (launcher));
601
602 tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl);
603
604 tree phsail_launch_kernel_call;
605
606 /* Compute the local group segment frame start pointer. */
607 tree group_local_offset_temp
608 = create_tmp_var (uint32_type_node, "group_local_offset");
609 tree group_local_offset_arg
610 = build2 (MODIFY_EXPR, uint32_type_node,
611 group_local_offset_temp,
612 build_int_cst (uint32_type_node,
613 m_parent->m_module_group_variables.size()));
614
615 /* Emit a launcher depending whether we converted the kernel function to
616 a work group function or not. */
617 if (m_is_wg_function)
618 phsail_launch_kernel_call
619 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
620 4, void_type_node,
621 ptr_type_node, kernel_func_ptr, ptr_type_node,
622 context_arg, ptr_type_node, group_base_addr_arg,
623 uint32_type_node, group_local_offset_arg);
624 else
625 phsail_launch_kernel_call
626 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
627 4, void_type_node,
628 ptr_type_node, kernel_func_ptr, ptr_type_node,
629 context_arg, ptr_type_node, group_base_addr_arg,
630 uint32_type_node, group_local_offset_arg);
631
632 append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
633
634 emit_metadata (stmt_list);
635
636 return launcher;
637 }
638
639 tree
append_statement(tree stmt)640 brig_function::append_statement (tree stmt)
641 {
642 gcc_assert (m_func_decl != NULL);
643
644 tree bind_expr = m_current_bind_expr;
645 tree stmts = BIND_EXPR_BODY (bind_expr);
646
647 append_to_statement_list_force (stmt, &stmts);
648 return stmt;
649 }
650
651 /* Creates a new "alloca frame" for the current function by
652 injecting an alloca frame push in the beginning of the function
653 and an alloca frame pop before all function exit points. */
654
655 void
create_alloca_frame()656 brig_function::create_alloca_frame ()
657 {
658 tree_stmt_iterator entry;
659
660 /* Adds the alloca push only after the ids have been initialized
661 in case of a kernel function. */
662 if (m_is_kernel)
663 entry = m_kernel_entry;
664 else
665 {
666 tree bind_expr = m_current_bind_expr;
667 tree stmts = BIND_EXPR_BODY (bind_expr);
668 entry = tsi_start (stmts);
669 }
670
671 tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME);
672 tree push_frame_call
673 = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node,
674 m_context_arg);
675
676 tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT);
677
678 tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME);
679
680 do
681 {
682 tree stmt = tsi_stmt (entry);
683 if (TREE_CODE (stmt) == RETURN_EXPR)
684 {
685 tree pop_frame_call
686 = call_builtin (pop_frame_builtin, 1, void_type_node,
687 ptr_type_node, m_context_arg);
688
689 tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT);
690 }
691 tsi_next (&entry);
692 }
693 while (!tsi_end_p (entry));
694 }
695
696 /* Finishes the currently built function. After calling this, no new
697 statements should be appeneded to the function. */
698 void
finish()699 brig_function::finish ()
700 {
701 append_return_stmt ();
702
703 /* Currently assume single alloca frame per WG. */
704 if (m_has_allocas)
705 create_alloca_frame ();
706 }
707
708 void
finish_kernel()709 brig_function::finish_kernel ()
710 {
711 /* Kernel functions should have a single exit point.
712 Let's create one. The return instructions should have
713 been converted to branches to this label. */
714 append_statement (build_stmt (LABEL_EXPR, m_exit_label));
715 /* Attempt to convert the kernel to a work-group function that
716 executes all work-items of the WG using a loop. */
717 convert_to_wg_function ();
718
719 append_return_stmt ();
720
721 /* Currently assume single alloca frame per WG. */
722 if (m_has_allocas)
723 create_alloca_frame ();
724 }
725
726 void
append_return_stmt()727 brig_function::append_return_stmt ()
728 {
729 gcc_assert (m_current_bind_expr != NULL_TREE);
730 tree stmts = BIND_EXPR_BODY (m_current_bind_expr);
731
732 if (STATEMENT_LIST_TAIL (stmts) == NULL)
733 return; /* Empty function. */
734
735 tree last_stmt = tsi_stmt (tsi_last (stmts));
736
737 if (TREE_CODE (last_stmt) == RETURN_EXPR)
738 return;
739
740 if (m_ret_value != NULL_TREE)
741 {
742 tree result_assign
743 = build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value,
744 m_ret_temp);
745
746 tree return_expr
747 = build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign);
748 append_to_statement_list_force (return_expr, &stmts);
749 }
750 else
751 {
752 tree return_stmt = build_stmt (RETURN_EXPR, NULL);
753 append_to_statement_list_force (return_stmt, &stmts);
754 }
755 }
756
757 bool
has_function_scope_var(const BrigBase * var) const758 brig_function::has_function_scope_var (const BrigBase* var) const
759 {
760 return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
761 }
762
763 size_t
group_variable_segment_offset(const std::string & name) const764 brig_function::group_variable_segment_offset (const std::string &name) const
765 {
766 if (m_local_group_variables.has_variable (name))
767 return m_local_group_variables.segment_offset (name);
768
769 gcc_assert (m_parent->m_module_group_variables.has_variable (name));
770 return m_parent->m_module_group_variables.segment_offset (name);
771 }
772