1 /* brig-function.cc -- declaration of brig_function class.
2 Copyright (C) 2016-2020 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23 #include <iomanip>
24
25 #include "brig-function.h"
26 #include "stringpool.h"
27 #include "tree-iterator.h"
28 #include "toplev.h"
29 #include "gimplify.h"
30 #include "gimple-expr.h"
31 #include "print-tree.h"
32 #include "hsa-brig-format.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-code-entry-handler.h"
36 #include "brig-machine.h"
37 #include "brig-util.h"
38 #include "phsa.h"
39 #include "tree-pretty-print.h"
40 #include "dumpfile.h"
41 #include "profile-count.h"
42 #include "tree-cfg.h"
43 #include "errors.h"
44 #include "function.h"
45 #include "brig-to-generic.h"
46 #include "brig-builtins.h"
47 #include "options.h"
48 #include "fold-const.h"
49 #include "target.h"
50 #include "builtins.h"
51
52 brig_function::builtin_map brig_function::s_custom_builtins;
53
brig_function(const BrigDirectiveExecutable * exec,brig_to_generic * parent)54 brig_function::brig_function (const BrigDirectiveExecutable *exec,
55 brig_to_generic *parent)
56 : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""),
57 m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE),
58 m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
59 m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
60 m_next_kernarg_offset (0), m_kernarg_max_align (0),
61 m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
62 m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
63 m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
64 m_generating_arg_block (false), m_parent (parent)
65 {
66 memset (m_regs, 0,
67 BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
68 memset (&m_descriptor, 0, sizeof (phsa_descriptor));
69
70 if (s_custom_builtins.size () > 0) return;
71
72 /* Populate the builtin index. */
73 #undef DEF_HSAIL_ATOMIC_BUILTIN
74 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
75 #undef DEF_HSAIL_INTR_BUILTIN
76 #undef DEF_HSAIL_SAT_BUILTIN
77 #undef DEF_HSAIL_BUILTIN
78 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
79 s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \
80 = builtin_decl_explicit (ENUM);
81
82 #include "brig-builtins.def"
83 }
84
~brig_function()85 brig_function::~brig_function ()
86 {
87 for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i)
88 {
89 if (m_regs[i] != NULL)
90 {
91 delete m_regs[i];
92 m_regs[i] = NULL;
93 }
94 }
95 }
96
97 /* Returns a GENERIC label with the given name in the given function.
98 Creates it, if not yet found. */
99
100 tree
label(const std::string & name)101 brig_function::label (const std::string &name)
102 {
103 label_index::const_iterator i = m_label_index.find (name);
104 if (i == m_label_index.end ())
105 {
106 tree name_identifier
107 = get_identifier_with_length (name.c_str (), name.size ());
108
109 tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
110 name_identifier, void_type_node);
111
112 DECL_CONTEXT (label_decl) = m_func_decl;
113 DECL_ARTIFICIAL (label_decl) = 0;
114
115 m_label_index[name] = label_decl;
116 return label_decl;
117 }
118 else
119 return (*i).second;
120 }
121
122 /* Record an argument variable for later use. This includes both local
123 variables inside arg blocks and incoming function arguments. */
124
125 void
add_arg_variable(const BrigDirectiveVariable * brigVar,tree treeDecl)126 brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar,
127 tree treeDecl)
128 {
129 m_arg_variables[brigVar] = treeDecl;
130 }
131
132 tree
arg_variable(const BrigDirectiveVariable * var) const133 brig_function::arg_variable (const BrigDirectiveVariable *var) const
134 {
135 variable_index::const_iterator i = m_arg_variables.find (var);
136 if (i == m_arg_variables.end ())
137 return NULL_TREE;
138 else
139 return (*i).second;
140 }
141
142 /* Appends a new kernel argument descriptor for the current kernel's
143 arg space. */
144
145 void
append_kernel_arg(const BrigDirectiveVariable * var,size_t size,size_t alignment)146 brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size,
147 size_t alignment)
148 {
149 gcc_assert (m_func_decl != NULL_TREE);
150 gcc_assert (m_is_kernel);
151
152 size_t align_padding = m_next_kernarg_offset % alignment == 0 ?
153 0 : (alignment - m_next_kernarg_offset % alignment);
154 m_next_kernarg_offset += align_padding;
155 m_kernarg_offsets[var] = m_next_kernarg_offset;
156 m_next_kernarg_offset += size;
157
158 m_kernarg_max_align
159 = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align;
160 }
161
162 size_t
kernel_arg_offset(const BrigDirectiveVariable * var) const163 brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const
164 {
165 var_offset_table::const_iterator i = m_kernarg_offsets.find (var);
166 gcc_assert (i != m_kernarg_offsets.end ());
167 return (*i).second;
168 }
169
170 /* Add work-item ID variables to the beginning of the kernel function
171 which can be used for address computation as kernel dispatch packet
172 instructions can be expanded to GENERIC nodes referring to them. */
173
174 void
add_id_variables()175 brig_function::add_id_variables ()
176 {
177 tree bind_expr = m_current_bind_expr;
178 tree stmts = BIND_EXPR_BODY (bind_expr);
179
180 /* Initialize the WG limits and local ids. */
181 m_kernel_entry = tsi_start (stmts);
182
183 for (int i = 0; i < 3; ++i)
184 {
185 char dim_char = (char) ((int) 'x' + i);
186
187 /* The local sizes are limited to 16b values, but let's still use 32b
188 to avoid unnecessary casts (the ID functions are 32b). */
189 m_local_id_vars[i]
190 = add_local_variable (std::string ("__local_") + dim_char,
191 long_long_integer_type_node);
192
193 tree workitemid_call
194 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2,
195 uint32_type_node, uint32_type_node,
196 build_int_cst (uint32_type_node, i), ptr_type_node,
197 m_context_arg);
198
199 tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]),
200 m_local_id_vars[i],
201 convert (TREE_TYPE (m_local_id_vars[i]),
202 workitemid_call));
203
204 append_statement (id_init);
205
206 m_cur_wg_size_vars[i]
207 = add_local_variable (std::string ("__cur_wg_size_") + dim_char,
208 long_long_integer_type_node);
209
210 tree cwgz_call;
211 if (flag_assume_phsa)
212 {
213 tree_stl_vec operands
214 = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
215 cwgz_call
216 = expand_or_call_builtin (BRIG_OPCODE_CURRENTWORKGROUPSIZE,
217 BRIG_TYPE_U32, uint32_type_node,
218 operands);
219 }
220 else
221 cwgz_call = call_builtin
222 (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE),
223 2, uint32_type_node, uint32_type_node,
224 build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg);
225
226 tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]),
227 m_cur_wg_size_vars[i],
228 convert (TREE_TYPE (m_cur_wg_size_vars[i]),
229 cwgz_call));
230
231 append_statement (limit_init);
232
233 m_wg_id_vars[i]
234 = add_local_variable (std::string ("__workgroupid_") + dim_char,
235 uint32_type_node);
236
237 tree wgid_call;
238 if (flag_assume_phsa)
239 {
240 tree_stl_vec operands
241 = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
242 wgid_call
243 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPID, BRIG_TYPE_U32,
244 uint32_type_node, operands);
245 }
246 else
247 wgid_call
248 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID),
249 2, uint32_type_node, uint32_type_node,
250 build_int_cst (uint32_type_node, i), ptr_type_node,
251 m_context_arg);
252
253 tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]),
254 m_wg_id_vars[i], wgid_call);
255
256 append_statement (wgid_init);
257
258 m_wg_size_vars[i]
259 = add_local_variable (std::string ("__workgroupsize_") + dim_char,
260 uint32_type_node);
261
262 tree wgsize_call;
263 if (flag_assume_phsa)
264 {
265 tree_stl_vec operands
266 = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
267 wgsize_call
268 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
269 uint32_type_node, operands);
270 }
271 else
272 wgsize_call
273 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE),
274 2, uint32_type_node, uint32_type_node,
275 build_int_cst (uint32_type_node, i), ptr_type_node,
276 m_context_arg);
277
278 tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]),
279 m_wg_size_vars[i], wgsize_call);
280
281 append_statement (wgsize_init);
282
283 m_grid_size_vars[i]
284 = add_local_variable (std::string ("__gridsize_") + dim_char,
285 uint32_type_node);
286
287 tree gridsize_call
288 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2,
289 uint32_type_node, uint32_type_node,
290 build_int_cst (uint32_type_node, i), ptr_type_node,
291 m_context_arg);
292
293 tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]),
294 m_grid_size_vars[i], gridsize_call);
295
296 append_statement (gridsize_init);
297
298 m_abs_id_base_vars[i]
299 = add_local_variable (std::string ("__abs_id_base_") + dim_char,
300 long_long_integer_type_node);
301
302 m_abs_id_vars[i]
303 = add_local_variable (std::string ("__abs_id_") + dim_char,
304 long_long_integer_type_node);
305
306 tree abs_id_base
307 = build2 (MULT_EXPR, long_long_integer_type_node,
308 convert (long_long_integer_type_node, m_wg_id_vars[i]),
309 convert (long_long_integer_type_node, m_wg_size_vars[i]));
310 tree abs_id
311 = build2 (PLUS_EXPR, long_long_integer_type_node, abs_id_base,
312 convert (long_long_integer_type_node, m_local_id_vars[i]));
313
314 tree abs_id_base_init
315 = build2 (MODIFY_EXPR, TREE_TYPE (m_abs_id_base_vars[i]),
316 m_abs_id_base_vars[i], abs_id_base);
317 append_statement (abs_id_base_init);
318
319 tree abs_id_init = build2 (MODIFY_EXPR,
320 TREE_TYPE (m_abs_id_vars[i]),
321 m_abs_id_vars[i], abs_id);
322 append_statement (abs_id_init);
323 }
324 }
325
326 /* Creates a new local variable with the given NAME and given GENERIC
327 TYPE. */
328
329 tree
add_local_variable(std::string name,tree type)330 brig_function::add_local_variable (std::string name, tree type)
331 {
332 tree name_identifier
333 = get_identifier_with_length (name.c_str (), name.size ());
334 tree variable
335 = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type);
336
337 DECL_NONLOCAL (variable) = 0;
338 TREE_ADDRESSABLE (variable) = 0;
339 TREE_STATIC (variable) = 0;
340 TREE_USED (variable) = 1;
341 DECL_ARTIFICIAL (variable) = 0;
342
343 tree bind_expr = DECL_SAVED_TREE (m_func_decl);
344
345 DECL_CONTEXT (variable) = m_func_decl;
346
347 DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr);
348 BIND_EXPR_VARS (bind_expr) = variable;
349 return variable;
350 }
351
352 /* Return tree type for an HSA register.
353
354 The tree type can be anything (scalar, vector, int, float, etc.)
355 but its size is guaranteed to match the HSA register size.
356
357 HSA registers are untyped but we select a type based on their use
358 to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
359 to occur when use or def reaches over current BB). */
360
361 tree
get_tree_type_for_hsa_reg(const BrigOperandRegister * reg) const362 brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
363 {
364 size_t reg_size = gccbrig_reg_size (reg);
365
366 /* The default type. */
367 tree type = build_nonstandard_integer_type (reg_size, true);
368
369 if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
370 return type;
371
372 const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
373 size_t reg_id = gccbrig_hsa_reg_id (*reg);
374 if (index.count (reg_id) == 0)
375 return type;
376
377 const reg_use_info &info = index.find (reg_id)->second;
378 std::vector<std::pair<tree, size_t> >::const_iterator it
379 = info.m_type_refs.begin ();
380 std::vector<std::pair<tree, size_t> >::const_iterator it_end
381 = info.m_type_refs.end ();
382 size_t max_refs_as_type_count = 0;
383 for (; it != it_end; it++)
384 {
385 size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
386 if (type_bit_size != reg_size) continue;
387 if (it->second > max_refs_as_type_count)
388 {
389 type = it->first;
390 max_refs_as_type_count = it->second;
391 }
392 }
393
394 return type;
395 }
396
397 /* Returns a DECL_VAR for the given HSAIL operand register.
398 If it has not been created yet for the function being generated,
399 creates it as a type determined by analysis phase. */
400
401 tree
get_m_var_declfor_reg(const BrigOperandRegister * reg)402 brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
403 {
404 size_t offset = gccbrig_hsa_reg_id (*reg);
405
406 reg_decl_index_entry *regEntry = m_regs[offset];
407 if (regEntry == NULL)
408 {
409 size_t reg_size = gccbrig_reg_size (reg);
410 tree type;
411 if (reg_size > 1)
412 type = get_tree_type_for_hsa_reg (reg);
413 else
414 type = boolean_type_node;
415
416 /* Drop the const qualifier so we do not end up with a read only
417 register variable which cannot be written to later. */
418 tree nonconst_type = build_type_variant (type, false, false);
419
420 regEntry = new reg_decl_index_entry;
421
422 regEntry->m_var_decl
423 = add_local_variable (gccbrig_reg_name (reg), nonconst_type);
424 m_regs[offset] = regEntry;
425 }
426 return regEntry->m_var_decl;
427 }
428
429 /* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is
430 a statement after which the iteration variables should be initialized and
431 the loop body starts. BRANCH_AFTER is the statement after which the loop
432 predicate check and the back edge goto will be appended. */
433
434 void
add_wi_loop(int dim,tree_stmt_iterator * header_entry,tree_stmt_iterator * branch_after)435 brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry,
436 tree_stmt_iterator *branch_after)
437 {
438 tree ivar = m_local_id_vars[dim];
439 tree abs_id_base_var = m_abs_id_base_vars[dim];
440 tree abs_id_var = m_abs_id_vars[dim];
441 tree ivar_max = m_cur_wg_size_vars[dim];
442 tree_stmt_iterator entry = *header_entry;
443
444 /* TODO: this is not a parallel loop as we share the "register variables"
445 across work-items. Should create a copy of them per WI instance. That
446 is, declare temporaries for new definitions inside the loop body, not at
447 function scope. */
448
449 tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar,
450 build_zero_cst (TREE_TYPE (ivar)));
451 tsi_link_after (&entry, ivar_init, TSI_NEW_STMT);
452
453 tree abs_id_var_init = build2 (MODIFY_EXPR, TREE_TYPE (abs_id_var),
454 abs_id_var,
455 convert (TREE_TYPE (abs_id_var),
456 abs_id_base_var));
457 tsi_link_after (&entry, abs_id_var_init, TSI_NEW_STMT);
458
459 tree loop_body_label
460 = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim));
461 tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label);
462
463 tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT);
464
465 if (m_has_unexpanded_dp_builtins)
466 {
467 if (!flag_assume_phsa)
468 {
469 tree id_set_builtin
470 = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID);
471 /* Set the local ID to the current wi-loop iteration variable value
472 to ensure the builtins see the correct values. */
473 tree id_set_call
474 = call_builtin (id_set_builtin, 3,
475 void_type_node, uint32_type_node,
476 build_int_cst (uint32_type_node, dim),
477 uint32_type_node, convert (uint32_type_node, ivar),
478 ptr_type_node, m_context_arg);
479 tsi_link_after (&entry, id_set_call, TSI_NEW_STMT);
480 }
481 else
482 {
483 tree ptr_type = build_pointer_type (uint32_type_node);
484 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
485 build_int_cst (ptr_type, dim * 4));
486 tree assign = build2 (MODIFY_EXPR, uint32_type_node, ctx,
487 convert (uint32_type_node, ivar));
488
489 tsi_link_after (&entry, assign, TSI_NEW_STMT);
490 }
491 }
492
493 /* Increment the WI iteration variable. */
494 tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar,
495 build_one_cst (TREE_TYPE (ivar)));
496
497 tsi_link_after (branch_after, incr, TSI_NEW_STMT);
498
499 /* ...and the abs id variable. */
500 tree abs_id_incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (abs_id_var),
501 abs_id_var,
502 build_one_cst (TREE_TYPE (abs_id_var)));
503
504 tsi_link_after (branch_after, abs_id_incr, TSI_NEW_STMT);
505
506 /* Append the predicate check with the back edge goto. */
507 tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max);
508 tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label);
509 tree if_stmt
510 = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE);
511 tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT);
512 }
513
514 /* Recursively analyzes the function and its callees for barrier usage. */
515
516 void
analyze_calls()517 brig_function::analyze_calls ()
518 {
519 if (m_calls_analyzed)
520 return;
521
522 /* Set this early to not get stuck in case of recursive call graphs.
523 This is safe because if the function calls itself, either the function
524 has barrier calls which implies a call to a function with barrier calls,
525 or it doesn't in which case the result depends on the later called
526 functions. */
527 m_calls_analyzed = true;
528
529 for (size_t i = 0; i < m_called_functions.size (); ++i)
530 {
531 tree f = m_called_functions[i];
532 brig_function *called_f = m_parent->get_finished_function (f);
533 if (called_f == NULL)
534 {
535 /* Unfinished function (only declaration within the set of BRIGs)
536 found. Cannot finish the CG analysis. Have to assume it does have
537 a barrier for safety. */
538 m_has_function_calls_with_barriers = true;
539 m_has_unexpanded_dp_builtins = true;
540 break;
541 }
542 called_f->analyze_calls ();
543 /* We can assume m_has_barriers has been correctly set during the
544 construction of the function decl. No need to reanalyze it. */
545 m_has_function_calls_with_barriers |= called_f->m_has_barriers;
546
547 /* If the function or any of its called functions has dispatch
548 packet builtin calls that require the local id, we need to
549 set the local id to the context in the work item loop before
550 the functions are called. If we analyze the opposite, these
551 function calls can be omitted. */
552 m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins;
553 }
554 }
555
556 /* Tries to convert the current kernel to a work-group function that executes
557 all work-items using loops. Returns true in case the conversion was
558 successful. */
559
560 bool
convert_to_wg_function()561 brig_function::convert_to_wg_function ()
562 {
563 if (!m_calls_analyzed)
564 analyze_calls ();
565
566 if (m_has_barriers || m_has_function_calls_with_barriers)
567 return false;
568
569 /* The most trivial case: No barriers at all in the kernel.
570 We can create one big work-item loop around the whole kernel. */
571 tree bind_expr = m_current_bind_expr;
572 tree stmts = BIND_EXPR_BODY (bind_expr);
573
574 for (int i = 0; i < 3; ++i)
575 {
576 /* The previous loop has added a new label to the end of the function,
577 the next level loop should wrap around it also. */
578 tree_stmt_iterator function_exit = tsi_last (stmts);
579 add_wi_loop (i, &m_kernel_entry, &function_exit);
580 }
581
582 m_is_wg_function = true;
583 return false;
584 }
585
586 /* Emits a kernel description to a special ELF section so it can be
587 utilized by an HSA runtime implementation. The assembly block
588 must be emitted to a statement list of an function, which is given
589 as an argument. Returns the assembly block used to emit the section. */
590
591 tree
emit_metadata(tree stmt_list)592 brig_function::emit_metadata (tree stmt_list)
593 {
594 /* Emit an ELF section via an assembly directive that generates a special
595 ELF section for each kernel that contains raw bytes of a descriptor
596 object. This is pretty disgusting, but life is never perfect ;) */
597
598 /* Use the original kernel name without the '_' prefix in the section name. */
599 std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name;
600
601 std::ostringstream strstr;
602 strstr << std::endl
603 << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name
604 << std::endl
605 << "\t.p2align 1, 1, 1" << std::endl
606 << "\t.byte ";
607
608 for (size_t i = 0; i < sizeof (phsa_descriptor); ++i)
609 {
610 strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex
611 << (unsigned) *((unsigned char *) &m_descriptor + i);
612 if (i + 1 < sizeof (phsa_descriptor))
613 strstr << ", ";
614 }
615
616 strstr << std::endl << ".popsection" << std::endl << std::endl;
617
618 tree metadata_asm
619 = build_stmt (ASM_EXPR,
620 build_string (strstr.str ().size (), strstr.str ().c_str ()),
621 NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE);
622
623 append_to_statement_list_force (metadata_asm, &stmt_list);
624 return metadata_asm;
625 }
626
627 /* Emits the kernel launcher function. Also emits the metadata section
628 creation statements in it.
629
630 The launcher function calls the device-side runtime
631 that runs the kernel for all work-items. In C:
632
633 void KernelName (void* context, void* group_base_addr)
634 {
635 __hsail_launch_kernel (_KernelName, context, group_base_addr);
636 }
637
638 or, in case of a successful conversion to a work-group function:
639
640 void KernelName (void* context, void* group_base_addr)
641 {
642 __hsail_launch_wg_function (_KernelName, context, group_base_addr);
643 }
644
645 The user/host sees this function as the kernel to call from the
646 outside. The actual kernel generated from HSAIL was named _KernelName.
647 */
648
649 tree
emit_launcher_and_metadata()650 brig_function::emit_launcher_and_metadata ()
651 {
652 /* The original kernel name without the '_' prefix. */
653 std::string kern_name = m_name.substr (1);
654
655 tree name_identifier
656 = get_identifier_with_length (kern_name.c_str (), kern_name.size ());
657
658 tree restrict_void_ptr
659 = build_qualified_type (build_pointer_type (void_type_node),
660 TYPE_QUAL_RESTRICT);
661 tree restrict_char_ptr
662 = build_qualified_type (build_pointer_type (char_type_node),
663 TYPE_QUAL_RESTRICT);
664 tree launcher
665 = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
666 build_function_type_list (void_type_node, restrict_void_ptr,
667 restrict_char_ptr, NULL_TREE));
668
669 TREE_USED (launcher) = 1;
670 DECL_ARTIFICIAL (launcher) = 1;
671
672 tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
673 get_identifier ("__context"),
674 restrict_void_ptr);
675
676 DECL_ARGUMENTS (launcher) = context_arg;
677 DECL_ARG_TYPE (context_arg) = restrict_void_ptr;
678 DECL_CONTEXT (context_arg) = launcher;
679 TREE_USED (context_arg) = 1;
680 DECL_ARTIFICIAL (context_arg) = 1;
681
682 tree group_base_addr_arg
683 = build_decl (UNKNOWN_LOCATION, PARM_DECL,
684 get_identifier ("__group_base_addr"), restrict_char_ptr);
685
686 chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg);
687 DECL_ARG_TYPE (group_base_addr_arg) = restrict_char_ptr;
688 DECL_CONTEXT (group_base_addr_arg) = launcher;
689 TREE_USED (group_base_addr_arg) = 1;
690 DECL_ARTIFICIAL (group_base_addr_arg) = 1;
691
692 tree resdecl
693 = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
694
695 DECL_RESULT (launcher) = resdecl;
696 DECL_CONTEXT (resdecl) = launcher;
697
698 DECL_INITIAL (launcher) = make_node (BLOCK);
699 TREE_USED (DECL_INITIAL (launcher)) = 1;
700
701 tree stmt_list = alloc_stmt_list ();
702
703 tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
704
705 TREE_STATIC (launcher) = 1;
706 TREE_PUBLIC (launcher) = 1;
707
708 DECL_SAVED_TREE (launcher) = bind_expr;
709
710 if (DECL_STRUCT_FUNCTION (launcher) == NULL)
711 push_struct_function (launcher);
712 else
713 push_cfun (DECL_STRUCT_FUNCTION (launcher));
714
715 tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl);
716
717 tree phsail_launch_kernel_call;
718
719 /* Compute the local group segment frame start pointer. */
720 tree group_local_offset_temp
721 = create_tmp_var (uint32_type_node, "group_local_offset");
722 tree group_local_offset_arg
723 = build2 (MODIFY_EXPR, uint32_type_node,
724 group_local_offset_temp,
725 build_int_cst (uint32_type_node,
726 m_parent->m_module_group_variables.size()));
727
728 /* Emit a launcher depending whether we converted the kernel function to
729 a work group function or not. */
730 if (m_is_wg_function)
731 phsail_launch_kernel_call
732 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
733 4, void_type_node,
734 ptr_type_node, kernel_func_ptr, restrict_void_ptr,
735 context_arg, restrict_char_ptr, group_base_addr_arg,
736 uint32_type_node, group_local_offset_arg);
737 else
738 phsail_launch_kernel_call
739 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
740 4, void_type_node,
741 ptr_type_node, kernel_func_ptr, restrict_void_ptr,
742 context_arg, restrict_char_ptr, group_base_addr_arg,
743 uint32_type_node, group_local_offset_arg);
744
745 append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
746
747 emit_metadata (stmt_list);
748
749 set_externally_visible (launcher);
750
751 return launcher;
752 }
753
754 tree
append_statement(tree stmt)755 brig_function::append_statement (tree stmt)
756 {
757 gcc_assert (m_func_decl != NULL);
758
759 tree bind_expr = m_current_bind_expr;
760 tree stmts = BIND_EXPR_BODY (bind_expr);
761
762 append_to_statement_list_force (stmt, &stmts);
763 return stmt;
764 }
765
766 /* Creates a new "alloca frame" for the current function by
767 injecting an alloca frame push in the beginning of the function
768 and an alloca frame pop before all function exit points. */
769
770 void
create_alloca_frame()771 brig_function::create_alloca_frame ()
772 {
773 tree_stmt_iterator entry;
774
775 /* Adds the alloca push only after the ids have been initialized
776 in case of a kernel function. */
777 if (m_is_kernel)
778 entry = m_kernel_entry;
779 else
780 {
781 tree bind_expr = m_current_bind_expr;
782 tree stmts = BIND_EXPR_BODY (bind_expr);
783 entry = tsi_start (stmts);
784 }
785
786 tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME);
787 tree push_frame_call
788 = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node,
789 m_context_arg);
790
791 tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT);
792
793 tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME);
794
795 do
796 {
797 tree stmt = tsi_stmt (entry);
798 if (TREE_CODE (stmt) == RETURN_EXPR)
799 {
800 tree pop_frame_call
801 = call_builtin (pop_frame_builtin, 1, void_type_node,
802 ptr_type_node, m_context_arg);
803
804 tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT);
805 }
806 tsi_next (&entry);
807 }
808 while (!tsi_end_p (entry));
809 }
810
811 /* Finishes the currently built function. After calling this, no new
812 statements should be appeneded to the function. */
813 void
finish()814 brig_function::finish ()
815 {
816 append_return_stmt ();
817
818 /* Currently assume single alloca frame per WG. */
819 if (m_has_allocas)
820 create_alloca_frame ();
821 }
822
823 void
finish_kernel()824 brig_function::finish_kernel ()
825 {
826 /* Kernel functions should have a single exit point.
827 Let's create one. The return instructions should have
828 been converted to branches to this label. */
829 append_statement (build_stmt (LABEL_EXPR, m_exit_label));
830 /* Attempt to convert the kernel to a work-group function that
831 executes all work-items of the WG using a loop. */
832 convert_to_wg_function ();
833
834 append_return_stmt ();
835
836 /* Currently assume single alloca frame per WG. */
837 if (m_has_allocas)
838 create_alloca_frame ();
839 }
840
841 void
append_return_stmt()842 brig_function::append_return_stmt ()
843 {
844 gcc_assert (m_current_bind_expr != NULL_TREE);
845 tree stmts = BIND_EXPR_BODY (m_current_bind_expr);
846
847 if (STATEMENT_LIST_TAIL (stmts) == NULL)
848 return; /* Empty function. */
849
850 tree last_stmt = tsi_stmt (tsi_last (stmts));
851
852 if (TREE_CODE (last_stmt) == RETURN_EXPR)
853 return;
854
855 if (m_ret_value != NULL_TREE)
856 {
857 tree result_assign
858 = build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value,
859 m_ret_temp);
860
861 tree return_expr
862 = build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign);
863 append_to_statement_list_force (return_expr, &stmts);
864 }
865 else
866 {
867 tree return_stmt = build_stmt (RETURN_EXPR, NULL);
868 append_to_statement_list_force (return_stmt, &stmts);
869 }
870 }
871
872 bool
has_function_scope_var(const BrigBase * var) const873 brig_function::has_function_scope_var (const BrigBase* var) const
874 {
875 return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
876 }
877
878 size_t
group_variable_segment_offset(const std::string & name) const879 brig_function::group_variable_segment_offset (const std::string &name) const
880 {
881 if (m_local_group_variables.has_variable (name))
882 return m_local_group_variables.segment_offset (name);
883
884 gcc_assert (m_parent->m_module_group_variables.has_variable (name));
885 return m_parent->m_module_group_variables.segment_offset (name);
886 }
887
888 /* Try to expand the given builtin call to reuse a previously generated
889 variable, if possible. If not, just call the given builtin.
890 BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
891 ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
892 input operands. */
893
894 tree
expand_or_call_builtin(BrigOpcode16_t brig_opcode,BrigType16_t brig_type,tree arith_type,tree_stl_vec & operands)895 brig_function::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
896 BrigType16_t brig_type,
897 tree arith_type,
898 tree_stl_vec &operands)
899 {
900 if (needs_workitem_context_data (brig_opcode))
901 m_has_unexpanded_dp_builtins = true;
902
903 if (can_expand_builtin (brig_opcode))
904 return expand_builtin (brig_opcode, operands);
905
906 tree built_in
907 = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
908
909 if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
910 && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
911 && brig_opcode != BRIG_OPCODE_LERP
912 && brig_opcode != BRIG_OPCODE_PACKCVT
913 && brig_opcode != BRIG_OPCODE_SAD
914 && brig_opcode != BRIG_OPCODE_SADHI)
915 {
916 /* Call the scalar built-in for all elements in the vector. */
917 tree_stl_vec operand0_elements;
918 if (operands.size () > 0)
919 unpack (operands[0], operand0_elements);
920
921 tree_stl_vec operand1_elements;
922 if (operands.size () > 1)
923 unpack (operands[1], operand1_elements);
924
925 tree_stl_vec result_elements;
926
927 size_t element_count = gccbrig_type_vector_subparts (arith_type);
928 for (size_t i = 0; i < element_count; ++i)
929 {
930 tree_stl_vec call_operands;
931 if (operand0_elements.size () > 0)
932 call_operands.push_back (operand0_elements.at (i));
933
934 if (operand1_elements.size () > 0)
935 call_operands.push_back (operand1_elements.at (i));
936
937 result_elements.push_back
938 (expand_or_call_builtin (brig_opcode, brig_type,
939 TREE_TYPE (arith_type),
940 call_operands));
941 }
942 return pack (result_elements);
943 }
944
945 tree_stl_vec call_operands;
946 tree_stl_vec operand_types;
947
948 tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
949
950 for (size_t i = 0; i < operands.size (); ++i)
951 {
952 tree operand_type = TREE_VALUE (arg_type_chain);
953 call_operands.push_back (convert (operand_type, operands[i]));
954 operand_types.push_back (operand_type);
955 arg_type_chain = TREE_CHAIN (arg_type_chain);
956 }
957
958 if (needs_workitem_context_data (brig_opcode))
959 {
960 call_operands.push_back (m_context_arg);
961 operand_types.push_back (ptr_type_node);
962 }
963
964 size_t operand_count = call_operands.size ();
965
966 call_operands.resize (4, NULL_TREE);
967 operand_types.resize (4, NULL_TREE);
968 for (size_t i = 0; i < operand_count; ++i)
969 call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
970 call_operands.at (i));
971
972 tree fnptr = build_fold_addr_expr (built_in);
973 return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
974 operand_count, &call_operands[0]);
975 }
976
977 /* Instead of calling a built-in function, use a more efficient mechanism
978 such as reuse a previously returned value known to be still valid, or
979 access the work-item context struct directly. This is beneficial especially
980 for the work-item identification related builtins as not having them as
981 unanalyzable black box calls can lead to more easily vectorizable parallel
982 loops for multi work-item work-groups. BRIG_OPCODE identifies the builtin
983 and OPERANDS store the operands. */
984
985 tree
expand_builtin(BrigOpcode16_t brig_opcode,tree_stl_vec & operands)986 brig_function::expand_builtin (BrigOpcode16_t brig_opcode,
987 tree_stl_vec &operands)
988 {
989 tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
990
991 tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
992
993 tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
994
995 if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
996 {
997 tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
998 id0 = convert (uint64_type_node, id0);
999
1000 tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
1001 id1 = convert (uint64_type_node, id1);
1002
1003 tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
1004 id2 = convert (uint64_type_node, id2);
1005
1006 tree max0 = convert (uint64_type_node, m_grid_size_vars[0]);
1007 tree max1 = convert (uint64_type_node, m_grid_size_vars[1]);
1008
1009 tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
1010 id2_x_max0_x_max1
1011 = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
1012
1013 tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
1014
1015 tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
1016 sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
1017
1018 return add_temp_var ("workitemflatabsid", sum);
1019 }
1020 else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
1021 {
1022 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1023 return m_abs_id_vars[dim];
1024 }
1025 else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
1026 {
1027
1028 tree wg_size_x = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_0);
1029 tree wg_size_y = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_1);
1030 tree z_x_wgsx_wgsy
1031 = build2 (MULT_EXPR, uint32_type_node,
1032 convert (uint32_type_node,
1033 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_2)),
1034 wg_size_x);
1035 z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
1036 wg_size_y);
1037
1038 tree y_x_wgsx
1039 = build2 (MULT_EXPR, uint32_type_node,
1040 convert (uint32_type_node,
1041 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_1)),
1042 wg_size_x);
1043
1044 tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
1045 sum = build2 (PLUS_EXPR, uint32_type_node,
1046 convert (uint32_type_node,
1047 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_0)),
1048 sum);
1049 return add_temp_var ("workitemflatid", sum);
1050 }
1051 else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
1052 {
1053 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1054 if (flag_assume_phsa)
1055 {
1056 tree ptr_type = build_pointer_type (uint32_type_node);
1057 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1058 build_int_cst (ptr_type,
1059 PHSA_CONTEXT_WG_SIZES
1060 + dim * 4));
1061 std::string name ("wgsize_x");
1062 name [name.length() - 1] += dim;
1063 return add_temp_var (name.c_str(), ctx);
1064 }
1065 else if (m_is_kernel)
1066 {
1067 /* For kernels without phsa we generate certain temps before
1068 the WI loop, which means we don't need to rely on LICM to get
1069 them moved out. */
1070 return m_wg_size_vars[dim];
1071 }
1072 else
1073 gcc_unreachable ();
1074 }
1075 else if (brig_opcode == BRIG_OPCODE_WORKITEMID)
1076 {
1077 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1078 if (m_is_kernel)
1079 {
1080 return m_local_id_vars [dim];
1081 }
1082 else if (flag_assume_phsa)
1083 {
1084 tree ptr_type = build_pointer_type (uint32_type_node);
1085 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1086 build_int_cst (ptr_type,
1087 PHSA_CONTEXT_OFFS_WI_IDS
1088 + dim * 4));
1089 std::string name ("wiid_x");
1090 name [name.length() - 1] += dim;
1091 return add_temp_var (name.c_str(), ctx);
1092 }
1093 else
1094 gcc_unreachable ();
1095 }
1096 else if (brig_opcode == BRIG_OPCODE_WORKGROUPID)
1097 {
1098 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1099 if (flag_assume_phsa)
1100 {
1101 tree ptr_type = build_pointer_type (uint32_type_node);
1102 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1103 build_int_cst (ptr_type,
1104 PHSA_CONTEXT_OFFS_WG_IDS
1105 + dim * 4));
1106 std::string name ("wgid_x");
1107 name [name.length() - 1] += dim;
1108 return add_temp_var (name.c_str(), ctx);
1109 } else if (m_is_kernel)
1110 return m_wg_id_vars [dim];
1111 else
1112 gcc_unreachable ();
1113 }
1114 else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
1115 {
1116 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1117 if (flag_assume_phsa)
1118 {
1119 tree ptr_type = build_pointer_type (uint32_type_node);
1120 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
1121 build_int_cst (ptr_type,
1122 PHSA_CONTEXT_CURRENT_WG_SIZES
1123 + dim * 4));
1124 std::string name ("curwgsize_x");
1125 name [name.length() - 1] += dim;
1126 return add_temp_var (name.c_str(), ctx);
1127 } else if (m_is_kernel)
1128 return m_cur_wg_size_vars[dim];
1129 else
1130 gcc_unreachable ();
1131 }
1132 else
1133 gcc_unreachable ();
1134
1135 return NULL_TREE;
1136 }
1137
1138 /* Returns true in case the given opcode that would normally be generated
1139 as a builtin call can be expanded to tree nodes. */
1140
1141 bool
can_expand_builtin(BrigOpcode16_t brig_opcode) const1142 brig_function::can_expand_builtin (BrigOpcode16_t brig_opcode) const
1143 {
1144 switch (brig_opcode)
1145 {
1146 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
1147 case BRIG_OPCODE_WORKITEMFLATID:
1148 case BRIG_OPCODE_WORKITEMID:
1149 case BRIG_OPCODE_WORKGROUPID:
1150 case BRIG_OPCODE_WORKGROUPSIZE:
1151 return m_is_kernel || flag_assume_phsa;
1152 case BRIG_OPCODE_WORKITEMFLATABSID:
1153 case BRIG_OPCODE_WORKITEMABSID:
1154 return m_is_kernel;
1155 default:
1156 return false;
1157 };
1158 }
1159
1160 /* In case the HSA instruction must be implemented using a builtin,
1161 this function is called to get the correct builtin function.
1162 TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
1163 brig instruction and BRIG_TYPE the brig instruction's type. */
1164
1165 tree
get_builtin_for_hsa_opcode(tree type,BrigOpcode16_t brig_opcode,BrigType16_t brig_type) const1166 brig_function::get_builtin_for_hsa_opcode
1167 (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
1168 {
1169 tree builtin = NULL_TREE;
1170 tree builtin_type = type;
1171
1172 /* For vector types, first find the scalar version of the builtin. */
1173 if (type != NULL_TREE && VECTOR_TYPE_P (type))
1174 builtin_type = TREE_TYPE (type);
1175 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
1176
1177 /* Some BRIG opcodes can use the same builtins for unsigned and
1178 signed types. Force these cases to unsigned types. */
1179
1180 if (brig_opcode == BRIG_OPCODE_BORROW
1181 || brig_opcode == BRIG_OPCODE_CARRY
1182 || brig_opcode == BRIG_OPCODE_LASTBIT
1183 || brig_opcode == BRIG_OPCODE_BITINSERT)
1184 {
1185 if (brig_type == BRIG_TYPE_S32)
1186 brig_type = BRIG_TYPE_U32;
1187 else if (brig_type == BRIG_TYPE_S64)
1188 brig_type = BRIG_TYPE_U64;
1189 }
1190
1191 switch (brig_opcode)
1192 {
1193 case BRIG_OPCODE_FLOOR:
1194 builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
1195 break;
1196 case BRIG_OPCODE_CEIL:
1197 builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
1198 break;
1199 case BRIG_OPCODE_SQRT:
1200 case BRIG_OPCODE_NSQRT:
1201 builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
1202 break;
1203 case BRIG_OPCODE_RINT:
1204 builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
1205 break;
1206 case BRIG_OPCODE_TRUNC:
1207 builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
1208 break;
1209 case BRIG_OPCODE_COPYSIGN:
1210 builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
1211 break;
1212 case BRIG_OPCODE_NSIN:
1213 builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
1214 break;
1215 case BRIG_OPCODE_NLOG2:
1216 builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
1217 break;
1218 case BRIG_OPCODE_NEXP2:
1219 builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
1220 break;
1221 case BRIG_OPCODE_FMA:
1222 case BRIG_OPCODE_NFMA:
1223 builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
1224 break;
1225 case BRIG_OPCODE_NCOS:
1226 builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
1227 break;
1228 case BRIG_OPCODE_POPCOUNT:
1229 /* Popcount should be typed by its argument type (the return value
1230 is always u32). Let's use a b64 version for also for b32 for now. */
1231 return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
1232 case BRIG_OPCODE_BORROW:
1233 /* Borrow uses the same builtin for unsigned and signed types. */
1234 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
1235 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
1236 else
1237 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
1238 case BRIG_OPCODE_CARRY:
1239 /* Carry also uses the same builtin for unsigned and signed types. */
1240 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
1241 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
1242 else
1243 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
1244 default:
1245
1246 /* Use our builtin index for finding a proper builtin for the BRIG
1247 opcode and BRIG type. This takes care most of the builtin cases,
1248 the special cases are handled in the separate 'case' statements
1249 above. */
1250 builtin_map::const_iterator i
1251 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
1252 if (i != s_custom_builtins.end ())
1253 return (*i).second;
1254
1255 if (brig_inner_type != brig_type)
1256 {
1257 /* Try to find a scalar built-in we could use. */
1258 i = s_custom_builtins.find
1259 (std::make_pair (brig_opcode, brig_inner_type));
1260 if (i != s_custom_builtins.end ())
1261 return (*i).second;
1262 }
1263
1264 /* In case this is an fp16 operation that is promoted to fp32,
1265 try to find a fp32 scalar built-in. */
1266 if (brig_inner_type == BRIG_TYPE_F16)
1267 {
1268 i = s_custom_builtins.find
1269 (std::make_pair (brig_opcode, BRIG_TYPE_F32));
1270 if (i != s_custom_builtins.end ())
1271 return (*i).second;
1272 }
1273 gcc_unreachable ();
1274 }
1275
1276 if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
1277 {
1278 /* Try to find a vectorized version of the built-in.
1279 TODO: properly assert that builtin is a mathfn builtin? */
1280 tree vec_builtin
1281 = targetm.vectorize.builtin_vectorized_function
1282 (builtin_mathfn_code (builtin), type, type);
1283 if (vec_builtin != NULL_TREE)
1284 return vec_builtin;
1285 else
1286 return builtin;
1287 }
1288 if (builtin == NULL_TREE)
1289 gcc_unreachable ();
1290 return builtin;
1291 }
1292
1293 /* Unpacks the elements of the vector in VALUE to scalars (bit field
1294 references) in ELEMENTS. */
1295
1296 void
unpack(tree value,tree_stl_vec & elements)1297 brig_function::unpack (tree value, tree_stl_vec &elements)
1298 {
1299 size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
1300 size_t element_size
1301 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
1302 size_t element_count
1303 = vec_size * BITS_PER_UNIT / element_size;
1304
1305 tree input_element_type = TREE_TYPE (TREE_TYPE (value));
1306
1307 value = add_temp_var ("unpack_input", value);
1308
1309 for (size_t i = 0; i < element_count; ++i)
1310 {
1311 tree element
1312 = build3 (BIT_FIELD_REF, input_element_type, value,
1313 TYPE_SIZE (input_element_type),
1314 bitsize_int(i * element_size));
1315
1316 element = add_temp_var ("scalar", element);
1317 elements.push_back (element);
1318 }
1319 }
1320
1321 /* Pack the elements of the scalars in ELEMENTS to the returned vector. */
1322
1323 tree
pack(tree_stl_vec & elements)1324 brig_function::pack (tree_stl_vec &elements)
1325 {
1326 size_t element_count = elements.size ();
1327
1328 gcc_assert (element_count > 1);
1329
1330 tree output_element_type = TREE_TYPE (elements.at (0));
1331
1332 vec<constructor_elt, va_gc> *constructor_vals = NULL;
1333 for (size_t i = 0; i < element_count; ++i)
1334 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
1335
1336 tree vec_type = build_vector_type (output_element_type, element_count);
1337
1338 /* build_constructor creates a vector type which is not a vector_cst
1339 that requires compile time constant elements. */
1340 tree vec = build_constructor (vec_type, constructor_vals);
1341
1342 /* Add a temp variable for readability. */
1343 tree tmp_var = create_tmp_var (vec_type, "vec_out");
1344 tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1345 append_statement (vec_tmp_assign);
1346 return tmp_var;
1347 }
1348
1349 /* Returns true in case the given opcode needs to know about work-item context
1350 data. In such case the context data is passed as a pointer to a work-item
1351 context object, as the last argument in the builtin call. */
1352
1353 bool
needs_workitem_context_data(BrigOpcode16_t brig_opcode)1354 brig_function::needs_workitem_context_data
1355 (BrigOpcode16_t brig_opcode)
1356 {
1357 switch (brig_opcode)
1358 {
1359 case BRIG_OPCODE_WORKITEMABSID:
1360 case BRIG_OPCODE_WORKITEMFLATABSID:
1361 case BRIG_OPCODE_WORKITEMFLATID:
1362 case BRIG_OPCODE_CURRENTWORKITEMFLATID:
1363 case BRIG_OPCODE_WORKITEMID:
1364 case BRIG_OPCODE_WORKGROUPID:
1365 case BRIG_OPCODE_WORKGROUPSIZE:
1366 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
1367 case BRIG_OPCODE_GRIDGROUPS:
1368 case BRIG_OPCODE_GRIDSIZE:
1369 case BRIG_OPCODE_DIM:
1370 case BRIG_OPCODE_PACKETID:
1371 case BRIG_OPCODE_PACKETCOMPLETIONSIG:
1372 case BRIG_OPCODE_BARRIER:
1373 case BRIG_OPCODE_WAVEBARRIER:
1374 case BRIG_OPCODE_ARRIVEFBAR:
1375 case BRIG_OPCODE_INITFBAR:
1376 case BRIG_OPCODE_JOINFBAR:
1377 case BRIG_OPCODE_LEAVEFBAR:
1378 case BRIG_OPCODE_RELEASEFBAR:
1379 case BRIG_OPCODE_WAITFBAR:
1380 case BRIG_OPCODE_CUID:
1381 case BRIG_OPCODE_MAXCUID:
1382 case BRIG_OPCODE_DEBUGTRAP:
1383 case BRIG_OPCODE_GROUPBASEPTR:
1384 case BRIG_OPCODE_KERNARGBASEPTR:
1385 case BRIG_OPCODE_ALLOCA:
1386 return true;
1387 default:
1388 return false;
1389 };
1390 }
1391
1392 /* Appends and returns a new temp variable and an accompanying assignment
1393 statement that stores the value of the given EXPR and has the given NAME. */
1394
1395 tree
add_temp_var(std::string name,tree expr)1396 brig_function::add_temp_var (std::string name, tree expr)
1397 {
1398 tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
1399 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
1400 append_statement (assign);
1401 return temp_var;
1402 }
1403
1404 /* Returns the integer constant value of the given node.
1405 If it's a cast, looks into the source of the cast. */
1406
1407 HOST_WIDE_INT
int_constant_value(tree node)1408 brig_function::int_constant_value (tree node)
1409 {
1410 tree n = node;
1411 if (TREE_CODE (n) == VIEW_CONVERT_EXPR)
1412 n = TREE_OPERAND (n, 0);
1413 return int_cst_value (n);
1414 }
1415
1416 /* Returns the tree code that should be used to implement the given
1417 HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
1418 (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly,
1419 returns TREE_LIST (if it can be emulated with a simple chain of tree
1420 nodes) or CALL_EXPR if the opcode should be implemented using a builtin
1421 call. */
1422
1423 tree_code
get_tree_code_for_hsa_opcode(BrigOpcode16_t brig_opcode,BrigType16_t brig_type)1424 brig_function::get_tree_code_for_hsa_opcode
1425 (BrigOpcode16_t brig_opcode, BrigType16_t brig_type)
1426 {
1427 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
1428 switch (brig_opcode)
1429 {
1430 case BRIG_OPCODE_NOP:
1431 return NOP_EXPR;
1432 case BRIG_OPCODE_ADD:
1433 return PLUS_EXPR;
1434 case BRIG_OPCODE_CMOV:
1435 if (brig_inner_type == brig_type)
1436 return COND_EXPR;
1437 else
1438 return VEC_COND_EXPR;
1439 case BRIG_OPCODE_SUB:
1440 return MINUS_EXPR;
1441 case BRIG_OPCODE_MUL:
1442 case BRIG_OPCODE_MUL24:
1443 return MULT_EXPR;
1444 case BRIG_OPCODE_MULHI:
1445 case BRIG_OPCODE_MUL24HI:
1446 return MULT_HIGHPART_EXPR;
1447 case BRIG_OPCODE_DIV:
1448 if (gccbrig_is_float_type (brig_inner_type))
1449 return RDIV_EXPR;
1450 else
1451 return TRUNC_DIV_EXPR;
1452 case BRIG_OPCODE_NEG:
1453 return NEGATE_EXPR;
1454 case BRIG_OPCODE_MIN:
1455 if (gccbrig_is_float_type (brig_inner_type))
1456 return CALL_EXPR;
1457 else
1458 return MIN_EXPR;
1459 case BRIG_OPCODE_MAX:
1460 if (gccbrig_is_float_type (brig_inner_type))
1461 return CALL_EXPR;
1462 else
1463 return MAX_EXPR;
1464 case BRIG_OPCODE_ABS:
1465 return ABS_EXPR;
1466 case BRIG_OPCODE_SHL:
1467 return LSHIFT_EXPR;
1468 case BRIG_OPCODE_SHR:
1469 return RSHIFT_EXPR;
1470 case BRIG_OPCODE_OR:
1471 return BIT_IOR_EXPR;
1472 case BRIG_OPCODE_XOR:
1473 return BIT_XOR_EXPR;
1474 case BRIG_OPCODE_AND:
1475 return BIT_AND_EXPR;
1476 case BRIG_OPCODE_NOT:
1477 return BIT_NOT_EXPR;
1478 case BRIG_OPCODE_RET:
1479 return RETURN_EXPR;
1480 case BRIG_OPCODE_MOV:
1481 case BRIG_OPCODE_LDF:
1482 return MODIFY_EXPR;
1483 case BRIG_OPCODE_LD:
1484 case BRIG_OPCODE_ST:
1485 return MEM_REF;
1486 case BRIG_OPCODE_BR:
1487 return GOTO_EXPR;
1488 case BRIG_OPCODE_REM:
1489 if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32)
1490 return TRUNC_MOD_EXPR;
1491 else
1492 return CALL_EXPR;
1493 case BRIG_OPCODE_NRCP:
1494 case BRIG_OPCODE_NRSQRT:
1495 /* Implement as 1/f (x). gcc should pattern detect that and
1496 use a native instruction, if available, for it. */
1497 return TREE_LIST;
1498 case BRIG_OPCODE_FMA:
1499 case BRIG_OPCODE_FLOOR:
1500 case BRIG_OPCODE_CEIL:
1501 case BRIG_OPCODE_SQRT:
1502 case BRIG_OPCODE_NSQRT:
1503 case BRIG_OPCODE_RINT:
1504 case BRIG_OPCODE_TRUNC:
1505 case BRIG_OPCODE_POPCOUNT:
1506 case BRIG_OPCODE_COPYSIGN:
1507 case BRIG_OPCODE_NCOS:
1508 case BRIG_OPCODE_NSIN:
1509 case BRIG_OPCODE_NLOG2:
1510 case BRIG_OPCODE_NEXP2:
1511 case BRIG_OPCODE_NFMA:
1512 /* Class has type B1 regardless of the float type, thus
1513 the below builtin map search cannot find it. */
1514 case BRIG_OPCODE_CLASS:
1515 case BRIG_OPCODE_WORKITEMABSID:
1516 return CALL_EXPR;
1517 default:
1518
1519 /* Some BRIG opcodes can use the same builtins for unsigned and
1520 signed types. Force these cases to unsigned types.
1521 */
1522
1523 if (brig_opcode == BRIG_OPCODE_BORROW
1524 || brig_opcode == BRIG_OPCODE_CARRY
1525 || brig_opcode == BRIG_OPCODE_LASTBIT
1526 || brig_opcode == BRIG_OPCODE_BITINSERT)
1527 {
1528 if (brig_type == BRIG_TYPE_S32)
1529 brig_type = BRIG_TYPE_U32;
1530 else if (brig_type == BRIG_TYPE_S64)
1531 brig_type = BRIG_TYPE_U64;
1532 }
1533
1534
1535 builtin_map::const_iterator i
1536 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
1537 if (i != s_custom_builtins.end ())
1538 return CALL_EXPR;
1539 else if (s_custom_builtins.find
1540 (std::make_pair (brig_opcode, brig_inner_type))
1541 != s_custom_builtins.end ())
1542 return CALL_EXPR;
1543 if (brig_inner_type == BRIG_TYPE_F16
1544 && s_custom_builtins.find
1545 (std::make_pair (brig_opcode, BRIG_TYPE_F32))
1546 != s_custom_builtins.end ())
1547 return CALL_EXPR;
1548 break;
1549 }
1550 return TREE_LIST; /* Emulate using a chain of nodes. */
1551 }
1552
1553 /* Inform of an update to the REG_VAR. */
1554
1555 void
add_reg_var_update(tree reg_var,tree var)1556 brig_function::add_reg_var_update (tree reg_var, tree var)
1557 {
1558 if (var == m_abs_id_vars[0] || var == m_abs_id_vars[1]
1559 || var == m_abs_id_vars[2] || var == m_local_id_vars[0]
1560 || var == m_local_id_vars[1] || var == m_local_id_vars[2])
1561 m_id_val_defs [reg_var] = var;
1562 else
1563 {
1564 /* Possible overwrite of an ID value. */
1565
1566 id_val_map::iterator i = m_id_val_defs.find (reg_var);
1567 if (i != m_id_val_defs.end())
1568 m_id_val_defs.erase (i);
1569 }
1570 }
1571
1572 /* If the REG_VAR is known to contain an ID value at this point in
1573 the basic block, return true. */
1574
1575 bool
is_id_val(tree reg_var)1576 brig_function::is_id_val (tree reg_var)
1577 {
1578 id_val_map::iterator i = m_id_val_defs.find (reg_var);
1579 return i != m_id_val_defs.end();
1580 }
1581
1582 /* Return an ID value for the given REG_VAR if its known to contain
1583 one at this point in the BB, NULL_TREE otherwise. */
1584
1585 tree
id_val(tree reg_var)1586 brig_function::id_val (tree reg_var)
1587 {
1588 id_val_map::iterator i = m_id_val_defs.find (reg_var);
1589 if (i != m_id_val_defs.end())
1590 return (*i).second;
1591 else
1592 return NULL_TREE;
1593 }
1594
1595 /* Informs of starting a new basic block. Called when generating
1596 a label, a call, a jump, or a return. */
1597
1598 void
start_new_bb()1599 brig_function::start_new_bb ()
1600 {
1601 m_id_val_defs.clear ();
1602 }
1603