1 /* brig2tree.cc -- brig to gcc generic/gimple tree conversion
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <cassert>
23 #include <iostream>
24 #include <iomanip>
25 #include <sstream>
26
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "target.h"
31 #include "function.h"
32 #include "brig-to-generic.h"
33 #include "stringpool.h"
34 #include "tree-iterator.h"
35 #include "toplev.h"
36 #include "gimplify.h"
37 #include "gimple-expr.h"
38 #include "print-tree.h"
39 #include "hsa-brig-format.h"
40 #include "stor-layout.h"
41 #include "diagnostic-core.h"
42 #include "brig-code-entry-handler.h"
43 #include "brig-machine.h"
44 #include "brig-util.h"
45 #include "phsa.h"
46 #include "tree-pretty-print.h"
47 #include "dumpfile.h"
48 #include "profile-count.h"
49 #include "tree-cfg.h"
50 #include "errors.h"
51 #include "fold-const.h"
52 #include "cgraph.h"
53 #include "dumpfile.h"
54 #include "tree-pretty-print.h"
55
56 extern int gccbrig_verbose;
57
58 tree brig_to_generic::s_fp16_type;
59 tree brig_to_generic::s_fp32_type;
60 tree brig_to_generic::s_fp64_type;
61
brig_to_generic()62 brig_to_generic::brig_to_generic ()
63 : m_cf (NULL), m_analyzing (true), m_total_group_segment_usage (0),
64 m_brig (NULL), m_next_private_offset (0)
65 {
66 m_globals = NULL_TREE;
67
68 /* Initialize the basic REAL types.
69 This doesn't work straight away because most of the targets
70 do not support fp16 natively. Let's by default convert
71 to fp32 and back before and after each instruction (handle it as
72 a storage format only), and later add an optimization pass
73 that removes the extra converts (in case of multiple fp16 ops
74 in a row). */
75 s_fp16_type = make_node (REAL_TYPE);
76 TYPE_PRECISION (s_fp16_type) = 16;
77 TYPE_SIZE (s_fp16_type) = bitsize_int (16);
78 TYPE_SIZE_UNIT (s_fp16_type) = size_int (2);
79 SET_TYPE_ALIGN (s_fp16_type, 16);
80 layout_type (s_fp16_type);
81
82 s_fp32_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F32);
83 s_fp64_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F64);
84
85 /* TODO: (machine)query the preferred rounding mode that is set by
86 the machine by default. This can be redefined by each BRIG module
87 header. */
88 m_default_float_rounding_mode = BRIG_ROUND_FLOAT_ZERO;
89
90 m_dump_file = dump_begin (TDI_original, &m_dump_flags);
91 }
92
93 class unimplemented_entry_handler : public brig_code_entry_handler
94 {
95 public:
unimplemented_entry_handler(brig_to_generic & parent)96 unimplemented_entry_handler (brig_to_generic &parent)
97 : brig_code_entry_handler (parent)
98 {
99 }
100
101 size_t
operator ()(const BrigBase * base)102 operator () (const BrigBase *base)
103 {
104 gcc_unreachable ();
105 return base->byteCount;
106 }
107 };
108
109 /* Handler for entries that can be (and are) safely skipped for the purposes
110 of GENERIC generation. */
111
112 class skipped_entry_handler : public brig_code_entry_handler
113 {
114 public:
skipped_entry_handler(brig_to_generic & parent)115 skipped_entry_handler (brig_to_generic &parent)
116 : brig_code_entry_handler (parent)
117 {
118 }
119
120 size_t
operator ()(const BrigBase * base)121 operator () (const BrigBase *base)
122 {
123 return base->byteCount;
124 }
125 };
126
127 class brig_reg_use_analyzer : public brig_code_entry_handler
128 {
129 public:
brig_reg_use_analyzer(brig_to_generic & parent)130 brig_reg_use_analyzer (brig_to_generic &parent)
131 : brig_code_entry_handler (parent)
132 {
133 }
134
135 size_t
operator ()(const BrigBase * base)136 operator () (const BrigBase *base)
137 {
138 const BrigInstBase *brig_inst = (const BrigInstBase *) base;
139 analyze_operands (*brig_inst);
140 return base->byteCount;
141 }
142
143 };
144
145 /* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that
146 should handle its data. */
147
148 struct code_entry_handler_info
149 {
150 BrigKind kind;
151 brig_code_entry_handler *handler;
152 };
153
154
155 /* Finds the BRIG file sections in the currently processed file. */
156
157 void
find_brig_sections()158 brig_to_generic::find_brig_sections ()
159 {
160 m_data = m_code = m_operand = NULL;
161 const BrigModuleHeader *mheader = (const BrigModuleHeader *) m_brig;
162
163 /* Find the positions of the different sections. */
164 for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec)
165 {
166 uint64_t offset
167 = ((const uint64_t *) (m_brig + mheader->sectionIndex))[sec];
168
169 const BrigSectionHeader *section_header
170 = (const BrigSectionHeader *) (m_brig + offset);
171
172 std::string name ((const char *) (§ion_header->name),
173 section_header->nameLength);
174
175 if (sec == BRIG_SECTION_INDEX_DATA && name == "hsa_data")
176 {
177 m_data = (const char *) section_header;
178 m_data_size = section_header->byteCount;
179 }
180 else if (sec == BRIG_SECTION_INDEX_CODE && name == "hsa_code")
181 {
182 m_code = (const char *) section_header;
183 m_code_size = section_header->byteCount;
184 }
185 else if (sec == BRIG_SECTION_INDEX_OPERAND && name == "hsa_operand")
186 {
187 m_operand = (const char *) section_header;
188 m_operand_size = section_header->byteCount;
189 }
190 else
191 {
192 gcc_unreachable ();
193 }
194 }
195
196 if (m_code == NULL)
197 gcc_unreachable ();
198 if (m_data == NULL)
199 gcc_unreachable ();
200 if (m_operand == NULL)
201 gcc_unreachable ();
202
203 }
204
205 /* Does a first pass over the given BRIG to collect data needed for the
206 actual parsing. Currently this includes only collecting the
207 group segment variable usage to support the experimental HSA PRM feature
208 where group variables can be declared also in module and function scope
209 (in addition to kernel scope).
210 */
211
212 void
analyze(const char * brig_blob)213 brig_to_generic::analyze (const char *brig_blob)
214 {
215 const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob;
216
217 if (strncmp (mheader->identification, "HSA BRIG", 8) != 0)
218 fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
219 "Unrecognized file format.");
220 if (mheader->brigMajor != 1 || mheader->brigMinor != 0)
221 fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
222 "BRIG version not supported. BRIG 1.0 required.");
223
224 m_brig = brig_blob;
225
226 find_brig_sections ();
227
228 brig_directive_variable_handler var_handler (*this);
229 brig_directive_fbarrier_handler fbar_handler (*this);
230 brig_directive_function_handler func_handler (*this);
231 brig_reg_use_analyzer reg_use_analyzer (*this);
232
233 /* Need this for grabbing the module names for mangling the
234 group variable names. */
235 brig_directive_module_handler module_handler (*this);
236 skipped_entry_handler skipped_handler (*this);
237
238 const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
239
240 code_entry_handler_info handlers[]
241 = {{BRIG_KIND_INST_BASIC, ®_use_analyzer},
242 {BRIG_KIND_INST_MOD, ®_use_analyzer},
243 {BRIG_KIND_INST_CMP, ®_use_analyzer},
244 {BRIG_KIND_INST_MEM, ®_use_analyzer},
245 {BRIG_KIND_INST_CVT, ®_use_analyzer},
246 {BRIG_KIND_INST_SEG_CVT, ®_use_analyzer},
247 {BRIG_KIND_INST_SEG, ®_use_analyzer},
248 {BRIG_KIND_INST_ADDR, ®_use_analyzer},
249 {BRIG_KIND_INST_SOURCE_TYPE, ®_use_analyzer},
250 {BRIG_KIND_INST_ATOMIC, ®_use_analyzer},
251 {BRIG_KIND_INST_SIGNAL, ®_use_analyzer},
252 {BRIG_KIND_INST_BR, ®_use_analyzer},
253 {BRIG_KIND_INST_LANE, ®_use_analyzer},
254 {BRIG_KIND_INST_QUEUE, ®_use_analyzer},
255 {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
256 {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
257 {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
258 {BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
259 {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}};
260
261 m_analyzing = true;
262 for (size_t b = csection_header->headerByteCount; b < m_code_size;)
263 {
264 const BrigBase *entry = (const BrigBase *) (m_code + b);
265
266 brig_code_entry_handler *handler = &skipped_handler;
267
268 if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
269 {
270 /* The function definition ended. We can just discard the place
271 holder function. */
272 m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
273 delete m_cf;
274 m_cf = NULL;
275 }
276
277 /* Find a handler. */
278 for (size_t i = 0;
279 i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
280 {
281 if (handlers[i].kind == entry->kind)
282 handler = handlers[i].handler;
283 }
284
285 int bytes_processed = (*handler) (entry);
286 if (bytes_processed == 0)
287 fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_CORRUPTED_MODULE
288 "Element with 0 bytes.");
289 b += bytes_processed;
290 }
291
292 if (m_cf != NULL)
293 {
294 m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
295 delete m_cf;
296 m_cf = NULL;
297 }
298
299 m_total_group_segment_usage += m_module_group_variables.size ();
300 m_analyzing = false;
301 }
302
303 /* Parses the given BRIG blob. */
304
305 void
parse(const char * brig_blob)306 brig_to_generic::parse (const char *brig_blob)
307 {
308 m_brig = brig_blob;
309 find_brig_sections ();
310
311 brig_basic_inst_handler inst_handler (*this);
312 brig_branch_inst_handler branch_inst_handler (*this);
313 brig_cvt_inst_handler cvt_inst_handler (*this);
314 brig_seg_inst_handler seg_inst_handler (*this);
315 brig_copy_move_inst_handler copy_move_inst_handler (*this);
316 brig_signal_inst_handler signal_inst_handler (*this);
317 brig_atomic_inst_handler atomic_inst_handler (*this);
318 brig_cmp_inst_handler cmp_inst_handler (*this);
319 brig_mem_inst_handler mem_inst_handler (*this);
320 brig_inst_mod_handler inst_mod_handler (*this);
321 brig_directive_label_handler label_handler (*this);
322 brig_directive_variable_handler var_handler (*this);
323 brig_directive_fbarrier_handler fbar_handler (*this);
324 brig_directive_comment_handler comment_handler (*this);
325 brig_directive_function_handler func_handler (*this);
326 brig_directive_control_handler control_handler (*this);
327 brig_directive_arg_block_handler arg_block_handler (*this);
328 brig_directive_module_handler module_handler (*this);
329 brig_lane_inst_handler lane_inst_handler (*this);
330 brig_queue_inst_handler queue_inst_handler (*this);
331 skipped_entry_handler skipped_handler (*this);
332 unimplemented_entry_handler unimplemented_handler (*this);
333
334 struct code_entry_handler_info
335 {
336 BrigKind kind;
337 brig_code_entry_handler *handler;
338 };
339
340 /* TODO: Convert to a hash table / map. For now, put the more common
341 entries to the top to keep the scan fast on average. */
342 code_entry_handler_info handlers[]
343 = {{BRIG_KIND_INST_BASIC, &inst_handler},
344 {BRIG_KIND_INST_CMP, &cmp_inst_handler},
345 {BRIG_KIND_INST_MEM, &mem_inst_handler},
346 {BRIG_KIND_INST_MOD, &inst_mod_handler},
347 {BRIG_KIND_INST_CVT, &cvt_inst_handler},
348 {BRIG_KIND_INST_SEG_CVT, &seg_inst_handler},
349 {BRIG_KIND_INST_SEG, &seg_inst_handler},
350 {BRIG_KIND_INST_ADDR, ©_move_inst_handler},
351 {BRIG_KIND_INST_SOURCE_TYPE, ©_move_inst_handler},
352 {BRIG_KIND_INST_ATOMIC, &atomic_inst_handler},
353 {BRIG_KIND_INST_SIGNAL, &signal_inst_handler},
354 {BRIG_KIND_INST_BR, &branch_inst_handler},
355 {BRIG_KIND_INST_LANE, &lane_inst_handler},
356 {BRIG_KIND_INST_QUEUE, &queue_inst_handler},
357 /* Assuming fences are not needed. FIXME: call builtins
358 when porting to a platform where they are. */
359 {BRIG_KIND_INST_MEM_FENCE, &skipped_handler},
360 {BRIG_KIND_DIRECTIVE_LABEL, &label_handler},
361 {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
362 {BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, &arg_block_handler},
363 {BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, &arg_block_handler},
364 {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
365 {BRIG_KIND_DIRECTIVE_COMMENT, &comment_handler},
366 {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
367 {BRIG_KIND_DIRECTIVE_SIGNATURE, &func_handler},
368 {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler},
369 {BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION, &func_handler},
370 {BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
371 /* Skipping debug locations for now as not needed for conformance. */
372 {BRIG_KIND_DIRECTIVE_LOC, &skipped_handler},
373 /* There are no supported pragmas at this moment. */
374 {BRIG_KIND_DIRECTIVE_PRAGMA, &skipped_handler},
375 {BRIG_KIND_DIRECTIVE_CONTROL, &control_handler},
376 {BRIG_KIND_DIRECTIVE_EXTENSION, &skipped_handler},
377 /* BRIG_KIND_NONE entries are valid anywhere. They can be used
378 for patching BRIGs before finalization. */
379 {BRIG_KIND_NONE, &skipped_handler}};
380
381 const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
382
383 for (size_t b = csection_header->headerByteCount; b < m_code_size;)
384 {
385 const BrigBase *entry = (const BrigBase *) (m_code + b);
386
387 brig_code_entry_handler *handler = &unimplemented_handler;
388
389 if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
390 finish_function (); /* The function definition ended. */
391
392 /* Find a handler. */
393 for (size_t i = 0;
394 i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
395 {
396 if (handlers[i].kind == entry->kind)
397 handler = handlers[i].handler;
398 }
399 b += (*handler) (entry);
400 }
401
402 finish_function ();
403 }
404
405 const BrigData *
get_brig_data_entry(size_t entry_offset) const406 brig_to_generic::get_brig_data_entry (size_t entry_offset) const
407 {
408 return (const BrigData *) (m_data + entry_offset);
409 }
410
411 const BrigBase *
get_brig_operand_entry(size_t entry_offset) const412 brig_to_generic::get_brig_operand_entry (size_t entry_offset) const
413 {
414 return (const BrigBase *) (m_operand + entry_offset);
415 }
416
417 const BrigBase *
get_brig_code_entry(size_t entry_offset) const418 brig_to_generic::get_brig_code_entry (size_t entry_offset) const
419 {
420 return (const BrigBase *) (m_code + entry_offset);
421 }
422
423 void
append_global(tree g)424 brig_to_generic::append_global (tree g)
425 {
426 if (m_globals == NULL_TREE)
427 {
428 m_globals = g;
429 return;
430 }
431 else
432 {
433 tree last = tree_last (m_globals);
434 TREE_CHAIN (last) = g;
435 }
436 }
437
438 tree
global_variable(const std::string & name) const439 brig_to_generic::global_variable (const std::string &name) const
440 {
441 label_index::const_iterator i = m_global_variables.find (name);
442 if (i == m_global_variables.end ())
443 return NULL_TREE;
444 else
445 return (*i).second;
446 }
447
448 /* Returns a function declaration with the given name. Assumes it has been
449 created previously via a DirectiveFunction or similar. */
450
451 tree
function_decl(const std::string & name)452 brig_to_generic::function_decl (const std::string &name)
453 {
454 label_index::const_iterator i = m_function_index.find (name);
455 if (i == m_function_index.end ())
456 return NULL_TREE;
457 return (*i).second;
458 }
459
460 void
add_function_decl(const std::string & name,tree func_decl)461 brig_to_generic::add_function_decl (const std::string &name, tree func_decl)
462 {
463 m_function_index[name] = func_decl;
464 }
465
466 /* Adds a GENERIC global variable VAR_DECL with the given NAME to the
467 current module. If we have generated a host def var ptr (a place holder
468 for variables that are defined by the HSA host code) for this global
469 variable definition (because there was a declaration earlier which looked
470 like it might have been a host defined variable), we now have
471 to assign its address and make it private to allow the references to
472 point to the defined variable instead. */
473
474 void
add_global_variable(const std::string & name,tree var_decl)475 brig_to_generic::add_global_variable (const std::string &name, tree var_decl)
476 {
477 append_global (var_decl);
478 m_global_variables[name] = var_decl;
479
480 std::string host_def_var_name
481 = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name;
482 tree host_def_var = global_variable (host_def_var_name);
483 if (host_def_var == NULL_TREE)
484 return;
485
486 tree ptype = build_pointer_type (TREE_TYPE (var_decl));
487 tree var_addr = build1 (ADDR_EXPR, ptype, var_decl);
488
489 DECL_INITIAL (host_def_var) = var_addr;
490 TREE_PUBLIC (host_def_var) = 0;
491 }
492
493 /* Adds an indirection pointer for a potential host-defined program scope
494 variable declaration. */
495
496 void
add_host_def_var_ptr(const std::string & name,tree var_decl)497 brig_to_generic::add_host_def_var_ptr (const std::string &name, tree var_decl)
498 {
499 std::string var_name = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name;
500
501 tree name_identifier = get_identifier (var_name.c_str ());
502
503 tree ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier,
504 build_pointer_type (TREE_TYPE (var_decl)));
505 DECL_EXTERNAL (ptr_var) = 0;
506 DECL_ARTIFICIAL (ptr_var) = 0;
507
508 TREE_PUBLIC (ptr_var) = 1;
509 TREE_USED (ptr_var) = 1;
510 TREE_ADDRESSABLE (ptr_var) = 1;
511 TREE_STATIC (ptr_var) = 1;
512
513 append_global (ptr_var);
514 m_global_variables[var_name] = ptr_var;
515 }
516
517 /* Produce a "mangled name" for the given brig function or kernel.
518 The mangling is used to make unique global symbol name in case of
519 module scope functions. Program scope functions are not mangled
520 (except for dropping the leading &), which makes the functions
521 directly visible for linking using the original function name. */
522
523 std::string
get_mangled_name(const BrigDirectiveExecutable * func) const524 brig_to_generic::get_mangled_name
525 (const BrigDirectiveExecutable *func) const
526 {
527 /* Strip the leading &. */
528 std::string func_name = get_string (func->name).substr (1);
529 if (func->linkage == BRIG_LINKAGE_MODULE)
530 {
531 /* Mangle the module scope function names with the module name and
532 make them public so they can be queried by the HSA runtime from
533 the produced binary. Assume it's the currently processed function
534 we are always referring to. */
535 func_name = "gccbrig." + m_module_name + "." + func_name;
536 }
537 return func_name;
538 }
539
540 std::string
get_string(size_t entry_offset) const541 brig_to_generic::get_string (size_t entry_offset) const
542 {
543 const BrigData *data_item = get_brig_data_entry (entry_offset);
544 return std::string ((const char *) &data_item->bytes, data_item->byteCount);
545 }
546
547 /* Adapted from c-semantics.c. */
548
549 tree
build_stmt(enum tree_code code,...)550 build_stmt (enum tree_code code, ...)
551 {
552 tree ret;
553 int length, i;
554 va_list p;
555 bool side_effects;
556
557 /* This function cannot be used to construct variably-sized nodes. */
558 gcc_assert (TREE_CODE_CLASS (code) != tcc_vl_exp);
559
560 va_start (p, code);
561
562 ret = make_node (code);
563 TREE_TYPE (ret) = void_type_node;
564 length = TREE_CODE_LENGTH (code);
565
566 /* TREE_SIDE_EFFECTS will already be set for statements with
567 implicit side effects. Here we make sure it is set for other
568 expressions by checking whether the parameters have side
569 effects. */
570
571 side_effects = false;
572 for (i = 0; i < length; i++)
573 {
574 tree t = va_arg (p, tree);
575 if (t && !TYPE_P (t))
576 side_effects |= TREE_SIDE_EFFECTS (t);
577 TREE_OPERAND (ret, i) = t;
578 }
579
580 TREE_SIDE_EFFECTS (ret) |= side_effects;
581
582 va_end (p);
583 return ret;
584 }
585
586 /* BRIG regs are untyped, but GENERIC is not. We need to add implicit casts
587 in case treating the operand with an instruction with a type different
588 than the created reg var type in order to select correct instruction type
589 later on. This function creates the necessary reinterpret type cast from
590 a source variable to the destination type. In case no cast is needed to
591 the same type, SOURCE is returned directly.
592
593 In case of mismatched type sizes, casting:
594 - to narrower type the upper bits are clipped and
595 - to wider type the source value is zero extended. */
596
597 tree
build_resize_convert_view(tree destination_type,tree source)598 build_resize_convert_view (tree destination_type, tree source)
599 {
600
601 gcc_assert (source && destination_type && TREE_TYPE (source) != NULL_TREE
602 && destination_type != NULL_TREE);
603
604 tree source_type = TREE_TYPE (source);
605 if (TREE_CODE (source) == CALL_EXPR)
606 {
607 tree func_decl = TREE_OPERAND (TREE_OPERAND (source, 1), 0);
608 source_type = TREE_TYPE (TREE_TYPE (func_decl));
609 }
610
611 if (destination_type == source_type)
612 return source;
613
614 size_t src_size = int_size_in_bytes (source_type);
615 size_t dst_size = int_size_in_bytes (destination_type);
616 if (src_size == dst_size)
617 return build1 (VIEW_CONVERT_EXPR, destination_type, source);
618 else /* src_size != dst_size */
619 {
620 /* The src_size can be smaller at least with f16 scalars which are
621 stored to 32b register variables. First convert to an equivalent
622 size unsigned type, then extend to an unsigned type of the
623 target width, after which VIEW_CONVERT_EXPR can be used to
624 force to the target type. */
625 tree resized = convert (get_scalar_unsigned_int_type (destination_type),
626 build_reinterpret_to_uint (source));
627 gcc_assert ((size_t)int_size_in_bytes (TREE_TYPE (resized)) == dst_size);
628 return build_resize_convert_view (destination_type, resized);
629 }
630 }
631
632 /* Reinterprets SOURCE as a scalar unsigned int with the size
633 corresponding to the orignal. */
634
build_reinterpret_to_uint(tree source)635 tree build_reinterpret_to_uint (tree source)
636 {
637 tree src_type = TREE_TYPE (source);
638 if (INTEGRAL_TYPE_P (src_type) && TYPE_UNSIGNED (src_type))
639 return source;
640 tree dest_type = get_scalar_unsigned_int_type (src_type);
641 return build1 (VIEW_CONVERT_EXPR, dest_type, source);
642 }
643
644 /* Returns the finished brig_function for the given generic FUNC_DECL,
645 or NULL, if not found. */
646
647 brig_function *
get_finished_function(tree func_decl)648 brig_to_generic::get_finished_function (tree func_decl)
649 {
650 std::string func_name
651 = identifier_to_locale (IDENTIFIER_POINTER (DECL_NAME (func_decl)));
652 std::map<std::string, brig_function *>::iterator i
653 = m_finished_functions.find (func_name);
654 if (i != m_finished_functions.end ())
655 return (*i).second;
656 else
657 return NULL;
658 }
659
660 /* Adds a group variable to a correct book keeping structure depending
661 on its segment. */
662
663 void
add_group_variable(const std::string & name,size_t size,size_t alignment,bool function_scope)664 brig_to_generic::add_group_variable (const std::string &name, size_t size,
665 size_t alignment, bool function_scope)
666 {
667 /* Module and function scope group region variables are an experimental
668 feature. We implement module scope group variables with a separate
669 book keeping inside brig_to_generic which is populated in the 'analyze()'
670 prepass. This is to ensure we know the group segment offsets when
671 processing the functions that might refer to them. */
672 if (!function_scope)
673 {
674 if (!m_module_group_variables.has_variable (name))
675 m_module_group_variables.add (name, size, alignment);
676 return;
677 }
678
679 if (!m_cf->m_local_group_variables.has_variable (name))
680 m_cf->m_local_group_variables.add (name, size, alignment);
681 }
682
683 /* Finalizes the currently handled function. Should be called before
684 setting a new function. */
685
686 void
finish_function()687 brig_to_generic::finish_function ()
688 {
689 if (m_cf == NULL || m_cf->m_func_decl == NULL_TREE)
690 {
691 /* It can be a finished func declaration fingerprint, in that case we
692 don't have m_func_decl. */
693 m_cf = NULL;
694 return;
695 }
696
697 if (!m_cf->m_is_kernel)
698 {
699 tree bind_expr = m_cf->m_current_bind_expr;
700 tree stmts = BIND_EXPR_BODY (bind_expr);
701 m_cf->finish ();
702 m_cf->emit_metadata (stmts);
703 dump_function (m_dump_file, m_cf);
704 gimplify_function_tree (m_cf->m_func_decl);
705 cgraph_node::finalize_function (m_cf->m_func_decl, true);
706 }
707 else
708 /* Emit the kernel only at the very end so we can analyze the total
709 group and private memory usage. */
710 m_kernels.push_back (m_cf);
711
712 pop_cfun ();
713
714 m_finished_functions[m_cf->m_name] = m_cf;
715 m_cf = NULL;
716 }
717
718 /* Initializes a new currently handled function. */
719
720 void
start_function(tree f)721 brig_to_generic::start_function (tree f)
722 {
723 if (DECL_STRUCT_FUNCTION (f) == NULL)
724 push_struct_function (f);
725 else
726 push_cfun (DECL_STRUCT_FUNCTION (f));
727
728 m_cf->m_func_decl = f;
729 }
730
731 /* Appends a new variable to the current kernel's private segment. */
732
733 void
append_private_variable(const std::string & name,size_t size,size_t alignment)734 brig_to_generic::append_private_variable (const std::string &name,
735 size_t size, size_t alignment)
736 {
737 /* We need to take care of two cases of alignment with private
738 variables because of the layout where the same variable for
739 each work-item is laid out in successive addresses.
740
741 1) Ensure the first work-item's variable is in an aligned
742 offset: */
743 size_t align_padding = m_next_private_offset % alignment == 0 ?
744 0 : (alignment - m_next_private_offset % alignment);
745
746 /* 2) Each successive per-work-item copy should be aligned.
747 If the variable has wider alignment than size then we need
748 to add extra padding to ensure it. The padding must be
749 included in the size to allow per-work-item offset computation
750 to find their own aligned copy. */
751
752 size_t per_var_padding = size % alignment == 0 ?
753 0 : (alignment - size % alignment);
754 m_private_data_sizes[name] = size + per_var_padding;
755
756 m_next_private_offset += align_padding;
757 m_private_offsets[name] = m_next_private_offset;
758 m_next_private_offset += size + per_var_padding;
759 }
760
761 size_t
private_variable_segment_offset(const std::string & name) const762 brig_to_generic::private_variable_segment_offset
763 (const std::string &name) const
764 {
765 var_offset_table::const_iterator i = m_private_offsets.find (name);
766 gcc_assert (i != m_private_offsets.end ());
767 return (*i).second;
768 }
769
770 bool
has_private_variable(const std::string & name) const771 brig_to_generic::has_private_variable (const std::string &name) const
772 {
773 std::map<std::string, size_t>::const_iterator i
774 = m_private_data_sizes.find (name);
775 return i != m_private_data_sizes.end ();
776 }
777
778 size_t
private_variable_size(const std::string & name) const779 brig_to_generic::private_variable_size (const std::string &name) const
780 {
781 std::map<std::string, size_t>::const_iterator i
782 = m_private_data_sizes.find (name);
783 gcc_assert (i != m_private_data_sizes.end ());
784 return (*i).second;
785 }
786
787
788 /* The size of private segment required by a single work-item executing
789 the currently processed kernel. */
790
791 size_t
private_segment_size() const792 brig_to_generic::private_segment_size () const
793 {
794 return m_next_private_offset;
795 }
796
797 /* Cached builtins indexed by name. */
798
799 typedef std::map<std::string, tree> builtin_index;
800 builtin_index builtin_cache_;
801
802 /* Build a call to a builtin function. PDECL is the builtin function to
803 call. NARGS is the number of input arguments, RETTYPE the built-in
804 functions return value type, and ... is the list of arguments passed to
805 the call with type first, then the value. */
806
807 tree
call_builtin(tree pdecl,int nargs,tree rettype,...)808 call_builtin (tree pdecl, int nargs, tree rettype, ...)
809 {
810 if (rettype == error_mark_node)
811 return error_mark_node;
812
813 tree *types = new tree[nargs];
814 tree *args = new tree[nargs];
815
816 va_list ap;
817 va_start (ap, rettype);
818 for (int i = 0; i < nargs; ++i)
819 {
820 types[i] = va_arg (ap, tree);
821 tree arg = va_arg (ap, tree);
822 args[i] = build_resize_convert_view (types[i], arg);
823 if (types[i] == error_mark_node || args[i] == error_mark_node)
824 {
825 delete[] types;
826 delete[] args;
827 va_end (ap);
828 return error_mark_node;
829 }
830 }
831 va_end (ap);
832
833 tree fnptr = build_fold_addr_expr (pdecl);
834
835 tree ret = build_call_array (rettype, fnptr, nargs, args);
836
837 delete[] types;
838 delete[] args;
839
840 return ret;
841 }
842
843 /* Generate all global declarations. Should be called after the last
844 BRIG has been fed in. */
845
846 void
write_globals()847 brig_to_generic::write_globals ()
848 {
849 /* Now that the whole BRIG module has been processed, build a launcher
850 and a metadata section for each built kernel. */
851 for (size_t i = 0; i < m_kernels.size (); ++i)
852 {
853 brig_function *f = m_kernels[i];
854
855 /* Finish kernels now that we know the call graphs and their barrier
856 usage. */
857 f->finish_kernel ();
858
859 dump_function (m_dump_file, f);
860 gimplify_function_tree (f->m_func_decl);
861 cgraph_node::finalize_function (f->m_func_decl, true);
862
863 f->m_descriptor.is_kernel = 1;
864 /* TODO: analyze the kernel's actual private and group segment usage
865 using call graph. Now the mem size is overly
866 pessimistic in case of multiple kernels in the same module.
867 */
868 f->m_descriptor.group_segment_size = m_total_group_segment_usage;
869 f->m_descriptor.private_segment_size = private_segment_size ();
870
871 /* The kernarg size is rounded up to a multiple of 16 according to
872 the PRM specs. */
873 f->m_descriptor.kernarg_segment_size = f->m_next_kernarg_offset;
874 if (f->m_descriptor.kernarg_segment_size % 16 > 0)
875 f->m_descriptor.kernarg_segment_size
876 += 16 - f->m_next_kernarg_offset % 16;
877 f->m_descriptor.kernarg_max_align = f->m_kernarg_max_align;
878
879 tree launcher = f->emit_launcher_and_metadata ();
880
881 append_global (launcher);
882
883 gimplify_function_tree (launcher);
884 cgraph_node::finalize_function (launcher, true);
885 pop_cfun ();
886 }
887
888 int no_globals = list_length (m_globals);
889 tree *vec = new tree[no_globals];
890
891 int i = 0;
892 tree global = m_globals;
893 while (global)
894 {
895 vec[i] = global;
896 ++i;
897 global = TREE_CHAIN (global);
898 }
899
900 wrapup_global_declarations (vec, no_globals);
901
902 delete[] vec;
903
904 }
905
906 /* Returns an type with unsigned int elements corresponding to the
907 size and element count of ORIGINAL_TYPE. */
908
909 tree
get_unsigned_int_type(tree original_type)910 get_unsigned_int_type (tree original_type)
911 {
912 if (VECTOR_TYPE_P (original_type))
913 {
914 size_t esize
915 = int_size_in_bytes (TREE_TYPE (original_type)) * BITS_PER_UNIT;
916 poly_uint64 ecount = TYPE_VECTOR_SUBPARTS (original_type);
917 return build_vector_type (build_nonstandard_integer_type (esize, true),
918 ecount);
919 }
920 else
921 return build_nonstandard_integer_type (int_size_in_bytes (original_type)
922 * BITS_PER_UNIT,
923 true);
924 }
925
926 /* Returns a type with unsigned int corresponding to the size
927 ORIGINAL_TYPE. */
928
929 tree
get_scalar_unsigned_int_type(tree original_type)930 get_scalar_unsigned_int_type (tree original_type)
931 {
932 return build_nonstandard_integer_type (int_size_in_bytes (original_type)
933 * BITS_PER_UNIT, true);
934 }
935
936 void
dump_function(FILE * dump_file,brig_function * f)937 dump_function (FILE *dump_file, brig_function *f)
938 {
939 /* Dump the BRIG-specific tree IR. */
940 if (dump_file)
941 {
942 fprintf (dump_file, "\n;; Function %s", f->m_name.c_str ());
943 fprintf (dump_file, "\n;; enabled by -%s\n\n",
944 dump_flag_name (TDI_original));
945 print_generic_decl (dump_file, f->m_func_decl, 0);
946 print_generic_expr (dump_file, f->m_current_bind_expr, 0);
947 fprintf (dump_file, "\n");
948 }
949 }
950
951 /* Records use of the BRIG_REG as a TYPE in the current function. */
952
953 void
add_reg_used_as_type(const BrigOperandRegister & brig_reg,tree type)954 brig_to_generic::add_reg_used_as_type (const BrigOperandRegister &brig_reg,
955 tree type)
956 {
957 gcc_assert (m_cf);
958 reg_use_info &info
959 = m_fn_regs_use_index[m_cf->m_name][gccbrig_hsa_reg_id (brig_reg)];
960
961 if (info.m_type_refs_lookup.count (type))
962 info.m_type_refs[info.m_type_refs_lookup[type]].second++;
963 else
964 {
965 info.m_type_refs.push_back (std::make_pair (type, 1));
966 info.m_type_refs_lookup[type] = info.m_type_refs.size () - 1;
967 }
968 }
969