1 /* brig-code-entry-handler.cc -- a gccbrig base class
2    Copyright (C) 2016-2018 Free Software Foundation, Inc.
3    Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4    for General Processor Tech.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it under
9    the terms of the GNU General Public License as published by the Free
10    Software Foundation; either version 3, or (at your option) any later
11    version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14    WARRANTY; without even the implied warranty of MERCHANTABILITY or
15    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16    for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with GCC; see the file COPYING3.  If not see
20    <http://www.gnu.org/licenses/>.  */
21 
22 #include "brig-code-entry-handler.h"
23 
24 #include "stringpool.h"
25 #include "tree-iterator.h"
26 #include "toplev.h"
27 #include "diagnostic.h"
28 #include "brig-machine.h"
29 #include "brig-util.h"
30 #include "errors.h"
31 #include "real.h"
32 #include "print-tree.h"
33 #include "tree-pretty-print.h"
34 #include "target.h"
35 #include "langhooks.h"
36 #include "gimple-expr.h"
37 #include "convert.h"
38 #include "brig-util.h"
39 #include "builtins.h"
40 #include "phsa.h"
41 #include "brig-builtins.h"
42 #include "fold-const.h"
43 
44 brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins;
45 
brig_code_entry_handler(brig_to_generic & parent)46 brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent)
47   : brig_entry_handler (parent)
48 {
49   if (s_custom_builtins.size () > 0) return;
50 
51   /* Populate the builtin index.  */
52 #undef DEF_HSAIL_ATOMIC_BUILTIN
53 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
54 #undef DEF_HSAIL_INTR_BUILTIN
55 #undef DEF_HSAIL_SAT_BUILTIN
56 #undef DEF_HSAIL_BUILTIN
57 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
58   s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)]		\
59     = builtin_decl_explicit (ENUM);
60 
61 #include "brig-builtins.def"
62 }
63 
64 /* Build a tree operand which is a reference to a piece of code.  REF is the
65    original reference as a BRIG object.  */
66 
67 tree
build_code_ref(const BrigBase & ref)68 brig_code_entry_handler::build_code_ref (const BrigBase &ref)
69 {
70   if (ref.kind == BRIG_KIND_DIRECTIVE_LABEL)
71     {
72       const BrigDirectiveLabel *brig_label = (const BrigDirectiveLabel *) &ref;
73 
74       const BrigData *label_name
75 	= m_parent.get_brig_data_entry (brig_label->name);
76 
77       std::string label_str ((const char *) (label_name->bytes),
78 			     label_name->byteCount);
79       return m_parent.m_cf->label (label_str);
80     }
81   else if (ref.kind == BRIG_KIND_DIRECTIVE_FUNCTION)
82     {
83       const BrigDirectiveExecutable *func
84        = (const BrigDirectiveExecutable *) &ref;
85       return m_parent.function_decl (m_parent.get_mangled_name (func));
86     }
87   else if (ref.kind == BRIG_KIND_DIRECTIVE_FBARRIER)
88     {
89       const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref;
90 
91       std::string var_name = m_parent.get_mangled_name (fbar);
92       uint64_t offset
93 	= m_parent.m_cf->group_variable_segment_offset (var_name);
94 
95       tree local_offset = build_int_cst (uint32_type_node, offset);
96       if (m_parent.m_cf->m_local_group_variables.has_variable (var_name))
97 	local_offset
98 	  = build2 (PLUS_EXPR, uint64_type_node, local_offset,
99 		    convert (uint64_type_node,
100 			     m_parent.m_cf->m_group_local_offset_arg));
101       return local_offset;
102     }
103   else
104     gcc_unreachable ();
105 }
106 
107 /* Produce a tree operand for the given BRIG_INST and its OPERAND.
108    OPERAND_TYPE should be the operand type in case it should not
109    be dictated by the BrigBase.  IS_INPUT indicates if the operand
110    is an input operand or a result.  */
111 
112 tree
build_tree_operand(const BrigInstBase & brig_inst,const BrigBase & operand,tree operand_type,bool is_input)113 brig_code_entry_handler::build_tree_operand (const BrigInstBase &brig_inst,
114 					     const BrigBase &operand,
115 					     tree operand_type, bool is_input)
116 {
117   switch (operand.kind)
118     {
119     case BRIG_KIND_OPERAND_OPERAND_LIST:
120       {
121 	vec<constructor_elt, va_gc> *constructor_vals = NULL;
122 	const BrigOperandOperandList &oplist
123 	  = (const BrigOperandOperandList &) operand;
124 	const BrigData *data = m_parent.get_brig_data_entry (oplist.elements);
125 	size_t bytes = data->byteCount;
126 	const BrigOperandOffset32_t *operand_ptr
127 	  = (const BrigOperandOffset32_t *) data->bytes;
128 	while (bytes > 0)
129 	  {
130 	    BrigOperandOffset32_t offset = *operand_ptr;
131 	    const BrigBase *operand_element
132 	      = m_parent.get_brig_operand_entry (offset);
133 	    tree element
134 	      = build_tree_operand (brig_inst, *operand_element, operand_type);
135 
136 	    /* In case a vector is used an input, cast the elements to
137 	       correct size here so we don't need a separate unpack/pack for it.
138 	       fp16-fp32 conversion is done in build_operands ().  */
139 	    if (is_input && TREE_TYPE (element) != operand_type)
140 	      element = build_resize_convert_view (operand_type, element);
141 
142 	    CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element);
143 	    ++operand_ptr;
144 	    bytes -= 4;
145 	  }
146 	size_t element_count = data->byteCount / 4;
147 	tree vec_type = build_vector_type (operand_type, element_count);
148 
149 	return build_constructor (vec_type, constructor_vals);
150       }
151     case BRIG_KIND_OPERAND_CODE_LIST:
152       {
153 	/* Build a TREE_VEC of code expressions.  */
154 
155 	const BrigOperandCodeList &oplist
156 	  = (const BrigOperandCodeList &) operand;
157 	const BrigData *data = m_parent.get_brig_data_entry (oplist.elements);
158 	size_t bytes = data->byteCount;
159 	const BrigOperandOffset32_t *operand_ptr
160 	  = (const BrigOperandOffset32_t *) data->bytes;
161 
162 	size_t case_index = 0;
163 	size_t element_count = data->byteCount / 4;
164 
165 	/* Create a TREE_VEC out of the labels in the list.  */
166 	tree vec = make_tree_vec (element_count);
167 
168 	while (bytes > 0)
169 	  {
170 	    BrigOperandOffset32_t offset = *operand_ptr;
171 	    const BrigBase *ref = m_parent.get_brig_code_entry (offset);
172 	    tree element = build_code_ref (*ref);
173 
174 	    gcc_assert (case_index < element_count);
175 	    TREE_VEC_ELT (vec, case_index) = element;
176 	    case_index++;
177 
178 	    ++operand_ptr;
179 	    bytes -= 4;
180 	  }
181 	return vec;
182       }
183     case BRIG_KIND_OPERAND_REGISTER:
184       {
185 	const BrigOperandRegister *brig_reg
186 	  = (const BrigOperandRegister *) &operand;
187 	return m_parent.m_cf->get_m_var_declfor_reg (brig_reg);
188       }
189     case BRIG_KIND_OPERAND_CONSTANT_BYTES:
190       {
191 	const BrigOperandConstantBytes *brigConst
192 	  = (const BrigOperandConstantBytes *) &operand;
193 	/* The constants can be of different type than the instruction
194 	   and are implicitly casted to the input operand.  */
195 	return get_tree_cst_for_hsa_operand (brigConst, NULL_TREE);
196       }
197     case BRIG_KIND_OPERAND_WAVESIZE:
198       {
199 	if (!INTEGRAL_TYPE_P (operand_type))
200 	  {
201 	    gcc_unreachable ();
202 	    return NULL_TREE;
203 	  }
204 	return build_int_cstu (operand_type, gccbrig_get_target_wavesize ());
205       }
206     case BRIG_KIND_OPERAND_CODE_REF:
207       {
208 	const BrigOperandCodeRef *brig_code_ref
209 	  = (const BrigOperandCodeRef *) &operand;
210 
211 	const BrigBase *ref = m_parent.get_brig_code_entry (brig_code_ref->ref);
212 
213 	return build_code_ref (*ref);
214       }
215     case BRIG_KIND_OPERAND_ADDRESS:
216       {
217 	return build_address_operand (brig_inst,
218 				      (const BrigOperandAddress &) operand);
219       }
220     default:
221       gcc_unreachable ();
222     }
223 }
224 
225 /* Build a tree node representing an address reference from a BRIG_INST and its
226    ADDR_OPERAND.  */
227 
228 tree
build_address_operand(const BrigInstBase & brig_inst,const BrigOperandAddress & addr_operand)229 brig_code_entry_handler::build_address_operand
230   (const BrigInstBase &brig_inst, const BrigOperandAddress &addr_operand)
231 {
232   tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
233 
234   BrigSegment8_t segment = BRIG_SEGMENT_GLOBAL;
235   if (brig_inst.opcode == BRIG_OPCODE_LDA)
236     segment = ((const BrigInstAddr &) brig_inst).segment;
237   else if (brig_inst.base.kind == BRIG_KIND_INST_MEM)
238     segment = ((const BrigInstMem &) brig_inst).segment;
239   else if (brig_inst.base.kind == BRIG_KIND_INST_ATOMIC)
240     segment = ((const BrigInstAtomic &) brig_inst).segment;
241 
242   tree var_offset = NULL_TREE;
243   tree const_offset = NULL_TREE;
244   tree symbol_base = NULL_TREE;
245 
246   if (addr_operand.symbol != 0)
247     {
248       const BrigDirectiveVariable *arg_symbol
249 	= (const BrigDirectiveVariable *) m_parent.get_brig_code_entry
250 	(addr_operand.symbol);
251 
252       std::string var_name = m_parent.get_mangled_name (arg_symbol);
253 
254       if (segment == BRIG_SEGMENT_KERNARG)
255 	{
256 	  /* Find the offset to the kernarg buffer for the given
257 	     kernel argument variable.  */
258 	  tree func = m_parent.m_cf->m_func_decl;
259 	  /* __args is the first parameter in kernel functions.  */
260 	  symbol_base = DECL_ARGUMENTS (func);
261 	  uint64_t offset = m_parent.m_cf->kernel_arg_offset (arg_symbol);
262 	  if (offset > 0)
263 	    const_offset = build_int_cst (size_type_node, offset);
264 	}
265       else if (segment == BRIG_SEGMENT_GROUP)
266 	{
267 	  uint64_t offset
268 	    = m_parent.m_cf->group_variable_segment_offset (var_name);
269 	  const_offset = build_int_cst (size_type_node, offset);
270 
271 	  /* If it's a local group variable reference, substract the local
272 	     group segment offset to get the group base ptr offset.  */
273 	  if (m_parent.m_cf->m_local_group_variables.has_variable (var_name))
274 	    const_offset
275 	      = build2 (PLUS_EXPR, uint64_type_node, const_offset,
276 			convert (uint64_type_node,
277 				 m_parent.m_cf->m_group_local_offset_arg));
278 
279 	}
280       else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL)
281 	{
282 	  uint32_t offset = m_parent.private_variable_segment_offset (var_name);
283 
284 	  /* Compute the offset to the work item's copy:
285 
286 	     single-wi-offset * local_size + wiflatid * varsize
287 
288 	     This way the work items have the same variable in
289 	     successive elements to each other in the segment,
290 	     helping to achieve autovectorization of loads/stores
291 	     with stride 1.  */
292 
293 	  tree_stl_vec uint32_0
294 	    = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
295 
296 	  tree_stl_vec uint32_1
297 	    = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
298 
299 	  tree_stl_vec uint32_2
300 	    = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
301 
302 	  tree local_size
303 	    = build2 (MULT_EXPR, uint32_type_node,
304 		      expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
305 					      BRIG_TYPE_U32,
306 					      uint32_type_node, uint32_0),
307 		      expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
308 					      BRIG_TYPE_U32,
309 					      uint32_type_node, uint32_1));
310 
311 	  local_size
312 	    = build2 (MULT_EXPR, uint32_type_node,
313 		      expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
314 					      BRIG_TYPE_U32,
315 					      uint32_type_node, uint32_2),
316 		      local_size);
317 
318 	  tree var_region
319 	    = build2 (MULT_EXPR, uint32_type_node,
320 		      build_int_cst (uint32_type_node, offset), local_size);
321 
322 	  tree_stl_vec operands;
323 	  tree pos
324 	    = build2 (MULT_EXPR, uint32_type_node,
325 		      build_int_cst (uint32_type_node,
326 				     m_parent.private_variable_size (var_name)),
327 		      expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID,
328 					      BRIG_TYPE_U32,
329 					      uint32_type_node, operands));
330 
331 	  tree var_offset
332 	    = build2 (PLUS_EXPR, uint32_type_node, var_region, pos);
333 
334 	  /* In case of LDA this is returned directly as an integer value.
335 	     For other mem-related instructions, we will convert this segment
336 	     offset to a flat address by adding it as an offset to a (private
337 	     or group) base pointer later on.  Same applies to group_var_offset.  */
338 	  symbol_base
339 	    = add_temp_var ("priv_var_offset",
340 			    convert (size_type_node, var_offset));
341 	}
342       else if (segment == BRIG_SEGMENT_ARG)
343 	{
344 	  tree arg_var_decl;
345 	  if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol)
346 	    arg_var_decl = m_parent.m_cf->m_ret_temp;
347 	  else
348 	    arg_var_decl = m_parent.m_cf->arg_variable (arg_symbol);
349 
350 	  gcc_assert (arg_var_decl != NULL_TREE);
351 
352 	  tree ptype = build_pointer_type (instr_type);
353 
354 	  if (arg_symbol->type & BRIG_TYPE_ARRAY)
355 	    {
356 
357 	      /* Two different type of array references in case of arguments
358 		 depending where they are referred at.  In the caller (argument
359 		 segment), the reference is to an array object and
360 		 in the callee, the array object has been passed as a pointer
361 		 to the array object.  */
362 
363 	      if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl)))
364 		symbol_base = build_resize_convert_view (ptype, arg_var_decl);
365 	      else
366 		{
367 		  /* In case we are referring to an array (the argument in
368 		     call site), use its element zero as the base address.  */
369 		  tree element_zero
370 		    = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (arg_var_decl)),
371 			      arg_var_decl, integer_zero_node, NULL_TREE,
372 			      NULL_TREE);
373 		  symbol_base = build1 (ADDR_EXPR, ptype, element_zero);
374 		}
375 	    }
376 	  else
377 	    symbol_base = build1 (ADDR_EXPR, ptype, arg_var_decl);
378 	}
379       else
380 	{
381 	  tree global_var_decl = m_parent.global_variable (var_name);
382 
383 	  /* In case the global variable hasn't been defined (yet),
384 	     use the host def indirection ptr variable.  */
385 	  if (global_var_decl == NULL_TREE)
386 	    {
387 	      std::string host_ptr_name
388 		= std::string (PHSA_HOST_DEF_PTR_PREFIX) + var_name;
389 	      tree host_defined_ptr = m_parent.global_variable (host_ptr_name);
390 	      gcc_assert (host_defined_ptr != NULL_TREE);
391 	      symbol_base = host_defined_ptr;
392 	    }
393 	  else
394 	    {
395 	      gcc_assert (global_var_decl != NULL_TREE);
396 
397 	      tree ptype = build_pointer_type (instr_type);
398 	      symbol_base = build1 (ADDR_EXPR, ptype, global_var_decl);
399 	    }
400 	}
401     }
402 
403   if (brig_inst.opcode != BRIG_OPCODE_LDA)
404     {
405       /* In case of lda_* we want to return the segment address because it's
406 	 used as a value, perhaps in address computation and later converted
407 	 explicitly to a flat address.
408 
409 	 In case of other instructions with memory operands we produce the flat
410 	 address directly here (assuming the target does not have a separate
411 	 address space for group/private segments for now).  */
412       if (segment == BRIG_SEGMENT_GROUP)
413 	symbol_base = m_parent.m_cf->m_group_base_arg;
414       else if (segment == BRIG_SEGMENT_PRIVATE
415 	       || segment == BRIG_SEGMENT_SPILL)
416 	{
417 	  if (symbol_base != NULL_TREE)
418 	    symbol_base = build2 (POINTER_PLUS_EXPR, ptr_type_node,
419 				  m_parent.m_cf->m_private_base_arg,
420 				  symbol_base);
421 	  else
422 	    symbol_base = m_parent.m_cf->m_private_base_arg;
423 	}
424     }
425 
426   if (addr_operand.reg != 0)
427     {
428       const BrigOperandRegister *mem_base_reg
429 	= (const BrigOperandRegister *) m_parent.get_brig_operand_entry
430 	(addr_operand.reg);
431       tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg);
432       tree as_uint = build_reinterpret_to_uint (base_reg_var);
433       var_offset = convert_to_pointer (ptr_type_node, as_uint);
434 
435       gcc_assert (var_offset != NULL_TREE);
436     }
437   /* The pointer type we use to access the memory.  Should be of the
438      width of the load/store instruction, not the target/data
439      register.  */
440   tree ptype = build_pointer_type (instr_type);
441 
442   gcc_assert (ptype != NULL_TREE);
443 
444   tree addr = NULL_TREE;
445   if (symbol_base != NULL_TREE && var_offset != NULL_TREE)
446     /* The most complex addressing mode: symbol + reg [+ const offset].  */
447     addr = build2 (POINTER_PLUS_EXPR, ptr_type_node,
448 		   convert (ptr_type_node, symbol_base),
449 		   convert (size_type_node, var_offset));
450   else if (var_offset != NULL)
451     addr = var_offset;
452   else if (symbol_base != NULL)
453     addr = symbol_base;
454 
455   if (const_offset != NULL_TREE)
456     {
457       if (addr == NULL_TREE)
458 	/* At least direct module-scope global group symbol access with LDA
459 	   has only the const_offset.  Group base ptr is not added as LDA should
460 	   return the segment address, not the flattened one.  */
461 	addr = const_offset;
462       else
463 	addr = build2 (POINTER_PLUS_EXPR, ptr_type_node,
464 		       addr, convert (size_type_node, const_offset));
465     }
466 
467   /* We might have two const offsets in case of group or private arrays
468      which have the first offset to the incoming group/private pointer
469      arg, and the second one an offset to it. It's also legal to have
470      a reference with a zero constant offset but no symbol.  I've seen
471      codes that reference kernarg segment like this.  Thus, if at this
472      point there is no address expression at all we assume it's an
473      access to offset 0. */
474   uint64_t offs = gccbrig_to_uint64_t (addr_operand.offset);
475   if (offs > 0 || addr == NULL_TREE)
476     {
477       /* In large mode, the offset is treated as 32bits unless it's
478 	 global, readonly or kernarg address space.
479 	 See:
480 	 http://www.hsafoundation.com/html_spec111/HSA_Library.htm
481 	 #PRM/Topics/02_ProgModel/small_and_large_machine_models.htm
482 	 #table_machine_model_data_sizes */
483 
484       int is64b_offset = segment == BRIG_SEGMENT_GLOBAL
485 	|| segment == BRIG_SEGMENT_READONLY
486 	|| segment == BRIG_SEGMENT_KERNARG;
487 
488       /* The original offset is signed and should be sign
489 	 extended for the pointer arithmetics.  */
490       tree const_offset_2 = is64b_offset
491         ? build_int_cst (size_type_node, offs)
492         : convert (long_integer_type_node,
493                    build_int_cst (integer_type_node, offs));
494 
495       if (addr == NULL_TREE)
496 	addr = const_offset_2;
497       else
498 	addr = build2 (POINTER_PLUS_EXPR, ptr_type_node,
499 		       /* Addr can be a constant offset in case this is
500 			  a private array access.  */
501 		       convert (ptr_type_node, addr),
502 		       convert (size_type_node, const_offset_2));
503     }
504 
505   gcc_assert (addr != NULL_TREE);
506   return convert_to_pointer (ptype, addr);
507 }
508 
509 /* Builds a tree operand with the given OPERAND_INDEX for the given
510    BRIG_INST with the desired tree OPERAND_TYPE.  OPERAND_TYPE can
511    be NULL in case the type is forced by the BRIG_INST type.  */
512 
513 tree
build_tree_operand_from_brig(const BrigInstBase * brig_inst,tree operand_type,size_t operand_index)514 brig_code_entry_handler::build_tree_operand_from_brig
515   (const BrigInstBase *brig_inst, tree operand_type, size_t operand_index)
516 {
517   const BrigData *operand_entries
518     = m_parent.get_brig_data_entry (brig_inst->operands);
519 
520   uint32_t operand_offset
521     = ((const uint32_t *) &operand_entries->bytes)[operand_index];
522   const BrigBase *operand_data
523     = m_parent.get_brig_operand_entry (operand_offset);
524 
525   bool inputp = !gccbrig_hsa_opcode_op_output_p (brig_inst->opcode,
526 						 operand_index);
527   return build_tree_operand (*brig_inst, *operand_data, operand_type, inputp);
528 }
529 
530 /* Builds a single (scalar) constant initialized element of type
531    ELEMENT_TYPE from the buffer pointed to by NEXT_DATA.  */
532 
533 tree
build_tree_cst_element(BrigType16_t element_type,const unsigned char * next_data) const534 brig_code_entry_handler::build_tree_cst_element
535   (BrigType16_t element_type, const unsigned char *next_data) const
536 {
537 
538   tree tree_element_type = gccbrig_tree_type_for_hsa_type (element_type);
539 
540   tree cst;
541   switch (element_type)
542     {
543     case BRIG_TYPE_F16:
544       {
545 	HOST_WIDE_INT low = *(const uint16_t *) next_data;
546 	cst = build_int_cst (uint16_type_node, low);
547 	break;
548       }
549     case BRIG_TYPE_F32:
550       {
551 	REAL_VALUE_TYPE val;
552 	ieee_single_format.decode (&ieee_single_format, &val,
553 				   (const long *) next_data);
554 	cst = build_real (tree_element_type, val);
555 	break;
556       }
557     case BRIG_TYPE_F64:
558       {
559 	long data[2];
560 	data[0] = *(const uint32_t *) next_data;
561 	data[1] = *(const uint32_t *) (next_data + 4);
562 	REAL_VALUE_TYPE val;
563 	ieee_double_format.decode (&ieee_double_format, &val, data);
564 	cst = build_real (tree_element_type, val);
565 	break;
566       }
567     case BRIG_TYPE_S8:
568     case BRIG_TYPE_S16:
569     case BRIG_TYPE_S32:
570     case BRIG_TYPE_S64:
571       {
572 	HOST_WIDE_INT low = *(const int64_t *) next_data;
573 	cst = build_int_cst (tree_element_type, low);
574 	break;
575       }
576     case BRIG_TYPE_U8:
577     case BRIG_TYPE_U16:
578     case BRIG_TYPE_U32:
579     case BRIG_TYPE_U64:
580       {
581 	unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data;
582 	cst = build_int_cstu (tree_element_type, low);
583 	break;
584       }
585     case BRIG_TYPE_SIG64:
586       {
587 	unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data;
588 	cst = build_int_cstu (uint64_type_node, low);
589 	break;
590       }
591     case BRIG_TYPE_SIG32:
592       {
593 	unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data;
594 	cst = build_int_cstu (uint32_type_node, low);
595 	break;
596       }
597     default:
598       gcc_unreachable ();
599       return NULL_TREE;
600     }
601   return cst;
602 }
603 
604 /* Produce a tree constant type for the given BRIG constant (BRIG_CONST).
605    TYPE should be the forced instruction type, otherwise the type is
606    dictated by the BRIG_CONST.  */
607 
608 tree
get_tree_cst_for_hsa_operand(const BrigOperandConstantBytes * brig_const,tree type) const609 brig_code_entry_handler::get_tree_cst_for_hsa_operand
610   (const BrigOperandConstantBytes *brig_const, tree type) const
611 {
612   const BrigData *data = m_parent.get_brig_data_entry (brig_const->bytes);
613 
614   tree cst = NULL_TREE;
615 
616   if (type == NULL_TREE)
617     type = gccbrig_tree_type_for_hsa_type (brig_const->type);
618 
619   /* The type of a single (scalar) element inside an array,
620      vector or an array of vectors.  */
621   BrigType16_t scalar_element_type
622     = brig_const->type & BRIG_TYPE_BASE_MASK;
623   tree tree_element_type = type;
624 
625   vec<constructor_elt, va_gc> *constructor_vals = NULL;
626 
627   if (TREE_CODE (type) == ARRAY_TYPE)
628     tree_element_type = TREE_TYPE (type);
629 
630   size_t bytes_left = data->byteCount;
631   const unsigned char *next_data = data->bytes;
632   size_t scalar_element_size
633     = gccbrig_hsa_type_bit_size (scalar_element_type) / BITS_PER_UNIT;
634 
635   while (bytes_left > 0)
636     {
637       if (VECTOR_TYPE_P (tree_element_type))
638 	{
639 	  /* In case of vector type elements (or sole vectors),
640 	     create a vector ctor.  */
641 	  size_t element_count
642 	    = gccbrig_type_vector_subparts (tree_element_type);
643 	  if (bytes_left < scalar_element_size * element_count)
644 	    fatal_error (UNKNOWN_LOCATION,
645 			 "Not enough bytes left for the initializer "
646 			 "(%lu need %lu).", (unsigned long) bytes_left,
647 			 (unsigned long) (scalar_element_size
648 					  * element_count));
649 
650 	  vec<constructor_elt, va_gc> *vec_els = NULL;
651 	  for (size_t i = 0; i < element_count; ++i)
652 	    {
653 	      tree element
654 		= build_tree_cst_element (scalar_element_type, next_data);
655 	      CONSTRUCTOR_APPEND_ELT (vec_els, NULL_TREE, element);
656 	      bytes_left -= scalar_element_size;
657 	      next_data += scalar_element_size;
658 	    }
659 	  cst = build_vector_from_ctor (tree_element_type, vec_els);
660 	}
661       else
662 	{
663 	  if (bytes_left < scalar_element_size)
664 	    fatal_error (UNKNOWN_LOCATION,
665 			 "Not enough bytes left for the initializer "
666 			 "(%lu need %lu).", (unsigned long) bytes_left,
667 			 (unsigned long) scalar_element_size);
668 	  cst = build_tree_cst_element (scalar_element_type, next_data);
669 	  bytes_left -= scalar_element_size;
670 	  next_data += scalar_element_size;
671 	}
672       CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst);
673     }
674 
675   if (TREE_CODE (type) == ARRAY_TYPE)
676     return build_constructor (type, constructor_vals);
677   else
678     return cst;
679 }
680 
681 /* Return the matching tree instruction arithmetics type for the
682    given BRIG_TYPE.  The aritmethics type is the one with which
683    computation is done (in contrast to the storage type).  F16
684    arithmetics type is emulated using F32 for now.  */
685 
686 tree
get_tree_expr_type_for_hsa_type(BrigType16_t brig_type) const687 brig_code_entry_handler::get_tree_expr_type_for_hsa_type
688   (BrigType16_t brig_type) const
689 {
690   BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
691   if (brig_inner_type == BRIG_TYPE_F16)
692     {
693       if (brig_inner_type == brig_type)
694 	return m_parent.s_fp32_type;
695       size_t element_count = gccbrig_hsa_type_bit_size (brig_type) / 16;
696       return build_vector_type (m_parent.s_fp32_type, element_count);
697     }
698   else
699     return gccbrig_tree_type_for_hsa_type (brig_type);
700 }
701 
702 /* In case the HSA instruction must be implemented using a builtin,
703    this function is called to get the correct builtin function.
704    TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
705    brig instruction and BRIG_TYPE the brig instruction's type.  */
706 
707 tree
get_builtin_for_hsa_opcode(tree type,BrigOpcode16_t brig_opcode,BrigType16_t brig_type) const708 brig_code_entry_handler::get_builtin_for_hsa_opcode
709   (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
710 {
711   tree builtin = NULL_TREE;
712   tree builtin_type = type;
713 
714   /* For vector types, first find the scalar version of the builtin.  */
715   if (type != NULL_TREE && VECTOR_TYPE_P (type))
716     builtin_type = TREE_TYPE (type);
717   BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
718 
719   /* Some BRIG opcodes can use the same builtins for unsigned and
720      signed types.  Force these cases to unsigned types.  */
721 
722   if (brig_opcode == BRIG_OPCODE_BORROW
723       || brig_opcode == BRIG_OPCODE_CARRY
724       || brig_opcode == BRIG_OPCODE_LASTBIT
725       || brig_opcode == BRIG_OPCODE_BITINSERT)
726     {
727       if (brig_type == BRIG_TYPE_S32)
728 	brig_type = BRIG_TYPE_U32;
729       else if (brig_type == BRIG_TYPE_S64)
730 	brig_type = BRIG_TYPE_U64;
731     }
732 
733   switch (brig_opcode)
734     {
735     case BRIG_OPCODE_FLOOR:
736       builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
737       break;
738     case BRIG_OPCODE_CEIL:
739       builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
740       break;
741     case BRIG_OPCODE_SQRT:
742     case BRIG_OPCODE_NSQRT:
743       builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
744       break;
745     case BRIG_OPCODE_RINT:
746       builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
747       break;
748     case BRIG_OPCODE_TRUNC:
749       builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
750       break;
751     case BRIG_OPCODE_COPYSIGN:
752       builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
753       break;
754     case BRIG_OPCODE_NSIN:
755       builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
756       break;
757     case BRIG_OPCODE_NLOG2:
758       builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
759       break;
760     case BRIG_OPCODE_NEXP2:
761       builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
762       break;
763     case BRIG_OPCODE_NFMA:
764       builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
765       break;
766     case BRIG_OPCODE_NCOS:
767       builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
768       break;
769     case BRIG_OPCODE_POPCOUNT:
770       /* Popcount should be typed by its argument type (the return value
771 	 is always u32).  Let's use a b64 version for also for b32 for now.  */
772       return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
773     case BRIG_OPCODE_BORROW:
774       /* Borrow uses the same builtin for unsigned and signed types.  */
775       if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
776 	return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
777       else
778 	return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
779     case BRIG_OPCODE_CARRY:
780       /* Carry also uses the same builtin for unsigned and signed types.  */
781       if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
782 	return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
783       else
784 	return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
785     default:
786 
787       /* Use our builtin index for finding a proper builtin for the BRIG
788 	 opcode and BRIG type.  This takes care most of the builtin cases,
789 	 the special cases are handled in the separate 'case' statements
790 	 above.  */
791       builtin_map::const_iterator i
792 	= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
793       if (i != s_custom_builtins.end ())
794 	return (*i).second;
795 
796       if (brig_inner_type != brig_type)
797 	{
798 	  /* Try to find a scalar built-in we could use.  */
799 	  i = s_custom_builtins.find
800 	    (std::make_pair (brig_opcode, brig_inner_type));
801 	  if (i != s_custom_builtins.end ())
802 	    return (*i).second;
803 	}
804 
805       /* In case this is an fp16 operation that is promoted to fp32,
806 	 try to find a fp32 scalar built-in.  */
807       if (brig_inner_type == BRIG_TYPE_F16)
808 	{
809 	  i = s_custom_builtins.find
810 	    (std::make_pair (brig_opcode, BRIG_TYPE_F32));
811 	  if (i != s_custom_builtins.end ())
812 	    return (*i).second;
813 	}
814       gcc_unreachable ();
815     }
816 
817   if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
818     {
819       /* Try to find a vectorized version of the built-in.
820 	 TODO: properly assert that builtin is a mathfn builtin? */
821       tree vec_builtin
822 	= targetm.vectorize.builtin_vectorized_function
823 	(builtin_mathfn_code (builtin), type, type);
824       if (vec_builtin != NULL_TREE)
825 	return vec_builtin;
826       else
827 	return builtin;
828     }
829   if (builtin == NULL_TREE)
830     gcc_unreachable ();
831   return builtin;
832 }
833 
834 /* Return the correct GENERIC type for storing comparison results
835    of operand with the type given in SOURCE_TYPE.  */
836 
837 tree
get_comparison_result_type(tree source_type)838 brig_code_entry_handler::get_comparison_result_type (tree source_type)
839 {
840   if (VECTOR_TYPE_P (source_type))
841     {
842       size_t element_size = int_size_in_bytes (TREE_TYPE (source_type));
843       return build_vector_type
844 	(build_nonstandard_boolean_type (element_size * BITS_PER_UNIT),
845 	 gccbrig_type_vector_subparts (source_type));
846     }
847   else
848     return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1);
849 }
850 
851 /* Returns true in case the given opcode needs to know about work-item context
852    data.  In such case the context data is passed as a pointer to a work-item
853    context object, as the last argument in the builtin call.  */
854 
855 bool
needs_workitem_context_data(BrigOpcode16_t brig_opcode) const856 brig_code_entry_handler::needs_workitem_context_data
857   (BrigOpcode16_t brig_opcode) const
858 {
859   switch (brig_opcode)
860     {
861     case BRIG_OPCODE_WORKITEMABSID:
862     case BRIG_OPCODE_WORKITEMFLATABSID:
863     case BRIG_OPCODE_WORKITEMFLATID:
864     case BRIG_OPCODE_CURRENTWORKITEMFLATID:
865     case BRIG_OPCODE_WORKITEMID:
866     case BRIG_OPCODE_WORKGROUPID:
867     case BRIG_OPCODE_WORKGROUPSIZE:
868     case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
869     case BRIG_OPCODE_GRIDGROUPS:
870     case BRIG_OPCODE_GRIDSIZE:
871     case BRIG_OPCODE_DIM:
872     case BRIG_OPCODE_PACKETID:
873     case BRIG_OPCODE_PACKETCOMPLETIONSIG:
874     case BRIG_OPCODE_BARRIER:
875     case BRIG_OPCODE_WAVEBARRIER:
876     case BRIG_OPCODE_ARRIVEFBAR:
877     case BRIG_OPCODE_INITFBAR:
878     case BRIG_OPCODE_JOINFBAR:
879     case BRIG_OPCODE_LEAVEFBAR:
880     case BRIG_OPCODE_RELEASEFBAR:
881     case BRIG_OPCODE_WAITFBAR:
882     case BRIG_OPCODE_CUID:
883     case BRIG_OPCODE_MAXCUID:
884     case BRIG_OPCODE_DEBUGTRAP:
885     case BRIG_OPCODE_GROUPBASEPTR:
886     case BRIG_OPCODE_KERNARGBASEPTR:
887     case BRIG_OPCODE_ALLOCA:
888       return true;
889     default:
890       return false;
891     };
892 }
893 
894 /* Returns true in case the given opcode that would normally be generated
895    as a builtin call can be expanded to tree nodes.  */
896 
897 bool
can_expand_builtin(BrigOpcode16_t brig_opcode) const898 brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const
899 {
900   switch (brig_opcode)
901     {
902     case BRIG_OPCODE_WORKITEMFLATABSID:
903     case BRIG_OPCODE_WORKITEMFLATID:
904     case BRIG_OPCODE_WORKITEMABSID:
905     case BRIG_OPCODE_WORKGROUPSIZE:
906     case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
907       /* TODO: expand more builtins.  */
908       return true;
909     default:
910       return false;
911     };
912 }
913 
914 /* Try to expand the given builtin call to reuse a previously generated
915    variable, if possible.  If not, just call the given builtin.
916    BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
917    ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
918    input operands.  */
919 
920 tree
expand_or_call_builtin(BrigOpcode16_t brig_opcode,BrigType16_t brig_type,tree arith_type,tree_stl_vec & operands)921 brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
922 						 BrigType16_t brig_type,
923 						 tree arith_type,
924 						 tree_stl_vec &operands)
925 {
926   if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode))
927     return expand_builtin (brig_opcode, operands);
928 
929   tree built_in
930     = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
931 
932   if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
933       && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
934       && brig_opcode != BRIG_OPCODE_LERP
935       && brig_opcode != BRIG_OPCODE_PACKCVT
936       && brig_opcode != BRIG_OPCODE_SAD
937       && brig_opcode != BRIG_OPCODE_SADHI)
938     {
939       /* Call the scalar built-in for all elements in the vector.  */
940       tree_stl_vec operand0_elements;
941       if (operands.size () > 0)
942 	unpack (operands[0], operand0_elements);
943 
944       tree_stl_vec operand1_elements;
945       if (operands.size () > 1)
946 	unpack (operands[1], operand1_elements);
947 
948       tree_stl_vec result_elements;
949 
950       size_t element_count = gccbrig_type_vector_subparts (arith_type);
951       for (size_t i = 0; i < element_count; ++i)
952 	{
953 	  tree_stl_vec call_operands;
954 	  if (operand0_elements.size () > 0)
955 	    call_operands.push_back (operand0_elements.at (i));
956 
957 	  if (operand1_elements.size () > 0)
958 	    call_operands.push_back (operand1_elements.at (i));
959 
960 	  result_elements.push_back
961 	    (expand_or_call_builtin (brig_opcode, brig_type,
962 				     TREE_TYPE (arith_type),
963 				     call_operands));
964 	}
965       return pack (result_elements);
966     }
967 
968   tree_stl_vec call_operands;
969   tree_stl_vec operand_types;
970 
971   tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
972 
973   for (size_t i = 0; i < operands.size (); ++i)
974     {
975       tree operand_type = TREE_VALUE (arg_type_chain);
976       call_operands.push_back (convert (operand_type, operands[i]));
977       operand_types.push_back (operand_type);
978       arg_type_chain = TREE_CHAIN (arg_type_chain);
979     }
980 
981   if (needs_workitem_context_data (brig_opcode))
982     {
983       call_operands.push_back (m_parent.m_cf->m_context_arg);
984       operand_types.push_back (ptr_type_node);
985       m_parent.m_cf->m_has_unexpanded_dp_builtins = true;
986     }
987 
988   size_t operand_count = call_operands.size ();
989 
990   call_operands.resize (4, NULL_TREE);
991   operand_types.resize (4, NULL_TREE);
992   for (size_t i = 0; i < operand_count; ++i)
993     call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
994 						      call_operands.at (i));
995 
996   tree fnptr = build_fold_addr_expr (built_in);
997   return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
998 			   operand_count, &call_operands[0]);
999 }
1000 
1001 /* Instead of calling a built-in, reuse a previously returned value known to
1002    be still valid.  This is beneficial especially for the work-item
1003    identification related builtins as not having them as calls can lead to
1004    more easily vectorizable parallel loops for multi work-item work-groups.
1005    BRIG_OPCODE identifies the builtin and OPERANDS store the operands.  */
1006 
1007 tree
expand_builtin(BrigOpcode16_t brig_opcode,tree_stl_vec & operands)1008 brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode,
1009 					 tree_stl_vec &operands)
1010 {
1011   tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
1012 
1013   tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
1014 
1015   tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
1016 
1017   if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
1018     {
1019       tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
1020       id0 = convert (uint64_type_node, id0);
1021 
1022       tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
1023       id1 = convert (uint64_type_node, id1);
1024 
1025       tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
1026       id2 = convert (uint64_type_node, id2);
1027 
1028       tree max0 = convert (uint64_type_node,
1029 			   m_parent.m_cf->m_grid_size_vars[0]);
1030       tree max1 = convert (uint64_type_node,
1031 			   m_parent.m_cf->m_grid_size_vars[1]);
1032 
1033       tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
1034       id2_x_max0_x_max1
1035 	= build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
1036 
1037       tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
1038 
1039       tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
1040       sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
1041 
1042       return add_temp_var ("workitemflatabsid", sum);
1043     }
1044   else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
1045     {
1046       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1047 
1048       tree local_id_var = m_parent.m_cf->m_local_id_vars[dim];
1049       tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim];
1050       tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim];
1051       tree grid_size_var = m_parent.m_cf->m_grid_size_vars[dim];
1052 
1053       tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node,
1054 				     convert (uint32_type_node, wg_id_var),
1055 				     convert (uint32_type_node, wg_size_var));
1056       tree sum
1057 	= build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var);
1058 
1059       /* We need a modulo here because of work-groups which have dimensions
1060 	 larger than the grid size :( TO CHECK: is this really allowed in the
1061 	 specs?  */
1062       tree modulo
1063 	= build2 (TRUNC_MOD_EXPR, uint32_type_node, sum, grid_size_var);
1064 
1065       return add_temp_var (std::string ("workitemabsid_")
1066 			     + (char) ((int) 'x' + dim),
1067 			   modulo);
1068     }
1069   else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
1070     {
1071       tree z_x_wgsx_wgsy
1072 	= build2 (MULT_EXPR, uint32_type_node,
1073 		  m_parent.m_cf->m_local_id_vars[2],
1074 		  m_parent.m_cf->m_wg_size_vars[0]);
1075       z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
1076 			      m_parent.m_cf->m_wg_size_vars[1]);
1077 
1078       tree y_x_wgsx
1079 	= build2 (MULT_EXPR, uint32_type_node,
1080 		  m_parent.m_cf->m_local_id_vars[1],
1081 		  m_parent.m_cf->m_wg_size_vars[0]);
1082 
1083       tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
1084       sum = build2 (PLUS_EXPR, uint32_type_node,
1085 		    m_parent.m_cf->m_local_id_vars[0],
1086 		    sum);
1087       return add_temp_var ("workitemflatid", sum);
1088     }
1089   else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
1090     {
1091       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1092       return m_parent.m_cf->m_wg_size_vars[dim];
1093     }
1094   else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
1095     {
1096       HOST_WIDE_INT dim = int_constant_value (operands[0]);
1097       return m_parent.m_cf->m_cur_wg_size_vars[dim];
1098     }
1099   else
1100     gcc_unreachable ();
1101 
1102   return NULL_TREE;
1103 }
1104 
1105 /* Appends and returns a new temp variable and an accompanying assignment
1106    statement that stores the value of the given EXPR and has the given NAME.  */
1107 
1108 tree
add_temp_var(std::string name,tree expr)1109 brig_code_entry_handler::add_temp_var (std::string name, tree expr)
1110 {
1111   tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
1112   tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
1113   m_parent.m_cf->append_statement (assign);
1114   return temp_var;
1115 }
1116 
1117 /* Creates a FP32 to FP16 conversion call, assuming the source and destination
1118    are FP32 type variables.  */
1119 
1120 tree
build_f2h_conversion(tree source)1121 brig_code_entry_handler::build_f2h_conversion (tree source)
1122 {
1123   return float_to_half () (*this, source);
1124 }
1125 
1126 /* Creates a FP16 to FP32 conversion call, assuming the source and destination
1127    are FP32 type variables.  */
1128 
1129 tree
build_h2f_conversion(tree source)1130 brig_code_entry_handler::build_h2f_conversion (tree source)
1131 {
1132   return half_to_float () (*this, source);
1133 }
1134 
1135 /* Builds and "normalizes" the dest and source operands for the instruction
1136    execution; converts the input operands to the expected instruction type,
1137    performs half to float conversions, constant to correct type variable,
1138    and flush to zero (if applicable).  */
1139 
1140 tree_stl_vec
build_operands(const BrigInstBase & brig_inst)1141 brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst)
1142 {
1143   return build_or_analyze_operands (brig_inst, false);
1144 }
1145 
1146 void
analyze_operands(const BrigInstBase & brig_inst)1147 brig_code_entry_handler::analyze_operands (const BrigInstBase &brig_inst)
1148 {
1149   build_or_analyze_operands (brig_inst, true);
1150 }
1151 
1152 /* Implements both the build_operands () and analyze_operands () call
1153    so changes go in tandem.  Performs build_operands () when ANALYZE
1154    is false.  Otherwise, only analyze operands and return empty
1155    list.
1156 
1157    If analyzing record each HSA register operand with the
1158    corresponding resolved operand tree type to
1159    brig_to_generic::m_fn_regs_use_index.  */
1160 
1161 tree_stl_vec
1162 brig_code_entry_handler::
build_or_analyze_operands(const BrigInstBase & brig_inst,bool analyze)1163 build_or_analyze_operands (const BrigInstBase &brig_inst, bool analyze)
1164 {
1165   /* Flush to zero.  */
1166   bool ftz = false;
1167   const BrigBase *base = &brig_inst.base;
1168 
1169   if (base->kind == BRIG_KIND_INST_MOD)
1170     {
1171       const BrigInstMod *mod = (const BrigInstMod *) base;
1172       ftz = mod->modifier & BRIG_ALU_FTZ;
1173     }
1174   else if (base->kind == BRIG_KIND_INST_CMP)
1175     {
1176       const BrigInstCmp *cmp = (const BrigInstCmp *) base;
1177       ftz = cmp->modifier & BRIG_ALU_FTZ;
1178     }
1179 
1180   bool is_vec_instr = hsa_type_packed_p (brig_inst.type);
1181 
1182   size_t element_count;
1183   if (is_vec_instr)
1184     {
1185       BrigType16_t brig_element_type = brig_inst.type & BRIG_TYPE_BASE_MASK;
1186       element_count = gccbrig_hsa_type_bit_size (brig_inst.type)
1187 		      / gccbrig_hsa_type_bit_size (brig_element_type);
1188     }
1189   else
1190     element_count = 1;
1191 
1192   bool is_fp16_arith = false;
1193 
1194   tree src_type;
1195   tree dest_type;
1196   if (base->kind == BRIG_KIND_INST_CMP)
1197     {
1198       const BrigInstCmp *cmp_inst = (const BrigInstCmp *) base;
1199       src_type = gccbrig_tree_type_for_hsa_type (cmp_inst->sourceType);
1200       dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1201       is_fp16_arith
1202 	= (cmp_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16;
1203     }
1204   else if (base->kind == BRIG_KIND_INST_SOURCE_TYPE)
1205     {
1206       const BrigInstSourceType *src_type_inst
1207 	= (const BrigInstSourceType *) base;
1208       src_type = gccbrig_tree_type_for_hsa_type (src_type_inst->sourceType);
1209       dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1210       is_fp16_arith
1211 	= (src_type_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
1212 	&& !gccbrig_is_bit_operation (brig_inst.opcode);
1213     }
1214   else if (base->kind == BRIG_KIND_INST_SEG_CVT)
1215     {
1216       const BrigInstSegCvt *seg_cvt_inst = (const BrigInstSegCvt *) base;
1217       src_type = gccbrig_tree_type_for_hsa_type (seg_cvt_inst->sourceType);
1218       dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1219     }
1220   else if (base->kind == BRIG_KIND_INST_MEM)
1221     {
1222       src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1223       dest_type = src_type;
1224       /* With mem instructions we don't want to cast the fp16
1225 	 back and forth between fp32, because the load/stores
1226 	 are not specific to the data type.  */
1227       is_fp16_arith = false;
1228     }
1229   else if (base->kind == BRIG_KIND_INST_CVT)
1230     {
1231       const BrigInstCvt *cvt_inst = (const BrigInstCvt *) base;
1232 
1233       src_type = gccbrig_tree_type_for_hsa_type (cvt_inst->sourceType);
1234       dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1235     }
1236   else
1237     {
1238       switch (brig_inst.opcode)
1239 	{
1240 	case BRIG_OPCODE_INITFBAR:
1241 	case BRIG_OPCODE_JOINFBAR:
1242 	case BRIG_OPCODE_WAITFBAR:
1243 	case BRIG_OPCODE_ARRIVEFBAR:
1244 	case BRIG_OPCODE_LEAVEFBAR:
1245 	case BRIG_OPCODE_RELEASEFBAR:
1246 	  src_type = uint32_type_node;
1247 	  break;
1248 	default:
1249 	  src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1250 	  break;
1251 	}
1252       dest_type = src_type;
1253       is_fp16_arith
1254 	= !gccbrig_is_bit_operation (brig_inst.opcode)
1255 	&& (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16;
1256     }
1257 
1258   /* Halfs are a tricky special case: their "storage format" is u16, but
1259      scalars are stored in 32b regs while packed f16 are... well packed.  */
1260   tree half_storage_type = element_count > 1
1261 			     ? gccbrig_tree_type_for_hsa_type (brig_inst.type)
1262 			     : uint32_type_node;
1263 
1264   const BrigData *operand_entries
1265     = m_parent.get_brig_data_entry (brig_inst.operands);
1266   std::vector<tree> operands;
1267   for (size_t i = 0; i < operand_entries->byteCount / 4; ++i)
1268     {
1269       uint32_t operand_offset = ((const uint32_t *) &operand_entries->bytes)[i];
1270       const BrigBase *operand_data
1271 	= m_parent.get_brig_operand_entry (operand_offset);
1272 
1273       const bool is_output
1274 	= gccbrig_hsa_opcode_op_output_p (brig_inst.opcode, i);
1275 
1276       tree operand_type = is_output ? dest_type : src_type;
1277 
1278       bool half_to_float = is_fp16_arith;
1279 
1280       /* Special cases for operand types.  */
1281       if ((brig_inst.opcode == BRIG_OPCODE_SHL
1282 	   || brig_inst.opcode == BRIG_OPCODE_SHR)
1283 	  && i == 2)
1284 	  /* The shift amount is always a scalar.  */
1285 	operand_type
1286 	  = VECTOR_TYPE_P (src_type) ? TREE_TYPE (src_type) : src_type;
1287       else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE)
1288 	{
1289 	  if (i == 3)
1290 	    /* HSAIL shuffle inputs the MASK vector as tightly packed bits
1291 	       while GENERIC VEC_PERM_EXPR expects the mask elements to be
1292 	       of the same size as the elements in the input vectors.  Let's
1293 	       cast to a scalar type here and convert to the VEC_PERM_EXPR
1294 	       format in instruction handling.  There are no arbitrary bit
1295 	       width int types in GENERIC so we cannot use the original
1296 	       vector type.  */
1297 	    operand_type = uint32_type_node;
1298 	  else
1299 	    /* Always treat the element as unsigned ints to avoid
1300 	       sign extensions/negative offsets with masks, which
1301 	       are expected to be of the same element type as the
1302 	       data in VEC_PERM_EXPR.  With shuffles the data type
1303 	       should not matter as it's a "raw operation".  */
1304 	    operand_type = get_unsigned_int_type (operand_type);
1305 	}
1306       else if (brig_inst.opcode == BRIG_OPCODE_PACK)
1307 	{
1308 	  if (i == 1)
1309 	    operand_type = get_unsigned_int_type (dest_type);
1310 	  else if (i == 2)
1311 	    operand_type = get_unsigned_int_type (TREE_TYPE (dest_type));
1312 	  else if (i == 3)
1313 	    operand_type = uint32_type_node;
1314 	}
1315       else if (brig_inst.opcode == BRIG_OPCODE_UNPACK && i == 2)
1316 	operand_type = uint32_type_node;
1317       else if (brig_inst.opcode == BRIG_OPCODE_SAD && i == 3)
1318 	operand_type = uint32_type_node;
1319       else if (brig_inst.opcode == BRIG_OPCODE_CLASS && i == 2)
1320 	{
1321 	  operand_type = uint32_type_node;
1322 	  half_to_float = false;
1323 	}
1324       else if (brig_inst.opcode == BRIG_OPCODE_ACTIVELANEPERMUTE && i == 4)
1325 	{
1326 	  operand_type = uint32_type_node;
1327 	}
1328       else if (half_to_float)
1329 	/* Treat the operands as the storage type at this point.  */
1330 	operand_type = half_storage_type;
1331 
1332       if (analyze)
1333 	{
1334 	  if (operand_data->kind == BRIG_KIND_OPERAND_REGISTER)
1335 	    {
1336 	      const BrigOperandRegister &brig_reg
1337 		= (const BrigOperandRegister &) *operand_data;
1338 	      m_parent.add_reg_used_as_type (brig_reg, operand_type);
1339 	    }
1340 	  continue;
1341 	}
1342 
1343       tree operand = build_tree_operand (brig_inst, *operand_data, operand_type,
1344 					 !is_output);
1345       gcc_assert (operand);
1346 
1347       /* Cast/convert the inputs to correct types as expected by the GENERIC
1348 	 opcode instruction.  */
1349       if (!is_output)
1350 	{
1351 	  if (half_to_float)
1352 	    operand = build_h2f_conversion
1353 	      (build_resize_convert_view (half_storage_type, operand));
1354 	  else if (TREE_CODE (operand) != LABEL_DECL
1355 		   && TREE_CODE (operand) != TREE_VEC
1356 		   && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS
1357 		   && operand_data->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
1358 	    {
1359 	      operand = build_resize_convert_view (operand_type, operand);
1360 	    }
1361 	  else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE)
1362 	    /* Force the operand type to be treated as the raw type.  */
1363 	    operand = build_resize_convert_view (operand_type, operand);
1364 
1365 	  if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1)
1366 	    {
1367 	      /* gcc expects the lower bit to be 1 (or all ones in case of
1368 		 vectors) while CMOV assumes false iff 0.  Convert the input
1369 		 here to what gcc likes by generating
1370 		 'operand = operand != 0'.  */
1371 	      tree cmp_res_type = get_comparison_result_type (operand_type);
1372 	      operand = build2 (NE_EXPR, cmp_res_type, operand,
1373 				build_zero_cst (TREE_TYPE (operand)));
1374 	    }
1375 
1376 	  if (ftz)
1377 	    operand = flush_to_zero (is_fp16_arith) (*this, operand);
1378 	}
1379       operands.push_back (operand);
1380     }
1381   return operands;
1382 }
1383 
1384 /* Build the GENERIC for assigning the result of an instruction to the result
1385    "register" (variable).  BRIG_INST is the original brig instruction,
1386    OUTPUT the result variable/register, INST_EXPR the one producing the
1387    result.  Required bitcasts and fp32 to fp16 conversions are added as
1388    well.  */
1389 
1390 tree
build_output_assignment(const BrigInstBase & brig_inst,tree output,tree inst_expr)1391 brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
1392 						  tree output, tree inst_expr)
1393 {
1394   /* The result/input type might be different from the output register
1395      variable type (can be any type; see get_m_var_declfor_reg @
1396      brig-function.cc).  */
1397   tree output_type = TREE_TYPE (output);
1398   tree input_type = TREE_TYPE (inst_expr);
1399   bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
1400 		 && brig_inst.base.kind != BRIG_KIND_INST_MEM
1401 		 && !gccbrig_is_bit_operation (brig_inst.opcode);
1402 
1403   /* Flush to zero.  */
1404   bool ftz = false;
1405   const BrigBase *base = &brig_inst.base;
1406 
1407   if (base->kind == BRIG_KIND_INST_MOD)
1408     {
1409       const BrigInstMod *mod = (const BrigInstMod *) base;
1410       ftz = mod->modifier & BRIG_ALU_FTZ;
1411     }
1412   else if (base->kind == BRIG_KIND_INST_CMP)
1413     {
1414       const BrigInstCmp *cmp = (const BrigInstCmp *) base;
1415       ftz = cmp->modifier & BRIG_ALU_FTZ;
1416     }
1417 
1418   if (TREE_CODE (inst_expr) == CALL_EXPR)
1419     {
1420       tree func_decl = TREE_OPERAND (TREE_OPERAND (inst_expr, 1), 0);
1421       input_type = TREE_TYPE (TREE_TYPE (func_decl));
1422     }
1423 
1424   if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr))
1425 	      || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16))
1426     {
1427       /* Ensure we don't duplicate the arithmetics to the arguments of the bit
1428 	 field reference operators.  */
1429       inst_expr = add_temp_var ("before_ftz", inst_expr);
1430       inst_expr = flush_to_zero (is_fp16) (*this, inst_expr);
1431     }
1432 
1433   if (is_fp16)
1434     {
1435       inst_expr = add_temp_var ("before_f2h", inst_expr);
1436       tree f2h_output = build_f2h_conversion (inst_expr);
1437       tree conv = build_resize_convert_view (output_type, f2h_output);
1438       tree assign = build2 (MODIFY_EXPR, output_type, output, conv);
1439       m_parent.m_cf->append_statement (assign);
1440       return assign;
1441     }
1442   else if (VECTOR_TYPE_P (output_type) && TREE_CODE (output) == CONSTRUCTOR)
1443     {
1444       /* Expand/unpack the input value to the given vector elements.  */
1445       size_t i;
1446       tree input = inst_expr;
1447       tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1448       tree element;
1449       tree last_assign = NULL_TREE;
1450       FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (output), i, element)
1451 	{
1452 	  tree element_ref
1453 	    = build3 (BIT_FIELD_REF, element_type, input,
1454 		      TYPE_SIZE (element_type),
1455 		      bitsize_int (i * int_size_in_bytes (element_type)
1456 				   *  BITS_PER_UNIT));
1457 
1458 	  last_assign
1459 	    = build_output_assignment (brig_inst, element, element_ref);
1460 	}
1461       return last_assign;
1462     }
1463   else
1464     {
1465       /* All we do here is to bitcast the result and store it to the
1466 	 'register' (variable).  Mainly need to take care of differing
1467 	 bitwidths.  */
1468       size_t src_width = int_size_in_bytes (input_type);
1469       size_t dst_width = int_size_in_bytes (output_type);
1470       tree input = inst_expr;
1471       /* Integer results are extended to the target register width, using
1472 	 the same sign as the inst_expr.  */
1473       if (INTEGRAL_TYPE_P (TREE_TYPE (input)) && src_width != dst_width)
1474 	{
1475 	  bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (input));
1476 	  tree resized_type
1477 	    = build_nonstandard_integer_type (dst_width * BITS_PER_UNIT,
1478 					      unsigned_p);
1479 	  input = convert_to_integer (resized_type, input);
1480 	}
1481       input = build_resize_convert_view (output_type, input);
1482       tree assign = build2 (MODIFY_EXPR, output_type, output, input);
1483       m_parent.m_cf->append_statement (assign);
1484       return assign;
1485     }
1486   return NULL_TREE;
1487 }
1488 
1489 /* Appends a GENERIC statement (STMT) to the currently constructed function.  */
1490 
1491 void
append_statement(tree stmt)1492 brig_code_entry_handler::append_statement (tree stmt)
1493 {
1494   m_parent.m_cf->append_statement (stmt);
1495 }
1496 
1497 /* Unpacks the elements of the vector in VALUE to scalars (bit field
1498    references) in ELEMENTS.  */
1499 
1500 void
unpack(tree value,tree_stl_vec & elements)1501 brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements)
1502 {
1503   size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
1504   size_t element_size
1505     = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
1506   size_t element_count
1507     = vec_size * BITS_PER_UNIT / element_size;
1508 
1509   tree input_element_type = TREE_TYPE (TREE_TYPE (value));
1510 
1511   value = add_temp_var ("unpack_input", value);
1512 
1513   for (size_t i = 0; i < element_count; ++i)
1514     {
1515       tree element
1516 	= build3 (BIT_FIELD_REF, input_element_type, value,
1517 		  TYPE_SIZE (input_element_type),
1518 		  bitsize_int(i * element_size));
1519 
1520       element = add_temp_var ("scalar", element);
1521       elements.push_back (element);
1522     }
1523 }
1524 
1525 /* Pack the elements of the scalars in ELEMENTS to the returned vector.  */
1526 
1527 tree
pack(tree_stl_vec & elements)1528 brig_code_entry_handler::pack (tree_stl_vec &elements)
1529 {
1530   size_t element_count = elements.size ();
1531 
1532   gcc_assert (element_count > 1);
1533 
1534   tree output_element_type = TREE_TYPE (elements.at (0));
1535 
1536   vec<constructor_elt, va_gc> *constructor_vals = NULL;
1537   for (size_t i = 0; i < element_count; ++i)
1538     CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
1539 
1540   tree vec_type = build_vector_type (output_element_type, element_count);
1541 
1542   /* build_constructor creates a vector type which is not a vector_cst
1543      that requires compile time constant elements.  */
1544   tree vec = build_constructor (vec_type, constructor_vals);
1545 
1546   /* Add a temp variable for readability.  */
1547   tree tmp_var = create_tmp_var (vec_type, "vec_out");
1548   tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1549   m_parent.m_cf->append_statement (vec_tmp_assign);
1550   return tmp_var;
1551 }
1552 
1553 /* Visits the element(s) in the OPERAND, calling HANDLER to each of them.  */
1554 
1555 tree
operator ()(brig_code_entry_handler & handler,tree operand)1556 tree_element_unary_visitor::operator () (brig_code_entry_handler &handler,
1557 					tree operand)
1558 {
1559   if (VECTOR_TYPE_P (TREE_TYPE (operand)))
1560     {
1561       size_t vec_size = int_size_in_bytes (TREE_TYPE (operand));
1562       size_t element_size = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand)));
1563       size_t element_count = vec_size / element_size;
1564 
1565       tree input_element_type = TREE_TYPE (TREE_TYPE (operand));
1566       tree output_element_type = NULL_TREE;
1567 
1568       vec<constructor_elt, va_gc> *constructor_vals = NULL;
1569       for (size_t i = 0; i < element_count; ++i)
1570 	{
1571 	  tree element = build3 (BIT_FIELD_REF, input_element_type, operand,
1572 				 TYPE_SIZE (input_element_type),
1573 				 bitsize_int (i * element_size
1574 					      * BITS_PER_UNIT));
1575 
1576 	  tree output = visit_element (handler, element);
1577 	  output_element_type = TREE_TYPE (output);
1578 
1579 	  CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output);
1580 	}
1581 
1582       tree vec_type = build_vector_type (output_element_type, element_count);
1583 
1584       /* build_constructor creates a vector type which is not a vector_cst
1585 	 that requires compile time constant elements.  */
1586       tree vec = build_constructor (vec_type, constructor_vals);
1587 
1588       /* Add a temp variable for readability.  */
1589       tree tmp_var = create_tmp_var (vec_type, "vec_out");
1590       tree vec_tmp_assign
1591 	= build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1592       handler.append_statement (vec_tmp_assign);
1593       return tmp_var;
1594     }
1595   else
1596     return visit_element (handler, operand);
1597 }
1598 
1599 /* Visits the element pair(s) in the OPERAND0 and OPERAND1, calling HANDLER
1600    to each of them.  */
1601 
1602 tree
operator ()(brig_code_entry_handler & handler,tree operand0,tree operand1)1603 tree_element_binary_visitor::operator () (brig_code_entry_handler &handler,
1604 					 tree operand0, tree operand1)
1605 {
1606   if (VECTOR_TYPE_P (TREE_TYPE (operand0)))
1607     {
1608       gcc_assert (VECTOR_TYPE_P (TREE_TYPE (operand1)));
1609       size_t vec_size = int_size_in_bytes (TREE_TYPE (operand0));
1610       size_t element_size
1611 	= int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand0)));
1612       size_t element_count = vec_size / element_size;
1613 
1614       tree input_element_type = TREE_TYPE (TREE_TYPE (operand0));
1615       tree output_element_type = NULL_TREE;
1616 
1617       vec<constructor_elt, va_gc> *constructor_vals = NULL;
1618       for (size_t i = 0; i < element_count; ++i)
1619 	{
1620 
1621 	  tree element0 = build3 (BIT_FIELD_REF, input_element_type, operand0,
1622 				  TYPE_SIZE (input_element_type),
1623 				  bitsize_int (i * element_size
1624 					       * BITS_PER_UNIT));
1625 
1626 	  tree element1 = build3 (BIT_FIELD_REF, input_element_type, operand1,
1627 				  TYPE_SIZE (input_element_type),
1628 				  bitsize_int (i * element_size
1629 					       * BITS_PER_UNIT));
1630 
1631 	  tree output = visit_element (handler, element0, element1);
1632 	  output_element_type = TREE_TYPE (output);
1633 
1634 	  CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output);
1635 	}
1636 
1637       tree vec_type = build_vector_type (output_element_type, element_count);
1638 
1639       /* build_constructor creates a vector type which is not a vector_cst
1640 	 that requires compile time constant elements.  */
1641       tree vec = build_constructor (vec_type, constructor_vals);
1642 
1643       /* Add a temp variable for readability.  */
1644       tree tmp_var = create_tmp_var (vec_type, "vec_out");
1645       tree vec_tmp_assign
1646 	= build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1647       handler.append_statement (vec_tmp_assign);
1648       return tmp_var;
1649     }
1650   else
1651     return visit_element (handler, operand0, operand1);
1652 }
1653 
1654 /* Generates GENERIC code that flushes the visited element to zero.  */
1655 
1656 tree
visit_element(brig_code_entry_handler &,tree operand)1657 flush_to_zero::visit_element (brig_code_entry_handler &, tree operand)
1658 {
1659   size_t size = int_size_in_bytes (TREE_TYPE (operand));
1660   if (size == 4)
1661     {
1662       tree built_in
1663 	= (m_fp16) ? builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32_F16) :
1664 	builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32);
1665 
1666       return call_builtin (built_in, 1, float_type_node, float_type_node,
1667 			   operand);
1668     }
1669   else if (size == 8)
1670     {
1671       return call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F64), 1,
1672 			   double_type_node, double_type_node, operand);
1673     }
1674   else
1675     gcc_unreachable ();
1676   return NULL_TREE;
1677 }
1678 
1679 /* Generates GENERIC code that converts a single precision float to half
1680    precision float.  */
1681 
1682 tree
visit_element(brig_code_entry_handler & caller,tree operand)1683 float_to_half::visit_element (brig_code_entry_handler &caller, tree operand)
1684 {
1685   tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16);
1686 
1687   tree casted_operand = build_resize_convert_view (uint32_type_node, operand);
1688 
1689   tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node,
1690 			    casted_operand);
1691   tree output
1692     = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out");
1693   tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, call);
1694   caller.append_statement (assign);
1695   return output;
1696 }
1697 
1698 /* Generates GENERIC code that converts a half precision float to single
1699    precision float.  */
1700 
1701 tree
visit_element(brig_code_entry_handler & caller,tree operand)1702 half_to_float::visit_element (brig_code_entry_handler &caller, tree operand)
1703 {
1704   tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F16_TO_F32);
1705   tree truncated_source = convert_to_integer (uint16_type_node, operand);
1706 
1707   tree call
1708     = call_builtin (built_in, 1, uint32_type_node, uint16_type_node,
1709 		    truncated_source);
1710 
1711   tree const_fp32_type
1712     = build_type_variant (brig_to_generic::s_fp32_type, 1, 0);
1713 
1714   tree output = create_tmp_var (const_fp32_type, "fp32out");
1715   tree casted_result
1716     = build_resize_convert_view (brig_to_generic::s_fp32_type, call);
1717 
1718   tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result);
1719 
1720   caller.append_statement (assign);
1721 
1722   return output;
1723 }
1724 
1725 /* Treats the INPUT as SRC_TYPE and sign or zero extends it to DEST_TYPE.  */
1726 
1727 tree
extend_int(tree input,tree dest_type,tree src_type)1728 brig_code_entry_handler::extend_int (tree input, tree dest_type, tree src_type)
1729 {
1730   /* Extend integer conversions according to the destination's
1731      ext mode.  First we need to clip the input register to
1732      the possible smaller integer size to ensure the correct sign
1733      bit is extended.  */
1734   tree clipped_input = convert_to_integer (src_type, input);
1735   tree conversion_result;
1736 
1737   if (TYPE_UNSIGNED (src_type))
1738     conversion_result
1739       = convert_to_integer (unsigned_type_for (dest_type), clipped_input);
1740   else
1741     conversion_result
1742       = convert_to_integer (signed_type_for (dest_type), clipped_input);
1743 
1744   /* Treat the result as unsigned so we do not sign extend to the
1745      register width.  For some reason this GENERIC sequence sign
1746      extends to the s register:
1747 
1748      D.1541 = (signed char) s1;
1749      D.1542 = (signed short) D.1541;
1750      s0 = (unsigned int) D.1542
1751   */
1752 
1753   /* The converted result is then extended to the target register
1754      width, using the same sign as the destination.  */
1755   return convert_to_integer (dest_type, conversion_result);
1756 }
1757 
1758 /* Returns the integer constant value of the given node.
1759    If it's a cast, looks into the source of the cast.  */
1760 HOST_WIDE_INT
int_constant_value(tree node)1761 brig_code_entry_handler::int_constant_value (tree node)
1762 {
1763   tree n = node;
1764   if (TREE_CODE (n) == VIEW_CONVERT_EXPR)
1765     n = TREE_OPERAND (n, 0);
1766   return int_cst_value (n);
1767 }
1768 
1769