xref: /dragonfly/contrib/gcc-8.0/gcc/hsa-gen.c (revision 38fd1498)
1*38fd1498Szrj /* A pass for lowering gimple to HSAIL
2*38fd1498Szrj    Copyright (C) 2013-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Martin Jambor <mjambor@suse.cz> and
4*38fd1498Szrj    Martin Liska <mliska@suse.cz>.
5*38fd1498Szrj 
6*38fd1498Szrj This file is part of GCC.
7*38fd1498Szrj 
8*38fd1498Szrj GCC is free software; you can redistribute it and/or modify
9*38fd1498Szrj it under the terms of the GNU General Public License as published by
10*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
11*38fd1498Szrj any later version.
12*38fd1498Szrj 
13*38fd1498Szrj GCC is distributed in the hope that it will be useful,
14*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
15*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16*38fd1498Szrj GNU General Public License for more details.
17*38fd1498Szrj 
18*38fd1498Szrj You should have received a copy of the GNU General Public License
19*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
20*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
21*38fd1498Szrj 
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "coretypes.h"
25*38fd1498Szrj #include "memmodel.h"
26*38fd1498Szrj #include "tm.h"
27*38fd1498Szrj #include "is-a.h"
28*38fd1498Szrj #include "hash-table.h"
29*38fd1498Szrj #include "vec.h"
30*38fd1498Szrj #include "tree.h"
31*38fd1498Szrj #include "tree-pass.h"
32*38fd1498Szrj #include "function.h"
33*38fd1498Szrj #include "basic-block.h"
34*38fd1498Szrj #include "cfg.h"
35*38fd1498Szrj #include "fold-const.h"
36*38fd1498Szrj #include "gimple.h"
37*38fd1498Szrj #include "gimple-iterator.h"
38*38fd1498Szrj #include "bitmap.h"
39*38fd1498Szrj #include "dumpfile.h"
40*38fd1498Szrj #include "gimple-pretty-print.h"
41*38fd1498Szrj #include "diagnostic-core.h"
42*38fd1498Szrj #include "gimple-ssa.h"
43*38fd1498Szrj #include "tree-phinodes.h"
44*38fd1498Szrj #include "stringpool.h"
45*38fd1498Szrj #include "tree-vrp.h"
46*38fd1498Szrj #include "tree-ssanames.h"
47*38fd1498Szrj #include "tree-dfa.h"
48*38fd1498Szrj #include "ssa-iterators.h"
49*38fd1498Szrj #include "cgraph.h"
50*38fd1498Szrj #include "print-tree.h"
51*38fd1498Szrj #include "symbol-summary.h"
52*38fd1498Szrj #include "hsa-common.h"
53*38fd1498Szrj #include "cfghooks.h"
54*38fd1498Szrj #include "tree-cfg.h"
55*38fd1498Szrj #include "cfgloop.h"
56*38fd1498Szrj #include "cfganal.h"
57*38fd1498Szrj #include "builtins.h"
58*38fd1498Szrj #include "params.h"
59*38fd1498Szrj #include "gomp-constants.h"
60*38fd1498Szrj #include "internal-fn.h"
61*38fd1498Szrj #include "builtins.h"
62*38fd1498Szrj #include "stor-layout.h"
63*38fd1498Szrj #include "stringpool.h"
64*38fd1498Szrj #include "attribs.h"
65*38fd1498Szrj 
66*38fd1498Szrj /* Print a warning message and set that we have seen an error.  */
67*38fd1498Szrj 
68*38fd1498Szrj #define HSA_SORRY_ATV(location, message, ...) \
69*38fd1498Szrj   do \
70*38fd1498Szrj   { \
71*38fd1498Szrj     hsa_fail_cfun (); \
72*38fd1498Szrj     if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \
73*38fd1498Szrj 		    HSA_SORRY_MSG)) \
74*38fd1498Szrj       inform (location, message, __VA_ARGS__); \
75*38fd1498Szrj   } \
76*38fd1498Szrj   while (false)
77*38fd1498Szrj 
78*38fd1498Szrj /* Same as previous, but highlight a location.  */
79*38fd1498Szrj 
80*38fd1498Szrj #define HSA_SORRY_AT(location, message) \
81*38fd1498Szrj   do \
82*38fd1498Szrj   { \
83*38fd1498Szrj     hsa_fail_cfun (); \
84*38fd1498Szrj     if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \
85*38fd1498Szrj 		    HSA_SORRY_MSG)) \
86*38fd1498Szrj       inform (location, message); \
87*38fd1498Szrj   } \
88*38fd1498Szrj   while (false)
89*38fd1498Szrj 
90*38fd1498Szrj /* Default number of threads used by kernel dispatch.  */
91*38fd1498Szrj 
92*38fd1498Szrj #define HSA_DEFAULT_NUM_THREADS 64
93*38fd1498Szrj 
94*38fd1498Szrj /* Following structures are defined in the final version
95*38fd1498Szrj    of HSA specification.  */
96*38fd1498Szrj 
97*38fd1498Szrj /* HSA queue packet is shadow structure, originally provided by AMD.  */
98*38fd1498Szrj 
99*38fd1498Szrj struct hsa_queue_packet
100*38fd1498Szrj {
101*38fd1498Szrj   uint16_t header;
102*38fd1498Szrj   uint16_t setup;
103*38fd1498Szrj   uint16_t workgroup_size_x;
104*38fd1498Szrj   uint16_t workgroup_size_y;
105*38fd1498Szrj   uint16_t workgroup_size_z;
106*38fd1498Szrj   uint16_t reserved0;
107*38fd1498Szrj   uint32_t grid_size_x;
108*38fd1498Szrj   uint32_t grid_size_y;
109*38fd1498Szrj   uint32_t grid_size_z;
110*38fd1498Szrj   uint32_t private_segment_size;
111*38fd1498Szrj   uint32_t group_segment_size;
112*38fd1498Szrj   uint64_t kernel_object;
113*38fd1498Szrj   void *kernarg_address;
114*38fd1498Szrj   uint64_t reserved2;
115*38fd1498Szrj   uint64_t completion_signal;
116*38fd1498Szrj };
117*38fd1498Szrj 
118*38fd1498Szrj /* HSA queue is shadow structure, originally provided by AMD.  */
119*38fd1498Szrj 
120*38fd1498Szrj struct hsa_queue
121*38fd1498Szrj {
122*38fd1498Szrj   int type;
123*38fd1498Szrj   uint32_t features;
124*38fd1498Szrj   void *base_address;
125*38fd1498Szrj   uint64_t doorbell_signal;
126*38fd1498Szrj   uint32_t size;
127*38fd1498Szrj   uint32_t reserved1;
128*38fd1498Szrj   uint64_t id;
129*38fd1498Szrj };
130*38fd1498Szrj 
131*38fd1498Szrj static struct obstack hsa_obstack;
132*38fd1498Szrj 
133*38fd1498Szrj /* List of pointers to all instructions that come from an object allocator.  */
134*38fd1498Szrj static vec <hsa_insn_basic *> hsa_instructions;
135*38fd1498Szrj 
136*38fd1498Szrj /* List of pointers to all operands that come from an object allocator.  */
137*38fd1498Szrj static vec <hsa_op_base *> hsa_operands;
138*38fd1498Szrj 
hsa_symbol()139*38fd1498Szrj hsa_symbol::hsa_symbol ()
140*38fd1498Szrj   : m_decl (NULL_TREE), m_name (NULL), m_name_number (0),
141*38fd1498Szrj     m_directive_offset (0), m_type (BRIG_TYPE_NONE),
142*38fd1498Szrj     m_segment (BRIG_SEGMENT_NONE), m_linkage (BRIG_LINKAGE_NONE), m_dim (0),
143*38fd1498Szrj     m_cst_value (NULL), m_global_scope_p (false), m_seen_error (false),
144*38fd1498Szrj     m_allocation (BRIG_ALLOCATION_AUTOMATIC), m_emitted_to_brig (false)
145*38fd1498Szrj {
146*38fd1498Szrj }
147*38fd1498Szrj 
148*38fd1498Szrj 
hsa_symbol(BrigType16_t type,BrigSegment8_t segment,BrigLinkage8_t linkage,bool global_scope_p,BrigAllocation allocation,BrigAlignment8_t align)149*38fd1498Szrj hsa_symbol::hsa_symbol (BrigType16_t type, BrigSegment8_t segment,
150*38fd1498Szrj 			BrigLinkage8_t linkage, bool global_scope_p,
151*38fd1498Szrj 			BrigAllocation allocation, BrigAlignment8_t align)
152*38fd1498Szrj   : m_decl (NULL_TREE), m_name (NULL), m_name_number (0),
153*38fd1498Szrj     m_directive_offset (0), m_type (type), m_segment (segment),
154*38fd1498Szrj     m_linkage (linkage), m_dim (0), m_cst_value (NULL),
155*38fd1498Szrj     m_global_scope_p (global_scope_p), m_seen_error (false),
156*38fd1498Szrj     m_allocation (allocation), m_emitted_to_brig (false), m_align (align)
157*38fd1498Szrj {
158*38fd1498Szrj }
159*38fd1498Szrj 
160*38fd1498Szrj unsigned HOST_WIDE_INT
total_byte_size()161*38fd1498Szrj hsa_symbol::total_byte_size ()
162*38fd1498Szrj {
163*38fd1498Szrj   unsigned HOST_WIDE_INT s
164*38fd1498Szrj     = hsa_type_bit_size (~BRIG_TYPE_ARRAY_MASK & m_type);
165*38fd1498Szrj   gcc_assert (s % BITS_PER_UNIT == 0);
166*38fd1498Szrj   s /= BITS_PER_UNIT;
167*38fd1498Szrj 
168*38fd1498Szrj   if (m_dim)
169*38fd1498Szrj     s *= m_dim;
170*38fd1498Szrj 
171*38fd1498Szrj   return s;
172*38fd1498Szrj }
173*38fd1498Szrj 
174*38fd1498Szrj /* Forward declaration.  */
175*38fd1498Szrj 
176*38fd1498Szrj static BrigType16_t
177*38fd1498Szrj hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p,
178*38fd1498Szrj 			bool min32int);
179*38fd1498Szrj 
180*38fd1498Szrj void
fillup_for_decl(tree decl)181*38fd1498Szrj hsa_symbol::fillup_for_decl (tree decl)
182*38fd1498Szrj {
183*38fd1498Szrj   m_decl = decl;
184*38fd1498Szrj   m_type = hsa_type_for_tree_type (TREE_TYPE (decl), &m_dim, false);
185*38fd1498Szrj   if (hsa_seen_error ())
186*38fd1498Szrj     {
187*38fd1498Szrj       m_seen_error = true;
188*38fd1498Szrj       return;
189*38fd1498Szrj     }
190*38fd1498Szrj 
191*38fd1498Szrj   m_align = MAX (m_align, hsa_natural_alignment (m_type));
192*38fd1498Szrj }
193*38fd1498Szrj 
194*38fd1498Szrj /* Constructor of class representing global HSA function/kernel information and
195*38fd1498Szrj    state.  FNDECL is function declaration, KERNEL_P is true if the function
196*38fd1498Szrj    is going to become a HSA kernel.  If the function has body, SSA_NAMES_COUNT
197*38fd1498Szrj    should be set to number of SSA names used in the function.
198*38fd1498Szrj    MODIFIED_CFG is set to true in case we modified control-flow graph
199*38fd1498Szrj    of the function.  */
200*38fd1498Szrj 
hsa_function_representation(tree fdecl,bool kernel_p,unsigned ssa_names_count,bool modified_cfg)201*38fd1498Szrj hsa_function_representation::hsa_function_representation
202*38fd1498Szrj   (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool modified_cfg)
203*38fd1498Szrj   : m_name (NULL),
204*38fd1498Szrj     m_reg_count (0), m_input_args (vNULL),
205*38fd1498Szrj     m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL),
206*38fd1498Szrj     m_private_variables (vNULL), m_called_functions (vNULL),
207*38fd1498Szrj     m_called_internal_fns (vNULL), m_hbb_count (0),
208*38fd1498Szrj     m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false),
209*38fd1498Szrj     m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL),
210*38fd1498Szrj     m_kernel_dispatch_count (0), m_maximum_omp_data_size (0),
211*38fd1498Szrj     m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (),
212*38fd1498Szrj     m_modified_cfg (modified_cfg)
213*38fd1498Szrj {
214*38fd1498Szrj   int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;
215*38fd1498Szrj   m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len);
216*38fd1498Szrj   m_ssa_map.safe_grow_cleared (ssa_names_count);
217*38fd1498Szrj }
218*38fd1498Szrj 
219*38fd1498Szrj /* Constructor of class representing HSA function information that
220*38fd1498Szrj    is derived for an internal function.  */
hsa_function_representation(hsa_internal_fn * fn)221*38fd1498Szrj hsa_function_representation::hsa_function_representation (hsa_internal_fn *fn)
222*38fd1498Szrj   : m_reg_count (0), m_input_args (vNULL),
223*38fd1498Szrj     m_output_arg (NULL), m_local_symbols (NULL),
224*38fd1498Szrj     m_spill_symbols (vNULL), m_global_symbols (vNULL),
225*38fd1498Szrj     m_private_variables (vNULL), m_called_functions (vNULL),
226*38fd1498Szrj     m_called_internal_fns (vNULL), m_hbb_count (0),
227*38fd1498Szrj     m_in_ssa (true), m_kern_p (false), m_declaration_p (true), m_decl (NULL),
228*38fd1498Szrj     m_internal_fn (fn), m_shadow_reg (NULL), m_kernel_dispatch_count (0),
229*38fd1498Szrj     m_maximum_omp_data_size (0), m_seen_error (false), m_temp_symbol_count (0),
230*38fd1498Szrj     m_ssa_map () {}
231*38fd1498Szrj 
232*38fd1498Szrj /* Destructor of class holding function/kernel-wide information and state.  */
233*38fd1498Szrj 
~hsa_function_representation()234*38fd1498Szrj hsa_function_representation::~hsa_function_representation ()
235*38fd1498Szrj {
236*38fd1498Szrj   /* Kernel names are deallocated at the end of BRIG output when deallocating
237*38fd1498Szrj      hsa_decl_kernel_mapping.  */
238*38fd1498Szrj   if (!m_kern_p || m_seen_error)
239*38fd1498Szrj     free (m_name);
240*38fd1498Szrj 
241*38fd1498Szrj   for (unsigned i = 0; i < m_input_args.length (); i++)
242*38fd1498Szrj     delete m_input_args[i];
243*38fd1498Szrj   m_input_args.release ();
244*38fd1498Szrj 
245*38fd1498Szrj   delete m_output_arg;
246*38fd1498Szrj   delete m_local_symbols;
247*38fd1498Szrj 
248*38fd1498Szrj   for (unsigned i = 0; i < m_spill_symbols.length (); i++)
249*38fd1498Szrj     delete m_spill_symbols[i];
250*38fd1498Szrj   m_spill_symbols.release ();
251*38fd1498Szrj 
252*38fd1498Szrj   hsa_symbol *sym;
253*38fd1498Szrj   for (unsigned i = 0; i < m_global_symbols.iterate (i, &sym); i++)
254*38fd1498Szrj     if (sym->m_linkage != BRIG_ALLOCATION_PROGRAM)
255*38fd1498Szrj       delete sym;
256*38fd1498Szrj   m_global_symbols.release ();
257*38fd1498Szrj 
258*38fd1498Szrj   for (unsigned i = 0; i < m_private_variables.length (); i++)
259*38fd1498Szrj     delete m_private_variables[i];
260*38fd1498Szrj   m_private_variables.release ();
261*38fd1498Szrj   m_called_functions.release ();
262*38fd1498Szrj   m_ssa_map.release ();
263*38fd1498Szrj 
264*38fd1498Szrj   for (unsigned i = 0; i < m_called_internal_fns.length (); i++)
265*38fd1498Szrj     delete m_called_internal_fns[i];
266*38fd1498Szrj }
267*38fd1498Szrj 
268*38fd1498Szrj hsa_op_reg *
get_shadow_reg()269*38fd1498Szrj hsa_function_representation::get_shadow_reg ()
270*38fd1498Szrj {
271*38fd1498Szrj   /* If we compile a function with kernel dispatch and does not set
272*38fd1498Szrj      an optimization level, the function won't be inlined and
273*38fd1498Szrj      we return NULL.  */
274*38fd1498Szrj   if (!m_kern_p)
275*38fd1498Szrj     return NULL;
276*38fd1498Szrj 
277*38fd1498Szrj   if (m_shadow_reg)
278*38fd1498Szrj     return m_shadow_reg;
279*38fd1498Szrj 
280*38fd1498Szrj   /* Append the shadow argument.  */
281*38fd1498Szrj   hsa_symbol *shadow = new hsa_symbol (BRIG_TYPE_U64, BRIG_SEGMENT_KERNARG,
282*38fd1498Szrj 				       BRIG_LINKAGE_FUNCTION);
283*38fd1498Szrj   m_input_args.safe_push (shadow);
284*38fd1498Szrj   shadow->m_name = "hsa_runtime_shadow";
285*38fd1498Szrj 
286*38fd1498Szrj   hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_U64);
287*38fd1498Szrj   hsa_op_address *addr = new hsa_op_address (shadow);
288*38fd1498Szrj 
289*38fd1498Szrj   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64, r, addr);
290*38fd1498Szrj   hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->append_insn (mem);
291*38fd1498Szrj   m_shadow_reg = r;
292*38fd1498Szrj 
293*38fd1498Szrj   return r;
294*38fd1498Szrj }
295*38fd1498Szrj 
has_shadow_reg_p()296*38fd1498Szrj bool hsa_function_representation::has_shadow_reg_p ()
297*38fd1498Szrj {
298*38fd1498Szrj   return m_shadow_reg != NULL;
299*38fd1498Szrj }
300*38fd1498Szrj 
301*38fd1498Szrj void
init_extra_bbs()302*38fd1498Szrj hsa_function_representation::init_extra_bbs ()
303*38fd1498Szrj {
304*38fd1498Szrj   hsa_init_new_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
305*38fd1498Szrj   hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
306*38fd1498Szrj }
307*38fd1498Szrj 
308*38fd1498Szrj void
update_dominance()309*38fd1498Szrj hsa_function_representation::update_dominance ()
310*38fd1498Szrj {
311*38fd1498Szrj   if (m_modified_cfg)
312*38fd1498Szrj     {
313*38fd1498Szrj       free_dominance_info (CDI_DOMINATORS);
314*38fd1498Szrj       calculate_dominance_info (CDI_DOMINATORS);
315*38fd1498Szrj     }
316*38fd1498Szrj }
317*38fd1498Szrj 
318*38fd1498Szrj hsa_symbol *
create_hsa_temporary(BrigType16_t type)319*38fd1498Szrj hsa_function_representation::create_hsa_temporary (BrigType16_t type)
320*38fd1498Szrj {
321*38fd1498Szrj   hsa_symbol *s = new hsa_symbol (type, BRIG_SEGMENT_PRIVATE,
322*38fd1498Szrj 				  BRIG_LINKAGE_FUNCTION);
323*38fd1498Szrj   s->m_name_number = m_temp_symbol_count++;
324*38fd1498Szrj 
325*38fd1498Szrj   hsa_cfun->m_private_variables.safe_push (s);
326*38fd1498Szrj   return s;
327*38fd1498Szrj }
328*38fd1498Szrj 
329*38fd1498Szrj BrigLinkage8_t
get_linkage()330*38fd1498Szrj hsa_function_representation::get_linkage ()
331*38fd1498Szrj {
332*38fd1498Szrj   if (m_internal_fn)
333*38fd1498Szrj     return BRIG_LINKAGE_PROGRAM;
334*38fd1498Szrj 
335*38fd1498Szrj   return m_kern_p || TREE_PUBLIC (m_decl) ?
336*38fd1498Szrj     BRIG_LINKAGE_PROGRAM : BRIG_LINKAGE_MODULE;
337*38fd1498Szrj }
338*38fd1498Szrj 
339*38fd1498Szrj /* Hash map of simple OMP builtins.  */
340*38fd1498Szrj static hash_map <nofree_string_hash, omp_simple_builtin> *omp_simple_builtins
341*38fd1498Szrj   = NULL;
342*38fd1498Szrj 
343*38fd1498Szrj /* Warning messages for OMP builtins.  */
344*38fd1498Szrj 
345*38fd1498Szrj #define HSA_WARN_LOCK_ROUTINE "support for HSA does not implement OpenMP " \
346*38fd1498Szrj   "lock routines"
347*38fd1498Szrj #define HSA_WARN_TIMING_ROUTINE "support for HSA does not implement OpenMP " \
348*38fd1498Szrj   "timing routines"
349*38fd1498Szrj #define HSA_WARN_MEMORY_ROUTINE "OpenMP device memory library routines have " \
350*38fd1498Szrj   "undefined semantics within target regions, support for HSA ignores them"
351*38fd1498Szrj #define HSA_WARN_AFFINITY "Support for HSA does not implement OpenMP " \
352*38fd1498Szrj   "affinity feateres"
353*38fd1498Szrj 
354*38fd1498Szrj /* Initialize hash map with simple OMP builtins.  */
355*38fd1498Szrj 
356*38fd1498Szrj static void
hsa_init_simple_builtins()357*38fd1498Szrj hsa_init_simple_builtins ()
358*38fd1498Szrj {
359*38fd1498Szrj   if (omp_simple_builtins != NULL)
360*38fd1498Szrj     return;
361*38fd1498Szrj 
362*38fd1498Szrj   omp_simple_builtins
363*38fd1498Szrj     = new hash_map <nofree_string_hash, omp_simple_builtin> ();
364*38fd1498Szrj 
365*38fd1498Szrj   omp_simple_builtin omp_builtins[] =
366*38fd1498Szrj     {
367*38fd1498Szrj       omp_simple_builtin ("omp_get_initial_device", NULL, false,
368*38fd1498Szrj 			  new hsa_op_immed (GOMP_DEVICE_HOST,
369*38fd1498Szrj 					    (BrigType16_t) BRIG_TYPE_S32)),
370*38fd1498Szrj       omp_simple_builtin ("omp_is_initial_device", NULL, false,
371*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
372*38fd1498Szrj       omp_simple_builtin ("omp_get_dynamic", NULL, false,
373*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
374*38fd1498Szrj       omp_simple_builtin ("omp_set_dynamic", NULL, false, NULL),
375*38fd1498Szrj       omp_simple_builtin ("omp_init_lock", HSA_WARN_LOCK_ROUTINE, true),
376*38fd1498Szrj       omp_simple_builtin ("omp_init_lock_with_hint", HSA_WARN_LOCK_ROUTINE,
377*38fd1498Szrj 			  true),
378*38fd1498Szrj       omp_simple_builtin ("omp_init_nest_lock_with_hint", HSA_WARN_LOCK_ROUTINE,
379*38fd1498Szrj 			  true),
380*38fd1498Szrj       omp_simple_builtin ("omp_destroy_lock", HSA_WARN_LOCK_ROUTINE, true),
381*38fd1498Szrj       omp_simple_builtin ("omp_set_lock", HSA_WARN_LOCK_ROUTINE, true),
382*38fd1498Szrj       omp_simple_builtin ("omp_unset_lock", HSA_WARN_LOCK_ROUTINE, true),
383*38fd1498Szrj       omp_simple_builtin ("omp_test_lock", HSA_WARN_LOCK_ROUTINE, true),
384*38fd1498Szrj       omp_simple_builtin ("omp_get_wtime", HSA_WARN_TIMING_ROUTINE, true),
385*38fd1498Szrj       omp_simple_builtin ("omp_get_wtick", HSA_WARN_TIMING_ROUTINE, true),
386*38fd1498Szrj       omp_simple_builtin ("omp_target_alloc", HSA_WARN_MEMORY_ROUTINE, false,
387*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_U64)),
388*38fd1498Szrj       omp_simple_builtin ("omp_target_free", HSA_WARN_MEMORY_ROUTINE, false),
389*38fd1498Szrj       omp_simple_builtin ("omp_target_is_present", HSA_WARN_MEMORY_ROUTINE,
390*38fd1498Szrj 			  false,
391*38fd1498Szrj 			  new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
392*38fd1498Szrj       omp_simple_builtin ("omp_target_memcpy", HSA_WARN_MEMORY_ROUTINE, false,
393*38fd1498Szrj 			  new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
394*38fd1498Szrj       omp_simple_builtin ("omp_target_memcpy_rect", HSA_WARN_MEMORY_ROUTINE,
395*38fd1498Szrj 			  false,
396*38fd1498Szrj 			  new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
397*38fd1498Szrj       omp_simple_builtin ("omp_target_associate_ptr", HSA_WARN_MEMORY_ROUTINE,
398*38fd1498Szrj 			  false,
399*38fd1498Szrj 			  new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
400*38fd1498Szrj       omp_simple_builtin ("omp_target_disassociate_ptr",
401*38fd1498Szrj 			  HSA_WARN_MEMORY_ROUTINE,
402*38fd1498Szrj 			  false,
403*38fd1498Szrj 			  new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
404*38fd1498Szrj       omp_simple_builtin ("omp_set_max_active_levels",
405*38fd1498Szrj 			  "Support for HSA only allows only one active level, "
406*38fd1498Szrj 			  "call to omp_set_max_active_levels will be ignored "
407*38fd1498Szrj 			  "in the generated HSAIL",
408*38fd1498Szrj 			  false, NULL),
409*38fd1498Szrj       omp_simple_builtin ("omp_get_max_active_levels", NULL, false,
410*38fd1498Szrj 			  new hsa_op_immed (1, (BrigType16_t) BRIG_TYPE_S32)),
411*38fd1498Szrj       omp_simple_builtin ("omp_in_final", NULL, false,
412*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
413*38fd1498Szrj       omp_simple_builtin ("omp_get_proc_bind", HSA_WARN_AFFINITY, false,
414*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
415*38fd1498Szrj       omp_simple_builtin ("omp_get_num_places", HSA_WARN_AFFINITY, false,
416*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
417*38fd1498Szrj       omp_simple_builtin ("omp_get_place_num_procs", HSA_WARN_AFFINITY, false,
418*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
419*38fd1498Szrj       omp_simple_builtin ("omp_get_place_proc_ids", HSA_WARN_AFFINITY, false,
420*38fd1498Szrj 			  NULL),
421*38fd1498Szrj       omp_simple_builtin ("omp_get_place_num", HSA_WARN_AFFINITY, false,
422*38fd1498Szrj 			  new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
423*38fd1498Szrj       omp_simple_builtin ("omp_get_partition_num_places", HSA_WARN_AFFINITY,
424*38fd1498Szrj 			  false,
425*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
426*38fd1498Szrj       omp_simple_builtin ("omp_get_partition_place_nums", HSA_WARN_AFFINITY,
427*38fd1498Szrj 			  false, NULL),
428*38fd1498Szrj       omp_simple_builtin ("omp_set_default_device",
429*38fd1498Szrj 			  "omp_set_default_device has undefined semantics "
430*38fd1498Szrj 			  "within target regions, support for HSA ignores it",
431*38fd1498Szrj 			  false, NULL),
432*38fd1498Szrj       omp_simple_builtin ("omp_get_default_device",
433*38fd1498Szrj 			  "omp_get_default_device has undefined semantics "
434*38fd1498Szrj 			  "within target regions, support for HSA ignores it",
435*38fd1498Szrj 			  false,
436*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
437*38fd1498Szrj       omp_simple_builtin ("omp_get_num_devices",
438*38fd1498Szrj 			  "omp_get_num_devices has undefined semantics "
439*38fd1498Szrj 			  "within target regions, support for HSA ignores it",
440*38fd1498Szrj 			  false,
441*38fd1498Szrj 			  new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
442*38fd1498Szrj       omp_simple_builtin ("omp_get_num_procs", NULL, true, NULL),
443*38fd1498Szrj       omp_simple_builtin ("omp_get_cancellation", NULL, true, NULL),
444*38fd1498Szrj       omp_simple_builtin ("omp_set_nested", NULL, true, NULL),
445*38fd1498Szrj       omp_simple_builtin ("omp_get_nested", NULL, true, NULL),
446*38fd1498Szrj       omp_simple_builtin ("omp_set_schedule", NULL, true, NULL),
447*38fd1498Szrj       omp_simple_builtin ("omp_get_schedule", NULL, true, NULL),
448*38fd1498Szrj       omp_simple_builtin ("omp_get_thread_limit", NULL, true, NULL),
449*38fd1498Szrj       omp_simple_builtin ("omp_get_team_size", NULL, true, NULL),
450*38fd1498Szrj       omp_simple_builtin ("omp_get_ancestor_thread_num", NULL, true, NULL),
451*38fd1498Szrj       omp_simple_builtin ("omp_get_max_task_priority", NULL, true, NULL)
452*38fd1498Szrj     };
453*38fd1498Szrj 
454*38fd1498Szrj   unsigned count = sizeof (omp_builtins) / sizeof (omp_simple_builtin);
455*38fd1498Szrj 
456*38fd1498Szrj   for (unsigned i = 0; i < count; i++)
457*38fd1498Szrj     omp_simple_builtins->put (omp_builtins[i].m_name, omp_builtins[i]);
458*38fd1498Szrj }
459*38fd1498Szrj 
460*38fd1498Szrj /* Allocate HSA structures that we need only while generating with this.  */
461*38fd1498Szrj 
462*38fd1498Szrj static void
hsa_init_data_for_cfun()463*38fd1498Szrj hsa_init_data_for_cfun ()
464*38fd1498Szrj {
465*38fd1498Szrj   hsa_init_compilation_unit_data ();
466*38fd1498Szrj   gcc_obstack_init (&hsa_obstack);
467*38fd1498Szrj }
468*38fd1498Szrj 
469*38fd1498Szrj /* Deinitialize HSA subsystem and free all allocated memory.  */
470*38fd1498Szrj 
471*38fd1498Szrj static void
hsa_deinit_data_for_cfun(void)472*38fd1498Szrj hsa_deinit_data_for_cfun (void)
473*38fd1498Szrj {
474*38fd1498Szrj   basic_block bb;
475*38fd1498Szrj 
476*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
477*38fd1498Szrj     if (bb->aux)
478*38fd1498Szrj       {
479*38fd1498Szrj 	hsa_bb *hbb = hsa_bb_for_bb (bb);
480*38fd1498Szrj 	hbb->~hsa_bb ();
481*38fd1498Szrj 	bb->aux = NULL;
482*38fd1498Szrj       }
483*38fd1498Szrj 
484*38fd1498Szrj   for (unsigned int i = 0; i < hsa_operands.length (); i++)
485*38fd1498Szrj     hsa_destroy_operand (hsa_operands[i]);
486*38fd1498Szrj 
487*38fd1498Szrj   hsa_operands.release ();
488*38fd1498Szrj 
489*38fd1498Szrj   for (unsigned i = 0; i < hsa_instructions.length (); i++)
490*38fd1498Szrj     hsa_destroy_insn (hsa_instructions[i]);
491*38fd1498Szrj 
492*38fd1498Szrj   hsa_instructions.release ();
493*38fd1498Szrj 
494*38fd1498Szrj   if (omp_simple_builtins != NULL)
495*38fd1498Szrj     {
496*38fd1498Szrj       delete omp_simple_builtins;
497*38fd1498Szrj       omp_simple_builtins = NULL;
498*38fd1498Szrj     }
499*38fd1498Szrj 
500*38fd1498Szrj   obstack_free (&hsa_obstack, NULL);
501*38fd1498Szrj   delete hsa_cfun;
502*38fd1498Szrj }
503*38fd1498Szrj 
504*38fd1498Szrj /* Return the type which holds addresses in the given SEGMENT.  */
505*38fd1498Szrj 
506*38fd1498Szrj static BrigType16_t
hsa_get_segment_addr_type(BrigSegment8_t segment)507*38fd1498Szrj hsa_get_segment_addr_type (BrigSegment8_t segment)
508*38fd1498Szrj {
509*38fd1498Szrj   switch (segment)
510*38fd1498Szrj     {
511*38fd1498Szrj     case BRIG_SEGMENT_NONE:
512*38fd1498Szrj       gcc_unreachable ();
513*38fd1498Szrj 
514*38fd1498Szrj     case BRIG_SEGMENT_FLAT:
515*38fd1498Szrj     case BRIG_SEGMENT_GLOBAL:
516*38fd1498Szrj     case BRIG_SEGMENT_READONLY:
517*38fd1498Szrj     case BRIG_SEGMENT_KERNARG:
518*38fd1498Szrj       return hsa_machine_large_p () ? BRIG_TYPE_U64 : BRIG_TYPE_U32;
519*38fd1498Szrj 
520*38fd1498Szrj     case BRIG_SEGMENT_GROUP:
521*38fd1498Szrj     case BRIG_SEGMENT_PRIVATE:
522*38fd1498Szrj     case BRIG_SEGMENT_SPILL:
523*38fd1498Szrj     case BRIG_SEGMENT_ARG:
524*38fd1498Szrj       return BRIG_TYPE_U32;
525*38fd1498Szrj     }
526*38fd1498Szrj   gcc_unreachable ();
527*38fd1498Szrj }
528*38fd1498Szrj 
529*38fd1498Szrj /* Return integer brig type according to provided SIZE in bytes.  If SIGN
530*38fd1498Szrj    is set to true, return signed integer type.  */
531*38fd1498Szrj 
532*38fd1498Szrj static BrigType16_t
get_integer_type_by_bytes(unsigned size,bool sign)533*38fd1498Szrj get_integer_type_by_bytes (unsigned size, bool sign)
534*38fd1498Szrj {
535*38fd1498Szrj   if (sign)
536*38fd1498Szrj     switch (size)
537*38fd1498Szrj       {
538*38fd1498Szrj       case 1:
539*38fd1498Szrj 	return BRIG_TYPE_S8;
540*38fd1498Szrj       case 2:
541*38fd1498Szrj 	return BRIG_TYPE_S16;
542*38fd1498Szrj       case 4:
543*38fd1498Szrj 	return BRIG_TYPE_S32;
544*38fd1498Szrj       case 8:
545*38fd1498Szrj 	return BRIG_TYPE_S64;
546*38fd1498Szrj       default:
547*38fd1498Szrj 	break;
548*38fd1498Szrj       }
549*38fd1498Szrj   else
550*38fd1498Szrj     switch (size)
551*38fd1498Szrj       {
552*38fd1498Szrj       case 1:
553*38fd1498Szrj 	return BRIG_TYPE_U8;
554*38fd1498Szrj       case 2:
555*38fd1498Szrj 	return BRIG_TYPE_U16;
556*38fd1498Szrj       case 4:
557*38fd1498Szrj 	return BRIG_TYPE_U32;
558*38fd1498Szrj       case 8:
559*38fd1498Szrj 	return BRIG_TYPE_U64;
560*38fd1498Szrj       default:
561*38fd1498Szrj 	break;
562*38fd1498Szrj       }
563*38fd1498Szrj 
564*38fd1498Szrj   return 0;
565*38fd1498Szrj }
566*38fd1498Szrj 
567*38fd1498Szrj /* If T points to an integral type smaller than 32 bits, change it to a 32bit
568*38fd1498Szrj    equivalent and return the result.  Otherwise just return the result.   */
569*38fd1498Szrj 
570*38fd1498Szrj static BrigType16_t
hsa_extend_inttype_to_32bit(BrigType16_t t)571*38fd1498Szrj hsa_extend_inttype_to_32bit (BrigType16_t t)
572*38fd1498Szrj {
573*38fd1498Szrj   if (t == BRIG_TYPE_U8 || t == BRIG_TYPE_U16)
574*38fd1498Szrj     return BRIG_TYPE_U32;
575*38fd1498Szrj   else if (t == BRIG_TYPE_S8 || t == BRIG_TYPE_S16)
576*38fd1498Szrj     return BRIG_TYPE_S32;
577*38fd1498Szrj   return t;
578*38fd1498Szrj }
579*38fd1498Szrj 
580*38fd1498Szrj /* Return HSA type for tree TYPE, which has to fit into BrigType16_t.  Pointers
581*38fd1498Szrj    are assumed to use flat addressing.  If min32int is true, always expand
582*38fd1498Szrj    integer types to one that has at least 32 bits.  */
583*38fd1498Szrj 
584*38fd1498Szrj static BrigType16_t
hsa_type_for_scalar_tree_type(const_tree type,bool min32int)585*38fd1498Szrj hsa_type_for_scalar_tree_type (const_tree type, bool min32int)
586*38fd1498Szrj {
587*38fd1498Szrj   HOST_WIDE_INT bsize;
588*38fd1498Szrj   const_tree base;
589*38fd1498Szrj   BrigType16_t res = BRIG_TYPE_NONE;
590*38fd1498Szrj 
591*38fd1498Szrj   gcc_checking_assert (TYPE_P (type));
592*38fd1498Szrj   gcc_checking_assert (!AGGREGATE_TYPE_P (type));
593*38fd1498Szrj   if (POINTER_TYPE_P (type))
594*38fd1498Szrj     return hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
595*38fd1498Szrj 
596*38fd1498Szrj   if (TREE_CODE (type) == VECTOR_TYPE)
597*38fd1498Szrj     base = TREE_TYPE (type);
598*38fd1498Szrj   else if (TREE_CODE (type) == COMPLEX_TYPE)
599*38fd1498Szrj     {
600*38fd1498Szrj       base = TREE_TYPE (type);
601*38fd1498Szrj       min32int = true;
602*38fd1498Szrj     }
603*38fd1498Szrj   else
604*38fd1498Szrj     base = type;
605*38fd1498Szrj 
606*38fd1498Szrj   if (!tree_fits_uhwi_p (TYPE_SIZE (base)))
607*38fd1498Szrj     {
608*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (type),
609*38fd1498Szrj 		     "support for HSA does not implement huge or "
610*38fd1498Szrj 		     "variable-sized type %qT", type);
611*38fd1498Szrj       return res;
612*38fd1498Szrj     }
613*38fd1498Szrj 
614*38fd1498Szrj   bsize = tree_to_uhwi (TYPE_SIZE (base));
615*38fd1498Szrj   unsigned byte_size = bsize / BITS_PER_UNIT;
616*38fd1498Szrj   if (INTEGRAL_TYPE_P (base))
617*38fd1498Szrj     res = get_integer_type_by_bytes (byte_size, !TYPE_UNSIGNED (base));
618*38fd1498Szrj   else if (SCALAR_FLOAT_TYPE_P (base))
619*38fd1498Szrj     {
620*38fd1498Szrj       switch (bsize)
621*38fd1498Szrj 	{
622*38fd1498Szrj 	case 16:
623*38fd1498Szrj 	  res = BRIG_TYPE_F16;
624*38fd1498Szrj 	  break;
625*38fd1498Szrj 	case 32:
626*38fd1498Szrj 	  res = BRIG_TYPE_F32;
627*38fd1498Szrj 	  break;
628*38fd1498Szrj 	case 64:
629*38fd1498Szrj 	  res = BRIG_TYPE_F64;
630*38fd1498Szrj 	  break;
631*38fd1498Szrj 	default:
632*38fd1498Szrj 	  break;
633*38fd1498Szrj 	}
634*38fd1498Szrj     }
635*38fd1498Szrj 
636*38fd1498Szrj   if (res == BRIG_TYPE_NONE)
637*38fd1498Szrj     {
638*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (type),
639*38fd1498Szrj 		     "support for HSA does not implement type %qT", type);
640*38fd1498Szrj       return res;
641*38fd1498Szrj     }
642*38fd1498Szrj 
643*38fd1498Szrj   if (TREE_CODE (type) == VECTOR_TYPE)
644*38fd1498Szrj     {
645*38fd1498Szrj       HOST_WIDE_INT tsize = tree_to_uhwi (TYPE_SIZE (type));
646*38fd1498Szrj 
647*38fd1498Szrj       if (bsize == tsize)
648*38fd1498Szrj 	{
649*38fd1498Szrj 	  HSA_SORRY_ATV (EXPR_LOCATION (type),
650*38fd1498Szrj 			 "support for HSA does not implement a vector type "
651*38fd1498Szrj 			 "where a type and unit size are equal: %qT", type);
652*38fd1498Szrj 	  return res;
653*38fd1498Szrj 	}
654*38fd1498Szrj 
655*38fd1498Szrj       switch (tsize)
656*38fd1498Szrj 	{
657*38fd1498Szrj 	case 32:
658*38fd1498Szrj 	  res |= BRIG_TYPE_PACK_32;
659*38fd1498Szrj 	  break;
660*38fd1498Szrj 	case 64:
661*38fd1498Szrj 	  res |= BRIG_TYPE_PACK_64;
662*38fd1498Szrj 	  break;
663*38fd1498Szrj 	case 128:
664*38fd1498Szrj 	  res |= BRIG_TYPE_PACK_128;
665*38fd1498Szrj 	  break;
666*38fd1498Szrj 	default:
667*38fd1498Szrj 	  HSA_SORRY_ATV (EXPR_LOCATION (type),
668*38fd1498Szrj 			 "support for HSA does not implement type %qT", type);
669*38fd1498Szrj 	}
670*38fd1498Szrj     }
671*38fd1498Szrj 
672*38fd1498Szrj   if (min32int)
673*38fd1498Szrj     /* Registers/immediate operands can only be 32bit or more except for
674*38fd1498Szrj        f16.  */
675*38fd1498Szrj     res = hsa_extend_inttype_to_32bit (res);
676*38fd1498Szrj 
677*38fd1498Szrj   if (TREE_CODE (type) == COMPLEX_TYPE)
678*38fd1498Szrj     {
679*38fd1498Szrj       unsigned bsize = 2 * hsa_type_bit_size (res);
680*38fd1498Szrj       res = hsa_bittype_for_bitsize (bsize);
681*38fd1498Szrj     }
682*38fd1498Szrj 
683*38fd1498Szrj   return res;
684*38fd1498Szrj }
685*38fd1498Szrj 
686*38fd1498Szrj /* Returns the BRIG type we need to load/store entities of TYPE.  */
687*38fd1498Szrj 
688*38fd1498Szrj static BrigType16_t
mem_type_for_type(BrigType16_t type)689*38fd1498Szrj mem_type_for_type (BrigType16_t type)
690*38fd1498Szrj {
691*38fd1498Szrj   /* HSA has non-intuitive constraints on load/store types.  If it's
692*38fd1498Szrj      a bit-type it _must_ be B128, if it's not a bit-type it must be
693*38fd1498Szrj      64bit max.  So for loading entities of 128 bits (e.g. vectors)
694*38fd1498Szrj      we have to use B128, while for loading the rest we have to use the
695*38fd1498Szrj      input type (??? or maybe also flattened to a equally sized non-vector
696*38fd1498Szrj      unsigned type?).  */
697*38fd1498Szrj   if ((type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_128)
698*38fd1498Szrj     return BRIG_TYPE_B128;
699*38fd1498Szrj   else if (hsa_btype_p (type) || hsa_type_packed_p (type))
700*38fd1498Szrj     {
701*38fd1498Szrj       unsigned bitsize = hsa_type_bit_size (type);
702*38fd1498Szrj       if (bitsize < 128)
703*38fd1498Szrj 	return hsa_uint_for_bitsize (bitsize);
704*38fd1498Szrj       else
705*38fd1498Szrj 	return hsa_bittype_for_bitsize (bitsize);
706*38fd1498Szrj     }
707*38fd1498Szrj   return type;
708*38fd1498Szrj }
709*38fd1498Szrj 
710*38fd1498Szrj /* Return HSA type for tree TYPE.  If it cannot fit into BrigType16_t, some
711*38fd1498Szrj    kind of array will be generated, setting DIM appropriately.  Otherwise, it
712*38fd1498Szrj    will be set to zero.  */
713*38fd1498Szrj 
714*38fd1498Szrj static BrigType16_t
715*38fd1498Szrj hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p = NULL,
716*38fd1498Szrj 			bool min32int = false)
717*38fd1498Szrj {
718*38fd1498Szrj   gcc_checking_assert (TYPE_P (type));
719*38fd1498Szrj   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
720*38fd1498Szrj     {
721*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (type), "support for HSA does not "
722*38fd1498Szrj 		     "implement huge or variable-sized type %qT", type);
723*38fd1498Szrj       return BRIG_TYPE_NONE;
724*38fd1498Szrj     }
725*38fd1498Szrj 
726*38fd1498Szrj   if (RECORD_OR_UNION_TYPE_P (type))
727*38fd1498Szrj     {
728*38fd1498Szrj       if (dim_p)
729*38fd1498Szrj 	*dim_p = tree_to_uhwi (TYPE_SIZE_UNIT (type));
730*38fd1498Szrj       return BRIG_TYPE_U8 | BRIG_TYPE_ARRAY;
731*38fd1498Szrj     }
732*38fd1498Szrj 
733*38fd1498Szrj   if (TREE_CODE (type) == ARRAY_TYPE)
734*38fd1498Szrj     {
735*38fd1498Szrj       /* We try to be nice and use the real base-type when this is an array of
736*38fd1498Szrj 	 scalars and only resort to an array of bytes if the type is more
737*38fd1498Szrj 	 complex.  */
738*38fd1498Szrj 
739*38fd1498Szrj       unsigned HOST_WIDE_INT dim = 1;
740*38fd1498Szrj 
741*38fd1498Szrj       while (TREE_CODE (type) == ARRAY_TYPE)
742*38fd1498Szrj 	{
743*38fd1498Szrj 	  tree domain = TYPE_DOMAIN (type);
744*38fd1498Szrj 	  if (!TYPE_MIN_VALUE (domain)
745*38fd1498Szrj 	      || !TYPE_MAX_VALUE (domain)
746*38fd1498Szrj 	      || !tree_fits_shwi_p (TYPE_MIN_VALUE (domain))
747*38fd1498Szrj 	      || !tree_fits_shwi_p (TYPE_MAX_VALUE (domain)))
748*38fd1498Szrj 	    {
749*38fd1498Szrj 	      HSA_SORRY_ATV (EXPR_LOCATION (type),
750*38fd1498Szrj 			     "support for HSA does not implement array "
751*38fd1498Szrj 			     "%qT with unknown bounds", type);
752*38fd1498Szrj 	      return BRIG_TYPE_NONE;
753*38fd1498Szrj 	    }
754*38fd1498Szrj 	  HOST_WIDE_INT min = tree_to_shwi (TYPE_MIN_VALUE (domain));
755*38fd1498Szrj 	  HOST_WIDE_INT max = tree_to_shwi (TYPE_MAX_VALUE (domain));
756*38fd1498Szrj 	  dim = dim * (unsigned HOST_WIDE_INT) (max - min + 1);
757*38fd1498Szrj 	  type = TREE_TYPE (type);
758*38fd1498Szrj 	}
759*38fd1498Szrj 
760*38fd1498Szrj       BrigType16_t res;
761*38fd1498Szrj       if (RECORD_OR_UNION_TYPE_P (type))
762*38fd1498Szrj 	{
763*38fd1498Szrj 	  dim = dim * tree_to_uhwi (TYPE_SIZE_UNIT (type));
764*38fd1498Szrj 	  res = BRIG_TYPE_U8;
765*38fd1498Szrj 	}
766*38fd1498Szrj       else
767*38fd1498Szrj 	res = hsa_type_for_scalar_tree_type (type, false);
768*38fd1498Szrj 
769*38fd1498Szrj       if (dim_p)
770*38fd1498Szrj 	*dim_p = dim;
771*38fd1498Szrj       return res | BRIG_TYPE_ARRAY;
772*38fd1498Szrj     }
773*38fd1498Szrj 
774*38fd1498Szrj   /* Scalar case: */
775*38fd1498Szrj   if (dim_p)
776*38fd1498Szrj     *dim_p = 0;
777*38fd1498Szrj 
778*38fd1498Szrj   return hsa_type_for_scalar_tree_type (type, min32int);
779*38fd1498Szrj }
780*38fd1498Szrj 
781*38fd1498Szrj /* Returns true if converting from STYPE into DTYPE needs the _CVT
782*38fd1498Szrj    opcode.  If false a normal _MOV is enough.  */
783*38fd1498Szrj 
784*38fd1498Szrj static bool
hsa_needs_cvt(BrigType16_t dtype,BrigType16_t stype)785*38fd1498Szrj hsa_needs_cvt (BrigType16_t dtype, BrigType16_t stype)
786*38fd1498Szrj {
787*38fd1498Szrj   if (hsa_btype_p (dtype))
788*38fd1498Szrj     return false;
789*38fd1498Szrj 
790*38fd1498Szrj   /* float <-> int conversions are real converts.  */
791*38fd1498Szrj   if (hsa_type_float_p (dtype) != hsa_type_float_p (stype))
792*38fd1498Szrj     return true;
793*38fd1498Szrj   /* When both types have different size, then we need CVT as well.  */
794*38fd1498Szrj   if (hsa_type_bit_size (dtype) != hsa_type_bit_size (stype))
795*38fd1498Szrj     return true;
796*38fd1498Szrj   return false;
797*38fd1498Szrj }
798*38fd1498Szrj 
799*38fd1498Szrj /* Return declaration name if it exists or create one from UID if it does not.
800*38fd1498Szrj    If DECL is a local variable, make UID part of its name.  */
801*38fd1498Szrj 
802*38fd1498Szrj const char *
hsa_get_declaration_name(tree decl)803*38fd1498Szrj hsa_get_declaration_name (tree decl)
804*38fd1498Szrj {
805*38fd1498Szrj   if (!DECL_NAME (decl))
806*38fd1498Szrj     {
807*38fd1498Szrj       char buf[64];
808*38fd1498Szrj       snprintf (buf, 64, "__hsa_anon_%u", DECL_UID (decl));
809*38fd1498Szrj       size_t len = strlen (buf);
810*38fd1498Szrj       char *copy = (char *) obstack_alloc (&hsa_obstack, len + 1);
811*38fd1498Szrj       memcpy (copy, buf, len + 1);
812*38fd1498Szrj       return copy;
813*38fd1498Szrj     }
814*38fd1498Szrj 
815*38fd1498Szrj   tree name_tree;
816*38fd1498Szrj   if (TREE_CODE (decl) == FUNCTION_DECL
817*38fd1498Szrj       || (TREE_CODE (decl) == VAR_DECL && is_global_var (decl)))
818*38fd1498Szrj     name_tree = DECL_ASSEMBLER_NAME (decl);
819*38fd1498Szrj   else
820*38fd1498Szrj     name_tree = DECL_NAME (decl);
821*38fd1498Szrj 
822*38fd1498Szrj   const char *name = IDENTIFIER_POINTER (name_tree);
823*38fd1498Szrj   /* User-defined assembly names have prepended asterisk symbol.  */
824*38fd1498Szrj   if (name[0] == '*')
825*38fd1498Szrj     name++;
826*38fd1498Szrj 
827*38fd1498Szrj   if ((TREE_CODE (decl) == VAR_DECL)
828*38fd1498Szrj       && decl_function_context (decl))
829*38fd1498Szrj     {
830*38fd1498Szrj       size_t len = strlen (name);
831*38fd1498Szrj       char *buf = (char *) alloca (len + 32);
832*38fd1498Szrj       snprintf (buf, len + 32, "%s_%u", name, DECL_UID (decl));
833*38fd1498Szrj       len = strlen (buf);
834*38fd1498Szrj       char *copy = (char *) obstack_alloc (&hsa_obstack, len + 1);
835*38fd1498Szrj       memcpy (copy, buf, len + 1);
836*38fd1498Szrj       return copy;
837*38fd1498Szrj     }
838*38fd1498Szrj   else
839*38fd1498Szrj     return name;
840*38fd1498Szrj }
841*38fd1498Szrj 
842*38fd1498Szrj /* Lookup or create the associated hsa_symbol structure with a given VAR_DECL
843*38fd1498Szrj    or lookup the hsa_structure corresponding to a PARM_DECL.  */
844*38fd1498Szrj 
845*38fd1498Szrj static hsa_symbol *
get_symbol_for_decl(tree decl)846*38fd1498Szrj get_symbol_for_decl (tree decl)
847*38fd1498Szrj {
848*38fd1498Szrj   hsa_symbol **slot;
849*38fd1498Szrj   hsa_symbol dummy (BRIG_TYPE_NONE, BRIG_SEGMENT_NONE, BRIG_LINKAGE_NONE);
850*38fd1498Szrj 
851*38fd1498Szrj   gcc_assert (TREE_CODE (decl) == PARM_DECL
852*38fd1498Szrj 	      || TREE_CODE (decl) == RESULT_DECL
853*38fd1498Szrj 	      || TREE_CODE (decl) == VAR_DECL
854*38fd1498Szrj 	      || TREE_CODE (decl) == CONST_DECL);
855*38fd1498Szrj 
856*38fd1498Szrj   dummy.m_decl = decl;
857*38fd1498Szrj 
858*38fd1498Szrj   bool is_in_global_vars = ((TREE_CODE (decl) == VAR_DECL)
859*38fd1498Szrj 			    && !decl_function_context (decl));
860*38fd1498Szrj 
861*38fd1498Szrj   if (is_in_global_vars)
862*38fd1498Szrj     slot = hsa_global_variable_symbols->find_slot (&dummy, INSERT);
863*38fd1498Szrj   else
864*38fd1498Szrj     slot = hsa_cfun->m_local_symbols->find_slot (&dummy, INSERT);
865*38fd1498Szrj 
866*38fd1498Szrj   gcc_checking_assert (slot);
867*38fd1498Szrj   if (*slot)
868*38fd1498Szrj     {
869*38fd1498Szrj       hsa_symbol *sym = (*slot);
870*38fd1498Szrj 
871*38fd1498Szrj       /* If the symbol is problematic, mark current function also as
872*38fd1498Szrj 	 problematic.  */
873*38fd1498Szrj       if (sym->m_seen_error)
874*38fd1498Szrj 	hsa_fail_cfun ();
875*38fd1498Szrj 
876*38fd1498Szrj       /* PR hsa/70234: If a global variable was marked to be emitted,
877*38fd1498Szrj 	 but HSAIL generation of a function using the variable fails,
878*38fd1498Szrj 	 we should retry to emit the variable in context of a different
879*38fd1498Szrj 	 function.
880*38fd1498Szrj 
881*38fd1498Szrj 	 Iterate elements whether a symbol is already in m_global_symbols
882*38fd1498Szrj 	 of not.  */
883*38fd1498Szrj         if (is_in_global_vars && !sym->m_emitted_to_brig)
884*38fd1498Szrj 	  {
885*38fd1498Szrj 	    for (unsigned i = 0; i < hsa_cfun->m_global_symbols.length (); i++)
886*38fd1498Szrj 	      if (hsa_cfun->m_global_symbols[i] == sym)
887*38fd1498Szrj 		return *slot;
888*38fd1498Szrj 	    hsa_cfun->m_global_symbols.safe_push (sym);
889*38fd1498Szrj 	  }
890*38fd1498Szrj 
891*38fd1498Szrj       return *slot;
892*38fd1498Szrj     }
893*38fd1498Szrj   else
894*38fd1498Szrj     {
895*38fd1498Szrj       hsa_symbol *sym;
896*38fd1498Szrj       /* PARM_DECLs and RESULT_DECL should be already in m_local_symbols.  */
897*38fd1498Szrj       gcc_assert (TREE_CODE (decl) == VAR_DECL
898*38fd1498Szrj 		  || TREE_CODE (decl) == CONST_DECL);
899*38fd1498Szrj       BrigAlignment8_t align = hsa_object_alignment (decl);
900*38fd1498Szrj 
901*38fd1498Szrj       if (is_in_global_vars)
902*38fd1498Szrj 	{
903*38fd1498Szrj 	  gcc_checking_assert (TREE_CODE (decl) != CONST_DECL);
904*38fd1498Szrj 	  sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_GLOBAL,
905*38fd1498Szrj 				BRIG_LINKAGE_PROGRAM, true,
906*38fd1498Szrj 				BRIG_ALLOCATION_PROGRAM, align);
907*38fd1498Szrj 	  hsa_cfun->m_global_symbols.safe_push (sym);
908*38fd1498Szrj 	  sym->fillup_for_decl (decl);
909*38fd1498Szrj 	  if (sym->m_align > align)
910*38fd1498Szrj 	    {
911*38fd1498Szrj 	      sym->m_seen_error = true;
912*38fd1498Szrj 	      HSA_SORRY_ATV (EXPR_LOCATION (decl),
913*38fd1498Szrj 			     "HSA specification requires that %E is at least "
914*38fd1498Szrj 			     "naturally aligned", decl);
915*38fd1498Szrj 	    }
916*38fd1498Szrj 	}
917*38fd1498Szrj       else
918*38fd1498Szrj 	{
919*38fd1498Szrj 	  /* As generation of efficient memory copy instructions relies
920*38fd1498Szrj 	     on alignment greater or equal to 8 bytes,
921*38fd1498Szrj 	     we need to increase alignment of all aggregate types.. */
922*38fd1498Szrj 	  if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
923*38fd1498Szrj 	    align = MAX ((BrigAlignment8_t) BRIG_ALIGNMENT_8, align);
924*38fd1498Szrj 
925*38fd1498Szrj 	  BrigAllocation allocation = BRIG_ALLOCATION_AUTOMATIC;
926*38fd1498Szrj 	  BrigSegment8_t segment;
927*38fd1498Szrj 	  if (TREE_CODE (decl) == CONST_DECL)
928*38fd1498Szrj 	    {
929*38fd1498Szrj 	      segment = BRIG_SEGMENT_READONLY;
930*38fd1498Szrj 	      allocation = BRIG_ALLOCATION_AGENT;
931*38fd1498Szrj 	    }
932*38fd1498Szrj 	  else if (lookup_attribute ("hsa_group_segment",
933*38fd1498Szrj 				     DECL_ATTRIBUTES (decl)))
934*38fd1498Szrj 	    segment = BRIG_SEGMENT_GROUP;
935*38fd1498Szrj 	  else if (TREE_STATIC (decl))
936*38fd1498Szrj 	    {
937*38fd1498Szrj 	      segment = BRIG_SEGMENT_GLOBAL;
938*38fd1498Szrj 	      allocation = BRIG_ALLOCATION_PROGRAM;
939*38fd1498Szrj 	    }
940*38fd1498Szrj 	  else if (lookup_attribute ("hsa_global_segment",
941*38fd1498Szrj 				     DECL_ATTRIBUTES (decl)))
942*38fd1498Szrj 	    segment = BRIG_SEGMENT_GLOBAL;
943*38fd1498Szrj 	  else
944*38fd1498Szrj 	    segment = BRIG_SEGMENT_PRIVATE;
945*38fd1498Szrj 
946*38fd1498Szrj 	  sym = new hsa_symbol (BRIG_TYPE_NONE, segment, BRIG_LINKAGE_FUNCTION,
947*38fd1498Szrj 				false, allocation, align);
948*38fd1498Szrj 	  sym->fillup_for_decl (decl);
949*38fd1498Szrj 	  hsa_cfun->m_private_variables.safe_push (sym);
950*38fd1498Szrj 	}
951*38fd1498Szrj 
952*38fd1498Szrj       sym->m_name = hsa_get_declaration_name (decl);
953*38fd1498Szrj       *slot = sym;
954*38fd1498Szrj       return sym;
955*38fd1498Szrj     }
956*38fd1498Szrj }
957*38fd1498Szrj 
958*38fd1498Szrj /* For a given HSA function declaration, return a host
959*38fd1498Szrj    function declaration.  */
960*38fd1498Szrj 
961*38fd1498Szrj tree
hsa_get_host_function(tree decl)962*38fd1498Szrj hsa_get_host_function (tree decl)
963*38fd1498Szrj {
964*38fd1498Szrj   hsa_function_summary *s
965*38fd1498Szrj     = hsa_summaries->get (cgraph_node::get_create (decl));
966*38fd1498Szrj   gcc_assert (s->m_kind != HSA_NONE);
967*38fd1498Szrj   gcc_assert (s->m_gpu_implementation_p);
968*38fd1498Szrj 
969*38fd1498Szrj   return s->m_bound_function ? s->m_bound_function->decl : NULL;
970*38fd1498Szrj }
971*38fd1498Szrj 
972*38fd1498Szrj /* Return true if function DECL has a host equivalent function.  */
973*38fd1498Szrj 
974*38fd1498Szrj static char *
get_brig_function_name(tree decl)975*38fd1498Szrj get_brig_function_name (tree decl)
976*38fd1498Szrj {
977*38fd1498Szrj   tree d = decl;
978*38fd1498Szrj 
979*38fd1498Szrj   hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (d));
980*38fd1498Szrj   if (s->m_kind != HSA_NONE
981*38fd1498Szrj       && s->m_gpu_implementation_p
982*38fd1498Szrj       && s->m_bound_function)
983*38fd1498Szrj     d = s->m_bound_function->decl;
984*38fd1498Szrj 
985*38fd1498Szrj   /* IPA split can create a function that has no host equivalent.  */
986*38fd1498Szrj   if (d == NULL)
987*38fd1498Szrj     d = decl;
988*38fd1498Szrj 
989*38fd1498Szrj   char *name = xstrdup (hsa_get_declaration_name (d));
990*38fd1498Szrj   hsa_sanitize_name (name);
991*38fd1498Szrj 
992*38fd1498Szrj   return name;
993*38fd1498Szrj }
994*38fd1498Szrj 
995*38fd1498Szrj /* Create a spill symbol of type TYPE.  */
996*38fd1498Szrj 
997*38fd1498Szrj hsa_symbol *
hsa_get_spill_symbol(BrigType16_t type)998*38fd1498Szrj hsa_get_spill_symbol (BrigType16_t type)
999*38fd1498Szrj {
1000*38fd1498Szrj   hsa_symbol *sym = new hsa_symbol (type, BRIG_SEGMENT_SPILL,
1001*38fd1498Szrj 				    BRIG_LINKAGE_FUNCTION);
1002*38fd1498Szrj   hsa_cfun->m_spill_symbols.safe_push (sym);
1003*38fd1498Szrj   return sym;
1004*38fd1498Szrj }
1005*38fd1498Szrj 
1006*38fd1498Szrj /* Create a symbol for a read-only string constant.  */
1007*38fd1498Szrj hsa_symbol *
hsa_get_string_cst_symbol(tree string_cst)1008*38fd1498Szrj hsa_get_string_cst_symbol (tree string_cst)
1009*38fd1498Szrj {
1010*38fd1498Szrj   gcc_checking_assert (TREE_CODE (string_cst) == STRING_CST);
1011*38fd1498Szrj 
1012*38fd1498Szrj   hsa_symbol **slot = hsa_cfun->m_string_constants_map.get (string_cst);
1013*38fd1498Szrj   if (slot)
1014*38fd1498Szrj     return *slot;
1015*38fd1498Szrj 
1016*38fd1498Szrj   hsa_op_immed *cst = new hsa_op_immed (string_cst);
1017*38fd1498Szrj   hsa_symbol *sym = new hsa_symbol (cst->m_type, BRIG_SEGMENT_GLOBAL,
1018*38fd1498Szrj 				    BRIG_LINKAGE_MODULE, true,
1019*38fd1498Szrj 				    BRIG_ALLOCATION_AGENT);
1020*38fd1498Szrj   sym->m_cst_value = cst;
1021*38fd1498Szrj   sym->m_dim = TREE_STRING_LENGTH (string_cst);
1022*38fd1498Szrj   sym->m_name_number = hsa_cfun->m_global_symbols.length ();
1023*38fd1498Szrj 
1024*38fd1498Szrj   hsa_cfun->m_global_symbols.safe_push (sym);
1025*38fd1498Szrj   hsa_cfun->m_string_constants_map.put (string_cst, sym);
1026*38fd1498Szrj   return sym;
1027*38fd1498Szrj }
1028*38fd1498Szrj 
1029*38fd1498Szrj /* Make the type of a MOV instruction larger if mandated by HSAIL rules.  */
1030*38fd1498Szrj 
1031*38fd1498Szrj static void
hsa_fixup_mov_insn_type(hsa_insn_basic * insn)1032*38fd1498Szrj hsa_fixup_mov_insn_type (hsa_insn_basic *insn)
1033*38fd1498Szrj {
1034*38fd1498Szrj   insn->m_type = hsa_extend_inttype_to_32bit (insn->m_type);
1035*38fd1498Szrj   if (insn->m_type == BRIG_TYPE_B8 || insn->m_type == BRIG_TYPE_B16)
1036*38fd1498Szrj     insn->m_type = BRIG_TYPE_B32;
1037*38fd1498Szrj }
1038*38fd1498Szrj 
1039*38fd1498Szrj /* Constructor of the ancestor of all operands.  K is BRIG kind that identified
1040*38fd1498Szrj    what the operator is.  */
1041*38fd1498Szrj 
hsa_op_base(BrigKind16_t k)1042*38fd1498Szrj hsa_op_base::hsa_op_base (BrigKind16_t k)
1043*38fd1498Szrj   : m_next (NULL), m_brig_op_offset (0), m_kind (k)
1044*38fd1498Szrj {
1045*38fd1498Szrj   hsa_operands.safe_push (this);
1046*38fd1498Szrj }
1047*38fd1498Szrj 
1048*38fd1498Szrj /* Constructor of ancestor of all operands which have a type.  K is BRIG kind
1049*38fd1498Szrj    that identified what the operator is.  T is the type of the operator.  */
1050*38fd1498Szrj 
hsa_op_with_type(BrigKind16_t k,BrigType16_t t)1051*38fd1498Szrj hsa_op_with_type::hsa_op_with_type (BrigKind16_t k, BrigType16_t t)
1052*38fd1498Szrj   : hsa_op_base (k), m_type (t)
1053*38fd1498Szrj {
1054*38fd1498Szrj }
1055*38fd1498Szrj 
1056*38fd1498Szrj hsa_op_with_type *
get_in_type(BrigType16_t dtype,hsa_bb * hbb)1057*38fd1498Szrj hsa_op_with_type::get_in_type (BrigType16_t dtype, hsa_bb *hbb)
1058*38fd1498Szrj {
1059*38fd1498Szrj   if (m_type == dtype)
1060*38fd1498Szrj     return this;
1061*38fd1498Szrj 
1062*38fd1498Szrj   hsa_op_reg *dest;
1063*38fd1498Szrj 
1064*38fd1498Szrj   if (hsa_needs_cvt (dtype, m_type))
1065*38fd1498Szrj     {
1066*38fd1498Szrj       dest = new hsa_op_reg (dtype);
1067*38fd1498Szrj       hbb->append_insn (new hsa_insn_cvt (dest, this));
1068*38fd1498Szrj     }
1069*38fd1498Szrj   else if (is_a <hsa_op_reg *> (this))
1070*38fd1498Szrj     {
1071*38fd1498Szrj       /* In the end, HSA registers do not really have types, only sizes, so if
1072*38fd1498Szrj 	 the sizes match, we can use the register directly.  */
1073*38fd1498Szrj       gcc_checking_assert (hsa_type_bit_size (dtype)
1074*38fd1498Szrj 			   == hsa_type_bit_size (m_type));
1075*38fd1498Szrj       return this;
1076*38fd1498Szrj     }
1077*38fd1498Szrj   else
1078*38fd1498Szrj     {
1079*38fd1498Szrj       dest = new hsa_op_reg (m_type);
1080*38fd1498Szrj 
1081*38fd1498Szrj       hsa_insn_basic *mov = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
1082*38fd1498Szrj 						dest->m_type, dest, this);
1083*38fd1498Szrj       hsa_fixup_mov_insn_type (mov);
1084*38fd1498Szrj       hbb->append_insn (mov);
1085*38fd1498Szrj       /* We cannot simply for instance: 'mov_u32 $_3, 48 (s32)' because
1086*38fd1498Szrj 	 type of the operand must be same as type of the instruction.  */
1087*38fd1498Szrj       dest->m_type = dtype;
1088*38fd1498Szrj     }
1089*38fd1498Szrj 
1090*38fd1498Szrj   return dest;
1091*38fd1498Szrj }
1092*38fd1498Szrj 
1093*38fd1498Szrj /* If this operand has integer type smaller than 32 bits, extend it to 32 bits,
1094*38fd1498Szrj    adding instructions to HBB if needed.  */
1095*38fd1498Szrj 
1096*38fd1498Szrj hsa_op_with_type *
extend_int_to_32bit(hsa_bb * hbb)1097*38fd1498Szrj hsa_op_with_type::extend_int_to_32bit (hsa_bb *hbb)
1098*38fd1498Szrj {
1099*38fd1498Szrj   if (m_type == BRIG_TYPE_U8 || m_type == BRIG_TYPE_U16)
1100*38fd1498Szrj     return get_in_type (BRIG_TYPE_U32, hbb);
1101*38fd1498Szrj   else if (m_type == BRIG_TYPE_S8 || m_type == BRIG_TYPE_S16)
1102*38fd1498Szrj     return get_in_type (BRIG_TYPE_S32, hbb);
1103*38fd1498Szrj   else
1104*38fd1498Szrj     return this;
1105*38fd1498Szrj }
1106*38fd1498Szrj 
1107*38fd1498Szrj /* Constructor of class representing HSA immediate values.  TREE_VAL is the
1108*38fd1498Szrj    tree representation of the immediate value.  If min32int is true,
1109*38fd1498Szrj    always expand integer types to one that has at least 32 bits.  */
1110*38fd1498Szrj 
hsa_op_immed(tree tree_val,bool min32int)1111*38fd1498Szrj hsa_op_immed::hsa_op_immed (tree tree_val, bool min32int)
1112*38fd1498Szrj   : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES,
1113*38fd1498Szrj 		      hsa_type_for_tree_type (TREE_TYPE (tree_val), NULL,
1114*38fd1498Szrj 					      min32int))
1115*38fd1498Szrj {
1116*38fd1498Szrj   if (hsa_seen_error ())
1117*38fd1498Szrj     return;
1118*38fd1498Szrj 
1119*38fd1498Szrj   gcc_checking_assert ((is_gimple_min_invariant (tree_val)
1120*38fd1498Szrj 		       && (!POINTER_TYPE_P (TREE_TYPE (tree_val))
1121*38fd1498Szrj 			   || TREE_CODE (tree_val) == INTEGER_CST))
1122*38fd1498Szrj 		       || TREE_CODE (tree_val) == CONSTRUCTOR);
1123*38fd1498Szrj   m_tree_value = tree_val;
1124*38fd1498Szrj 
1125*38fd1498Szrj   /* Verify that all elements of a constructor are constants.  */
1126*38fd1498Szrj   if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
1127*38fd1498Szrj     for (unsigned i = 0; i < CONSTRUCTOR_NELTS (m_tree_value); i++)
1128*38fd1498Szrj       {
1129*38fd1498Szrj 	tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1130*38fd1498Szrj 	if (!CONSTANT_CLASS_P (v))
1131*38fd1498Szrj 	  {
1132*38fd1498Szrj 	    HSA_SORRY_AT (EXPR_LOCATION (tree_val),
1133*38fd1498Szrj 			  "HSA ctor should have only constants");
1134*38fd1498Szrj 	    return;
1135*38fd1498Szrj 	  }
1136*38fd1498Szrj       }
1137*38fd1498Szrj }
1138*38fd1498Szrj 
1139*38fd1498Szrj /* Constructor of class representing HSA immediate values.  INTEGER_VALUE is the
1140*38fd1498Szrj    integer representation of the immediate value.  TYPE is BRIG type.  */
1141*38fd1498Szrj 
hsa_op_immed(HOST_WIDE_INT integer_value,BrigType16_t type)1142*38fd1498Szrj hsa_op_immed::hsa_op_immed (HOST_WIDE_INT integer_value, BrigType16_t type)
1143*38fd1498Szrj   : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES, type),
1144*38fd1498Szrj     m_tree_value (NULL)
1145*38fd1498Szrj {
1146*38fd1498Szrj   gcc_assert (hsa_type_integer_p (type));
1147*38fd1498Szrj   m_int_value = integer_value;
1148*38fd1498Szrj }
1149*38fd1498Szrj 
hsa_op_immed()1150*38fd1498Szrj hsa_op_immed::hsa_op_immed ()
1151*38fd1498Szrj   : hsa_op_with_type (BRIG_KIND_NONE, BRIG_TYPE_NONE)
1152*38fd1498Szrj {
1153*38fd1498Szrj }
1154*38fd1498Szrj 
1155*38fd1498Szrj /* New operator to allocate immediate operands from obstack.  */
1156*38fd1498Szrj 
1157*38fd1498Szrj void *
new(size_t size)1158*38fd1498Szrj hsa_op_immed::operator new (size_t size)
1159*38fd1498Szrj {
1160*38fd1498Szrj   return obstack_alloc (&hsa_obstack, size);
1161*38fd1498Szrj }
1162*38fd1498Szrj 
1163*38fd1498Szrj /* Destructor.  */
1164*38fd1498Szrj 
~hsa_op_immed()1165*38fd1498Szrj hsa_op_immed::~hsa_op_immed ()
1166*38fd1498Szrj {
1167*38fd1498Szrj }
1168*38fd1498Szrj 
1169*38fd1498Szrj /* Change type of the immediate value to T.  */
1170*38fd1498Szrj 
1171*38fd1498Szrj void
set_type(BrigType16_t t)1172*38fd1498Szrj hsa_op_immed::set_type (BrigType16_t t)
1173*38fd1498Szrj {
1174*38fd1498Szrj   m_type = t;
1175*38fd1498Szrj }
1176*38fd1498Szrj 
1177*38fd1498Szrj /* Constructor of class representing HSA registers and pseudo-registers.  T is
1178*38fd1498Szrj    the BRIG type of the new register.  */
1179*38fd1498Szrj 
hsa_op_reg(BrigType16_t t)1180*38fd1498Szrj hsa_op_reg::hsa_op_reg (BrigType16_t t)
1181*38fd1498Szrj   : hsa_op_with_type (BRIG_KIND_OPERAND_REGISTER, t), m_gimple_ssa (NULL_TREE),
1182*38fd1498Szrj     m_def_insn (NULL), m_spill_sym (NULL), m_order (hsa_cfun->m_reg_count++),
1183*38fd1498Szrj     m_lr_begin (0), m_lr_end (0), m_reg_class (0), m_hard_num (0)
1184*38fd1498Szrj {
1185*38fd1498Szrj }
1186*38fd1498Szrj 
1187*38fd1498Szrj /* New operator to allocate a register from obstack.  */
1188*38fd1498Szrj 
1189*38fd1498Szrj void *
new(size_t size)1190*38fd1498Szrj hsa_op_reg::operator new (size_t size)
1191*38fd1498Szrj {
1192*38fd1498Szrj   return obstack_alloc (&hsa_obstack, size);
1193*38fd1498Szrj }
1194*38fd1498Szrj 
1195*38fd1498Szrj /* Verify register operand.  */
1196*38fd1498Szrj 
1197*38fd1498Szrj void
verify_ssa()1198*38fd1498Szrj hsa_op_reg::verify_ssa ()
1199*38fd1498Szrj {
1200*38fd1498Szrj   /* Verify that each HSA register has a definition assigned.
1201*38fd1498Szrj      Exceptions are VAR_DECL and PARM_DECL that are a default
1202*38fd1498Szrj      definition.  */
1203*38fd1498Szrj   gcc_checking_assert (m_def_insn
1204*38fd1498Szrj 		       || (m_gimple_ssa != NULL
1205*38fd1498Szrj 			   && (!SSA_NAME_VAR (m_gimple_ssa)
1206*38fd1498Szrj 			       || (TREE_CODE (SSA_NAME_VAR (m_gimple_ssa))
1207*38fd1498Szrj 				   != PARM_DECL))
1208*38fd1498Szrj 			   && SSA_NAME_IS_DEFAULT_DEF (m_gimple_ssa)));
1209*38fd1498Szrj 
1210*38fd1498Szrj   /* Verify that every use of the register is really present
1211*38fd1498Szrj      in an instruction.  */
1212*38fd1498Szrj   for (unsigned i = 0; i < m_uses.length (); i++)
1213*38fd1498Szrj     {
1214*38fd1498Szrj       hsa_insn_basic *use = m_uses[i];
1215*38fd1498Szrj 
1216*38fd1498Szrj       bool is_visited = false;
1217*38fd1498Szrj       for (unsigned j = 0; j < use->operand_count (); j++)
1218*38fd1498Szrj 	{
1219*38fd1498Szrj 	  hsa_op_base *u = use->get_op (j);
1220*38fd1498Szrj 	  hsa_op_address *addr; addr = dyn_cast <hsa_op_address *> (u);
1221*38fd1498Szrj 	  if (addr && addr->m_reg)
1222*38fd1498Szrj 	    u = addr->m_reg;
1223*38fd1498Szrj 
1224*38fd1498Szrj 	  if (u == this)
1225*38fd1498Szrj 	    {
1226*38fd1498Szrj 	      bool r = !addr && use->op_output_p (j);
1227*38fd1498Szrj 
1228*38fd1498Szrj 	      if (r)
1229*38fd1498Szrj 		{
1230*38fd1498Szrj 		  error ("HSA SSA name defined by instruction that is supposed "
1231*38fd1498Szrj 			 "to be using it");
1232*38fd1498Szrj 		  debug_hsa_operand (this);
1233*38fd1498Szrj 		  debug_hsa_insn (use);
1234*38fd1498Szrj 		  internal_error ("HSA SSA verification failed");
1235*38fd1498Szrj 		}
1236*38fd1498Szrj 
1237*38fd1498Szrj 	      is_visited = true;
1238*38fd1498Szrj 	    }
1239*38fd1498Szrj 	}
1240*38fd1498Szrj 
1241*38fd1498Szrj       if (!is_visited)
1242*38fd1498Szrj 	{
1243*38fd1498Szrj 	  error ("HSA SSA name not among operands of instruction that is "
1244*38fd1498Szrj 		 "supposed to use it");
1245*38fd1498Szrj 	  debug_hsa_operand (this);
1246*38fd1498Szrj 	  debug_hsa_insn (use);
1247*38fd1498Szrj 	  internal_error ("HSA SSA verification failed");
1248*38fd1498Szrj 	}
1249*38fd1498Szrj     }
1250*38fd1498Szrj }
1251*38fd1498Szrj 
hsa_op_address(hsa_symbol * sym,hsa_op_reg * r,HOST_WIDE_INT offset)1252*38fd1498Szrj hsa_op_address::hsa_op_address (hsa_symbol *sym, hsa_op_reg *r,
1253*38fd1498Szrj 				HOST_WIDE_INT offset)
1254*38fd1498Szrj   : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (r),
1255*38fd1498Szrj     m_imm_offset (offset)
1256*38fd1498Szrj {
1257*38fd1498Szrj }
1258*38fd1498Szrj 
hsa_op_address(hsa_symbol * sym,HOST_WIDE_INT offset)1259*38fd1498Szrj hsa_op_address::hsa_op_address (hsa_symbol *sym, HOST_WIDE_INT offset)
1260*38fd1498Szrj   : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (NULL),
1261*38fd1498Szrj     m_imm_offset (offset)
1262*38fd1498Szrj {
1263*38fd1498Szrj }
1264*38fd1498Szrj 
hsa_op_address(hsa_op_reg * r,HOST_WIDE_INT offset)1265*38fd1498Szrj hsa_op_address::hsa_op_address (hsa_op_reg *r, HOST_WIDE_INT offset)
1266*38fd1498Szrj   : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (NULL), m_reg (r),
1267*38fd1498Szrj     m_imm_offset (offset)
1268*38fd1498Szrj {
1269*38fd1498Szrj }
1270*38fd1498Szrj 
1271*38fd1498Szrj /* New operator to allocate address operands from obstack.  */
1272*38fd1498Szrj 
1273*38fd1498Szrj void *
new(size_t size)1274*38fd1498Szrj hsa_op_address::operator new (size_t size)
1275*38fd1498Szrj {
1276*38fd1498Szrj   return obstack_alloc (&hsa_obstack, size);
1277*38fd1498Szrj }
1278*38fd1498Szrj 
1279*38fd1498Szrj /* Constructor of an operand referring to HSAIL code.  */
1280*38fd1498Szrj 
hsa_op_code_ref()1281*38fd1498Szrj hsa_op_code_ref::hsa_op_code_ref () : hsa_op_base (BRIG_KIND_OPERAND_CODE_REF),
1282*38fd1498Szrj   m_directive_offset (0)
1283*38fd1498Szrj {
1284*38fd1498Szrj }
1285*38fd1498Szrj 
1286*38fd1498Szrj /* Constructor of an operand representing a code list.  Set it up so that it
1287*38fd1498Szrj    can contain ELEMENTS number of elements.  */
1288*38fd1498Szrj 
hsa_op_code_list(unsigned elements)1289*38fd1498Szrj hsa_op_code_list::hsa_op_code_list (unsigned elements)
1290*38fd1498Szrj   : hsa_op_base (BRIG_KIND_OPERAND_CODE_LIST)
1291*38fd1498Szrj {
1292*38fd1498Szrj   m_offsets.create (1);
1293*38fd1498Szrj   m_offsets.safe_grow_cleared (elements);
1294*38fd1498Szrj }
1295*38fd1498Szrj 
1296*38fd1498Szrj /* New operator to allocate code list operands from obstack.  */
1297*38fd1498Szrj 
1298*38fd1498Szrj void *
new(size_t size)1299*38fd1498Szrj hsa_op_code_list::operator new (size_t size)
1300*38fd1498Szrj {
1301*38fd1498Szrj   return obstack_alloc (&hsa_obstack, size);
1302*38fd1498Szrj }
1303*38fd1498Szrj 
1304*38fd1498Szrj /* Constructor of an operand representing an operand list.
1305*38fd1498Szrj    Set it up so that it can contain ELEMENTS number of elements.  */
1306*38fd1498Szrj 
hsa_op_operand_list(unsigned elements)1307*38fd1498Szrj hsa_op_operand_list::hsa_op_operand_list (unsigned elements)
1308*38fd1498Szrj   : hsa_op_base (BRIG_KIND_OPERAND_OPERAND_LIST)
1309*38fd1498Szrj {
1310*38fd1498Szrj   m_offsets.create (elements);
1311*38fd1498Szrj   m_offsets.safe_grow (elements);
1312*38fd1498Szrj }
1313*38fd1498Szrj 
1314*38fd1498Szrj /* New operator to allocate operand list operands from obstack.  */
1315*38fd1498Szrj 
1316*38fd1498Szrj void *
new(size_t size)1317*38fd1498Szrj hsa_op_operand_list::operator new (size_t size)
1318*38fd1498Szrj {
1319*38fd1498Szrj   return obstack_alloc (&hsa_obstack, size);
1320*38fd1498Szrj }
1321*38fd1498Szrj 
~hsa_op_operand_list()1322*38fd1498Szrj hsa_op_operand_list::~hsa_op_operand_list ()
1323*38fd1498Szrj {
1324*38fd1498Szrj   m_offsets.release ();
1325*38fd1498Szrj }
1326*38fd1498Szrj 
1327*38fd1498Szrj 
1328*38fd1498Szrj hsa_op_reg *
reg_for_gimple_ssa(tree ssa)1329*38fd1498Szrj hsa_function_representation::reg_for_gimple_ssa (tree ssa)
1330*38fd1498Szrj {
1331*38fd1498Szrj   hsa_op_reg *hreg;
1332*38fd1498Szrj 
1333*38fd1498Szrj   gcc_checking_assert (TREE_CODE (ssa) == SSA_NAME);
1334*38fd1498Szrj   if (m_ssa_map[SSA_NAME_VERSION (ssa)])
1335*38fd1498Szrj     return m_ssa_map[SSA_NAME_VERSION (ssa)];
1336*38fd1498Szrj 
1337*38fd1498Szrj   hreg = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (ssa),
1338*38fd1498Szrj 							false));
1339*38fd1498Szrj   hreg->m_gimple_ssa = ssa;
1340*38fd1498Szrj   m_ssa_map[SSA_NAME_VERSION (ssa)] = hreg;
1341*38fd1498Szrj 
1342*38fd1498Szrj   return hreg;
1343*38fd1498Szrj }
1344*38fd1498Szrj 
1345*38fd1498Szrj void
set_definition(hsa_insn_basic * insn)1346*38fd1498Szrj hsa_op_reg::set_definition (hsa_insn_basic *insn)
1347*38fd1498Szrj {
1348*38fd1498Szrj   if (hsa_cfun->m_in_ssa)
1349*38fd1498Szrj     {
1350*38fd1498Szrj       gcc_checking_assert (!m_def_insn);
1351*38fd1498Szrj       m_def_insn = insn;
1352*38fd1498Szrj     }
1353*38fd1498Szrj   else
1354*38fd1498Szrj     m_def_insn = NULL;
1355*38fd1498Szrj }
1356*38fd1498Szrj 
1357*38fd1498Szrj /* Constructor of the class which is the bases of all instructions and directly
1358*38fd1498Szrj    represents the most basic ones.  NOPS is the number of operands that the
1359*38fd1498Szrj    operand vector will contain (and which will be cleared).  OP is the opcode
1360*38fd1498Szrj    of the instruction.  This constructor does not set type.  */
1361*38fd1498Szrj 
hsa_insn_basic(unsigned nops,int opc)1362*38fd1498Szrj hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc)
1363*38fd1498Szrj   : m_prev (NULL),
1364*38fd1498Szrj     m_next (NULL), m_bb (NULL), m_opcode (opc), m_number (0),
1365*38fd1498Szrj     m_type (BRIG_TYPE_NONE), m_brig_offset (0)
1366*38fd1498Szrj {
1367*38fd1498Szrj   if (nops > 0)
1368*38fd1498Szrj     m_operands.safe_grow_cleared (nops);
1369*38fd1498Szrj 
1370*38fd1498Szrj   hsa_instructions.safe_push (this);
1371*38fd1498Szrj }
1372*38fd1498Szrj 
1373*38fd1498Szrj /* Make OP the operand number INDEX of operands of this instruction.  If OP is a
1374*38fd1498Szrj    register or an address containing a register, then either set the definition
1375*38fd1498Szrj    of the register to this instruction if it an output operand or add this
1376*38fd1498Szrj    instruction to the uses if it is an input one.  */
1377*38fd1498Szrj 
1378*38fd1498Szrj void
set_op(int index,hsa_op_base * op)1379*38fd1498Szrj hsa_insn_basic::set_op (int index, hsa_op_base *op)
1380*38fd1498Szrj {
1381*38fd1498Szrj   /* Each address operand is always use.  */
1382*38fd1498Szrj   hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
1383*38fd1498Szrj   if (addr && addr->m_reg)
1384*38fd1498Szrj     addr->m_reg->m_uses.safe_push (this);
1385*38fd1498Szrj   else
1386*38fd1498Szrj     {
1387*38fd1498Szrj       hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
1388*38fd1498Szrj       if (reg)
1389*38fd1498Szrj 	{
1390*38fd1498Szrj 	  if (op_output_p (index))
1391*38fd1498Szrj 	    reg->set_definition (this);
1392*38fd1498Szrj 	  else
1393*38fd1498Szrj 	    reg->m_uses.safe_push (this);
1394*38fd1498Szrj 	}
1395*38fd1498Szrj     }
1396*38fd1498Szrj 
1397*38fd1498Szrj   m_operands[index] = op;
1398*38fd1498Szrj }
1399*38fd1498Szrj 
1400*38fd1498Szrj /* Get INDEX-th operand of the instruction.  */
1401*38fd1498Szrj 
1402*38fd1498Szrj hsa_op_base *
get_op(int index)1403*38fd1498Szrj hsa_insn_basic::get_op (int index)
1404*38fd1498Szrj {
1405*38fd1498Szrj   return m_operands[index];
1406*38fd1498Szrj }
1407*38fd1498Szrj 
1408*38fd1498Szrj /* Get address of INDEX-th operand of the instruction.  */
1409*38fd1498Szrj 
1410*38fd1498Szrj hsa_op_base **
get_op_addr(int index)1411*38fd1498Szrj hsa_insn_basic::get_op_addr (int index)
1412*38fd1498Szrj {
1413*38fd1498Szrj   return &m_operands[index];
1414*38fd1498Szrj }
1415*38fd1498Szrj 
1416*38fd1498Szrj /* Get number of operands of the instruction.  */
1417*38fd1498Szrj unsigned int
operand_count()1418*38fd1498Szrj hsa_insn_basic::operand_count ()
1419*38fd1498Szrj {
1420*38fd1498Szrj   return m_operands.length ();
1421*38fd1498Szrj }
1422*38fd1498Szrj 
1423*38fd1498Szrj /* Constructor of the class which is the bases of all instructions and directly
1424*38fd1498Szrj    represents the most basic ones.  NOPS is the number of operands that the
1425*38fd1498Szrj    operand vector will contain (and which will be cleared).  OPC is the opcode
1426*38fd1498Szrj    of the instruction, T is the type of the instruction.  */
1427*38fd1498Szrj 
hsa_insn_basic(unsigned nops,int opc,BrigType16_t t,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2,hsa_op_base * arg3)1428*38fd1498Szrj hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc, BrigType16_t t,
1429*38fd1498Szrj 				hsa_op_base *arg0, hsa_op_base *arg1,
1430*38fd1498Szrj 				hsa_op_base *arg2, hsa_op_base *arg3)
1431*38fd1498Szrj  : m_prev (NULL), m_next (NULL), m_bb (NULL), m_opcode (opc),m_number (0),
1432*38fd1498Szrj    m_type (t),  m_brig_offset (0)
1433*38fd1498Szrj {
1434*38fd1498Szrj   if (nops > 0)
1435*38fd1498Szrj     m_operands.safe_grow_cleared (nops);
1436*38fd1498Szrj 
1437*38fd1498Szrj   if (arg0 != NULL)
1438*38fd1498Szrj     {
1439*38fd1498Szrj       gcc_checking_assert (nops >= 1);
1440*38fd1498Szrj       set_op (0, arg0);
1441*38fd1498Szrj     }
1442*38fd1498Szrj 
1443*38fd1498Szrj   if (arg1 != NULL)
1444*38fd1498Szrj     {
1445*38fd1498Szrj       gcc_checking_assert (nops >= 2);
1446*38fd1498Szrj       set_op (1, arg1);
1447*38fd1498Szrj     }
1448*38fd1498Szrj 
1449*38fd1498Szrj   if (arg2 != NULL)
1450*38fd1498Szrj     {
1451*38fd1498Szrj       gcc_checking_assert (nops >= 3);
1452*38fd1498Szrj       set_op (2, arg2);
1453*38fd1498Szrj     }
1454*38fd1498Szrj 
1455*38fd1498Szrj   if (arg3 != NULL)
1456*38fd1498Szrj     {
1457*38fd1498Szrj       gcc_checking_assert (nops >= 4);
1458*38fd1498Szrj       set_op (3, arg3);
1459*38fd1498Szrj     }
1460*38fd1498Szrj 
1461*38fd1498Szrj   hsa_instructions.safe_push (this);
1462*38fd1498Szrj }
1463*38fd1498Szrj 
1464*38fd1498Szrj /* New operator to allocate basic instruction from obstack.  */
1465*38fd1498Szrj 
1466*38fd1498Szrj void *
new(size_t size)1467*38fd1498Szrj hsa_insn_basic::operator new (size_t size)
1468*38fd1498Szrj {
1469*38fd1498Szrj   return obstack_alloc (&hsa_obstack, size);
1470*38fd1498Szrj }
1471*38fd1498Szrj 
1472*38fd1498Szrj /* Verify the instruction.  */
1473*38fd1498Szrj 
1474*38fd1498Szrj void
verify()1475*38fd1498Szrj hsa_insn_basic::verify ()
1476*38fd1498Szrj {
1477*38fd1498Szrj   hsa_op_address *addr;
1478*38fd1498Szrj   hsa_op_reg *reg;
1479*38fd1498Szrj 
1480*38fd1498Szrj   /* Iterate all register operands and verify that the instruction
1481*38fd1498Szrj      is set in uses of the register.  */
1482*38fd1498Szrj   for (unsigned i = 0; i < operand_count (); i++)
1483*38fd1498Szrj     {
1484*38fd1498Szrj       hsa_op_base *use = get_op (i);
1485*38fd1498Szrj 
1486*38fd1498Szrj       if ((addr = dyn_cast <hsa_op_address *> (use)) && addr->m_reg)
1487*38fd1498Szrj 	{
1488*38fd1498Szrj 	  gcc_assert (addr->m_reg->m_def_insn != this);
1489*38fd1498Szrj 	  use = addr->m_reg;
1490*38fd1498Szrj 	}
1491*38fd1498Szrj 
1492*38fd1498Szrj       if ((reg = dyn_cast <hsa_op_reg *> (use)) && !op_output_p (i))
1493*38fd1498Szrj 	{
1494*38fd1498Szrj 	  unsigned j;
1495*38fd1498Szrj 	  for (j = 0; j < reg->m_uses.length (); j++)
1496*38fd1498Szrj 	    {
1497*38fd1498Szrj 	      if (reg->m_uses[j] == this)
1498*38fd1498Szrj 		break;
1499*38fd1498Szrj 	    }
1500*38fd1498Szrj 
1501*38fd1498Szrj 	  if (j == reg->m_uses.length ())
1502*38fd1498Szrj 	    {
1503*38fd1498Szrj 	      error ("HSA instruction uses a register but is not among "
1504*38fd1498Szrj 		     "recorded register uses");
1505*38fd1498Szrj 	      debug_hsa_operand (reg);
1506*38fd1498Szrj 	      debug_hsa_insn (this);
1507*38fd1498Szrj 	      internal_error ("HSA instruction verification failed");
1508*38fd1498Szrj 	    }
1509*38fd1498Szrj 	}
1510*38fd1498Szrj     }
1511*38fd1498Szrj }
1512*38fd1498Szrj 
1513*38fd1498Szrj /* Constructor of an instruction representing a PHI node.  NOPS is the number
1514*38fd1498Szrj    of operands (equal to the number of predecessors).  */
1515*38fd1498Szrj 
hsa_insn_phi(unsigned nops,hsa_op_reg * dst)1516*38fd1498Szrj hsa_insn_phi::hsa_insn_phi (unsigned nops, hsa_op_reg *dst)
1517*38fd1498Szrj   : hsa_insn_basic (nops, HSA_OPCODE_PHI), m_dest (dst)
1518*38fd1498Szrj {
1519*38fd1498Szrj   dst->set_definition (this);
1520*38fd1498Szrj }
1521*38fd1498Szrj 
1522*38fd1498Szrj /* Constructor of class representing instructions for control flow and
1523*38fd1498Szrj    sychronization,   */
1524*38fd1498Szrj 
hsa_insn_br(unsigned nops,int opc,BrigType16_t t,BrigWidth8_t width,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2,hsa_op_base * arg3)1525*38fd1498Szrj hsa_insn_br::hsa_insn_br (unsigned nops, int opc, BrigType16_t t,
1526*38fd1498Szrj 			  BrigWidth8_t width, hsa_op_base *arg0,
1527*38fd1498Szrj 			  hsa_op_base *arg1, hsa_op_base *arg2,
1528*38fd1498Szrj 			  hsa_op_base *arg3)
1529*38fd1498Szrj   : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3),
1530*38fd1498Szrj     m_width (width)
1531*38fd1498Szrj {
1532*38fd1498Szrj }
1533*38fd1498Szrj 
1534*38fd1498Szrj /* Constructor of class representing instruction for conditional jump, CTRL is
1535*38fd1498Szrj    the control register determining whether the jump will be carried out, the
1536*38fd1498Szrj    new instruction is automatically added to its uses list.  */
1537*38fd1498Szrj 
hsa_insn_cbr(hsa_op_reg * ctrl)1538*38fd1498Szrj hsa_insn_cbr::hsa_insn_cbr (hsa_op_reg *ctrl)
1539*38fd1498Szrj   : hsa_insn_br (1, BRIG_OPCODE_CBR, BRIG_TYPE_B1, BRIG_WIDTH_1, ctrl)
1540*38fd1498Szrj {
1541*38fd1498Szrj }
1542*38fd1498Szrj 
1543*38fd1498Szrj /* Constructor of class representing instruction for switch jump, CTRL is
1544*38fd1498Szrj    the index register.  */
1545*38fd1498Szrj 
hsa_insn_sbr(hsa_op_reg * index,unsigned jump_count)1546*38fd1498Szrj hsa_insn_sbr::hsa_insn_sbr (hsa_op_reg *index, unsigned jump_count)
1547*38fd1498Szrj   : hsa_insn_basic (1, BRIG_OPCODE_SBR, BRIG_TYPE_B1, index),
1548*38fd1498Szrj     m_width (BRIG_WIDTH_1), m_jump_table (vNULL),
1549*38fd1498Szrj     m_label_code_list (new hsa_op_code_list (jump_count))
1550*38fd1498Szrj {
1551*38fd1498Szrj }
1552*38fd1498Szrj 
1553*38fd1498Szrj /* Replace all occurrences of OLD_BB with NEW_BB in the statements
1554*38fd1498Szrj    jump table.  */
1555*38fd1498Szrj 
1556*38fd1498Szrj void
replace_all_labels(basic_block old_bb,basic_block new_bb)1557*38fd1498Szrj hsa_insn_sbr::replace_all_labels (basic_block old_bb, basic_block new_bb)
1558*38fd1498Szrj {
1559*38fd1498Szrj   for (unsigned i = 0; i < m_jump_table.length (); i++)
1560*38fd1498Szrj     if (m_jump_table[i] == old_bb)
1561*38fd1498Szrj       m_jump_table[i] = new_bb;
1562*38fd1498Szrj }
1563*38fd1498Szrj 
~hsa_insn_sbr()1564*38fd1498Szrj hsa_insn_sbr::~hsa_insn_sbr ()
1565*38fd1498Szrj {
1566*38fd1498Szrj   m_jump_table.release ();
1567*38fd1498Szrj }
1568*38fd1498Szrj 
1569*38fd1498Szrj /* Constructor of comparison instruction.  CMP is the comparison operation and T
1570*38fd1498Szrj    is the result type.  */
1571*38fd1498Szrj 
hsa_insn_cmp(BrigCompareOperation8_t cmp,BrigType16_t t,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2)1572*38fd1498Szrj hsa_insn_cmp::hsa_insn_cmp (BrigCompareOperation8_t cmp, BrigType16_t t,
1573*38fd1498Szrj 			    hsa_op_base *arg0, hsa_op_base *arg1,
1574*38fd1498Szrj 			    hsa_op_base *arg2)
1575*38fd1498Szrj   : hsa_insn_basic (3 , BRIG_OPCODE_CMP, t, arg0, arg1, arg2), m_compare (cmp)
1576*38fd1498Szrj {
1577*38fd1498Szrj }
1578*38fd1498Szrj 
1579*38fd1498Szrj /* Constructor of classes representing memory accesses.  OPC is the opcode (must
1580*38fd1498Szrj    be BRIG_OPCODE_ST or BRIG_OPCODE_LD) and T is the type.  The instruction
1581*38fd1498Szrj    operands are provided as ARG0 and ARG1.  */
1582*38fd1498Szrj 
hsa_insn_mem(int opc,BrigType16_t t,hsa_op_base * arg0,hsa_op_base * arg1)1583*38fd1498Szrj hsa_insn_mem::hsa_insn_mem (int opc, BrigType16_t t, hsa_op_base *arg0,
1584*38fd1498Szrj 			    hsa_op_base *arg1)
1585*38fd1498Szrj   : hsa_insn_basic (2, opc, t, arg0, arg1),
1586*38fd1498Szrj     m_align (hsa_natural_alignment (t)), m_equiv_class (0)
1587*38fd1498Szrj {
1588*38fd1498Szrj   gcc_checking_assert (opc == BRIG_OPCODE_LD || opc == BRIG_OPCODE_ST);
1589*38fd1498Szrj }
1590*38fd1498Szrj 
1591*38fd1498Szrj /* Constructor for descendants allowing different opcodes and number of
1592*38fd1498Szrj    operands, it passes its arguments directly to hsa_insn_basic
1593*38fd1498Szrj    constructor.  The instruction operands are provided as ARG[0-3].  */
1594*38fd1498Szrj 
1595*38fd1498Szrj 
hsa_insn_mem(unsigned nops,int opc,BrigType16_t t,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2,hsa_op_base * arg3)1596*38fd1498Szrj hsa_insn_mem::hsa_insn_mem (unsigned nops, int opc, BrigType16_t t,
1597*38fd1498Szrj 			    hsa_op_base *arg0, hsa_op_base *arg1,
1598*38fd1498Szrj 			    hsa_op_base *arg2, hsa_op_base *arg3)
1599*38fd1498Szrj   : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3),
1600*38fd1498Szrj     m_align (hsa_natural_alignment (t)), m_equiv_class (0)
1601*38fd1498Szrj {
1602*38fd1498Szrj }
1603*38fd1498Szrj 
1604*38fd1498Szrj /* Constructor of class representing atomic instructions.  OPC is the principal
1605*38fd1498Szrj    opcode, AOP is the specific atomic operation opcode.  T is the type of the
1606*38fd1498Szrj    instruction.  The instruction operands are provided as ARG[0-3].  */
1607*38fd1498Szrj 
hsa_insn_atomic(int nops,int opc,enum BrigAtomicOperation aop,BrigType16_t t,BrigMemoryOrder memorder,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2,hsa_op_base * arg3)1608*38fd1498Szrj hsa_insn_atomic::hsa_insn_atomic (int nops, int opc,
1609*38fd1498Szrj 				  enum BrigAtomicOperation aop,
1610*38fd1498Szrj 				  BrigType16_t t, BrigMemoryOrder memorder,
1611*38fd1498Szrj 				  hsa_op_base *arg0,
1612*38fd1498Szrj 				  hsa_op_base *arg1, hsa_op_base *arg2,
1613*38fd1498Szrj 				  hsa_op_base *arg3)
1614*38fd1498Szrj   : hsa_insn_mem (nops, opc, t, arg0, arg1, arg2, arg3), m_atomicop (aop),
1615*38fd1498Szrj     m_memoryorder (memorder),
1616*38fd1498Szrj     m_memoryscope (BRIG_MEMORY_SCOPE_SYSTEM)
1617*38fd1498Szrj {
1618*38fd1498Szrj   gcc_checking_assert (opc == BRIG_OPCODE_ATOMICNORET ||
1619*38fd1498Szrj 		       opc == BRIG_OPCODE_ATOMIC ||
1620*38fd1498Szrj 		       opc == BRIG_OPCODE_SIGNAL ||
1621*38fd1498Szrj 		       opc == BRIG_OPCODE_SIGNALNORET);
1622*38fd1498Szrj }
1623*38fd1498Szrj 
1624*38fd1498Szrj /* Constructor of class representing signal instructions.  OPC is the prinicpal
1625*38fd1498Szrj    opcode, SOP is the specific signal operation opcode.  T is the type of the
1626*38fd1498Szrj    instruction.  The instruction operands are provided as ARG[0-3].  */
1627*38fd1498Szrj 
hsa_insn_signal(int nops,int opc,enum BrigAtomicOperation sop,BrigType16_t t,BrigMemoryOrder memorder,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2,hsa_op_base * arg3)1628*38fd1498Szrj hsa_insn_signal::hsa_insn_signal (int nops, int opc,
1629*38fd1498Szrj 				  enum BrigAtomicOperation sop,
1630*38fd1498Szrj 				  BrigType16_t t, BrigMemoryOrder memorder,
1631*38fd1498Szrj 				  hsa_op_base *arg0, hsa_op_base *arg1,
1632*38fd1498Szrj 				  hsa_op_base *arg2, hsa_op_base *arg3)
1633*38fd1498Szrj   : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3),
1634*38fd1498Szrj     m_memory_order (memorder), m_signalop (sop)
1635*38fd1498Szrj {
1636*38fd1498Szrj }
1637*38fd1498Szrj 
1638*38fd1498Szrj /* Constructor of class representing segment conversion instructions.  OPC is
1639*38fd1498Szrj    the opcode which must be either BRIG_OPCODE_STOF or BRIG_OPCODE_FTOS.  DEST
1640*38fd1498Szrj    and SRCT are destination and source types respectively, SEG is the segment
1641*38fd1498Szrj    we are converting to or from.  The instruction operands are
1642*38fd1498Szrj    provided as ARG0 and ARG1.  */
1643*38fd1498Szrj 
hsa_insn_seg(int opc,BrigType16_t dest,BrigType16_t srct,BrigSegment8_t seg,hsa_op_base * arg0,hsa_op_base * arg1)1644*38fd1498Szrj hsa_insn_seg::hsa_insn_seg (int opc, BrigType16_t dest, BrigType16_t srct,
1645*38fd1498Szrj 			    BrigSegment8_t seg, hsa_op_base *arg0,
1646*38fd1498Szrj 			    hsa_op_base *arg1)
1647*38fd1498Szrj   : hsa_insn_basic (2, opc, dest, arg0, arg1), m_src_type (srct),
1648*38fd1498Szrj     m_segment (seg)
1649*38fd1498Szrj {
1650*38fd1498Szrj   gcc_checking_assert (opc == BRIG_OPCODE_STOF || opc == BRIG_OPCODE_FTOS);
1651*38fd1498Szrj }
1652*38fd1498Szrj 
1653*38fd1498Szrj /* Constructor of class representing a call instruction.  CALLEE is the tree
1654*38fd1498Szrj    representation of the function being called.  */
1655*38fd1498Szrj 
hsa_insn_call(tree callee)1656*38fd1498Szrj hsa_insn_call::hsa_insn_call (tree callee)
1657*38fd1498Szrj   : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (callee),
1658*38fd1498Szrj     m_output_arg (NULL), m_args_code_list (NULL), m_result_code_list (NULL)
1659*38fd1498Szrj {
1660*38fd1498Szrj }
1661*38fd1498Szrj 
hsa_insn_call(hsa_internal_fn * fn)1662*38fd1498Szrj hsa_insn_call::hsa_insn_call (hsa_internal_fn *fn)
1663*38fd1498Szrj   : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (NULL),
1664*38fd1498Szrj     m_called_internal_fn (fn), m_output_arg (NULL), m_args_code_list (NULL),
1665*38fd1498Szrj     m_result_code_list (NULL)
1666*38fd1498Szrj {
1667*38fd1498Szrj }
1668*38fd1498Szrj 
~hsa_insn_call()1669*38fd1498Szrj hsa_insn_call::~hsa_insn_call ()
1670*38fd1498Szrj {
1671*38fd1498Szrj   for (unsigned i = 0; i < m_input_args.length (); i++)
1672*38fd1498Szrj     delete m_input_args[i];
1673*38fd1498Szrj 
1674*38fd1498Szrj   delete m_output_arg;
1675*38fd1498Szrj 
1676*38fd1498Szrj   m_input_args.release ();
1677*38fd1498Szrj   m_input_arg_insns.release ();
1678*38fd1498Szrj }
1679*38fd1498Szrj 
1680*38fd1498Szrj /* Constructor of class representing the argument block required to invoke
1681*38fd1498Szrj    a call in HSAIL.  */
hsa_insn_arg_block(BrigKind brig_kind,hsa_insn_call * call)1682*38fd1498Szrj hsa_insn_arg_block::hsa_insn_arg_block (BrigKind brig_kind,
1683*38fd1498Szrj 					hsa_insn_call * call)
1684*38fd1498Szrj   : hsa_insn_basic (0, HSA_OPCODE_ARG_BLOCK), m_kind (brig_kind),
1685*38fd1498Szrj     m_call_insn (call)
1686*38fd1498Szrj {
1687*38fd1498Szrj }
1688*38fd1498Szrj 
hsa_insn_comment(const char * s)1689*38fd1498Szrj hsa_insn_comment::hsa_insn_comment (const char *s)
1690*38fd1498Szrj   : hsa_insn_basic (0, BRIG_KIND_DIRECTIVE_COMMENT)
1691*38fd1498Szrj {
1692*38fd1498Szrj   unsigned l = strlen (s);
1693*38fd1498Szrj 
1694*38fd1498Szrj   /* Append '// ' to the string.  */
1695*38fd1498Szrj   char *buf = XNEWVEC (char, l + 4);
1696*38fd1498Szrj   sprintf (buf, "// %s", s);
1697*38fd1498Szrj   m_comment = buf;
1698*38fd1498Szrj }
1699*38fd1498Szrj 
~hsa_insn_comment()1700*38fd1498Szrj hsa_insn_comment::~hsa_insn_comment ()
1701*38fd1498Szrj {
1702*38fd1498Szrj   gcc_checking_assert (m_comment);
1703*38fd1498Szrj   free (m_comment);
1704*38fd1498Szrj   m_comment = NULL;
1705*38fd1498Szrj }
1706*38fd1498Szrj 
1707*38fd1498Szrj /* Constructor of class representing the queue instruction in HSAIL.  */
1708*38fd1498Szrj 
hsa_insn_queue(int nops,int opcode,BrigSegment segment,BrigMemoryOrder memory_order,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2,hsa_op_base * arg3)1709*38fd1498Szrj hsa_insn_queue::hsa_insn_queue (int nops, int opcode, BrigSegment segment,
1710*38fd1498Szrj 				BrigMemoryOrder memory_order,
1711*38fd1498Szrj 				hsa_op_base *arg0, hsa_op_base *arg1,
1712*38fd1498Szrj 				hsa_op_base *arg2, hsa_op_base *arg3)
1713*38fd1498Szrj   : hsa_insn_basic (nops, opcode, BRIG_TYPE_U64, arg0, arg1, arg2, arg3),
1714*38fd1498Szrj     m_segment (segment), m_memory_order (memory_order)
1715*38fd1498Szrj {
1716*38fd1498Szrj }
1717*38fd1498Szrj 
1718*38fd1498Szrj /* Constructor of class representing the source type instruction in HSAIL.  */
1719*38fd1498Szrj 
1720*38fd1498Szrj hsa_insn_srctype::hsa_insn_srctype (int nops, BrigOpcode opcode,
1721*38fd1498Szrj 				    BrigType16_t destt, BrigType16_t srct,
1722*38fd1498Szrj 				    hsa_op_base *arg0, hsa_op_base *arg1,
1723*38fd1498Szrj 				    hsa_op_base *arg2 = NULL)
hsa_insn_basic(nops,opcode,destt,arg0,arg1,arg2)1724*38fd1498Szrj   : hsa_insn_basic (nops, opcode, destt, arg0, arg1, arg2),
1725*38fd1498Szrj     m_source_type (srct)
1726*38fd1498Szrj {}
1727*38fd1498Szrj 
1728*38fd1498Szrj /* Constructor of class representing the packed instruction in HSAIL.  */
1729*38fd1498Szrj 
hsa_insn_packed(int nops,BrigOpcode opcode,BrigType16_t destt,BrigType16_t srct,hsa_op_base * arg0,hsa_op_base * arg1,hsa_op_base * arg2)1730*38fd1498Szrj hsa_insn_packed::hsa_insn_packed (int nops, BrigOpcode opcode,
1731*38fd1498Szrj 				  BrigType16_t destt, BrigType16_t srct,
1732*38fd1498Szrj 				  hsa_op_base *arg0, hsa_op_base *arg1,
1733*38fd1498Szrj 				  hsa_op_base *arg2)
1734*38fd1498Szrj   : hsa_insn_srctype (nops, opcode, destt, srct, arg0, arg1, arg2)
1735*38fd1498Szrj {
1736*38fd1498Szrj   m_operand_list = new hsa_op_operand_list (nops - 1);
1737*38fd1498Szrj }
1738*38fd1498Szrj 
1739*38fd1498Szrj /* Constructor of class representing the convert instruction in HSAIL.  */
1740*38fd1498Szrj 
hsa_insn_cvt(hsa_op_with_type * dest,hsa_op_with_type * src)1741*38fd1498Szrj hsa_insn_cvt::hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src)
1742*38fd1498Szrj   : hsa_insn_basic (2, BRIG_OPCODE_CVT, dest->m_type, dest, src)
1743*38fd1498Szrj {
1744*38fd1498Szrj }
1745*38fd1498Szrj 
1746*38fd1498Szrj /* Constructor of class representing the alloca in HSAIL.  */
1747*38fd1498Szrj 
hsa_insn_alloca(hsa_op_with_type * dest,hsa_op_with_type * size,unsigned alignment)1748*38fd1498Szrj hsa_insn_alloca::hsa_insn_alloca (hsa_op_with_type *dest,
1749*38fd1498Szrj 				  hsa_op_with_type *size, unsigned alignment)
1750*38fd1498Szrj   : hsa_insn_basic (2, BRIG_OPCODE_ALLOCA, dest->m_type, dest, size),
1751*38fd1498Szrj     m_align (BRIG_ALIGNMENT_8)
1752*38fd1498Szrj {
1753*38fd1498Szrj   gcc_assert (dest->m_type == BRIG_TYPE_U32);
1754*38fd1498Szrj   if (alignment)
1755*38fd1498Szrj     m_align = hsa_alignment_encoding (alignment);
1756*38fd1498Szrj }
1757*38fd1498Szrj 
1758*38fd1498Szrj /* Append an instruction INSN into the basic block.  */
1759*38fd1498Szrj 
1760*38fd1498Szrj void
append_insn(hsa_insn_basic * insn)1761*38fd1498Szrj hsa_bb::append_insn (hsa_insn_basic *insn)
1762*38fd1498Szrj {
1763*38fd1498Szrj   gcc_assert (insn->m_opcode != 0 || insn->operand_count () == 0);
1764*38fd1498Szrj   gcc_assert (!insn->m_bb);
1765*38fd1498Szrj 
1766*38fd1498Szrj   insn->m_bb = m_bb;
1767*38fd1498Szrj   insn->m_prev = m_last_insn;
1768*38fd1498Szrj   insn->m_next = NULL;
1769*38fd1498Szrj   if (m_last_insn)
1770*38fd1498Szrj     m_last_insn->m_next = insn;
1771*38fd1498Szrj   m_last_insn = insn;
1772*38fd1498Szrj   if (!m_first_insn)
1773*38fd1498Szrj     m_first_insn = insn;
1774*38fd1498Szrj }
1775*38fd1498Szrj 
1776*38fd1498Szrj void
append_phi(hsa_insn_phi * hphi)1777*38fd1498Szrj hsa_bb::append_phi (hsa_insn_phi *hphi)
1778*38fd1498Szrj {
1779*38fd1498Szrj   hphi->m_bb = m_bb;
1780*38fd1498Szrj 
1781*38fd1498Szrj   hphi->m_prev = m_last_phi;
1782*38fd1498Szrj   hphi->m_next = NULL;
1783*38fd1498Szrj   if (m_last_phi)
1784*38fd1498Szrj     m_last_phi->m_next = hphi;
1785*38fd1498Szrj   m_last_phi = hphi;
1786*38fd1498Szrj   if (!m_first_phi)
1787*38fd1498Szrj     m_first_phi = hphi;
1788*38fd1498Szrj }
1789*38fd1498Szrj 
1790*38fd1498Szrj /* Insert HSA instruction NEW_INSN immediately before an existing instruction
1791*38fd1498Szrj    OLD_INSN.  */
1792*38fd1498Szrj 
1793*38fd1498Szrj static void
hsa_insert_insn_before(hsa_insn_basic * new_insn,hsa_insn_basic * old_insn)1794*38fd1498Szrj hsa_insert_insn_before (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn)
1795*38fd1498Szrj {
1796*38fd1498Szrj   hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb);
1797*38fd1498Szrj 
1798*38fd1498Szrj   if (hbb->m_first_insn == old_insn)
1799*38fd1498Szrj     hbb->m_first_insn = new_insn;
1800*38fd1498Szrj   new_insn->m_prev = old_insn->m_prev;
1801*38fd1498Szrj   new_insn->m_next = old_insn;
1802*38fd1498Szrj   if (old_insn->m_prev)
1803*38fd1498Szrj     old_insn->m_prev->m_next = new_insn;
1804*38fd1498Szrj   old_insn->m_prev = new_insn;
1805*38fd1498Szrj }
1806*38fd1498Szrj 
1807*38fd1498Szrj /* Append HSA instruction NEW_INSN immediately after an existing instruction
1808*38fd1498Szrj    OLD_INSN.  */
1809*38fd1498Szrj 
1810*38fd1498Szrj static void
hsa_append_insn_after(hsa_insn_basic * new_insn,hsa_insn_basic * old_insn)1811*38fd1498Szrj hsa_append_insn_after (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn)
1812*38fd1498Szrj {
1813*38fd1498Szrj   hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb);
1814*38fd1498Szrj 
1815*38fd1498Szrj   if (hbb->m_last_insn == old_insn)
1816*38fd1498Szrj     hbb->m_last_insn = new_insn;
1817*38fd1498Szrj   new_insn->m_prev = old_insn;
1818*38fd1498Szrj   new_insn->m_next = old_insn->m_next;
1819*38fd1498Szrj   if (old_insn->m_next)
1820*38fd1498Szrj     old_insn->m_next->m_prev = new_insn;
1821*38fd1498Szrj   old_insn->m_next = new_insn;
1822*38fd1498Szrj }
1823*38fd1498Szrj 
1824*38fd1498Szrj /* Return a register containing the calculated value of EXP which must be an
1825*38fd1498Szrj    expression consisting of PLUS_EXPRs, MULT_EXPRs, NOP_EXPRs, SSA_NAMEs and
1826*38fd1498Szrj    integer constants as returned by get_inner_reference.
1827*38fd1498Szrj    Newly generated HSA instructions will be appended to HBB.
1828*38fd1498Szrj    Perform all calculations in ADDRTYPE.  */
1829*38fd1498Szrj 
1830*38fd1498Szrj static hsa_op_with_type *
gen_address_calculation(tree exp,hsa_bb * hbb,BrigType16_t addrtype)1831*38fd1498Szrj gen_address_calculation (tree exp, hsa_bb *hbb, BrigType16_t addrtype)
1832*38fd1498Szrj {
1833*38fd1498Szrj   int opcode;
1834*38fd1498Szrj 
1835*38fd1498Szrj   if (TREE_CODE (exp) == NOP_EXPR)
1836*38fd1498Szrj     exp = TREE_OPERAND (exp, 0);
1837*38fd1498Szrj 
1838*38fd1498Szrj   switch (TREE_CODE (exp))
1839*38fd1498Szrj     {
1840*38fd1498Szrj     case SSA_NAME:
1841*38fd1498Szrj       return hsa_cfun->reg_for_gimple_ssa (exp)->get_in_type (addrtype, hbb);
1842*38fd1498Szrj 
1843*38fd1498Szrj     case INTEGER_CST:
1844*38fd1498Szrj       {
1845*38fd1498Szrj 	hsa_op_immed *imm = new hsa_op_immed (exp);
1846*38fd1498Szrj        if (addrtype != imm->m_type)
1847*38fd1498Szrj 	 imm->m_type = addrtype;
1848*38fd1498Szrj        return imm;
1849*38fd1498Szrj       }
1850*38fd1498Szrj 
1851*38fd1498Szrj     case PLUS_EXPR:
1852*38fd1498Szrj       opcode = BRIG_OPCODE_ADD;
1853*38fd1498Szrj       break;
1854*38fd1498Szrj 
1855*38fd1498Szrj     case MULT_EXPR:
1856*38fd1498Szrj       opcode = BRIG_OPCODE_MUL;
1857*38fd1498Szrj       break;
1858*38fd1498Szrj 
1859*38fd1498Szrj     default:
1860*38fd1498Szrj       gcc_unreachable ();
1861*38fd1498Szrj     }
1862*38fd1498Szrj 
1863*38fd1498Szrj   hsa_op_reg *res = new hsa_op_reg (addrtype);
1864*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, addrtype);
1865*38fd1498Szrj   insn->set_op (0, res);
1866*38fd1498Szrj 
1867*38fd1498Szrj   hsa_op_with_type *op1 = gen_address_calculation (TREE_OPERAND (exp, 0), hbb,
1868*38fd1498Szrj 						   addrtype);
1869*38fd1498Szrj   hsa_op_with_type *op2 = gen_address_calculation (TREE_OPERAND (exp, 1), hbb,
1870*38fd1498Szrj 						   addrtype);
1871*38fd1498Szrj   insn->set_op (1, op1);
1872*38fd1498Szrj   insn->set_op (2, op2);
1873*38fd1498Szrj 
1874*38fd1498Szrj   hbb->append_insn (insn);
1875*38fd1498Szrj   return res;
1876*38fd1498Szrj }
1877*38fd1498Szrj 
1878*38fd1498Szrj /* If R1 is NULL, just return R2, otherwise append an instruction adding them
1879*38fd1498Szrj    to HBB and return the register holding the result.  */
1880*38fd1498Szrj 
1881*38fd1498Szrj static hsa_op_reg *
add_addr_regs_if_needed(hsa_op_reg * r1,hsa_op_reg * r2,hsa_bb * hbb)1882*38fd1498Szrj add_addr_regs_if_needed (hsa_op_reg *r1, hsa_op_reg *r2, hsa_bb *hbb)
1883*38fd1498Szrj {
1884*38fd1498Szrj   gcc_checking_assert (r2);
1885*38fd1498Szrj   if (!r1)
1886*38fd1498Szrj     return r2;
1887*38fd1498Szrj 
1888*38fd1498Szrj   hsa_op_reg *res = new hsa_op_reg (r1->m_type);
1889*38fd1498Szrj   gcc_assert (!hsa_needs_cvt (r1->m_type, r2->m_type));
1890*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_ADD, res->m_type);
1891*38fd1498Szrj   insn->set_op (0, res);
1892*38fd1498Szrj   insn->set_op (1, r1);
1893*38fd1498Szrj   insn->set_op (2, r2);
1894*38fd1498Szrj   hbb->append_insn (insn);
1895*38fd1498Szrj   return res;
1896*38fd1498Szrj }
1897*38fd1498Szrj 
1898*38fd1498Szrj /* Helper of gen_hsa_addr.  Update *SYMBOL, *ADDRTYPE, *REG and *OFFSET to
1899*38fd1498Szrj    reflect BASE which is the first operand of a MEM_REF or a TARGET_MEM_REF.  */
1900*38fd1498Szrj 
1901*38fd1498Szrj static void
process_mem_base(tree base,hsa_symbol ** symbol,BrigType16_t * addrtype,hsa_op_reg ** reg,offset_int * offset,hsa_bb * hbb)1902*38fd1498Szrj process_mem_base (tree base, hsa_symbol **symbol, BrigType16_t *addrtype,
1903*38fd1498Szrj 		  hsa_op_reg **reg, offset_int *offset, hsa_bb *hbb)
1904*38fd1498Szrj {
1905*38fd1498Szrj   if (TREE_CODE (base) == SSA_NAME)
1906*38fd1498Szrj     {
1907*38fd1498Szrj       gcc_assert (!*reg);
1908*38fd1498Szrj       hsa_op_with_type *ssa
1909*38fd1498Szrj 	= hsa_cfun->reg_for_gimple_ssa (base)->get_in_type (*addrtype, hbb);
1910*38fd1498Szrj       *reg = dyn_cast <hsa_op_reg *> (ssa);
1911*38fd1498Szrj     }
1912*38fd1498Szrj   else if (TREE_CODE (base) == ADDR_EXPR)
1913*38fd1498Szrj     {
1914*38fd1498Szrj       tree decl = TREE_OPERAND (base, 0);
1915*38fd1498Szrj 
1916*38fd1498Szrj       if (!DECL_P (decl) || TREE_CODE (decl) == FUNCTION_DECL)
1917*38fd1498Szrj 	{
1918*38fd1498Szrj 	  HSA_SORRY_AT (EXPR_LOCATION (base),
1919*38fd1498Szrj 			"support for HSA does not implement a memory reference "
1920*38fd1498Szrj 			"to a non-declaration type");
1921*38fd1498Szrj 	  return;
1922*38fd1498Szrj 	}
1923*38fd1498Szrj 
1924*38fd1498Szrj       gcc_assert (!*symbol);
1925*38fd1498Szrj 
1926*38fd1498Szrj       *symbol = get_symbol_for_decl (decl);
1927*38fd1498Szrj       *addrtype = hsa_get_segment_addr_type ((*symbol)->m_segment);
1928*38fd1498Szrj     }
1929*38fd1498Szrj   else if (TREE_CODE (base) == INTEGER_CST)
1930*38fd1498Szrj     *offset += wi::to_offset (base);
1931*38fd1498Szrj   else
1932*38fd1498Szrj     gcc_unreachable ();
1933*38fd1498Szrj }
1934*38fd1498Szrj 
1935*38fd1498Szrj /* Forward declaration of a function.  */
1936*38fd1498Szrj 
1937*38fd1498Szrj static void
1938*38fd1498Szrj gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb);
1939*38fd1498Szrj 
1940*38fd1498Szrj /* Generate HSA address operand for a given tree memory reference REF.  If
1941*38fd1498Szrj    instructions need to be created to calculate the address, they will be added
1942*38fd1498Szrj    to the end of HBB.  If a caller provider OUTPUT_BITSIZE and OUTPUT_BITPOS,
1943*38fd1498Szrj    the function assumes that the caller will handle possible
1944*38fd1498Szrj    bit-field references.  Otherwise if we reference a bit-field, sorry message
1945*38fd1498Szrj    is displayed.  */
1946*38fd1498Szrj 
1947*38fd1498Szrj static hsa_op_address *
1948*38fd1498Szrj gen_hsa_addr (tree ref, hsa_bb *hbb, HOST_WIDE_INT *output_bitsize = NULL,
1949*38fd1498Szrj 	      HOST_WIDE_INT *output_bitpos = NULL)
1950*38fd1498Szrj {
1951*38fd1498Szrj   hsa_symbol *symbol = NULL;
1952*38fd1498Szrj   hsa_op_reg *reg = NULL;
1953*38fd1498Szrj   offset_int offset = 0;
1954*38fd1498Szrj   tree origref = ref;
1955*38fd1498Szrj   tree varoffset = NULL_TREE;
1956*38fd1498Szrj   BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
1957*38fd1498Szrj   HOST_WIDE_INT bitsize = 0, bitpos = 0;
1958*38fd1498Szrj   BrigType16_t flat_addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
1959*38fd1498Szrj 
1960*38fd1498Szrj   if (TREE_CODE (ref) == STRING_CST)
1961*38fd1498Szrj     {
1962*38fd1498Szrj       symbol = hsa_get_string_cst_symbol (ref);
1963*38fd1498Szrj       goto out;
1964*38fd1498Szrj     }
1965*38fd1498Szrj   else if (TREE_CODE (ref) == BIT_FIELD_REF
1966*38fd1498Szrj 	   && (!multiple_p (bit_field_size (ref), BITS_PER_UNIT)
1967*38fd1498Szrj 	       || !multiple_p (bit_field_offset (ref), BITS_PER_UNIT)))
1968*38fd1498Szrj     {
1969*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (origref),
1970*38fd1498Szrj 		     "support for HSA does not implement "
1971*38fd1498Szrj 		     "bit field references such as %E", ref);
1972*38fd1498Szrj       goto out;
1973*38fd1498Szrj     }
1974*38fd1498Szrj 
1975*38fd1498Szrj   if (handled_component_p (ref))
1976*38fd1498Szrj     {
1977*38fd1498Szrj       machine_mode mode;
1978*38fd1498Szrj       int unsignedp, volatilep, preversep;
1979*38fd1498Szrj       poly_int64 pbitsize, pbitpos;
1980*38fd1498Szrj       tree new_ref;
1981*38fd1498Szrj 
1982*38fd1498Szrj       new_ref = get_inner_reference (ref, &pbitsize, &pbitpos, &varoffset,
1983*38fd1498Szrj 				     &mode, &unsignedp, &preversep,
1984*38fd1498Szrj 				     &volatilep);
1985*38fd1498Szrj       /* When this isn't true, the switch below will report an
1986*38fd1498Szrj 	 appropriate error.  */
1987*38fd1498Szrj       if (pbitsize.is_constant () && pbitpos.is_constant ())
1988*38fd1498Szrj 	{
1989*38fd1498Szrj 	  bitsize = pbitsize.to_constant ();
1990*38fd1498Szrj 	  bitpos = pbitpos.to_constant ();
1991*38fd1498Szrj 	  ref = new_ref;
1992*38fd1498Szrj 	  offset = bitpos;
1993*38fd1498Szrj 	  offset = wi::rshift (offset, LOG2_BITS_PER_UNIT, SIGNED);
1994*38fd1498Szrj 	}
1995*38fd1498Szrj     }
1996*38fd1498Szrj 
1997*38fd1498Szrj   switch (TREE_CODE (ref))
1998*38fd1498Szrj     {
1999*38fd1498Szrj     case ADDR_EXPR:
2000*38fd1498Szrj       {
2001*38fd1498Szrj 	addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE);
2002*38fd1498Szrj 	symbol = hsa_cfun->create_hsa_temporary (flat_addrtype);
2003*38fd1498Szrj 	hsa_op_reg *r = new hsa_op_reg (flat_addrtype);
2004*38fd1498Szrj 	gen_hsa_addr_insns (ref, r, hbb);
2005*38fd1498Szrj 	hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type,
2006*38fd1498Szrj 					    r, new hsa_op_address (symbol)));
2007*38fd1498Szrj 
2008*38fd1498Szrj 	break;
2009*38fd1498Szrj       }
2010*38fd1498Szrj     case SSA_NAME:
2011*38fd1498Szrj       {
2012*38fd1498Szrj 	addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE);
2013*38fd1498Szrj 	hsa_op_with_type *r = hsa_cfun->reg_for_gimple_ssa (ref);
2014*38fd1498Szrj 	if (r->m_type == BRIG_TYPE_B1)
2015*38fd1498Szrj 	  r = r->get_in_type (BRIG_TYPE_U32, hbb);
2016*38fd1498Szrj 	symbol = hsa_cfun->create_hsa_temporary (r->m_type);
2017*38fd1498Szrj 
2018*38fd1498Szrj 	hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type,
2019*38fd1498Szrj 					    r, new hsa_op_address (symbol)));
2020*38fd1498Szrj 
2021*38fd1498Szrj 	break;
2022*38fd1498Szrj       }
2023*38fd1498Szrj     case PARM_DECL:
2024*38fd1498Szrj     case VAR_DECL:
2025*38fd1498Szrj     case RESULT_DECL:
2026*38fd1498Szrj     case CONST_DECL:
2027*38fd1498Szrj       gcc_assert (!symbol);
2028*38fd1498Szrj       symbol = get_symbol_for_decl (ref);
2029*38fd1498Szrj       addrtype = hsa_get_segment_addr_type (symbol->m_segment);
2030*38fd1498Szrj       break;
2031*38fd1498Szrj 
2032*38fd1498Szrj     case MEM_REF:
2033*38fd1498Szrj       process_mem_base (TREE_OPERAND (ref, 0), &symbol, &addrtype, &reg,
2034*38fd1498Szrj 			&offset, hbb);
2035*38fd1498Szrj 
2036*38fd1498Szrj       if (!integer_zerop (TREE_OPERAND (ref, 1)))
2037*38fd1498Szrj 	offset += wi::to_offset (TREE_OPERAND (ref, 1));
2038*38fd1498Szrj       break;
2039*38fd1498Szrj 
2040*38fd1498Szrj     case TARGET_MEM_REF:
2041*38fd1498Szrj       process_mem_base (TMR_BASE (ref), &symbol, &addrtype, &reg, &offset, hbb);
2042*38fd1498Szrj       if (TMR_INDEX (ref))
2043*38fd1498Szrj 	{
2044*38fd1498Szrj 	  hsa_op_reg *disp1;
2045*38fd1498Szrj 	  hsa_op_base *idx = hsa_cfun->reg_for_gimple_ssa
2046*38fd1498Szrj 	    (TMR_INDEX (ref))->get_in_type (addrtype, hbb);
2047*38fd1498Szrj 	  if (TMR_STEP (ref) && !integer_onep (TMR_STEP (ref)))
2048*38fd1498Szrj 	    {
2049*38fd1498Szrj 	      disp1 = new hsa_op_reg (addrtype);
2050*38fd1498Szrj 	      hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_MUL,
2051*38fd1498Szrj 							 addrtype);
2052*38fd1498Szrj 
2053*38fd1498Szrj 	      /* As step must respect addrtype, we overwrite the type
2054*38fd1498Szrj 		 of an immediate value.  */
2055*38fd1498Szrj 	      hsa_op_immed *step = new hsa_op_immed (TMR_STEP (ref));
2056*38fd1498Szrj 	      step->m_type = addrtype;
2057*38fd1498Szrj 
2058*38fd1498Szrj 	      insn->set_op (0, disp1);
2059*38fd1498Szrj 	      insn->set_op (1, idx);
2060*38fd1498Szrj 	      insn->set_op (2, step);
2061*38fd1498Szrj 	      hbb->append_insn (insn);
2062*38fd1498Szrj 	    }
2063*38fd1498Szrj 	  else
2064*38fd1498Szrj 	    disp1 = as_a <hsa_op_reg *> (idx);
2065*38fd1498Szrj 	  reg = add_addr_regs_if_needed (reg, disp1, hbb);
2066*38fd1498Szrj 	}
2067*38fd1498Szrj       if (TMR_INDEX2 (ref))
2068*38fd1498Szrj 	{
2069*38fd1498Szrj 	  if (TREE_CODE (TMR_INDEX2 (ref)) == SSA_NAME)
2070*38fd1498Szrj 	    {
2071*38fd1498Szrj 	      hsa_op_base *disp2 = hsa_cfun->reg_for_gimple_ssa
2072*38fd1498Szrj 		(TMR_INDEX2 (ref))->get_in_type (addrtype, hbb);
2073*38fd1498Szrj 	      reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (disp2),
2074*38fd1498Szrj 					     hbb);
2075*38fd1498Szrj 	    }
2076*38fd1498Szrj 	  else if (TREE_CODE (TMR_INDEX2 (ref)) == INTEGER_CST)
2077*38fd1498Szrj 	    offset += wi::to_offset (TMR_INDEX2 (ref));
2078*38fd1498Szrj 	  else
2079*38fd1498Szrj 	    gcc_unreachable ();
2080*38fd1498Szrj 	}
2081*38fd1498Szrj       offset += wi::to_offset (TMR_OFFSET (ref));
2082*38fd1498Szrj       break;
2083*38fd1498Szrj     case FUNCTION_DECL:
2084*38fd1498Szrj       HSA_SORRY_AT (EXPR_LOCATION (origref),
2085*38fd1498Szrj 		    "support for HSA does not implement function pointers");
2086*38fd1498Szrj       goto out;
2087*38fd1498Szrj     default:
2088*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does "
2089*38fd1498Szrj 		     "not implement memory access to %E", origref);
2090*38fd1498Szrj       goto out;
2091*38fd1498Szrj     }
2092*38fd1498Szrj 
2093*38fd1498Szrj   if (varoffset)
2094*38fd1498Szrj     {
2095*38fd1498Szrj       if (TREE_CODE (varoffset) == INTEGER_CST)
2096*38fd1498Szrj 	offset += wi::to_offset (varoffset);
2097*38fd1498Szrj       else
2098*38fd1498Szrj 	{
2099*38fd1498Szrj 	  hsa_op_base *off_op = gen_address_calculation (varoffset, hbb,
2100*38fd1498Szrj 							 addrtype);
2101*38fd1498Szrj 	  reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (off_op),
2102*38fd1498Szrj 					 hbb);
2103*38fd1498Szrj 	}
2104*38fd1498Szrj     }
2105*38fd1498Szrj 
2106*38fd1498Szrj   gcc_checking_assert ((symbol
2107*38fd1498Szrj 			&& addrtype
2108*38fd1498Szrj 			== hsa_get_segment_addr_type (symbol->m_segment))
2109*38fd1498Szrj 		       || (!symbol
2110*38fd1498Szrj 			   && addrtype
2111*38fd1498Szrj 			   == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)));
2112*38fd1498Szrj out:
2113*38fd1498Szrj   HOST_WIDE_INT hwi_offset = offset.to_shwi ();
2114*38fd1498Szrj 
2115*38fd1498Szrj   /* Calculate remaining bitsize offset (if presented).  */
2116*38fd1498Szrj   bitpos %= BITS_PER_UNIT;
2117*38fd1498Szrj   /* If bitsize is a power of two that is greater or equal to BITS_PER_UNIT, it
2118*38fd1498Szrj      is not a reason to think this is a bit-field access.  */
2119*38fd1498Szrj   if (bitpos == 0
2120*38fd1498Szrj       && (bitsize >= BITS_PER_UNIT)
2121*38fd1498Szrj       && !(bitsize & (bitsize - 1)))
2122*38fd1498Szrj     bitsize = 0;
2123*38fd1498Szrj 
2124*38fd1498Szrj   if ((bitpos || bitsize) && (output_bitpos == NULL || output_bitsize == NULL))
2125*38fd1498Szrj     HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does not "
2126*38fd1498Szrj 		   "implement unhandled bit field reference such as %E", ref);
2127*38fd1498Szrj 
2128*38fd1498Szrj   if (output_bitsize != NULL && output_bitpos != NULL)
2129*38fd1498Szrj     {
2130*38fd1498Szrj       *output_bitsize = bitsize;
2131*38fd1498Szrj       *output_bitpos = bitpos;
2132*38fd1498Szrj     }
2133*38fd1498Szrj 
2134*38fd1498Szrj   return new hsa_op_address (symbol, reg, hwi_offset);
2135*38fd1498Szrj }
2136*38fd1498Szrj 
2137*38fd1498Szrj /* Generate HSA address operand for a given tree memory reference REF.  If
2138*38fd1498Szrj    instructions need to be created to calculate the address, they will be added
2139*38fd1498Szrj    to the end of HBB.  OUTPUT_ALIGN is alignment of the created address.  */
2140*38fd1498Szrj 
2141*38fd1498Szrj static hsa_op_address *
gen_hsa_addr_with_align(tree ref,hsa_bb * hbb,BrigAlignment8_t * output_align)2142*38fd1498Szrj gen_hsa_addr_with_align (tree ref, hsa_bb *hbb, BrigAlignment8_t *output_align)
2143*38fd1498Szrj {
2144*38fd1498Szrj   hsa_op_address *addr = gen_hsa_addr (ref, hbb);
2145*38fd1498Szrj   if (addr->m_reg || !addr->m_symbol)
2146*38fd1498Szrj     *output_align = hsa_object_alignment (ref);
2147*38fd1498Szrj   else
2148*38fd1498Szrj     {
2149*38fd1498Szrj       /* If the address consists only of a symbol and an offset, we
2150*38fd1498Szrj          compute the alignment ourselves to take into account any alignment
2151*38fd1498Szrj          promotions we might have done for the HSA symbol representation.  */
2152*38fd1498Szrj       unsigned align = hsa_byte_alignment (addr->m_symbol->m_align);
2153*38fd1498Szrj       unsigned misalign = addr->m_imm_offset & (align - 1);
2154*38fd1498Szrj       if (misalign)
2155*38fd1498Szrj         align = least_bit_hwi (misalign);
2156*38fd1498Szrj       *output_align = hsa_alignment_encoding (BITS_PER_UNIT * align);
2157*38fd1498Szrj     }
2158*38fd1498Szrj   return addr;
2159*38fd1498Szrj }
2160*38fd1498Szrj 
2161*38fd1498Szrj /* Generate HSA address for a function call argument of given TYPE.
2162*38fd1498Szrj    INDEX is used to generate corresponding name of the arguments.
2163*38fd1498Szrj    Special value -1 represents fact that result value is created.  */
2164*38fd1498Szrj 
2165*38fd1498Szrj static hsa_op_address *
gen_hsa_addr_for_arg(tree tree_type,int index)2166*38fd1498Szrj gen_hsa_addr_for_arg (tree tree_type, int index)
2167*38fd1498Szrj {
2168*38fd1498Szrj   hsa_symbol *sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
2169*38fd1498Szrj 				    BRIG_LINKAGE_ARG);
2170*38fd1498Szrj   sym->m_type = hsa_type_for_tree_type (tree_type, &sym->m_dim);
2171*38fd1498Szrj 
2172*38fd1498Szrj   if (index == -1) /* Function result.  */
2173*38fd1498Szrj     sym->m_name = "res";
2174*38fd1498Szrj   else /* Function call arguments.  */
2175*38fd1498Szrj     {
2176*38fd1498Szrj       sym->m_name = NULL;
2177*38fd1498Szrj       sym->m_name_number = index;
2178*38fd1498Szrj     }
2179*38fd1498Szrj 
2180*38fd1498Szrj   return new hsa_op_address (sym);
2181*38fd1498Szrj }
2182*38fd1498Szrj 
2183*38fd1498Szrj /* Generate HSA instructions that process all necessary conversions
2184*38fd1498Szrj    of an ADDR to flat addressing and place the result into DEST.
2185*38fd1498Szrj    Instructions are appended to HBB.  */
2186*38fd1498Szrj 
2187*38fd1498Szrj static void
convert_addr_to_flat_segment(hsa_op_address * addr,hsa_op_reg * dest,hsa_bb * hbb)2188*38fd1498Szrj convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest,
2189*38fd1498Szrj 			      hsa_bb *hbb)
2190*38fd1498Szrj {
2191*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA);
2192*38fd1498Szrj   insn->set_op (1, addr);
2193*38fd1498Szrj   if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL)
2194*38fd1498Szrj     {
2195*38fd1498Szrj       /* LDA produces segment-relative address, we need to convert
2196*38fd1498Szrj 	 it to the flat one.  */
2197*38fd1498Szrj       hsa_op_reg *tmp;
2198*38fd1498Szrj       tmp = new hsa_op_reg (hsa_get_segment_addr_type
2199*38fd1498Szrj 			    (addr->m_symbol->m_segment));
2200*38fd1498Szrj       hsa_insn_seg *seg;
2201*38fd1498Szrj       seg = new hsa_insn_seg (BRIG_OPCODE_STOF,
2202*38fd1498Szrj 			      hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT),
2203*38fd1498Szrj 			      tmp->m_type, addr->m_symbol->m_segment, dest,
2204*38fd1498Szrj 			      tmp);
2205*38fd1498Szrj 
2206*38fd1498Szrj       insn->set_op (0, tmp);
2207*38fd1498Szrj       insn->m_type = tmp->m_type;
2208*38fd1498Szrj       hbb->append_insn (insn);
2209*38fd1498Szrj       hbb->append_insn (seg);
2210*38fd1498Szrj     }
2211*38fd1498Szrj   else
2212*38fd1498Szrj     {
2213*38fd1498Szrj       insn->set_op (0, dest);
2214*38fd1498Szrj       insn->m_type = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
2215*38fd1498Szrj       hbb->append_insn (insn);
2216*38fd1498Szrj     }
2217*38fd1498Szrj }
2218*38fd1498Szrj 
2219*38fd1498Szrj /* Generate HSA instructions that calculate address of VAL including all
2220*38fd1498Szrj    necessary conversions to flat addressing and place the result into DEST.
2221*38fd1498Szrj    Instructions are appended to HBB.  */
2222*38fd1498Szrj 
2223*38fd1498Szrj static void
gen_hsa_addr_insns(tree val,hsa_op_reg * dest,hsa_bb * hbb)2224*38fd1498Szrj gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
2225*38fd1498Szrj {
2226*38fd1498Szrj   /* Handle cases like tmp = NULL, where we just emit a move instruction
2227*38fd1498Szrj      to a register.  */
2228*38fd1498Szrj   if (TREE_CODE (val) == INTEGER_CST)
2229*38fd1498Szrj     {
2230*38fd1498Szrj       hsa_op_immed *c = new hsa_op_immed (val);
2231*38fd1498Szrj       hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
2232*38fd1498Szrj 						 dest->m_type, dest, c);
2233*38fd1498Szrj       hbb->append_insn (insn);
2234*38fd1498Szrj       return;
2235*38fd1498Szrj     }
2236*38fd1498Szrj 
2237*38fd1498Szrj   hsa_op_address *addr;
2238*38fd1498Szrj 
2239*38fd1498Szrj   gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
2240*38fd1498Szrj   if (TREE_CODE (val) == ADDR_EXPR)
2241*38fd1498Szrj     val = TREE_OPERAND (val, 0);
2242*38fd1498Szrj   addr = gen_hsa_addr (val, hbb);
2243*38fd1498Szrj 
2244*38fd1498Szrj   if (TREE_CODE (val) == CONST_DECL
2245*38fd1498Szrj       && is_gimple_reg_type (TREE_TYPE (val)))
2246*38fd1498Szrj     {
2247*38fd1498Szrj       gcc_assert (addr->m_symbol
2248*38fd1498Szrj 		  && addr->m_symbol->m_segment == BRIG_SEGMENT_READONLY);
2249*38fd1498Szrj       /* CONST_DECLs are in readonly segment which however does not have
2250*38fd1498Szrj 	 addresses convertible to flat segments.  So copy it to a private one
2251*38fd1498Szrj 	 and take address of that.  */
2252*38fd1498Szrj       BrigType16_t csttype
2253*38fd1498Szrj 	= mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (val),
2254*38fd1498Szrj 							    false));
2255*38fd1498Szrj       hsa_op_reg *r = new hsa_op_reg (csttype);
2256*38fd1498Szrj       hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_LD, csttype, r,
2257*38fd1498Szrj 					  new hsa_op_address (addr->m_symbol)));
2258*38fd1498Szrj       hsa_symbol *copysym = hsa_cfun->create_hsa_temporary (csttype);
2259*38fd1498Szrj       hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, csttype, r,
2260*38fd1498Szrj 					  new hsa_op_address (copysym)));
2261*38fd1498Szrj       addr->m_symbol = copysym;
2262*38fd1498Szrj     }
2263*38fd1498Szrj   else if (addr->m_symbol && addr->m_symbol->m_segment == BRIG_SEGMENT_READONLY)
2264*38fd1498Szrj     {
2265*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (val), "support for HSA does "
2266*38fd1498Szrj 		     "not implement taking addresses of complex "
2267*38fd1498Szrj 		     "CONST_DECLs such as %E", val);
2268*38fd1498Szrj       return;
2269*38fd1498Szrj     }
2270*38fd1498Szrj 
2271*38fd1498Szrj 
2272*38fd1498Szrj   convert_addr_to_flat_segment (addr, dest, hbb);
2273*38fd1498Szrj }
2274*38fd1498Szrj 
2275*38fd1498Szrj /* Return an HSA register or HSA immediate value operand corresponding to
2276*38fd1498Szrj    gimple operand OP.  */
2277*38fd1498Szrj 
2278*38fd1498Szrj static hsa_op_with_type *
hsa_reg_or_immed_for_gimple_op(tree op,hsa_bb * hbb)2279*38fd1498Szrj hsa_reg_or_immed_for_gimple_op (tree op, hsa_bb *hbb)
2280*38fd1498Szrj {
2281*38fd1498Szrj   hsa_op_reg *tmp;
2282*38fd1498Szrj 
2283*38fd1498Szrj   if (TREE_CODE (op) == SSA_NAME)
2284*38fd1498Szrj     tmp = hsa_cfun->reg_for_gimple_ssa (op);
2285*38fd1498Szrj   else if (!POINTER_TYPE_P (TREE_TYPE (op)))
2286*38fd1498Szrj     return new hsa_op_immed (op);
2287*38fd1498Szrj   else
2288*38fd1498Szrj     {
2289*38fd1498Szrj       tmp = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
2290*38fd1498Szrj       gen_hsa_addr_insns (op, tmp, hbb);
2291*38fd1498Szrj     }
2292*38fd1498Szrj   return tmp;
2293*38fd1498Szrj }
2294*38fd1498Szrj 
2295*38fd1498Szrj /* Create a simple movement instruction with register destination DEST and
2296*38fd1498Szrj    register or immediate source SRC and append it to the end of HBB.  */
2297*38fd1498Szrj 
2298*38fd1498Szrj void
hsa_build_append_simple_mov(hsa_op_reg * dest,hsa_op_base * src,hsa_bb * hbb)2299*38fd1498Szrj hsa_build_append_simple_mov (hsa_op_reg *dest, hsa_op_base *src, hsa_bb *hbb)
2300*38fd1498Szrj {
2301*38fd1498Szrj   /* Moves of packed data between registers need to adhere to the same type
2302*38fd1498Szrj      rules like when dealing with memory.  */
2303*38fd1498Szrj   BrigType16_t tp = mem_type_for_type (dest->m_type);
2304*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, tp, dest, src);
2305*38fd1498Szrj   hsa_fixup_mov_insn_type (insn);
2306*38fd1498Szrj   unsigned dest_size = hsa_type_bit_size (dest->m_type);
2307*38fd1498Szrj   if (hsa_op_reg *sreg = dyn_cast <hsa_op_reg *> (src))
2308*38fd1498Szrj     gcc_assert (dest_size == hsa_type_bit_size (sreg->m_type));
2309*38fd1498Szrj   else
2310*38fd1498Szrj     {
2311*38fd1498Szrj       unsigned imm_size
2312*38fd1498Szrj 	=  hsa_type_bit_size (as_a <hsa_op_immed *> (src)->m_type);
2313*38fd1498Szrj       gcc_assert ((dest_size == imm_size)
2314*38fd1498Szrj 		  /* Eventually < 32bit registers will be promoted to 32bit. */
2315*38fd1498Szrj 		  || (dest_size < 32 && imm_size == 32));
2316*38fd1498Szrj     }
2317*38fd1498Szrj   hbb->append_insn (insn);
2318*38fd1498Szrj }
2319*38fd1498Szrj 
2320*38fd1498Szrj /* Generate HSAIL instructions loading a bit field into register DEST.
2321*38fd1498Szrj    VALUE_REG is a register of a SSA name that is used in the bit field
2322*38fd1498Szrj    reference.  To identify a bit field BITPOS is offset to the loaded memory
2323*38fd1498Szrj    and BITSIZE is number of bits of the bit field.
2324*38fd1498Szrj    Add instructions to HBB.  */
2325*38fd1498Szrj 
2326*38fd1498Szrj static void
gen_hsa_insns_for_bitfield(hsa_op_reg * dest,hsa_op_reg * value_reg,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitpos,hsa_bb * hbb)2327*38fd1498Szrj gen_hsa_insns_for_bitfield (hsa_op_reg *dest, hsa_op_reg *value_reg,
2328*38fd1498Szrj 			    HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
2329*38fd1498Szrj 			    hsa_bb *hbb)
2330*38fd1498Szrj {
2331*38fd1498Szrj   unsigned type_bitsize
2332*38fd1498Szrj     = hsa_type_bit_size (hsa_extend_inttype_to_32bit (dest->m_type));
2333*38fd1498Szrj   unsigned left_shift = type_bitsize - (bitsize + bitpos);
2334*38fd1498Szrj   unsigned right_shift = left_shift + bitpos;
2335*38fd1498Szrj 
2336*38fd1498Szrj   if (left_shift)
2337*38fd1498Szrj     {
2338*38fd1498Szrj       hsa_op_reg *value_reg_2
2339*38fd1498Szrj 	= new hsa_op_reg (hsa_extend_inttype_to_32bit (dest->m_type));
2340*38fd1498Szrj       hsa_op_immed *c = new hsa_op_immed (left_shift, BRIG_TYPE_U32);
2341*38fd1498Szrj 
2342*38fd1498Szrj       hsa_insn_basic *lshift
2343*38fd1498Szrj 	= new hsa_insn_basic (3, BRIG_OPCODE_SHL, value_reg_2->m_type,
2344*38fd1498Szrj 			      value_reg_2, value_reg, c);
2345*38fd1498Szrj 
2346*38fd1498Szrj       hbb->append_insn (lshift);
2347*38fd1498Szrj 
2348*38fd1498Szrj       value_reg = value_reg_2;
2349*38fd1498Szrj     }
2350*38fd1498Szrj 
2351*38fd1498Szrj   if (right_shift)
2352*38fd1498Szrj     {
2353*38fd1498Szrj       hsa_op_reg *value_reg_2
2354*38fd1498Szrj 	= new hsa_op_reg (hsa_extend_inttype_to_32bit (dest->m_type));
2355*38fd1498Szrj       hsa_op_immed *c = new hsa_op_immed (right_shift, BRIG_TYPE_U32);
2356*38fd1498Szrj 
2357*38fd1498Szrj       hsa_insn_basic *rshift
2358*38fd1498Szrj 	= new hsa_insn_basic (3, BRIG_OPCODE_SHR, value_reg_2->m_type,
2359*38fd1498Szrj 			      value_reg_2, value_reg, c);
2360*38fd1498Szrj 
2361*38fd1498Szrj       hbb->append_insn (rshift);
2362*38fd1498Szrj 
2363*38fd1498Szrj       value_reg = value_reg_2;
2364*38fd1498Szrj     }
2365*38fd1498Szrj 
2366*38fd1498Szrj     hsa_insn_basic *assignment
2367*38fd1498Szrj       = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, NULL, value_reg);
2368*38fd1498Szrj     hsa_fixup_mov_insn_type (assignment);
2369*38fd1498Szrj     hbb->append_insn (assignment);
2370*38fd1498Szrj     assignment->set_output_in_type (dest, 0, hbb);
2371*38fd1498Szrj }
2372*38fd1498Szrj 
2373*38fd1498Szrj 
2374*38fd1498Szrj /* Generate HSAIL instructions loading a bit field into register DEST.  ADDR is
2375*38fd1498Szrj    prepared memory address which is used to load the bit field.  To identify a
2376*38fd1498Szrj    bit field BITPOS is offset to the loaded memory and BITSIZE is number of
2377*38fd1498Szrj    bits of the bit field.  Add instructions to HBB.  Load must be performed in
2378*38fd1498Szrj    alignment ALIGN.  */
2379*38fd1498Szrj 
2380*38fd1498Szrj static void
gen_hsa_insns_for_bitfield_load(hsa_op_reg * dest,hsa_op_address * addr,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitpos,hsa_bb * hbb,BrigAlignment8_t align)2381*38fd1498Szrj gen_hsa_insns_for_bitfield_load (hsa_op_reg *dest, hsa_op_address *addr,
2382*38fd1498Szrj 				 HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
2383*38fd1498Szrj 				 hsa_bb *hbb, BrigAlignment8_t align)
2384*38fd1498Szrj {
2385*38fd1498Szrj   hsa_op_reg *value_reg = new hsa_op_reg (dest->m_type);
2386*38fd1498Szrj   hsa_insn_mem *mem
2387*38fd1498Szrj   = new hsa_insn_mem (BRIG_OPCODE_LD,
2388*38fd1498Szrj 		      hsa_extend_inttype_to_32bit (dest->m_type),
2389*38fd1498Szrj 		      value_reg, addr);
2390*38fd1498Szrj   mem->set_align (align);
2391*38fd1498Szrj   hbb->append_insn (mem);
2392*38fd1498Szrj   gen_hsa_insns_for_bitfield (dest, value_reg, bitsize, bitpos, hbb);
2393*38fd1498Szrj }
2394*38fd1498Szrj 
2395*38fd1498Szrj /* Return the alignment of base memory accesses we issue to perform bit-field
2396*38fd1498Szrj    memory access REF.  */
2397*38fd1498Szrj 
2398*38fd1498Szrj static BrigAlignment8_t
hsa_bitmemref_alignment(tree ref)2399*38fd1498Szrj hsa_bitmemref_alignment (tree ref)
2400*38fd1498Szrj {
2401*38fd1498Szrj   unsigned HOST_WIDE_INT bit_offset = 0;
2402*38fd1498Szrj 
2403*38fd1498Szrj   while (true)
2404*38fd1498Szrj     {
2405*38fd1498Szrj       if (TREE_CODE (ref) == BIT_FIELD_REF)
2406*38fd1498Szrj 	{
2407*38fd1498Szrj 	  if (!tree_fits_uhwi_p (TREE_OPERAND (ref, 2)))
2408*38fd1498Szrj 	    return BRIG_ALIGNMENT_1;
2409*38fd1498Szrj 	  bit_offset += tree_to_uhwi (TREE_OPERAND (ref, 2));
2410*38fd1498Szrj 	}
2411*38fd1498Szrj       else if (TREE_CODE (ref) == COMPONENT_REF
2412*38fd1498Szrj 	       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
2413*38fd1498Szrj 	bit_offset += int_bit_position (TREE_OPERAND (ref, 1));
2414*38fd1498Szrj       else
2415*38fd1498Szrj 	break;
2416*38fd1498Szrj       ref = TREE_OPERAND (ref, 0);
2417*38fd1498Szrj     }
2418*38fd1498Szrj 
2419*38fd1498Szrj   unsigned HOST_WIDE_INT bits = bit_offset % BITS_PER_UNIT;
2420*38fd1498Szrj   unsigned HOST_WIDE_INT byte_bits = bit_offset - bits;
2421*38fd1498Szrj   BrigAlignment8_t base = hsa_object_alignment (ref);
2422*38fd1498Szrj   if (byte_bits == 0)
2423*38fd1498Szrj     return base;
2424*38fd1498Szrj   return MIN (base, hsa_alignment_encoding (least_bit_hwi (byte_bits)));
2425*38fd1498Szrj }
2426*38fd1498Szrj 
2427*38fd1498Szrj /* Generate HSAIL instructions loading something into register DEST.  RHS is
2428*38fd1498Szrj    tree representation of the loaded data, which are loaded as type TYPE.  Add
2429*38fd1498Szrj    instructions to HBB.  */
2430*38fd1498Szrj 
2431*38fd1498Szrj static void
gen_hsa_insns_for_load(hsa_op_reg * dest,tree rhs,tree type,hsa_bb * hbb)2432*38fd1498Szrj gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb)
2433*38fd1498Szrj {
2434*38fd1498Szrj   /* The destination SSA name will give us the type.  */
2435*38fd1498Szrj   if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR)
2436*38fd1498Szrj     rhs = TREE_OPERAND (rhs, 0);
2437*38fd1498Szrj 
2438*38fd1498Szrj   if (TREE_CODE (rhs) == SSA_NAME)
2439*38fd1498Szrj     {
2440*38fd1498Szrj       hsa_op_reg *src = hsa_cfun->reg_for_gimple_ssa (rhs);
2441*38fd1498Szrj       hsa_build_append_simple_mov (dest, src, hbb);
2442*38fd1498Szrj     }
2443*38fd1498Szrj   else if (is_gimple_min_invariant (rhs)
2444*38fd1498Szrj 	   || TREE_CODE (rhs) == ADDR_EXPR)
2445*38fd1498Szrj     {
2446*38fd1498Szrj       if (POINTER_TYPE_P (TREE_TYPE (rhs)))
2447*38fd1498Szrj 	{
2448*38fd1498Szrj 	  if (dest->m_type != hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT))
2449*38fd1498Szrj 	    {
2450*38fd1498Szrj 	      HSA_SORRY_ATV (EXPR_LOCATION (rhs),
2451*38fd1498Szrj 			     "support for HSA does not implement conversion "
2452*38fd1498Szrj 			     "of %E to the requested non-pointer type.", rhs);
2453*38fd1498Szrj 	      return;
2454*38fd1498Szrj 	    }
2455*38fd1498Szrj 
2456*38fd1498Szrj 	  gen_hsa_addr_insns (rhs, dest, hbb);
2457*38fd1498Szrj 	}
2458*38fd1498Szrj       else if (TREE_CODE (rhs) == COMPLEX_CST)
2459*38fd1498Szrj 	{
2460*38fd1498Szrj 	  hsa_op_immed *real_part = new hsa_op_immed (TREE_REALPART (rhs));
2461*38fd1498Szrj 	  hsa_op_immed *imag_part = new hsa_op_immed (TREE_IMAGPART (rhs));
2462*38fd1498Szrj 
2463*38fd1498Szrj 	  hsa_op_reg *real_part_reg
2464*38fd1498Szrj 	    = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type),
2465*38fd1498Szrj 							     true));
2466*38fd1498Szrj 	  hsa_op_reg *imag_part_reg
2467*38fd1498Szrj 	    = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type),
2468*38fd1498Szrj 							     true));
2469*38fd1498Szrj 
2470*38fd1498Szrj 	  hsa_build_append_simple_mov (real_part_reg, real_part, hbb);
2471*38fd1498Szrj 	  hsa_build_append_simple_mov (imag_part_reg, imag_part, hbb);
2472*38fd1498Szrj 
2473*38fd1498Szrj 	  BrigType16_t src_type = hsa_bittype_for_type (real_part_reg->m_type);
2474*38fd1498Szrj 
2475*38fd1498Szrj 	  hsa_insn_packed *insn
2476*38fd1498Szrj 	    = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type,
2477*38fd1498Szrj 				   src_type, dest, real_part_reg,
2478*38fd1498Szrj 				   imag_part_reg);
2479*38fd1498Szrj 	  hbb->append_insn (insn);
2480*38fd1498Szrj 	}
2481*38fd1498Szrj       else
2482*38fd1498Szrj 	{
2483*38fd1498Szrj 	  hsa_op_immed *imm = new hsa_op_immed (rhs);
2484*38fd1498Szrj 	  hsa_build_append_simple_mov (dest, imm, hbb);
2485*38fd1498Szrj 	}
2486*38fd1498Szrj     }
2487*38fd1498Szrj   else if (TREE_CODE (rhs) == REALPART_EXPR || TREE_CODE (rhs) == IMAGPART_EXPR)
2488*38fd1498Szrj     {
2489*38fd1498Szrj       tree pack_type = TREE_TYPE (TREE_OPERAND (rhs, 0));
2490*38fd1498Szrj 
2491*38fd1498Szrj       hsa_op_reg *packed_reg
2492*38fd1498Szrj 	= new hsa_op_reg (hsa_type_for_scalar_tree_type (pack_type, true));
2493*38fd1498Szrj 
2494*38fd1498Szrj       tree complex_rhs = TREE_OPERAND (rhs, 0);
2495*38fd1498Szrj       gen_hsa_insns_for_load (packed_reg, complex_rhs, TREE_TYPE (complex_rhs),
2496*38fd1498Szrj 			      hbb);
2497*38fd1498Szrj 
2498*38fd1498Szrj       hsa_op_reg *real_reg
2499*38fd1498Szrj 	= new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true));
2500*38fd1498Szrj 
2501*38fd1498Szrj       hsa_op_reg *imag_reg
2502*38fd1498Szrj 	= new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true));
2503*38fd1498Szrj 
2504*38fd1498Szrj       BrigKind16_t brig_type = packed_reg->m_type;
2505*38fd1498Szrj       hsa_insn_packed *packed
2506*38fd1498Szrj 	= new hsa_insn_packed (3, BRIG_OPCODE_EXPAND,
2507*38fd1498Szrj 			       hsa_bittype_for_type (real_reg->m_type),
2508*38fd1498Szrj 	 brig_type, real_reg, imag_reg, packed_reg);
2509*38fd1498Szrj 
2510*38fd1498Szrj       hbb->append_insn (packed);
2511*38fd1498Szrj 
2512*38fd1498Szrj       hsa_op_reg *source = TREE_CODE (rhs) == REALPART_EXPR ?
2513*38fd1498Szrj 	real_reg : imag_reg;
2514*38fd1498Szrj 
2515*38fd1498Szrj       hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
2516*38fd1498Szrj 						 dest->m_type, NULL, source);
2517*38fd1498Szrj       hsa_fixup_mov_insn_type (insn);
2518*38fd1498Szrj       hbb->append_insn (insn);
2519*38fd1498Szrj       insn->set_output_in_type (dest, 0, hbb);
2520*38fd1498Szrj     }
2521*38fd1498Szrj   else if (TREE_CODE (rhs) == BIT_FIELD_REF
2522*38fd1498Szrj 	   && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME)
2523*38fd1498Szrj     {
2524*38fd1498Szrj       tree ssa_name = TREE_OPERAND (rhs, 0);
2525*38fd1498Szrj       HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs, 1));
2526*38fd1498Szrj       HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs, 2));
2527*38fd1498Szrj 
2528*38fd1498Szrj       hsa_op_reg *imm_value = hsa_cfun->reg_for_gimple_ssa (ssa_name);
2529*38fd1498Szrj       gen_hsa_insns_for_bitfield (dest, imm_value, bitsize, bitpos, hbb);
2530*38fd1498Szrj     }
2531*38fd1498Szrj   else if (DECL_P (rhs) || TREE_CODE (rhs) == MEM_REF
2532*38fd1498Szrj 	   || TREE_CODE (rhs) == TARGET_MEM_REF
2533*38fd1498Szrj 	   || handled_component_p (rhs))
2534*38fd1498Szrj     {
2535*38fd1498Szrj       HOST_WIDE_INT bitsize, bitpos;
2536*38fd1498Szrj 
2537*38fd1498Szrj       /* Load from memory.  */
2538*38fd1498Szrj       hsa_op_address *addr;
2539*38fd1498Szrj       addr = gen_hsa_addr (rhs, hbb, &bitsize, &bitpos);
2540*38fd1498Szrj 
2541*38fd1498Szrj       /* Handle load of a bit field.  */
2542*38fd1498Szrj       if (bitsize > 64)
2543*38fd1498Szrj 	{
2544*38fd1498Szrj 	  HSA_SORRY_AT (EXPR_LOCATION (rhs),
2545*38fd1498Szrj 			"support for HSA does not implement load from a bit "
2546*38fd1498Szrj 			"field bigger than 64 bits");
2547*38fd1498Szrj 	  return;
2548*38fd1498Szrj 	}
2549*38fd1498Szrj 
2550*38fd1498Szrj       if (bitsize || bitpos)
2551*38fd1498Szrj 	gen_hsa_insns_for_bitfield_load (dest, addr, bitsize, bitpos, hbb,
2552*38fd1498Szrj 					 hsa_bitmemref_alignment (rhs));
2553*38fd1498Szrj       else
2554*38fd1498Szrj 	{
2555*38fd1498Szrj 	  BrigType16_t mtype;
2556*38fd1498Szrj 	  /* Not dest->m_type, that's possibly extended.  */
2557*38fd1498Szrj 	  mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (type,
2558*38fd1498Szrj 								    false));
2559*38fd1498Szrj 	  hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dest,
2560*38fd1498Szrj 						addr);
2561*38fd1498Szrj 	  mem->set_align (hsa_object_alignment (rhs));
2562*38fd1498Szrj 	  hbb->append_insn (mem);
2563*38fd1498Szrj 	}
2564*38fd1498Szrj     }
2565*38fd1498Szrj   else
2566*38fd1498Szrj     HSA_SORRY_ATV (EXPR_LOCATION (rhs),
2567*38fd1498Szrj 		   "support for HSA does not implement loading "
2568*38fd1498Szrj 		   "of expression %E",
2569*38fd1498Szrj 		   rhs);
2570*38fd1498Szrj }
2571*38fd1498Szrj 
2572*38fd1498Szrj /* Return number of bits necessary for representation of a bit field,
2573*38fd1498Szrj    starting at BITPOS with size of BITSIZE.  */
2574*38fd1498Szrj 
2575*38fd1498Szrj static unsigned
get_bitfield_size(unsigned bitpos,unsigned bitsize)2576*38fd1498Szrj get_bitfield_size (unsigned bitpos, unsigned bitsize)
2577*38fd1498Szrj {
2578*38fd1498Szrj   unsigned s = bitpos + bitsize;
2579*38fd1498Szrj   unsigned sizes[] = {8, 16, 32, 64};
2580*38fd1498Szrj 
2581*38fd1498Szrj   for (unsigned i = 0; i < 4; i++)
2582*38fd1498Szrj     if (s <= sizes[i])
2583*38fd1498Szrj       return sizes[i];
2584*38fd1498Szrj 
2585*38fd1498Szrj   gcc_unreachable ();
2586*38fd1498Szrj   return 0;
2587*38fd1498Szrj }
2588*38fd1498Szrj 
2589*38fd1498Szrj /* Generate HSAIL instructions storing into memory.  LHS is the destination of
2590*38fd1498Szrj    the store, SRC is the source operand.  Add instructions to HBB.  */
2591*38fd1498Szrj 
2592*38fd1498Szrj static void
gen_hsa_insns_for_store(tree lhs,hsa_op_base * src,hsa_bb * hbb)2593*38fd1498Szrj gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb)
2594*38fd1498Szrj {
2595*38fd1498Szrj   HOST_WIDE_INT bitsize = 0, bitpos = 0;
2596*38fd1498Szrj   BrigAlignment8_t req_align;
2597*38fd1498Szrj   BrigType16_t mtype;
2598*38fd1498Szrj   mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
2599*38fd1498Szrj 							    false));
2600*38fd1498Szrj   hsa_op_address *addr;
2601*38fd1498Szrj   addr = gen_hsa_addr (lhs, hbb, &bitsize, &bitpos);
2602*38fd1498Szrj 
2603*38fd1498Szrj   /* Handle store to a bit field.  */
2604*38fd1498Szrj   if (bitsize > 64)
2605*38fd1498Szrj     {
2606*38fd1498Szrj       HSA_SORRY_AT (EXPR_LOCATION (lhs),
2607*38fd1498Szrj 		    "support for HSA does not implement store to a bit field "
2608*38fd1498Szrj 		    "bigger than 64 bits");
2609*38fd1498Szrj       return;
2610*38fd1498Szrj     }
2611*38fd1498Szrj 
2612*38fd1498Szrj   unsigned type_bitsize = get_bitfield_size (bitpos, bitsize);
2613*38fd1498Szrj 
2614*38fd1498Szrj   /* HSAIL does not support MOV insn with 16-bits integers.  */
2615*38fd1498Szrj   if (type_bitsize < 32)
2616*38fd1498Szrj     type_bitsize = 32;
2617*38fd1498Szrj 
2618*38fd1498Szrj   if (bitpos || (bitsize && type_bitsize != bitsize))
2619*38fd1498Szrj     {
2620*38fd1498Szrj       unsigned HOST_WIDE_INT mask = 0;
2621*38fd1498Szrj       BrigType16_t mem_type
2622*38fd1498Szrj 	= get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT,
2623*38fd1498Szrj 				     !TYPE_UNSIGNED (TREE_TYPE (lhs)));
2624*38fd1498Szrj 
2625*38fd1498Szrj       for (unsigned i = 0; i < type_bitsize; i++)
2626*38fd1498Szrj 	if (i < bitpos || i >= bitpos + bitsize)
2627*38fd1498Szrj 	  mask |= ((unsigned HOST_WIDE_INT)1 << i);
2628*38fd1498Szrj 
2629*38fd1498Szrj       hsa_op_reg *value_reg = new hsa_op_reg (mem_type);
2630*38fd1498Szrj 
2631*38fd1498Szrj       req_align = hsa_bitmemref_alignment (lhs);
2632*38fd1498Szrj       /* Load value from memory.  */
2633*38fd1498Szrj       hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mem_type,
2634*38fd1498Szrj 					    value_reg, addr);
2635*38fd1498Szrj       mem->set_align (req_align);
2636*38fd1498Szrj       hbb->append_insn (mem);
2637*38fd1498Szrj 
2638*38fd1498Szrj       /* AND the loaded value with prepared mask.  */
2639*38fd1498Szrj       hsa_op_reg *cleared_reg = new hsa_op_reg (mem_type);
2640*38fd1498Szrj 
2641*38fd1498Szrj       BrigType16_t t
2642*38fd1498Szrj 	= get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT, false);
2643*38fd1498Szrj       hsa_op_immed *c = new hsa_op_immed (mask, t);
2644*38fd1498Szrj 
2645*38fd1498Szrj       hsa_insn_basic *clearing
2646*38fd1498Szrj 	= new hsa_insn_basic (3, BRIG_OPCODE_AND, mem_type, cleared_reg,
2647*38fd1498Szrj 			      value_reg, c);
2648*38fd1498Szrj       hbb->append_insn (clearing);
2649*38fd1498Szrj 
2650*38fd1498Szrj       /* Shift to left a value that is going to be stored.  */
2651*38fd1498Szrj       hsa_op_reg *new_value_reg = new hsa_op_reg (mem_type);
2652*38fd1498Szrj 
2653*38fd1498Szrj       hsa_insn_basic *basic = new hsa_insn_basic (2, BRIG_OPCODE_MOV, mem_type,
2654*38fd1498Szrj 						  new_value_reg, src);
2655*38fd1498Szrj       hsa_fixup_mov_insn_type (basic);
2656*38fd1498Szrj       hbb->append_insn (basic);
2657*38fd1498Szrj 
2658*38fd1498Szrj       if (bitpos)
2659*38fd1498Szrj 	{
2660*38fd1498Szrj 	  hsa_op_reg *shifted_value_reg = new hsa_op_reg (mem_type);
2661*38fd1498Szrj 	  c = new hsa_op_immed (bitpos, BRIG_TYPE_U32);
2662*38fd1498Szrj 
2663*38fd1498Szrj 	  hsa_insn_basic *basic
2664*38fd1498Szrj 	    = new hsa_insn_basic (3, BRIG_OPCODE_SHL, mem_type,
2665*38fd1498Szrj 				  shifted_value_reg, new_value_reg, c);
2666*38fd1498Szrj 	  hbb->append_insn (basic);
2667*38fd1498Szrj 
2668*38fd1498Szrj 	  new_value_reg = shifted_value_reg;
2669*38fd1498Szrj 	}
2670*38fd1498Szrj 
2671*38fd1498Szrj       /* OR the prepared value with prepared chunk loaded from memory.  */
2672*38fd1498Szrj       hsa_op_reg *prepared_reg= new hsa_op_reg (mem_type);
2673*38fd1498Szrj       basic = new hsa_insn_basic (3, BRIG_OPCODE_OR, mem_type, prepared_reg,
2674*38fd1498Szrj 				  new_value_reg, cleared_reg);
2675*38fd1498Szrj       hbb->append_insn (basic);
2676*38fd1498Szrj 
2677*38fd1498Szrj       src = prepared_reg;
2678*38fd1498Szrj       mtype = mem_type;
2679*38fd1498Szrj     }
2680*38fd1498Szrj   else
2681*38fd1498Szrj     req_align = hsa_object_alignment (lhs);
2682*38fd1498Szrj 
2683*38fd1498Szrj   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, addr);
2684*38fd1498Szrj   mem->set_align (req_align);
2685*38fd1498Szrj 
2686*38fd1498Szrj   /* The HSAIL verifier has another constraint: if the source is an immediate
2687*38fd1498Szrj      then it must match the destination type.  If it's a register the low bits
2688*38fd1498Szrj      will be used for sub-word stores.  We're always allocating new operands so
2689*38fd1498Szrj      we can modify the above in place.  */
2690*38fd1498Szrj   if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src))
2691*38fd1498Szrj     {
2692*38fd1498Szrj       if (!hsa_type_packed_p (imm->m_type))
2693*38fd1498Szrj 	imm->m_type = mem->m_type;
2694*38fd1498Szrj       else
2695*38fd1498Szrj 	{
2696*38fd1498Szrj 	  /* ...and all vector immediates apparently need to be vectors of
2697*38fd1498Szrj 	     unsigned bytes.  */
2698*38fd1498Szrj 	  unsigned bs = hsa_type_bit_size (imm->m_type);
2699*38fd1498Szrj 	  gcc_assert (bs == hsa_type_bit_size (mem->m_type));
2700*38fd1498Szrj 	  switch (bs)
2701*38fd1498Szrj 	    {
2702*38fd1498Szrj 	    case 32:
2703*38fd1498Szrj 	      imm->m_type = BRIG_TYPE_U8X4;
2704*38fd1498Szrj 	      break;
2705*38fd1498Szrj 	    case 64:
2706*38fd1498Szrj 	      imm->m_type = BRIG_TYPE_U8X8;
2707*38fd1498Szrj 	      break;
2708*38fd1498Szrj 	    case 128:
2709*38fd1498Szrj 	      imm->m_type = BRIG_TYPE_U8X16;
2710*38fd1498Szrj 	      break;
2711*38fd1498Szrj 	    default:
2712*38fd1498Szrj 	      gcc_unreachable ();
2713*38fd1498Szrj 	    }
2714*38fd1498Szrj 	}
2715*38fd1498Szrj     }
2716*38fd1498Szrj 
2717*38fd1498Szrj   hbb->append_insn (mem);
2718*38fd1498Szrj }
2719*38fd1498Szrj 
2720*38fd1498Szrj /* Generate memory copy instructions that are going to be used
2721*38fd1498Szrj    for copying a SRC memory to TARGET memory,
2722*38fd1498Szrj    represented by pointer in a register.  MIN_ALIGN is minimal alignment
2723*38fd1498Szrj    of provided HSA addresses.  */
2724*38fd1498Szrj 
2725*38fd1498Szrj static void
gen_hsa_memory_copy(hsa_bb * hbb,hsa_op_address * target,hsa_op_address * src,unsigned size,BrigAlignment8_t min_align)2726*38fd1498Szrj gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src,
2727*38fd1498Szrj 		     unsigned size, BrigAlignment8_t min_align)
2728*38fd1498Szrj {
2729*38fd1498Szrj   hsa_op_address *addr;
2730*38fd1498Szrj   hsa_insn_mem *mem;
2731*38fd1498Szrj 
2732*38fd1498Szrj   unsigned offset = 0;
2733*38fd1498Szrj   unsigned min_byte_align = hsa_byte_alignment (min_align);
2734*38fd1498Szrj 
2735*38fd1498Szrj   while (size)
2736*38fd1498Szrj     {
2737*38fd1498Szrj       unsigned s;
2738*38fd1498Szrj       if (size >= 8)
2739*38fd1498Szrj 	s = 8;
2740*38fd1498Szrj       else if (size >= 4)
2741*38fd1498Szrj 	s = 4;
2742*38fd1498Szrj       else if (size >= 2)
2743*38fd1498Szrj 	s = 2;
2744*38fd1498Szrj       else
2745*38fd1498Szrj 	s = 1;
2746*38fd1498Szrj 
2747*38fd1498Szrj       if (s > min_byte_align)
2748*38fd1498Szrj 	s = min_byte_align;
2749*38fd1498Szrj 
2750*38fd1498Szrj       BrigType16_t t = get_integer_type_by_bytes (s, false);
2751*38fd1498Szrj 
2752*38fd1498Szrj       hsa_op_reg *tmp = new hsa_op_reg (t);
2753*38fd1498Szrj       addr = new hsa_op_address (src->m_symbol, src->m_reg,
2754*38fd1498Szrj 				 src->m_imm_offset + offset);
2755*38fd1498Szrj       mem = new hsa_insn_mem (BRIG_OPCODE_LD, t, tmp, addr);
2756*38fd1498Szrj       hbb->append_insn (mem);
2757*38fd1498Szrj 
2758*38fd1498Szrj       addr = new hsa_op_address (target->m_symbol, target->m_reg,
2759*38fd1498Szrj 				 target->m_imm_offset + offset);
2760*38fd1498Szrj       mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, tmp, addr);
2761*38fd1498Szrj       hbb->append_insn (mem);
2762*38fd1498Szrj       offset += s;
2763*38fd1498Szrj       size -= s;
2764*38fd1498Szrj     }
2765*38fd1498Szrj }
2766*38fd1498Szrj 
2767*38fd1498Szrj /* Create a memset mask that is created by copying a CONSTANT byte value
2768*38fd1498Szrj    to an integer of BYTE_SIZE bytes.  */
2769*38fd1498Szrj 
2770*38fd1498Szrj static unsigned HOST_WIDE_INT
build_memset_value(unsigned HOST_WIDE_INT constant,unsigned byte_size)2771*38fd1498Szrj build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size)
2772*38fd1498Szrj {
2773*38fd1498Szrj   if (constant == 0)
2774*38fd1498Szrj     return 0;
2775*38fd1498Szrj 
2776*38fd1498Szrj   HOST_WIDE_INT v = constant;
2777*38fd1498Szrj 
2778*38fd1498Szrj   for (unsigned i = 1; i < byte_size; i++)
2779*38fd1498Szrj     v |= constant << (8 * i);
2780*38fd1498Szrj 
2781*38fd1498Szrj   return v;
2782*38fd1498Szrj }
2783*38fd1498Szrj 
2784*38fd1498Szrj /* Generate memory set instructions that are going to be used
2785*38fd1498Szrj    for setting a CONSTANT byte value to TARGET memory of SIZE bytes.
2786*38fd1498Szrj    MIN_ALIGN is minimal alignment of provided HSA addresses.  */
2787*38fd1498Szrj 
2788*38fd1498Szrj static void
gen_hsa_memory_set(hsa_bb * hbb,hsa_op_address * target,unsigned HOST_WIDE_INT constant,unsigned size,BrigAlignment8_t min_align)2789*38fd1498Szrj gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
2790*38fd1498Szrj 		    unsigned HOST_WIDE_INT constant,
2791*38fd1498Szrj 		    unsigned size, BrigAlignment8_t min_align)
2792*38fd1498Szrj {
2793*38fd1498Szrj   hsa_op_address *addr;
2794*38fd1498Szrj   hsa_insn_mem *mem;
2795*38fd1498Szrj 
2796*38fd1498Szrj   unsigned offset = 0;
2797*38fd1498Szrj   unsigned min_byte_align = hsa_byte_alignment (min_align);
2798*38fd1498Szrj 
2799*38fd1498Szrj   while (size)
2800*38fd1498Szrj     {
2801*38fd1498Szrj       unsigned s;
2802*38fd1498Szrj       if (size >= 8)
2803*38fd1498Szrj 	s = 8;
2804*38fd1498Szrj       else if (size >= 4)
2805*38fd1498Szrj 	s = 4;
2806*38fd1498Szrj       else if (size >= 2)
2807*38fd1498Szrj 	s = 2;
2808*38fd1498Szrj       else
2809*38fd1498Szrj 	s = 1;
2810*38fd1498Szrj 
2811*38fd1498Szrj       if (s > min_byte_align)
2812*38fd1498Szrj 	s = min_byte_align;
2813*38fd1498Szrj 
2814*38fd1498Szrj       addr = new hsa_op_address (target->m_symbol, target->m_reg,
2815*38fd1498Szrj 				 target->m_imm_offset + offset);
2816*38fd1498Szrj 
2817*38fd1498Szrj       BrigType16_t t = get_integer_type_by_bytes (s, false);
2818*38fd1498Szrj       HOST_WIDE_INT c = build_memset_value (constant, s);
2819*38fd1498Szrj 
2820*38fd1498Szrj       mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, new hsa_op_immed (c, t),
2821*38fd1498Szrj 			      addr);
2822*38fd1498Szrj       hbb->append_insn (mem);
2823*38fd1498Szrj       offset += s;
2824*38fd1498Szrj       size -= s;
2825*38fd1498Szrj     }
2826*38fd1498Szrj }
2827*38fd1498Szrj 
2828*38fd1498Szrj /* Generate HSAIL instructions for a single assignment
2829*38fd1498Szrj    of an empty constructor to an ADDR_LHS.  Constructor is passed as a
2830*38fd1498Szrj    tree RHS and all instructions are appended to HBB.  ALIGN is
2831*38fd1498Szrj    alignment of the address.  */
2832*38fd1498Szrj 
2833*38fd1498Szrj void
gen_hsa_ctor_assignment(hsa_op_address * addr_lhs,tree rhs,hsa_bb * hbb,BrigAlignment8_t align)2834*38fd1498Szrj gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb,
2835*38fd1498Szrj 			 BrigAlignment8_t align)
2836*38fd1498Szrj {
2837*38fd1498Szrj   if (CONSTRUCTOR_NELTS (rhs))
2838*38fd1498Szrj     {
2839*38fd1498Szrj       HSA_SORRY_AT (EXPR_LOCATION (rhs),
2840*38fd1498Szrj 		    "support for HSA does not implement load from constructor");
2841*38fd1498Szrj       return;
2842*38fd1498Szrj     }
2843*38fd1498Szrj 
2844*38fd1498Szrj   unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
2845*38fd1498Szrj   gen_hsa_memory_set (hbb, addr_lhs, 0, size, align);
2846*38fd1498Szrj }
2847*38fd1498Szrj 
2848*38fd1498Szrj /* Generate HSA instructions for a single assignment of RHS to LHS.
2849*38fd1498Szrj    HBB is the basic block they will be appended to.  */
2850*38fd1498Szrj 
2851*38fd1498Szrj static void
gen_hsa_insns_for_single_assignment(tree lhs,tree rhs,hsa_bb * hbb)2852*38fd1498Szrj gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb)
2853*38fd1498Szrj {
2854*38fd1498Szrj   if (TREE_CODE (lhs) == SSA_NAME)
2855*38fd1498Szrj     {
2856*38fd1498Szrj       hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
2857*38fd1498Szrj       if (hsa_seen_error ())
2858*38fd1498Szrj 	return;
2859*38fd1498Szrj 
2860*38fd1498Szrj       gen_hsa_insns_for_load (dest, rhs, TREE_TYPE (lhs), hbb);
2861*38fd1498Szrj     }
2862*38fd1498Szrj   else if (TREE_CODE (rhs) == SSA_NAME
2863*38fd1498Szrj 	   || (is_gimple_min_invariant (rhs) && TREE_CODE (rhs) != STRING_CST))
2864*38fd1498Szrj     {
2865*38fd1498Szrj       /* Store to memory.  */
2866*38fd1498Szrj       hsa_op_base *src = hsa_reg_or_immed_for_gimple_op (rhs, hbb);
2867*38fd1498Szrj       if (hsa_seen_error ())
2868*38fd1498Szrj 	return;
2869*38fd1498Szrj 
2870*38fd1498Szrj       gen_hsa_insns_for_store (lhs, src, hbb);
2871*38fd1498Szrj     }
2872*38fd1498Szrj   else
2873*38fd1498Szrj     {
2874*38fd1498Szrj       BrigAlignment8_t lhs_align;
2875*38fd1498Szrj       hsa_op_address *addr_lhs = gen_hsa_addr_with_align (lhs, hbb,
2876*38fd1498Szrj 							  &lhs_align);
2877*38fd1498Szrj 
2878*38fd1498Szrj       if (TREE_CODE (rhs) == CONSTRUCTOR)
2879*38fd1498Szrj 	gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align);
2880*38fd1498Szrj       else
2881*38fd1498Szrj 	{
2882*38fd1498Szrj 	  BrigAlignment8_t rhs_align;
2883*38fd1498Szrj 	  hsa_op_address *addr_rhs = gen_hsa_addr_with_align (rhs, hbb,
2884*38fd1498Szrj 							      &rhs_align);
2885*38fd1498Szrj 
2886*38fd1498Szrj 	  unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
2887*38fd1498Szrj 	  gen_hsa_memory_copy (hbb, addr_lhs, addr_rhs, size,
2888*38fd1498Szrj 			       MIN (lhs_align, rhs_align));
2889*38fd1498Szrj 	}
2890*38fd1498Szrj     }
2891*38fd1498Szrj }
2892*38fd1498Szrj 
2893*38fd1498Szrj /* Prepend before INSN a load from spill symbol of SPILL_REG.  Return the
2894*38fd1498Szrj    register into which we loaded.  If this required another register to convert
2895*38fd1498Szrj    from a B1 type, return it in *PTMP2, otherwise store NULL into it.  We
2896*38fd1498Szrj    assume we are out of SSA so the returned register does not have its
2897*38fd1498Szrj    definition set.  */
2898*38fd1498Szrj 
2899*38fd1498Szrj hsa_op_reg *
hsa_spill_in(hsa_insn_basic * insn,hsa_op_reg * spill_reg,hsa_op_reg ** ptmp2)2900*38fd1498Szrj hsa_spill_in (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2)
2901*38fd1498Szrj {
2902*38fd1498Szrj   hsa_symbol *spill_sym = spill_reg->m_spill_sym;
2903*38fd1498Szrj   hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type);
2904*38fd1498Szrj   hsa_op_address *addr = new hsa_op_address (spill_sym);
2905*38fd1498Szrj 
2906*38fd1498Szrj   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, spill_sym->m_type,
2907*38fd1498Szrj 					reg, addr);
2908*38fd1498Szrj   hsa_insert_insn_before (mem, insn);
2909*38fd1498Szrj 
2910*38fd1498Szrj   *ptmp2 = NULL;
2911*38fd1498Szrj   if (spill_reg->m_type == BRIG_TYPE_B1)
2912*38fd1498Szrj     {
2913*38fd1498Szrj       hsa_insn_basic *cvtinsn;
2914*38fd1498Szrj       *ptmp2 = reg;
2915*38fd1498Szrj       reg = new hsa_op_reg (spill_reg->m_type);
2916*38fd1498Szrj 
2917*38fd1498Szrj       cvtinsn = new hsa_insn_cvt (reg, *ptmp2);
2918*38fd1498Szrj       hsa_insert_insn_before (cvtinsn, insn);
2919*38fd1498Szrj     }
2920*38fd1498Szrj   return reg;
2921*38fd1498Szrj }
2922*38fd1498Szrj 
2923*38fd1498Szrj /* Append after INSN a store to spill symbol of SPILL_REG.  Return the register
2924*38fd1498Szrj    from which we stored.  If this required another register to convert to a B1
2925*38fd1498Szrj    type, return it in *PTMP2, otherwise store NULL into it.  We assume we are
2926*38fd1498Szrj    out of SSA so the returned register does not have its use updated.  */
2927*38fd1498Szrj 
2928*38fd1498Szrj hsa_op_reg *
hsa_spill_out(hsa_insn_basic * insn,hsa_op_reg * spill_reg,hsa_op_reg ** ptmp2)2929*38fd1498Szrj hsa_spill_out (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2)
2930*38fd1498Szrj {
2931*38fd1498Szrj   hsa_symbol *spill_sym = spill_reg->m_spill_sym;
2932*38fd1498Szrj   hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type);
2933*38fd1498Szrj   hsa_op_address *addr = new hsa_op_address (spill_sym);
2934*38fd1498Szrj   hsa_op_reg *returnreg;
2935*38fd1498Szrj 
2936*38fd1498Szrj   *ptmp2 = NULL;
2937*38fd1498Szrj   returnreg = reg;
2938*38fd1498Szrj   if (spill_reg->m_type == BRIG_TYPE_B1)
2939*38fd1498Szrj     {
2940*38fd1498Szrj       hsa_insn_basic *cvtinsn;
2941*38fd1498Szrj       *ptmp2 = new hsa_op_reg (spill_sym->m_type);
2942*38fd1498Szrj       reg->m_type = spill_reg->m_type;
2943*38fd1498Szrj 
2944*38fd1498Szrj       cvtinsn = new hsa_insn_cvt (*ptmp2, returnreg);
2945*38fd1498Szrj       hsa_append_insn_after (cvtinsn, insn);
2946*38fd1498Szrj       insn = cvtinsn;
2947*38fd1498Szrj       reg = *ptmp2;
2948*38fd1498Szrj     }
2949*38fd1498Szrj 
2950*38fd1498Szrj   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, spill_sym->m_type, reg,
2951*38fd1498Szrj 					addr);
2952*38fd1498Szrj   hsa_append_insn_after (mem, insn);
2953*38fd1498Szrj   return returnreg;
2954*38fd1498Szrj }
2955*38fd1498Szrj 
2956*38fd1498Szrj /* Generate a comparison instruction that will compare LHS and RHS with
2957*38fd1498Szrj    comparison specified by CODE and put result into register DEST.  DEST has to
2958*38fd1498Szrj    have its type set already but must not have its definition set yet.
2959*38fd1498Szrj    Generated instructions will be added to HBB.  */
2960*38fd1498Szrj 
2961*38fd1498Szrj static void
gen_hsa_cmp_insn_from_gimple(enum tree_code code,tree lhs,tree rhs,hsa_op_reg * dest,hsa_bb * hbb)2962*38fd1498Szrj gen_hsa_cmp_insn_from_gimple (enum tree_code code, tree lhs, tree rhs,
2963*38fd1498Szrj 			      hsa_op_reg *dest, hsa_bb *hbb)
2964*38fd1498Szrj {
2965*38fd1498Szrj   BrigCompareOperation8_t compare;
2966*38fd1498Szrj 
2967*38fd1498Szrj   switch (code)
2968*38fd1498Szrj     {
2969*38fd1498Szrj     case LT_EXPR:
2970*38fd1498Szrj       compare = BRIG_COMPARE_LT;
2971*38fd1498Szrj       break;
2972*38fd1498Szrj     case LE_EXPR:
2973*38fd1498Szrj       compare = BRIG_COMPARE_LE;
2974*38fd1498Szrj       break;
2975*38fd1498Szrj     case GT_EXPR:
2976*38fd1498Szrj       compare = BRIG_COMPARE_GT;
2977*38fd1498Szrj       break;
2978*38fd1498Szrj     case GE_EXPR:
2979*38fd1498Szrj       compare = BRIG_COMPARE_GE;
2980*38fd1498Szrj       break;
2981*38fd1498Szrj     case EQ_EXPR:
2982*38fd1498Szrj       compare = BRIG_COMPARE_EQ;
2983*38fd1498Szrj       break;
2984*38fd1498Szrj     case NE_EXPR:
2985*38fd1498Szrj       compare = BRIG_COMPARE_NE;
2986*38fd1498Szrj       break;
2987*38fd1498Szrj     case UNORDERED_EXPR:
2988*38fd1498Szrj       compare = BRIG_COMPARE_NAN;
2989*38fd1498Szrj       break;
2990*38fd1498Szrj     case ORDERED_EXPR:
2991*38fd1498Szrj       compare = BRIG_COMPARE_NUM;
2992*38fd1498Szrj       break;
2993*38fd1498Szrj     case UNLT_EXPR:
2994*38fd1498Szrj       compare = BRIG_COMPARE_LTU;
2995*38fd1498Szrj       break;
2996*38fd1498Szrj     case UNLE_EXPR:
2997*38fd1498Szrj       compare = BRIG_COMPARE_LEU;
2998*38fd1498Szrj       break;
2999*38fd1498Szrj     case UNGT_EXPR:
3000*38fd1498Szrj       compare = BRIG_COMPARE_GTU;
3001*38fd1498Szrj       break;
3002*38fd1498Szrj     case UNGE_EXPR:
3003*38fd1498Szrj       compare = BRIG_COMPARE_GEU;
3004*38fd1498Szrj       break;
3005*38fd1498Szrj     case UNEQ_EXPR:
3006*38fd1498Szrj       compare = BRIG_COMPARE_EQU;
3007*38fd1498Szrj       break;
3008*38fd1498Szrj     case LTGT_EXPR:
3009*38fd1498Szrj       compare = BRIG_COMPARE_NEU;
3010*38fd1498Szrj       break;
3011*38fd1498Szrj 
3012*38fd1498Szrj     default:
3013*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (lhs),
3014*38fd1498Szrj 		     "support for HSA does not implement comparison tree "
3015*38fd1498Szrj 		     "code %s\n", get_tree_code_name (code));
3016*38fd1498Szrj       return;
3017*38fd1498Szrj     }
3018*38fd1498Szrj 
3019*38fd1498Szrj   /* CMP instruction returns e.g. 0xffffffff (for a 32-bit with integer)
3020*38fd1498Szrj      as a result of comparison.  */
3021*38fd1498Szrj 
3022*38fd1498Szrj   BrigType16_t dest_type = hsa_type_integer_p (dest->m_type)
3023*38fd1498Szrj     ? (BrigType16_t) BRIG_TYPE_B1 : dest->m_type;
3024*38fd1498Szrj 
3025*38fd1498Szrj   hsa_insn_cmp *cmp = new hsa_insn_cmp (compare, dest_type);
3026*38fd1498Szrj   hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (lhs, hbb);
3027*38fd1498Szrj   cmp->set_op (1, op1->extend_int_to_32bit (hbb));
3028*38fd1498Szrj   hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs, hbb);
3029*38fd1498Szrj   cmp->set_op (2, op2->extend_int_to_32bit (hbb));
3030*38fd1498Szrj 
3031*38fd1498Szrj   hbb->append_insn (cmp);
3032*38fd1498Szrj   cmp->set_output_in_type (dest, 0, hbb);
3033*38fd1498Szrj }
3034*38fd1498Szrj 
3035*38fd1498Szrj /* Generate an unary instruction with OPCODE and append it to a basic block
3036*38fd1498Szrj    HBB.  The instruction uses DEST as a destination and OP1
3037*38fd1498Szrj    as a single operand.  */
3038*38fd1498Szrj 
3039*38fd1498Szrj static void
gen_hsa_unary_operation(BrigOpcode opcode,hsa_op_reg * dest,hsa_op_with_type * op1,hsa_bb * hbb)3040*38fd1498Szrj gen_hsa_unary_operation (BrigOpcode opcode, hsa_op_reg *dest,
3041*38fd1498Szrj 			 hsa_op_with_type *op1, hsa_bb *hbb)
3042*38fd1498Szrj {
3043*38fd1498Szrj   gcc_checking_assert (dest);
3044*38fd1498Szrj   hsa_insn_basic *insn;
3045*38fd1498Szrj 
3046*38fd1498Szrj   if (opcode == BRIG_OPCODE_MOV && hsa_needs_cvt (dest->m_type, op1->m_type))
3047*38fd1498Szrj     {
3048*38fd1498Szrj       insn = new hsa_insn_cvt (dest, op1);
3049*38fd1498Szrj       hbb->append_insn (insn);
3050*38fd1498Szrj       return;
3051*38fd1498Szrj     }
3052*38fd1498Szrj 
3053*38fd1498Szrj   op1 = op1->extend_int_to_32bit (hbb);
3054*38fd1498Szrj   if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT)
3055*38fd1498Szrj     {
3056*38fd1498Szrj       BrigType16_t srctype = hsa_type_integer_p (op1->m_type) ? op1->m_type
3057*38fd1498Szrj 	: hsa_unsigned_type_for_type (op1->m_type);
3058*38fd1498Szrj       insn = new hsa_insn_srctype (2, opcode, BRIG_TYPE_U32, srctype, NULL,
3059*38fd1498Szrj 				   op1);
3060*38fd1498Szrj     }
3061*38fd1498Szrj   else
3062*38fd1498Szrj     {
3063*38fd1498Szrj       BrigType16_t optype = hsa_extend_inttype_to_32bit (dest->m_type);
3064*38fd1498Szrj       insn = new hsa_insn_basic (2, opcode, optype, NULL, op1);
3065*38fd1498Szrj 
3066*38fd1498Szrj       if (opcode == BRIG_OPCODE_MOV)
3067*38fd1498Szrj 	hsa_fixup_mov_insn_type (insn);
3068*38fd1498Szrj       else if (opcode == BRIG_OPCODE_ABS || opcode == BRIG_OPCODE_NEG)
3069*38fd1498Szrj 	{
3070*38fd1498Szrj 	  /* ABS and NEG only exist in _s form :-/  */
3071*38fd1498Szrj 	  if (insn->m_type == BRIG_TYPE_U32)
3072*38fd1498Szrj 	    insn->m_type = BRIG_TYPE_S32;
3073*38fd1498Szrj 	  else if (insn->m_type == BRIG_TYPE_U64)
3074*38fd1498Szrj 	    insn->m_type = BRIG_TYPE_S64;
3075*38fd1498Szrj 	}
3076*38fd1498Szrj     }
3077*38fd1498Szrj 
3078*38fd1498Szrj   hbb->append_insn (insn);
3079*38fd1498Szrj   insn->set_output_in_type (dest, 0, hbb);
3080*38fd1498Szrj }
3081*38fd1498Szrj 
3082*38fd1498Szrj /* Generate a binary instruction with OPCODE and append it to a basic block
3083*38fd1498Szrj    HBB.  The instruction uses DEST as a destination and operands OP1
3084*38fd1498Szrj    and OP2.  */
3085*38fd1498Szrj 
3086*38fd1498Szrj static void
gen_hsa_binary_operation(int opcode,hsa_op_reg * dest,hsa_op_with_type * op1,hsa_op_with_type * op2,hsa_bb * hbb)3087*38fd1498Szrj gen_hsa_binary_operation (int opcode, hsa_op_reg *dest,
3088*38fd1498Szrj 			  hsa_op_with_type *op1, hsa_op_with_type *op2,
3089*38fd1498Szrj 			  hsa_bb *hbb)
3090*38fd1498Szrj {
3091*38fd1498Szrj   gcc_checking_assert (dest);
3092*38fd1498Szrj 
3093*38fd1498Szrj   BrigType16_t optype = hsa_extend_inttype_to_32bit (dest->m_type);
3094*38fd1498Szrj   op1 = op1->extend_int_to_32bit (hbb);
3095*38fd1498Szrj   op2 = op2->extend_int_to_32bit (hbb);
3096*38fd1498Szrj 
3097*38fd1498Szrj   if ((opcode == BRIG_OPCODE_SHL || opcode == BRIG_OPCODE_SHR)
3098*38fd1498Szrj       && is_a <hsa_op_immed *> (op2))
3099*38fd1498Szrj     {
3100*38fd1498Szrj       hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2);
3101*38fd1498Szrj       i->set_type (BRIG_TYPE_U32);
3102*38fd1498Szrj     }
3103*38fd1498Szrj   if ((opcode == BRIG_OPCODE_OR
3104*38fd1498Szrj        || opcode == BRIG_OPCODE_XOR
3105*38fd1498Szrj        || opcode == BRIG_OPCODE_AND)
3106*38fd1498Szrj       && is_a <hsa_op_immed *> (op2))
3107*38fd1498Szrj     {
3108*38fd1498Szrj       hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2);
3109*38fd1498Szrj       i->set_type (hsa_unsigned_type_for_type (i->m_type));
3110*38fd1498Szrj     }
3111*38fd1498Szrj 
3112*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, optype, NULL,
3113*38fd1498Szrj 					     op1, op2);
3114*38fd1498Szrj   hbb->append_insn (insn);
3115*38fd1498Szrj   insn->set_output_in_type (dest, 0, hbb);
3116*38fd1498Szrj }
3117*38fd1498Szrj 
3118*38fd1498Szrj /* Generate HSA instructions for a single assignment.  HBB is the basic block
3119*38fd1498Szrj    they will be appended to.  */
3120*38fd1498Szrj 
3121*38fd1498Szrj static void
gen_hsa_insns_for_operation_assignment(gimple * assign,hsa_bb * hbb)3122*38fd1498Szrj gen_hsa_insns_for_operation_assignment (gimple *assign, hsa_bb *hbb)
3123*38fd1498Szrj {
3124*38fd1498Szrj   tree_code code = gimple_assign_rhs_code (assign);
3125*38fd1498Szrj   gimple_rhs_class rhs_class = get_gimple_rhs_class (gimple_expr_code (assign));
3126*38fd1498Szrj 
3127*38fd1498Szrj   tree lhs = gimple_assign_lhs (assign);
3128*38fd1498Szrj   tree rhs1 = gimple_assign_rhs1 (assign);
3129*38fd1498Szrj   tree rhs2 = gimple_assign_rhs2 (assign);
3130*38fd1498Szrj   tree rhs3 = gimple_assign_rhs3 (assign);
3131*38fd1498Szrj 
3132*38fd1498Szrj   BrigOpcode opcode;
3133*38fd1498Szrj 
3134*38fd1498Szrj   switch (code)
3135*38fd1498Szrj     {
3136*38fd1498Szrj     CASE_CONVERT:
3137*38fd1498Szrj     case FLOAT_EXPR:
3138*38fd1498Szrj       /* The opcode is changed to BRIG_OPCODE_CVT if BRIG types
3139*38fd1498Szrj 	 needs a conversion.  */
3140*38fd1498Szrj       opcode = BRIG_OPCODE_MOV;
3141*38fd1498Szrj       break;
3142*38fd1498Szrj 
3143*38fd1498Szrj     case PLUS_EXPR:
3144*38fd1498Szrj     case POINTER_PLUS_EXPR:
3145*38fd1498Szrj       opcode = BRIG_OPCODE_ADD;
3146*38fd1498Szrj       break;
3147*38fd1498Szrj     case MINUS_EXPR:
3148*38fd1498Szrj       opcode = BRIG_OPCODE_SUB;
3149*38fd1498Szrj       break;
3150*38fd1498Szrj     case MULT_EXPR:
3151*38fd1498Szrj       opcode = BRIG_OPCODE_MUL;
3152*38fd1498Szrj       break;
3153*38fd1498Szrj     case MULT_HIGHPART_EXPR:
3154*38fd1498Szrj       opcode = BRIG_OPCODE_MULHI;
3155*38fd1498Szrj       break;
3156*38fd1498Szrj     case RDIV_EXPR:
3157*38fd1498Szrj     case TRUNC_DIV_EXPR:
3158*38fd1498Szrj     case EXACT_DIV_EXPR:
3159*38fd1498Szrj       opcode = BRIG_OPCODE_DIV;
3160*38fd1498Szrj       break;
3161*38fd1498Szrj     case CEIL_DIV_EXPR:
3162*38fd1498Szrj     case FLOOR_DIV_EXPR:
3163*38fd1498Szrj     case ROUND_DIV_EXPR:
3164*38fd1498Szrj       HSA_SORRY_AT (gimple_location (assign),
3165*38fd1498Szrj 		    "support for HSA does not implement CEIL_DIV_EXPR, "
3166*38fd1498Szrj 		    "FLOOR_DIV_EXPR or ROUND_DIV_EXPR");
3167*38fd1498Szrj       return;
3168*38fd1498Szrj     case TRUNC_MOD_EXPR:
3169*38fd1498Szrj       opcode = BRIG_OPCODE_REM;
3170*38fd1498Szrj       break;
3171*38fd1498Szrj     case CEIL_MOD_EXPR:
3172*38fd1498Szrj     case FLOOR_MOD_EXPR:
3173*38fd1498Szrj     case ROUND_MOD_EXPR:
3174*38fd1498Szrj       HSA_SORRY_AT (gimple_location (assign),
3175*38fd1498Szrj 		    "support for HSA does not implement CEIL_MOD_EXPR, "
3176*38fd1498Szrj 		    "FLOOR_MOD_EXPR or ROUND_MOD_EXPR");
3177*38fd1498Szrj       return;
3178*38fd1498Szrj     case NEGATE_EXPR:
3179*38fd1498Szrj       opcode = BRIG_OPCODE_NEG;
3180*38fd1498Szrj       break;
3181*38fd1498Szrj     case FMA_EXPR:
3182*38fd1498Szrj       /* There is a native HSA instruction for scalar FMAs but not for vector
3183*38fd1498Szrj 	 ones.  */
3184*38fd1498Szrj       if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE)
3185*38fd1498Szrj 	{
3186*38fd1498Szrj 	  hsa_op_reg *dest
3187*38fd1498Szrj 	    = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign));
3188*38fd1498Szrj 	  hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
3189*38fd1498Szrj 	  hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
3190*38fd1498Szrj 	  hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb);
3191*38fd1498Szrj 	  hsa_op_reg *tmp = new hsa_op_reg (dest->m_type);
3192*38fd1498Szrj 	  gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp, op1, op2, hbb);
3193*38fd1498Szrj 	  gen_hsa_binary_operation (BRIG_OPCODE_ADD, dest, tmp, op3, hbb);
3194*38fd1498Szrj 	  return;
3195*38fd1498Szrj 	}
3196*38fd1498Szrj       opcode = BRIG_OPCODE_MAD;
3197*38fd1498Szrj       break;
3198*38fd1498Szrj     case MIN_EXPR:
3199*38fd1498Szrj       opcode = BRIG_OPCODE_MIN;
3200*38fd1498Szrj       break;
3201*38fd1498Szrj     case MAX_EXPR:
3202*38fd1498Szrj       opcode = BRIG_OPCODE_MAX;
3203*38fd1498Szrj       break;
3204*38fd1498Szrj     case ABS_EXPR:
3205*38fd1498Szrj       opcode = BRIG_OPCODE_ABS;
3206*38fd1498Szrj       break;
3207*38fd1498Szrj     case LSHIFT_EXPR:
3208*38fd1498Szrj       opcode = BRIG_OPCODE_SHL;
3209*38fd1498Szrj       break;
3210*38fd1498Szrj     case RSHIFT_EXPR:
3211*38fd1498Szrj       opcode = BRIG_OPCODE_SHR;
3212*38fd1498Szrj       break;
3213*38fd1498Szrj     case LROTATE_EXPR:
3214*38fd1498Szrj     case RROTATE_EXPR:
3215*38fd1498Szrj       {
3216*38fd1498Szrj 	hsa_insn_basic *insn = NULL;
3217*38fd1498Szrj 	int code1 = code == LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR;
3218*38fd1498Szrj 	int code2 = code != LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR;
3219*38fd1498Szrj 	BrigType16_t btype = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
3220*38fd1498Szrj 							    true);
3221*38fd1498Szrj 
3222*38fd1498Szrj 	hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
3223*38fd1498Szrj 	hsa_op_reg *op1 = new hsa_op_reg (btype);
3224*38fd1498Szrj 	hsa_op_reg *op2 = new hsa_op_reg (btype);
3225*38fd1498Szrj 	hsa_op_with_type *shift1 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
3226*38fd1498Szrj 
3227*38fd1498Szrj 	tree type = TREE_TYPE (rhs2);
3228*38fd1498Szrj 	unsigned HOST_WIDE_INT bitsize = TREE_INT_CST_LOW (TYPE_SIZE (type));
3229*38fd1498Szrj 
3230*38fd1498Szrj 	hsa_op_with_type *shift2 = NULL;
3231*38fd1498Szrj 	if (TREE_CODE (rhs2) == INTEGER_CST)
3232*38fd1498Szrj 	  shift2 = new hsa_op_immed (bitsize - tree_to_uhwi (rhs2),
3233*38fd1498Szrj 				     BRIG_TYPE_U32);
3234*38fd1498Szrj 	else if (TREE_CODE (rhs2) == SSA_NAME)
3235*38fd1498Szrj 	  {
3236*38fd1498Szrj 	    hsa_op_reg *s = hsa_cfun->reg_for_gimple_ssa (rhs2);
3237*38fd1498Szrj 	    s = as_a <hsa_op_reg *> (s->extend_int_to_32bit (hbb));
3238*38fd1498Szrj 	    hsa_op_reg *d = new hsa_op_reg (s->m_type);
3239*38fd1498Szrj 	    hsa_op_immed *size_imm = new hsa_op_immed (bitsize, BRIG_TYPE_U32);
3240*38fd1498Szrj 
3241*38fd1498Szrj 	    insn = new hsa_insn_basic (3, BRIG_OPCODE_SUB, d->m_type,
3242*38fd1498Szrj 				       d, s, size_imm);
3243*38fd1498Szrj 	    hbb->append_insn (insn);
3244*38fd1498Szrj 
3245*38fd1498Szrj 	    shift2 = d;
3246*38fd1498Szrj 	  }
3247*38fd1498Szrj 	else
3248*38fd1498Szrj 	  gcc_unreachable ();
3249*38fd1498Szrj 
3250*38fd1498Szrj 	hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
3251*38fd1498Szrj 	gen_hsa_binary_operation (code1, op1, src, shift1, hbb);
3252*38fd1498Szrj 	gen_hsa_binary_operation (code2, op2, src, shift2, hbb);
3253*38fd1498Szrj 	gen_hsa_binary_operation (BRIG_OPCODE_OR, dest, op1, op2, hbb);
3254*38fd1498Szrj 
3255*38fd1498Szrj 	return;
3256*38fd1498Szrj       }
3257*38fd1498Szrj     case BIT_IOR_EXPR:
3258*38fd1498Szrj       opcode = BRIG_OPCODE_OR;
3259*38fd1498Szrj       break;
3260*38fd1498Szrj     case BIT_XOR_EXPR:
3261*38fd1498Szrj       opcode = BRIG_OPCODE_XOR;
3262*38fd1498Szrj       break;
3263*38fd1498Szrj     case BIT_AND_EXPR:
3264*38fd1498Szrj       opcode = BRIG_OPCODE_AND;
3265*38fd1498Szrj       break;
3266*38fd1498Szrj     case BIT_NOT_EXPR:
3267*38fd1498Szrj       opcode = BRIG_OPCODE_NOT;
3268*38fd1498Szrj       break;
3269*38fd1498Szrj     case FIX_TRUNC_EXPR:
3270*38fd1498Szrj       {
3271*38fd1498Szrj 	hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
3272*38fd1498Szrj 	hsa_op_with_type *v = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
3273*38fd1498Szrj 
3274*38fd1498Szrj 	if (hsa_needs_cvt (dest->m_type, v->m_type))
3275*38fd1498Szrj 	  {
3276*38fd1498Szrj 	    hsa_op_reg *tmp = new hsa_op_reg (v->m_type);
3277*38fd1498Szrj 
3278*38fd1498Szrj 	    hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC,
3279*38fd1498Szrj 						       tmp->m_type, tmp, v);
3280*38fd1498Szrj 	    hbb->append_insn (insn);
3281*38fd1498Szrj 
3282*38fd1498Szrj 	    hsa_insn_basic *cvtinsn = new hsa_insn_cvt (dest, tmp);
3283*38fd1498Szrj 	    hbb->append_insn (cvtinsn);
3284*38fd1498Szrj 	  }
3285*38fd1498Szrj 	else
3286*38fd1498Szrj 	  {
3287*38fd1498Szrj 	    hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC,
3288*38fd1498Szrj 						       dest->m_type, dest, v);
3289*38fd1498Szrj 	    hbb->append_insn (insn);
3290*38fd1498Szrj 	  }
3291*38fd1498Szrj 
3292*38fd1498Szrj 	return;
3293*38fd1498Szrj       }
3294*38fd1498Szrj       opcode = BRIG_OPCODE_TRUNC;
3295*38fd1498Szrj       break;
3296*38fd1498Szrj 
3297*38fd1498Szrj     case LT_EXPR:
3298*38fd1498Szrj     case LE_EXPR:
3299*38fd1498Szrj     case GT_EXPR:
3300*38fd1498Szrj     case GE_EXPR:
3301*38fd1498Szrj     case EQ_EXPR:
3302*38fd1498Szrj     case NE_EXPR:
3303*38fd1498Szrj     case UNORDERED_EXPR:
3304*38fd1498Szrj     case ORDERED_EXPR:
3305*38fd1498Szrj     case UNLT_EXPR:
3306*38fd1498Szrj     case UNLE_EXPR:
3307*38fd1498Szrj     case UNGT_EXPR:
3308*38fd1498Szrj     case UNGE_EXPR:
3309*38fd1498Szrj     case UNEQ_EXPR:
3310*38fd1498Szrj     case LTGT_EXPR:
3311*38fd1498Szrj       {
3312*38fd1498Szrj 	hsa_op_reg *dest
3313*38fd1498Szrj 	  = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign));
3314*38fd1498Szrj 
3315*38fd1498Szrj 	gen_hsa_cmp_insn_from_gimple (code, rhs1, rhs2, dest, hbb);
3316*38fd1498Szrj 	return;
3317*38fd1498Szrj       }
3318*38fd1498Szrj     case COND_EXPR:
3319*38fd1498Szrj       {
3320*38fd1498Szrj 	hsa_op_reg *dest
3321*38fd1498Szrj 	  = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign));
3322*38fd1498Szrj 	hsa_op_with_type *ctrl = NULL;
3323*38fd1498Szrj 	tree cond = rhs1;
3324*38fd1498Szrj 
3325*38fd1498Szrj 	if (CONSTANT_CLASS_P (cond) || TREE_CODE (cond) == SSA_NAME)
3326*38fd1498Szrj 	  ctrl = hsa_reg_or_immed_for_gimple_op (cond, hbb);
3327*38fd1498Szrj 	else
3328*38fd1498Szrj 	  {
3329*38fd1498Szrj 	    hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1);
3330*38fd1498Szrj 
3331*38fd1498Szrj 	    gen_hsa_cmp_insn_from_gimple (TREE_CODE (cond),
3332*38fd1498Szrj 				  TREE_OPERAND (cond, 0),
3333*38fd1498Szrj 				  TREE_OPERAND (cond, 1),
3334*38fd1498Szrj 				  r, hbb);
3335*38fd1498Szrj 
3336*38fd1498Szrj 	    ctrl = r;
3337*38fd1498Szrj 	  }
3338*38fd1498Szrj 
3339*38fd1498Szrj 	hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
3340*38fd1498Szrj 	hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb);
3341*38fd1498Szrj 	op2 = op2->extend_int_to_32bit (hbb);
3342*38fd1498Szrj 	op3 = op3->extend_int_to_32bit (hbb);
3343*38fd1498Szrj 
3344*38fd1498Szrj 	BrigType16_t type = hsa_extend_inttype_to_32bit (dest->m_type);
3345*38fd1498Szrj 	BrigType16_t utype = hsa_unsigned_type_for_type (type);
3346*38fd1498Szrj 	if (is_a <hsa_op_immed *> (op2))
3347*38fd1498Szrj 	  op2->m_type = utype;
3348*38fd1498Szrj 	if (is_a <hsa_op_immed *> (op3))
3349*38fd1498Szrj 	  op3->m_type = utype;
3350*38fd1498Szrj 
3351*38fd1498Szrj 	hsa_insn_basic *insn
3352*38fd1498Szrj 	  = new hsa_insn_basic (4, BRIG_OPCODE_CMOV,
3353*38fd1498Szrj 				hsa_bittype_for_type (type),
3354*38fd1498Szrj 				NULL, ctrl, op2, op3);
3355*38fd1498Szrj 
3356*38fd1498Szrj 	hbb->append_insn (insn);
3357*38fd1498Szrj 	insn->set_output_in_type (dest, 0, hbb);
3358*38fd1498Szrj 	return;
3359*38fd1498Szrj       }
3360*38fd1498Szrj     case COMPLEX_EXPR:
3361*38fd1498Szrj       {
3362*38fd1498Szrj 	hsa_op_reg *dest
3363*38fd1498Szrj 	  = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign));
3364*38fd1498Szrj 	hsa_op_with_type *rhs1_reg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
3365*38fd1498Szrj 	rhs1_reg = rhs1_reg->extend_int_to_32bit (hbb);
3366*38fd1498Szrj 	hsa_op_with_type *rhs2_reg = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
3367*38fd1498Szrj 	rhs2_reg = rhs2_reg->extend_int_to_32bit (hbb);
3368*38fd1498Szrj 
3369*38fd1498Szrj 	if (hsa_seen_error ())
3370*38fd1498Szrj 	  return;
3371*38fd1498Szrj 
3372*38fd1498Szrj 	BrigType16_t src_type = hsa_bittype_for_type (rhs1_reg->m_type);
3373*38fd1498Szrj 	rhs1_reg = rhs1_reg->get_in_type (src_type, hbb);
3374*38fd1498Szrj 	rhs2_reg = rhs2_reg->get_in_type (src_type, hbb);
3375*38fd1498Szrj 
3376*38fd1498Szrj 	hsa_insn_packed *insn
3377*38fd1498Szrj 	  = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type, src_type,
3378*38fd1498Szrj 				 dest, rhs1_reg, rhs2_reg);
3379*38fd1498Szrj 	hbb->append_insn (insn);
3380*38fd1498Szrj 
3381*38fd1498Szrj 	return;
3382*38fd1498Szrj       }
3383*38fd1498Szrj     default:
3384*38fd1498Szrj       /* Implement others as we come across them.  */
3385*38fd1498Szrj       HSA_SORRY_ATV (gimple_location (assign),
3386*38fd1498Szrj 		     "support for HSA does not implement operation %s",
3387*38fd1498Szrj 		     get_tree_code_name (code));
3388*38fd1498Szrj       return;
3389*38fd1498Szrj     }
3390*38fd1498Szrj 
3391*38fd1498Szrj 
3392*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
3393*38fd1498Szrj   hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
3394*38fd1498Szrj   hsa_op_with_type *op2
3395*38fd1498Szrj     = rhs2 ? hsa_reg_or_immed_for_gimple_op (rhs2, hbb) : NULL;
3396*38fd1498Szrj 
3397*38fd1498Szrj   if (hsa_seen_error ())
3398*38fd1498Szrj     return;
3399*38fd1498Szrj 
3400*38fd1498Szrj   switch (rhs_class)
3401*38fd1498Szrj     {
3402*38fd1498Szrj     case GIMPLE_TERNARY_RHS:
3403*38fd1498Szrj       {
3404*38fd1498Szrj 	hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb);
3405*38fd1498Szrj 	op3 = op3->extend_int_to_32bit (hbb);
3406*38fd1498Szrj 	hsa_insn_basic *insn = new hsa_insn_basic (4, opcode, dest->m_type, dest,
3407*38fd1498Szrj 						   op1, op2, op3);
3408*38fd1498Szrj 	hbb->append_insn (insn);
3409*38fd1498Szrj       }
3410*38fd1498Szrj       return;
3411*38fd1498Szrj 
3412*38fd1498Szrj     case GIMPLE_BINARY_RHS:
3413*38fd1498Szrj       gen_hsa_binary_operation (opcode, dest, op1, op2, hbb);
3414*38fd1498Szrj       break;
3415*38fd1498Szrj 
3416*38fd1498Szrj     case GIMPLE_UNARY_RHS:
3417*38fd1498Szrj       gen_hsa_unary_operation (opcode, dest, op1, hbb);
3418*38fd1498Szrj       break;
3419*38fd1498Szrj     default:
3420*38fd1498Szrj       gcc_unreachable ();
3421*38fd1498Szrj     }
3422*38fd1498Szrj }
3423*38fd1498Szrj 
3424*38fd1498Szrj /* Generate HSA instructions for a given gimple condition statement COND.
3425*38fd1498Szrj    Instructions will be appended to HBB, which also needs to be the
3426*38fd1498Szrj    corresponding structure to the basic_block of COND.  */
3427*38fd1498Szrj 
3428*38fd1498Szrj static void
gen_hsa_insns_for_cond_stmt(gimple * cond,hsa_bb * hbb)3429*38fd1498Szrj gen_hsa_insns_for_cond_stmt (gimple *cond, hsa_bb *hbb)
3430*38fd1498Szrj {
3431*38fd1498Szrj   hsa_op_reg *ctrl = new hsa_op_reg (BRIG_TYPE_B1);
3432*38fd1498Szrj   hsa_insn_cbr *cbr;
3433*38fd1498Szrj 
3434*38fd1498Szrj   gen_hsa_cmp_insn_from_gimple (gimple_cond_code (cond),
3435*38fd1498Szrj 				gimple_cond_lhs (cond),
3436*38fd1498Szrj 				gimple_cond_rhs (cond),
3437*38fd1498Szrj 				ctrl, hbb);
3438*38fd1498Szrj 
3439*38fd1498Szrj   cbr = new hsa_insn_cbr (ctrl);
3440*38fd1498Szrj   hbb->append_insn (cbr);
3441*38fd1498Szrj }
3442*38fd1498Szrj 
3443*38fd1498Szrj /* Maximum number of elements in a jump table for an HSA SBR instruction.  */
3444*38fd1498Szrj 
3445*38fd1498Szrj #define HSA_MAXIMUM_SBR_LABELS	16
3446*38fd1498Szrj 
3447*38fd1498Szrj /* Return lowest value of a switch S that is handled in a non-default
3448*38fd1498Szrj    label.  */
3449*38fd1498Szrj 
3450*38fd1498Szrj static tree
get_switch_low(gswitch * s)3451*38fd1498Szrj get_switch_low (gswitch *s)
3452*38fd1498Szrj {
3453*38fd1498Szrj   unsigned labels = gimple_switch_num_labels (s);
3454*38fd1498Szrj   gcc_checking_assert (labels >= 1);
3455*38fd1498Szrj 
3456*38fd1498Szrj   return CASE_LOW (gimple_switch_label (s, 1));
3457*38fd1498Szrj }
3458*38fd1498Szrj 
3459*38fd1498Szrj /* Return highest value of a switch S that is handled in a non-default
3460*38fd1498Szrj    label.  */
3461*38fd1498Szrj 
3462*38fd1498Szrj static tree
get_switch_high(gswitch * s)3463*38fd1498Szrj get_switch_high (gswitch *s)
3464*38fd1498Szrj {
3465*38fd1498Szrj   unsigned labels = gimple_switch_num_labels (s);
3466*38fd1498Szrj 
3467*38fd1498Szrj   /* Compare last label to maximum number of labels.  */
3468*38fd1498Szrj   tree label = gimple_switch_label (s, labels - 1);
3469*38fd1498Szrj   tree low = CASE_LOW (label);
3470*38fd1498Szrj   tree high = CASE_HIGH (label);
3471*38fd1498Szrj 
3472*38fd1498Szrj   return high != NULL_TREE ? high : low;
3473*38fd1498Szrj }
3474*38fd1498Szrj 
3475*38fd1498Szrj static tree
get_switch_size(gswitch * s)3476*38fd1498Szrj get_switch_size (gswitch *s)
3477*38fd1498Szrj {
3478*38fd1498Szrj   return int_const_binop (MINUS_EXPR, get_switch_high (s), get_switch_low (s));
3479*38fd1498Szrj }
3480*38fd1498Szrj 
3481*38fd1498Szrj /* Generate HSA instructions for a given gimple switch.
3482*38fd1498Szrj    Instructions will be appended to HBB.  */
3483*38fd1498Szrj 
3484*38fd1498Szrj static void
gen_hsa_insns_for_switch_stmt(gswitch * s,hsa_bb * hbb)3485*38fd1498Szrj gen_hsa_insns_for_switch_stmt (gswitch *s, hsa_bb *hbb)
3486*38fd1498Szrj {
3487*38fd1498Szrj   gimple_stmt_iterator it = gsi_for_stmt (s);
3488*38fd1498Szrj   gsi_prev (&it);
3489*38fd1498Szrj 
3490*38fd1498Szrj   /* Create preambule that verifies that index - lowest_label >= 0.  */
3491*38fd1498Szrj   edge e = split_block (hbb->m_bb, gsi_stmt (it));
3492*38fd1498Szrj   e->flags &= ~EDGE_FALLTHRU;
3493*38fd1498Szrj   e->flags |= EDGE_TRUE_VALUE;
3494*38fd1498Szrj 
3495*38fd1498Szrj   function *func = DECL_STRUCT_FUNCTION (current_function_decl);
3496*38fd1498Szrj   tree index_tree = gimple_switch_index (s);
3497*38fd1498Szrj   tree lowest = get_switch_low (s);
3498*38fd1498Szrj   tree highest = get_switch_high (s);
3499*38fd1498Szrj 
3500*38fd1498Szrj   hsa_op_reg *index = hsa_cfun->reg_for_gimple_ssa (index_tree);
3501*38fd1498Szrj   index = as_a <hsa_op_reg *> (index->extend_int_to_32bit (hbb));
3502*38fd1498Szrj 
3503*38fd1498Szrj   hsa_op_reg *cmp1_reg = new hsa_op_reg (BRIG_TYPE_B1);
3504*38fd1498Szrj   hsa_op_immed *cmp1_immed = new hsa_op_immed (lowest, true);
3505*38fd1498Szrj   hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_GE, cmp1_reg->m_type,
3506*38fd1498Szrj 				      cmp1_reg, index, cmp1_immed));
3507*38fd1498Szrj 
3508*38fd1498Szrj   hsa_op_reg *cmp2_reg = new hsa_op_reg (BRIG_TYPE_B1);
3509*38fd1498Szrj   hsa_op_immed *cmp2_immed = new hsa_op_immed (highest, true);
3510*38fd1498Szrj   hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_LE, cmp2_reg->m_type,
3511*38fd1498Szrj 				      cmp2_reg, index, cmp2_immed));
3512*38fd1498Szrj 
3513*38fd1498Szrj   hsa_op_reg *cmp_reg = new hsa_op_reg (BRIG_TYPE_B1);
3514*38fd1498Szrj   hbb->append_insn (new hsa_insn_basic (3, BRIG_OPCODE_AND, cmp_reg->m_type,
3515*38fd1498Szrj 					cmp_reg, cmp1_reg, cmp2_reg));
3516*38fd1498Szrj 
3517*38fd1498Szrj   hbb->append_insn (new hsa_insn_cbr (cmp_reg));
3518*38fd1498Szrj 
3519*38fd1498Szrj   tree default_label = gimple_switch_default_label (s);
3520*38fd1498Szrj   basic_block default_label_bb = label_to_block_fn (func,
3521*38fd1498Szrj 						    CASE_LABEL (default_label));
3522*38fd1498Szrj 
3523*38fd1498Szrj   if (!gimple_seq_empty_p (phi_nodes (default_label_bb)))
3524*38fd1498Szrj     {
3525*38fd1498Szrj       default_label_bb = split_edge (find_edge (e->dest, default_label_bb));
3526*38fd1498Szrj       hsa_init_new_bb (default_label_bb);
3527*38fd1498Szrj     }
3528*38fd1498Szrj 
3529*38fd1498Szrj   make_edge (e->src, default_label_bb, EDGE_FALSE_VALUE);
3530*38fd1498Szrj 
3531*38fd1498Szrj   hsa_cfun->m_modified_cfg = true;
3532*38fd1498Szrj 
3533*38fd1498Szrj   /* Basic block with the SBR instruction.  */
3534*38fd1498Szrj   hbb = hsa_init_new_bb (e->dest);
3535*38fd1498Szrj 
3536*38fd1498Szrj   hsa_op_reg *sub_index = new hsa_op_reg (index->m_type);
3537*38fd1498Szrj   hbb->append_insn (new hsa_insn_basic (3, BRIG_OPCODE_SUB, sub_index->m_type,
3538*38fd1498Szrj 					sub_index, index,
3539*38fd1498Szrj 					new hsa_op_immed (lowest, true)));
3540*38fd1498Szrj 
3541*38fd1498Szrj   hsa_op_base *tmp = sub_index->get_in_type (BRIG_TYPE_U64, hbb);
3542*38fd1498Szrj   sub_index = as_a <hsa_op_reg *> (tmp);
3543*38fd1498Szrj   unsigned labels = gimple_switch_num_labels (s);
3544*38fd1498Szrj   unsigned HOST_WIDE_INT size = tree_to_uhwi (get_switch_size (s));
3545*38fd1498Szrj 
3546*38fd1498Szrj   hsa_insn_sbr *sbr = new hsa_insn_sbr (sub_index, size + 1);
3547*38fd1498Szrj 
3548*38fd1498Szrj   /* Prepare array with default label destination.  */
3549*38fd1498Szrj   for (unsigned HOST_WIDE_INT i = 0; i <= size; i++)
3550*38fd1498Szrj     sbr->m_jump_table.safe_push (default_label_bb);
3551*38fd1498Szrj 
3552*38fd1498Szrj   /* Iterate all labels and fill up the jump table.  */
3553*38fd1498Szrj   for (unsigned i = 1; i < labels; i++)
3554*38fd1498Szrj     {
3555*38fd1498Szrj       tree label = gimple_switch_label (s, i);
3556*38fd1498Szrj       basic_block bb = label_to_block_fn (func, CASE_LABEL (label));
3557*38fd1498Szrj 
3558*38fd1498Szrj       unsigned HOST_WIDE_INT sub_low
3559*38fd1498Szrj 	= tree_to_uhwi (int_const_binop (MINUS_EXPR, CASE_LOW (label), lowest));
3560*38fd1498Szrj 
3561*38fd1498Szrj       unsigned HOST_WIDE_INT sub_high = sub_low;
3562*38fd1498Szrj       tree high = CASE_HIGH (label);
3563*38fd1498Szrj       if (high != NULL)
3564*38fd1498Szrj 	sub_high = tree_to_uhwi (int_const_binop (MINUS_EXPR, high, lowest));
3565*38fd1498Szrj 
3566*38fd1498Szrj       for (unsigned HOST_WIDE_INT j = sub_low; j <= sub_high; j++)
3567*38fd1498Szrj 	sbr->m_jump_table[j] = bb;
3568*38fd1498Szrj     }
3569*38fd1498Szrj 
3570*38fd1498Szrj   hbb->append_insn (sbr);
3571*38fd1498Szrj }
3572*38fd1498Szrj 
3573*38fd1498Szrj /* Verify that the function DECL can be handled by HSA.  */
3574*38fd1498Szrj 
3575*38fd1498Szrj static void
verify_function_arguments(tree decl)3576*38fd1498Szrj verify_function_arguments (tree decl)
3577*38fd1498Szrj {
3578*38fd1498Szrj   tree type = TREE_TYPE (decl);
3579*38fd1498Szrj   if (DECL_STATIC_CHAIN (decl))
3580*38fd1498Szrj     {
3581*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (decl),
3582*38fd1498Szrj 		     "HSA does not support nested functions: %qD", decl);
3583*38fd1498Szrj       return;
3584*38fd1498Szrj     }
3585*38fd1498Szrj   else if (!TYPE_ARG_TYPES (type) || stdarg_p (type))
3586*38fd1498Szrj     {
3587*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (decl),
3588*38fd1498Szrj 		     "HSA does not support functions with variadic arguments "
3589*38fd1498Szrj 		     "(or unknown return type): %qD", decl);
3590*38fd1498Szrj       return;
3591*38fd1498Szrj     }
3592*38fd1498Szrj }
3593*38fd1498Szrj 
3594*38fd1498Szrj /* Return BRIG type for FORMAL_ARG_TYPE.  If the formal argument type is NULL,
3595*38fd1498Szrj    return ACTUAL_ARG_TYPE.  */
3596*38fd1498Szrj 
3597*38fd1498Szrj static BrigType16_t
get_format_argument_type(tree formal_arg_type,BrigType16_t actual_arg_type)3598*38fd1498Szrj get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type)
3599*38fd1498Szrj {
3600*38fd1498Szrj   if (formal_arg_type == NULL)
3601*38fd1498Szrj     return actual_arg_type;
3602*38fd1498Szrj 
3603*38fd1498Szrj   BrigType16_t decl_type
3604*38fd1498Szrj     = hsa_type_for_scalar_tree_type (formal_arg_type, false);
3605*38fd1498Szrj   return mem_type_for_type (decl_type);
3606*38fd1498Szrj }
3607*38fd1498Szrj 
3608*38fd1498Szrj /* Generate HSA instructions for a direct call instruction.
3609*38fd1498Szrj    Instructions will be appended to HBB, which also needs to be the
3610*38fd1498Szrj    corresponding structure to the basic_block of STMT.
3611*38fd1498Szrj    If ASSIGN_LHS is false, do not copy HSA function result argument into the
3612*38fd1498Szrj    corresponding HSA representation of the gimple statement LHS.  */
3613*38fd1498Szrj 
3614*38fd1498Szrj static void
3615*38fd1498Szrj gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb,
3616*38fd1498Szrj 			       bool assign_lhs = true)
3617*38fd1498Szrj {
3618*38fd1498Szrj   tree decl = gimple_call_fndecl (stmt);
3619*38fd1498Szrj   verify_function_arguments (decl);
3620*38fd1498Szrj   if (hsa_seen_error ())
3621*38fd1498Szrj     return;
3622*38fd1498Szrj 
3623*38fd1498Szrj   hsa_insn_call *call_insn = new hsa_insn_call (decl);
3624*38fd1498Szrj   hsa_cfun->m_called_functions.safe_push (call_insn->m_called_function);
3625*38fd1498Szrj 
3626*38fd1498Szrj   /* Argument block start.  */
3627*38fd1498Szrj   hsa_insn_arg_block *arg_start
3628*38fd1498Szrj     = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn);
3629*38fd1498Szrj   hbb->append_insn (arg_start);
3630*38fd1498Szrj 
3631*38fd1498Szrj   tree parm_type_chain = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
3632*38fd1498Szrj 
3633*38fd1498Szrj   /* Preparation of arguments that will be passed to function.  */
3634*38fd1498Szrj   const unsigned args = gimple_call_num_args (stmt);
3635*38fd1498Szrj   for (unsigned i = 0; i < args; ++i)
3636*38fd1498Szrj     {
3637*38fd1498Szrj       tree parm = gimple_call_arg (stmt, (int)i);
3638*38fd1498Szrj       tree parm_decl_type = parm_type_chain != NULL_TREE
3639*38fd1498Szrj 	? TREE_VALUE (parm_type_chain) : NULL_TREE;
3640*38fd1498Szrj       hsa_op_address *addr;
3641*38fd1498Szrj 
3642*38fd1498Szrj       if (AGGREGATE_TYPE_P (TREE_TYPE (parm)))
3643*38fd1498Szrj 	{
3644*38fd1498Szrj 	  addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i);
3645*38fd1498Szrj 	  BrigAlignment8_t align;
3646*38fd1498Szrj 	  hsa_op_address *src = gen_hsa_addr_with_align (parm, hbb, &align);
3647*38fd1498Szrj 	  gen_hsa_memory_copy (hbb, addr, src,
3648*38fd1498Szrj 			       addr->m_symbol->total_byte_size (), align);
3649*38fd1498Szrj 	}
3650*38fd1498Szrj       else
3651*38fd1498Szrj 	{
3652*38fd1498Szrj 	  hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb);
3653*38fd1498Szrj 
3654*38fd1498Szrj 	  if (parm_decl_type != NULL && AGGREGATE_TYPE_P (parm_decl_type))
3655*38fd1498Szrj 	    {
3656*38fd1498Szrj 	      HSA_SORRY_AT (gimple_location (stmt),
3657*38fd1498Szrj 			    "support for HSA does not implement an aggregate "
3658*38fd1498Szrj 			    "formal argument in a function call, while actual "
3659*38fd1498Szrj 			    "argument is not an aggregate");
3660*38fd1498Szrj 	      return;
3661*38fd1498Szrj 	    }
3662*38fd1498Szrj 
3663*38fd1498Szrj 	  BrigType16_t formal_arg_type
3664*38fd1498Szrj 	    = get_format_argument_type (parm_decl_type, src->m_type);
3665*38fd1498Szrj 	  if (hsa_seen_error ())
3666*38fd1498Szrj 	    return;
3667*38fd1498Szrj 
3668*38fd1498Szrj 	  if (src->m_type != formal_arg_type)
3669*38fd1498Szrj 	    src = src->get_in_type (formal_arg_type, hbb);
3670*38fd1498Szrj 
3671*38fd1498Szrj 	  addr
3672*38fd1498Szrj 	    = gen_hsa_addr_for_arg (parm_decl_type != NULL_TREE ?
3673*38fd1498Szrj 				    parm_decl_type: TREE_TYPE (parm), i);
3674*38fd1498Szrj 	  hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, formal_arg_type,
3675*38fd1498Szrj 						src, addr);
3676*38fd1498Szrj 
3677*38fd1498Szrj 	  hbb->append_insn (mem);
3678*38fd1498Szrj 	}
3679*38fd1498Szrj 
3680*38fd1498Szrj       call_insn->m_input_args.safe_push (addr->m_symbol);
3681*38fd1498Szrj       if (parm_type_chain)
3682*38fd1498Szrj 	parm_type_chain = TREE_CHAIN (parm_type_chain);
3683*38fd1498Szrj     }
3684*38fd1498Szrj 
3685*38fd1498Szrj   call_insn->m_args_code_list = new hsa_op_code_list (args);
3686*38fd1498Szrj   hbb->append_insn (call_insn);
3687*38fd1498Szrj 
3688*38fd1498Szrj   tree result_type = TREE_TYPE (TREE_TYPE (decl));
3689*38fd1498Szrj 
3690*38fd1498Szrj   tree result = gimple_call_lhs (stmt);
3691*38fd1498Szrj   hsa_insn_mem *result_insn = NULL;
3692*38fd1498Szrj   if (!VOID_TYPE_P (result_type))
3693*38fd1498Szrj     {
3694*38fd1498Szrj       hsa_op_address *addr = gen_hsa_addr_for_arg (result_type, -1);
3695*38fd1498Szrj 
3696*38fd1498Szrj       /* Even if result of a function call is unused, we have to emit
3697*38fd1498Szrj 	 declaration for the result.  */
3698*38fd1498Szrj       if (result && assign_lhs)
3699*38fd1498Szrj 	{
3700*38fd1498Szrj 	  tree lhs_type = TREE_TYPE (result);
3701*38fd1498Szrj 
3702*38fd1498Szrj 	  if (hsa_seen_error ())
3703*38fd1498Szrj 	    return;
3704*38fd1498Szrj 
3705*38fd1498Szrj 	  if (AGGREGATE_TYPE_P (lhs_type))
3706*38fd1498Szrj 	    {
3707*38fd1498Szrj 	      BrigAlignment8_t align;
3708*38fd1498Szrj 	      hsa_op_address *result_addr
3709*38fd1498Szrj 		= gen_hsa_addr_with_align (result, hbb, &align);
3710*38fd1498Szrj 	      gen_hsa_memory_copy (hbb, result_addr, addr,
3711*38fd1498Szrj 				   addr->m_symbol->total_byte_size (), align);
3712*38fd1498Szrj 	    }
3713*38fd1498Szrj 	  else
3714*38fd1498Szrj 	    {
3715*38fd1498Szrj 	      BrigType16_t mtype
3716*38fd1498Szrj 		= mem_type_for_type (hsa_type_for_scalar_tree_type (lhs_type,
3717*38fd1498Szrj 								    false));
3718*38fd1498Szrj 
3719*38fd1498Szrj 	      hsa_op_reg *dst = hsa_cfun->reg_for_gimple_ssa (result);
3720*38fd1498Szrj 	      result_insn = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dst, addr);
3721*38fd1498Szrj 	      hbb->append_insn (result_insn);
3722*38fd1498Szrj 	    }
3723*38fd1498Szrj 	}
3724*38fd1498Szrj 
3725*38fd1498Szrj       call_insn->m_output_arg = addr->m_symbol;
3726*38fd1498Szrj       call_insn->m_result_code_list = new hsa_op_code_list (1);
3727*38fd1498Szrj     }
3728*38fd1498Szrj   else
3729*38fd1498Szrj     {
3730*38fd1498Szrj       if (result)
3731*38fd1498Szrj 	{
3732*38fd1498Szrj 	  HSA_SORRY_AT (gimple_location (stmt),
3733*38fd1498Szrj 			"support for HSA does not implement an assignment of "
3734*38fd1498Szrj 			"return value from a void function");
3735*38fd1498Szrj 	  return;
3736*38fd1498Szrj 	}
3737*38fd1498Szrj 
3738*38fd1498Szrj       call_insn->m_result_code_list = new hsa_op_code_list (0);
3739*38fd1498Szrj     }
3740*38fd1498Szrj 
3741*38fd1498Szrj   /* Argument block end.  */
3742*38fd1498Szrj   hsa_insn_arg_block *arg_end
3743*38fd1498Szrj     = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn);
3744*38fd1498Szrj   hbb->append_insn (arg_end);
3745*38fd1498Szrj }
3746*38fd1498Szrj 
3747*38fd1498Szrj /* Generate HSA instructions for a direct call of an internal fn.
3748*38fd1498Szrj    Instructions will be appended to HBB, which also needs to be the
3749*38fd1498Szrj    corresponding structure to the basic_block of STMT.  */
3750*38fd1498Szrj 
3751*38fd1498Szrj static void
gen_hsa_insns_for_call_of_internal_fn(gimple * stmt,hsa_bb * hbb)3752*38fd1498Szrj gen_hsa_insns_for_call_of_internal_fn (gimple *stmt, hsa_bb *hbb)
3753*38fd1498Szrj {
3754*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
3755*38fd1498Szrj   if (!lhs)
3756*38fd1498Szrj     return;
3757*38fd1498Szrj 
3758*38fd1498Szrj   tree lhs_type = TREE_TYPE (lhs);
3759*38fd1498Szrj   tree rhs1 = gimple_call_arg (stmt, 0);
3760*38fd1498Szrj   tree rhs1_type = TREE_TYPE (rhs1);
3761*38fd1498Szrj   enum internal_fn fn = gimple_call_internal_fn (stmt);
3762*38fd1498Szrj   hsa_internal_fn *ifn
3763*38fd1498Szrj     = new hsa_internal_fn (fn, tree_to_uhwi (TYPE_SIZE (rhs1_type)));
3764*38fd1498Szrj   hsa_insn_call *call_insn = new hsa_insn_call (ifn);
3765*38fd1498Szrj 
3766*38fd1498Szrj   gcc_checking_assert (FLOAT_TYPE_P (rhs1_type));
3767*38fd1498Szrj 
3768*38fd1498Szrj   if (!hsa_emitted_internal_decls->find (call_insn->m_called_internal_fn))
3769*38fd1498Szrj     hsa_cfun->m_called_internal_fns.safe_push (call_insn->m_called_internal_fn);
3770*38fd1498Szrj 
3771*38fd1498Szrj   hsa_insn_arg_block *arg_start
3772*38fd1498Szrj     = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn);
3773*38fd1498Szrj   hbb->append_insn (arg_start);
3774*38fd1498Szrj 
3775*38fd1498Szrj   unsigned num_args = gimple_call_num_args (stmt);
3776*38fd1498Szrj 
3777*38fd1498Szrj   /* Function arguments.  */
3778*38fd1498Szrj   for (unsigned i = 0; i < num_args; i++)
3779*38fd1498Szrj     {
3780*38fd1498Szrj       tree parm = gimple_call_arg (stmt, (int)i);
3781*38fd1498Szrj       hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb);
3782*38fd1498Szrj 
3783*38fd1498Szrj       hsa_op_address *addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i);
3784*38fd1498Szrj       hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, src->m_type,
3785*38fd1498Szrj 					    src, addr);
3786*38fd1498Szrj 
3787*38fd1498Szrj       call_insn->m_input_args.safe_push (addr->m_symbol);
3788*38fd1498Szrj       hbb->append_insn (mem);
3789*38fd1498Szrj     }
3790*38fd1498Szrj 
3791*38fd1498Szrj   call_insn->m_args_code_list = new hsa_op_code_list (num_args);
3792*38fd1498Szrj   hbb->append_insn (call_insn);
3793*38fd1498Szrj 
3794*38fd1498Szrj   /* Assign returned value.  */
3795*38fd1498Szrj   hsa_op_address *addr = gen_hsa_addr_for_arg (lhs_type, -1);
3796*38fd1498Szrj 
3797*38fd1498Szrj   call_insn->m_output_arg = addr->m_symbol;
3798*38fd1498Szrj   call_insn->m_result_code_list = new hsa_op_code_list (1);
3799*38fd1498Szrj 
3800*38fd1498Szrj   /* Argument block end.  */
3801*38fd1498Szrj   hsa_insn_arg_block *arg_end
3802*38fd1498Szrj     = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn);
3803*38fd1498Szrj   hbb->append_insn (arg_end);
3804*38fd1498Szrj }
3805*38fd1498Szrj 
3806*38fd1498Szrj /* Generate HSA instructions for a return value instruction.
3807*38fd1498Szrj    Instructions will be appended to HBB, which also needs to be the
3808*38fd1498Szrj    corresponding structure to the basic_block of STMT.  */
3809*38fd1498Szrj 
3810*38fd1498Szrj static void
gen_hsa_insns_for_return(greturn * stmt,hsa_bb * hbb)3811*38fd1498Szrj gen_hsa_insns_for_return (greturn *stmt, hsa_bb *hbb)
3812*38fd1498Szrj {
3813*38fd1498Szrj   tree retval = gimple_return_retval (stmt);
3814*38fd1498Szrj   if (retval)
3815*38fd1498Szrj     {
3816*38fd1498Szrj       hsa_op_address *addr = new hsa_op_address (hsa_cfun->m_output_arg);
3817*38fd1498Szrj 
3818*38fd1498Szrj       if (AGGREGATE_TYPE_P (TREE_TYPE (retval)))
3819*38fd1498Szrj 	{
3820*38fd1498Szrj 	  BrigAlignment8_t align;
3821*38fd1498Szrj 	  hsa_op_address *retval_addr = gen_hsa_addr_with_align (retval, hbb,
3822*38fd1498Szrj 								 &align);
3823*38fd1498Szrj 	  gen_hsa_memory_copy (hbb, addr, retval_addr,
3824*38fd1498Szrj 			       hsa_cfun->m_output_arg->total_byte_size (),
3825*38fd1498Szrj 			       align);
3826*38fd1498Szrj 	}
3827*38fd1498Szrj       else
3828*38fd1498Szrj 	{
3829*38fd1498Szrj 	  BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (retval),
3830*38fd1498Szrj 							  false);
3831*38fd1498Szrj 	  BrigType16_t mtype = mem_type_for_type (t);
3832*38fd1498Szrj 
3833*38fd1498Szrj 	  /* Store of return value.  */
3834*38fd1498Szrj 	  hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (retval, hbb);
3835*38fd1498Szrj 	  src = src->get_in_type (mtype, hbb);
3836*38fd1498Szrj 	  hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src,
3837*38fd1498Szrj 						addr);
3838*38fd1498Szrj 	  hbb->append_insn (mem);
3839*38fd1498Szrj 	}
3840*38fd1498Szrj     }
3841*38fd1498Szrj 
3842*38fd1498Szrj   /* HSAIL return instruction emission.  */
3843*38fd1498Szrj   hsa_insn_basic *ret = new hsa_insn_basic (0, BRIG_OPCODE_RET);
3844*38fd1498Szrj   hbb->append_insn (ret);
3845*38fd1498Szrj }
3846*38fd1498Szrj 
3847*38fd1498Szrj /* Set OP_INDEX-th operand of the instruction to DEST, as the DEST
3848*38fd1498Szrj    can have a different type, conversion instructions are possibly
3849*38fd1498Szrj    appended to HBB.  */
3850*38fd1498Szrj 
3851*38fd1498Szrj void
set_output_in_type(hsa_op_reg * dest,unsigned op_index,hsa_bb * hbb)3852*38fd1498Szrj hsa_insn_basic::set_output_in_type (hsa_op_reg *dest, unsigned op_index,
3853*38fd1498Szrj 				    hsa_bb *hbb)
3854*38fd1498Szrj {
3855*38fd1498Szrj   gcc_checking_assert (op_output_p (op_index));
3856*38fd1498Szrj 
3857*38fd1498Szrj   if (dest->m_type == m_type)
3858*38fd1498Szrj     {
3859*38fd1498Szrj       set_op (op_index, dest);
3860*38fd1498Szrj       return;
3861*38fd1498Szrj     }
3862*38fd1498Szrj 
3863*38fd1498Szrj   hsa_insn_basic *insn;
3864*38fd1498Szrj   hsa_op_reg *tmp;
3865*38fd1498Szrj   if (hsa_needs_cvt (dest->m_type, m_type))
3866*38fd1498Szrj     {
3867*38fd1498Szrj       tmp = new hsa_op_reg (m_type);
3868*38fd1498Szrj       insn = new hsa_insn_cvt (dest, tmp);
3869*38fd1498Szrj     }
3870*38fd1498Szrj   else if (hsa_type_bit_size (dest->m_type) == hsa_type_bit_size (m_type))
3871*38fd1498Szrj     {
3872*38fd1498Szrj       /* When output, HSA registers do not really have types, only sizes, so if
3873*38fd1498Szrj 	 the sizes match, we can use the register directly.  */
3874*38fd1498Szrj       set_op (op_index, dest);
3875*38fd1498Szrj       return;
3876*38fd1498Szrj     }
3877*38fd1498Szrj   else
3878*38fd1498Szrj     {
3879*38fd1498Szrj       tmp = new hsa_op_reg (m_type);
3880*38fd1498Szrj       insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type,
3881*38fd1498Szrj 				 dest, tmp->get_in_type (dest->m_type, hbb));
3882*38fd1498Szrj       hsa_fixup_mov_insn_type (insn);
3883*38fd1498Szrj     }
3884*38fd1498Szrj   set_op (op_index, tmp);
3885*38fd1498Szrj   hbb->append_insn (insn);
3886*38fd1498Szrj }
3887*38fd1498Szrj 
3888*38fd1498Szrj /* Generate instruction OPCODE to query a property of HSA grid along the
3889*38fd1498Szrj    given DIMENSION.  Store result into DEST and append the instruction to
3890*38fd1498Szrj    HBB.  */
3891*38fd1498Szrj 
3892*38fd1498Szrj static void
query_hsa_grid_dim(hsa_op_reg * dest,int opcode,hsa_op_immed * dimension,hsa_bb * hbb)3893*38fd1498Szrj query_hsa_grid_dim (hsa_op_reg *dest, int opcode, hsa_op_immed *dimension,
3894*38fd1498Szrj 		    hsa_bb *hbb)
3895*38fd1498Szrj {
3896*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (2, opcode, BRIG_TYPE_U32, NULL,
3897*38fd1498Szrj 					     dimension);
3898*38fd1498Szrj   hbb->append_insn (insn);
3899*38fd1498Szrj   insn->set_output_in_type (dest, 0, hbb);
3900*38fd1498Szrj }
3901*38fd1498Szrj 
3902*38fd1498Szrj /* Generate instruction OPCODE to query a property of HSA grid along the given
3903*38fd1498Szrj    dimension which is an immediate in first argument of STMT.  Store result
3904*38fd1498Szrj    into the register corresponding to LHS of STMT and append the instruction to
3905*38fd1498Szrj    HBB.  */
3906*38fd1498Szrj 
3907*38fd1498Szrj static void
query_hsa_grid_dim(gimple * stmt,int opcode,hsa_bb * hbb)3908*38fd1498Szrj query_hsa_grid_dim (gimple *stmt, int opcode, hsa_bb *hbb)
3909*38fd1498Szrj {
3910*38fd1498Szrj   tree lhs = gimple_call_lhs (dyn_cast <gcall *> (stmt));
3911*38fd1498Szrj   if (lhs == NULL_TREE)
3912*38fd1498Szrj     return;
3913*38fd1498Szrj 
3914*38fd1498Szrj   tree arg = gimple_call_arg (stmt, 0);
3915*38fd1498Szrj   unsigned HOST_WIDE_INT dim = 5;
3916*38fd1498Szrj   if (tree_fits_uhwi_p (arg))
3917*38fd1498Szrj     dim = tree_to_uhwi (arg);
3918*38fd1498Szrj   if (dim > 2)
3919*38fd1498Szrj     {
3920*38fd1498Szrj       HSA_SORRY_AT (gimple_location (stmt),
3921*38fd1498Szrj 		    "HSA grid query dimension must be immediate constant 0, 1 "
3922*38fd1498Szrj 		    "or 2");
3923*38fd1498Szrj       return;
3924*38fd1498Szrj     }
3925*38fd1498Szrj 
3926*38fd1498Szrj   hsa_op_immed *hdim = new hsa_op_immed (dim, (BrigKind16_t) BRIG_TYPE_U32);
3927*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
3928*38fd1498Szrj   query_hsa_grid_dim (dest, opcode, hdim, hbb);
3929*38fd1498Szrj }
3930*38fd1498Szrj 
3931*38fd1498Szrj /* Generate instruction OPCODE to query a property of HSA grid that is
3932*38fd1498Szrj    independent of any dimension.  Store result into the register corresponding
3933*38fd1498Szrj    to LHS of STMT and append the instruction to HBB.  */
3934*38fd1498Szrj 
3935*38fd1498Szrj static void
query_hsa_grid_nodim(gimple * stmt,BrigOpcode16_t opcode,hsa_bb * hbb)3936*38fd1498Szrj query_hsa_grid_nodim (gimple *stmt, BrigOpcode16_t opcode, hsa_bb *hbb)
3937*38fd1498Szrj {
3938*38fd1498Szrj   tree lhs = gimple_call_lhs (dyn_cast <gcall *> (stmt));
3939*38fd1498Szrj   if (lhs == NULL_TREE)
3940*38fd1498Szrj     return;
3941*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
3942*38fd1498Szrj   BrigType16_t brig_type = hsa_unsigned_type_for_type (dest->m_type);
3943*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (1, opcode, brig_type, dest);
3944*38fd1498Szrj   hbb->append_insn (insn);
3945*38fd1498Szrj }
3946*38fd1498Szrj 
3947*38fd1498Szrj /* Emit instructions that set hsa_num_threads according to provided VALUE.
3948*38fd1498Szrj    Instructions are appended to basic block HBB.  */
3949*38fd1498Szrj 
3950*38fd1498Szrj static void
gen_set_num_threads(tree value,hsa_bb * hbb)3951*38fd1498Szrj gen_set_num_threads (tree value, hsa_bb *hbb)
3952*38fd1498Szrj {
3953*38fd1498Szrj   hbb->append_insn (new hsa_insn_comment ("omp_set_num_threads"));
3954*38fd1498Szrj   hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (value, hbb);
3955*38fd1498Szrj 
3956*38fd1498Szrj   src = src->get_in_type (hsa_num_threads->m_type, hbb);
3957*38fd1498Szrj   hsa_op_address *addr = new hsa_op_address (hsa_num_threads);
3958*38fd1498Szrj 
3959*38fd1498Szrj   hsa_insn_basic *basic
3960*38fd1498Szrj     = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type, src, addr);
3961*38fd1498Szrj   hbb->append_insn (basic);
3962*38fd1498Szrj }
3963*38fd1498Szrj 
3964*38fd1498Szrj /* Return byte offset of a FIELD_NAME in GOMP_hsa_kernel_dispatch which
3965*38fd1498Szrj    is defined in plugin-hsa.c.  */
3966*38fd1498Szrj 
3967*38fd1498Szrj static HOST_WIDE_INT
get_hsa_kernel_dispatch_offset(const char * field_name)3968*38fd1498Szrj get_hsa_kernel_dispatch_offset (const char *field_name)
3969*38fd1498Szrj {
3970*38fd1498Szrj   tree *hsa_kernel_dispatch_type = hsa_get_kernel_dispatch_type ();
3971*38fd1498Szrj   if (*hsa_kernel_dispatch_type == NULL)
3972*38fd1498Szrj     {
3973*38fd1498Szrj       /* Collection of information needed for a dispatch of a kernel from a
3974*38fd1498Szrj 	 kernel.  Keep in sync with libgomp's plugin-hsa.c.  */
3975*38fd1498Szrj 
3976*38fd1498Szrj       *hsa_kernel_dispatch_type = make_node (RECORD_TYPE);
3977*38fd1498Szrj       tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
3978*38fd1498Szrj 			       get_identifier ("queue"), ptr_type_node);
3979*38fd1498Szrj       DECL_CHAIN (id_f1) = NULL_TREE;
3980*38fd1498Szrj       tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
3981*38fd1498Szrj 			       get_identifier ("omp_data_memory"),
3982*38fd1498Szrj 			       ptr_type_node);
3983*38fd1498Szrj       DECL_CHAIN (id_f2) = id_f1;
3984*38fd1498Szrj       tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
3985*38fd1498Szrj 			       get_identifier ("kernarg_address"),
3986*38fd1498Szrj 			       ptr_type_node);
3987*38fd1498Szrj       DECL_CHAIN (id_f3) = id_f2;
3988*38fd1498Szrj       tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
3989*38fd1498Szrj 			       get_identifier ("object"),
3990*38fd1498Szrj 			       uint64_type_node);
3991*38fd1498Szrj       DECL_CHAIN (id_f4) = id_f3;
3992*38fd1498Szrj       tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
3993*38fd1498Szrj 			       get_identifier ("signal"),
3994*38fd1498Szrj 			       uint64_type_node);
3995*38fd1498Szrj       DECL_CHAIN (id_f5) = id_f4;
3996*38fd1498Szrj       tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
3997*38fd1498Szrj 			       get_identifier ("private_segment_size"),
3998*38fd1498Szrj 			       uint32_type_node);
3999*38fd1498Szrj       DECL_CHAIN (id_f6) = id_f5;
4000*38fd1498Szrj       tree id_f7 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
4001*38fd1498Szrj 			       get_identifier ("group_segment_size"),
4002*38fd1498Szrj 			       uint32_type_node);
4003*38fd1498Szrj       DECL_CHAIN (id_f7) = id_f6;
4004*38fd1498Szrj       tree id_f8 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
4005*38fd1498Szrj 			       get_identifier ("kernel_dispatch_count"),
4006*38fd1498Szrj 			       uint64_type_node);
4007*38fd1498Szrj       DECL_CHAIN (id_f8) = id_f7;
4008*38fd1498Szrj       tree id_f9 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
4009*38fd1498Szrj 			       get_identifier ("debug"),
4010*38fd1498Szrj 			       uint64_type_node);
4011*38fd1498Szrj       DECL_CHAIN (id_f9) = id_f8;
4012*38fd1498Szrj       tree id_f10 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
4013*38fd1498Szrj 				get_identifier ("omp_level"),
4014*38fd1498Szrj 				uint64_type_node);
4015*38fd1498Szrj       DECL_CHAIN (id_f10) = id_f9;
4016*38fd1498Szrj       tree id_f11 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
4017*38fd1498Szrj 				get_identifier ("children_dispatches"),
4018*38fd1498Szrj 				ptr_type_node);
4019*38fd1498Szrj       DECL_CHAIN (id_f11) = id_f10;
4020*38fd1498Szrj       tree id_f12 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
4021*38fd1498Szrj 			       get_identifier ("omp_num_threads"),
4022*38fd1498Szrj 			       uint32_type_node);
4023*38fd1498Szrj       DECL_CHAIN (id_f12) = id_f11;
4024*38fd1498Szrj 
4025*38fd1498Szrj 
4026*38fd1498Szrj       finish_builtin_struct (*hsa_kernel_dispatch_type, "__hsa_kernel_dispatch",
4027*38fd1498Szrj 			     id_f12, NULL_TREE);
4028*38fd1498Szrj       TYPE_ARTIFICIAL (*hsa_kernel_dispatch_type) = 1;
4029*38fd1498Szrj     }
4030*38fd1498Szrj 
4031*38fd1498Szrj   for (tree chain = TYPE_FIELDS (*hsa_kernel_dispatch_type);
4032*38fd1498Szrj        chain != NULL_TREE; chain = TREE_CHAIN (chain))
4033*38fd1498Szrj     if (id_equal (DECL_NAME (chain), field_name))
4034*38fd1498Szrj       return int_byte_position (chain);
4035*38fd1498Szrj 
4036*38fd1498Szrj   gcc_unreachable ();
4037*38fd1498Szrj }
4038*38fd1498Szrj 
4039*38fd1498Szrj /* Return an HSA register that will contain number of threads for
4040*38fd1498Szrj    a future dispatched kernel.  Instructions are added to HBB.  */
4041*38fd1498Szrj 
4042*38fd1498Szrj static hsa_op_reg *
gen_num_threads_for_dispatch(hsa_bb * hbb)4043*38fd1498Szrj gen_num_threads_for_dispatch (hsa_bb *hbb)
4044*38fd1498Szrj {
4045*38fd1498Szrj   /* Step 1) Assign to number of threads:
4046*38fd1498Szrj      MIN (HSA_DEFAULT_NUM_THREADS, hsa_num_threads).  */
4047*38fd1498Szrj   hsa_op_reg *threads = new hsa_op_reg (hsa_num_threads->m_type);
4048*38fd1498Szrj   hsa_op_address *addr = new hsa_op_address (hsa_num_threads);
4049*38fd1498Szrj 
4050*38fd1498Szrj   hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_LD, threads->m_type,
4051*38fd1498Szrj 				      threads, addr));
4052*38fd1498Szrj 
4053*38fd1498Szrj   hsa_op_immed *limit = new hsa_op_immed (HSA_DEFAULT_NUM_THREADS,
4054*38fd1498Szrj 					  BRIG_TYPE_U32);
4055*38fd1498Szrj   hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1);
4056*38fd1498Szrj   hsa_insn_cmp * cmp
4057*38fd1498Szrj     = new hsa_insn_cmp (BRIG_COMPARE_LT, r->m_type, r, threads, limit);
4058*38fd1498Szrj   hbb->append_insn (cmp);
4059*38fd1498Szrj 
4060*38fd1498Szrj   BrigType16_t btype = hsa_bittype_for_type (threads->m_type);
4061*38fd1498Szrj   hsa_op_reg *tmp = new hsa_op_reg (threads->m_type);
4062*38fd1498Szrj 
4063*38fd1498Szrj   hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp, r,
4064*38fd1498Szrj 					threads, limit));
4065*38fd1498Szrj 
4066*38fd1498Szrj   /* Step 2) If the number is equal to zero,
4067*38fd1498Szrj      return shadow->omp_num_threads.  */
4068*38fd1498Szrj   hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
4069*38fd1498Szrj 
4070*38fd1498Szrj   hsa_op_reg *shadow_thread_count = new hsa_op_reg (BRIG_TYPE_U32);
4071*38fd1498Szrj   addr
4072*38fd1498Szrj     = new hsa_op_address (shadow_reg_ptr,
4073*38fd1498Szrj 			  get_hsa_kernel_dispatch_offset ("omp_num_threads"));
4074*38fd1498Szrj   hsa_insn_basic *basic
4075*38fd1498Szrj     = new hsa_insn_mem (BRIG_OPCODE_LD, shadow_thread_count->m_type,
4076*38fd1498Szrj 			shadow_thread_count, addr);
4077*38fd1498Szrj   hbb->append_insn (basic);
4078*38fd1498Szrj 
4079*38fd1498Szrj   hsa_op_reg *tmp2 = new hsa_op_reg (threads->m_type);
4080*38fd1498Szrj   r = new hsa_op_reg (BRIG_TYPE_B1);
4081*38fd1498Szrj   hsa_op_immed *imm = new hsa_op_immed (0, shadow_thread_count->m_type);
4082*38fd1498Szrj   hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_EQ, r->m_type, r, tmp, imm));
4083*38fd1498Szrj   hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp2, r,
4084*38fd1498Szrj 					shadow_thread_count, tmp));
4085*38fd1498Szrj 
4086*38fd1498Szrj   hsa_op_base *dest = tmp2->get_in_type (BRIG_TYPE_U16, hbb);
4087*38fd1498Szrj 
4088*38fd1498Szrj   return as_a <hsa_op_reg *> (dest);
4089*38fd1498Szrj }
4090*38fd1498Szrj 
4091*38fd1498Szrj /* Build OPCODE query for all three hsa dimensions, multiply them and store the
4092*38fd1498Szrj    result into DEST.  */
4093*38fd1498Szrj 
4094*38fd1498Szrj static void
multiply_grid_dim_characteristics(hsa_op_reg * dest,int opcode,hsa_bb * hbb)4095*38fd1498Szrj multiply_grid_dim_characteristics (hsa_op_reg *dest, int opcode, hsa_bb *hbb)
4096*38fd1498Szrj {
4097*38fd1498Szrj   hsa_op_reg *dimx = new hsa_op_reg (BRIG_TYPE_U32);
4098*38fd1498Szrj   query_hsa_grid_dim (dimx, opcode,
4099*38fd1498Szrj 		      new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4100*38fd1498Szrj   hsa_op_reg *dimy = new hsa_op_reg (BRIG_TYPE_U32);
4101*38fd1498Szrj   query_hsa_grid_dim (dimy, opcode,
4102*38fd1498Szrj 		      new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4103*38fd1498Szrj   hsa_op_reg *dimz = new hsa_op_reg (BRIG_TYPE_U32);
4104*38fd1498Szrj   query_hsa_grid_dim (dimz, opcode,
4105*38fd1498Szrj 		      new hsa_op_immed (2, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4106*38fd1498Szrj   hsa_op_reg *tmp = new hsa_op_reg (dest->m_type);
4107*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp,
4108*38fd1498Szrj 			    dimx->get_in_type (dest->m_type, hbb),
4109*38fd1498Szrj 			    dimy->get_in_type (dest->m_type, hbb), hbb);
4110*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_MUL, dest, tmp,
4111*38fd1498Szrj 			    dimz->get_in_type (dest->m_type, hbb), hbb);
4112*38fd1498Szrj }
4113*38fd1498Szrj 
4114*38fd1498Szrj /* Emit instructions that assign number of threads to lhs of gimple STMT.
4115*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4116*38fd1498Szrj 
4117*38fd1498Szrj static void
gen_get_num_threads(gimple * stmt,hsa_bb * hbb)4118*38fd1498Szrj gen_get_num_threads (gimple *stmt, hsa_bb *hbb)
4119*38fd1498Szrj {
4120*38fd1498Szrj   if (gimple_call_lhs (stmt) == NULL_TREE)
4121*38fd1498Szrj     return;
4122*38fd1498Szrj 
4123*38fd1498Szrj   hbb->append_insn (new hsa_insn_comment ("omp_get_num_threads"));
4124*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4125*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4126*38fd1498Szrj   multiply_grid_dim_characteristics (dest, BRIG_OPCODE_CURRENTWORKGROUPSIZE,
4127*38fd1498Szrj 				     hbb);
4128*38fd1498Szrj }
4129*38fd1498Szrj 
4130*38fd1498Szrj /* Emit instructions that assign number of teams to lhs of gimple STMT.
4131*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4132*38fd1498Szrj 
4133*38fd1498Szrj static void
gen_get_num_teams(gimple * stmt,hsa_bb * hbb)4134*38fd1498Szrj gen_get_num_teams (gimple *stmt, hsa_bb *hbb)
4135*38fd1498Szrj {
4136*38fd1498Szrj   if (gimple_call_lhs (stmt) == NULL_TREE)
4137*38fd1498Szrj     return;
4138*38fd1498Szrj 
4139*38fd1498Szrj   hbb->append_insn (new hsa_insn_comment ("omp_get_num_teams"));
4140*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4141*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4142*38fd1498Szrj   multiply_grid_dim_characteristics (dest, BRIG_OPCODE_GRIDGROUPS, hbb);
4143*38fd1498Szrj }
4144*38fd1498Szrj 
4145*38fd1498Szrj /* Emit instructions that assign a team number to lhs of gimple STMT.
4146*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4147*38fd1498Szrj 
4148*38fd1498Szrj static void
gen_get_team_num(gimple * stmt,hsa_bb * hbb)4149*38fd1498Szrj gen_get_team_num (gimple *stmt, hsa_bb *hbb)
4150*38fd1498Szrj {
4151*38fd1498Szrj   if (gimple_call_lhs (stmt) == NULL_TREE)
4152*38fd1498Szrj     return;
4153*38fd1498Szrj 
4154*38fd1498Szrj   hbb->append_insn (new hsa_insn_comment ("omp_get_team_num"));
4155*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4156*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4157*38fd1498Szrj 
4158*38fd1498Szrj   hsa_op_reg *gnum_x = new hsa_op_reg (BRIG_TYPE_U32);
4159*38fd1498Szrj   query_hsa_grid_dim (gnum_x, BRIG_OPCODE_GRIDGROUPS,
4160*38fd1498Szrj 		      new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4161*38fd1498Szrj   hsa_op_reg *gnum_y = new hsa_op_reg (BRIG_TYPE_U32);
4162*38fd1498Szrj   query_hsa_grid_dim (gnum_y, BRIG_OPCODE_GRIDGROUPS,
4163*38fd1498Szrj 		      new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4164*38fd1498Szrj 
4165*38fd1498Szrj   hsa_op_reg *gno_z = new hsa_op_reg (BRIG_TYPE_U32);
4166*38fd1498Szrj   query_hsa_grid_dim (gno_z, BRIG_OPCODE_WORKGROUPID,
4167*38fd1498Szrj 		      new hsa_op_immed (2, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4168*38fd1498Szrj 
4169*38fd1498Szrj   hsa_op_reg *tmp1 = new hsa_op_reg (dest->m_type);
4170*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp1,
4171*38fd1498Szrj 			    gnum_x->get_in_type (dest->m_type, hbb),
4172*38fd1498Szrj 			    gnum_y->get_in_type (dest->m_type, hbb), hbb);
4173*38fd1498Szrj   hsa_op_reg *tmp2 = new hsa_op_reg (dest->m_type);
4174*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp2, tmp1,
4175*38fd1498Szrj 			    gno_z->get_in_type (dest->m_type, hbb), hbb);
4176*38fd1498Szrj 
4177*38fd1498Szrj   hsa_op_reg *gno_y = new hsa_op_reg (BRIG_TYPE_U32);
4178*38fd1498Szrj   query_hsa_grid_dim (gno_y, BRIG_OPCODE_WORKGROUPID,
4179*38fd1498Szrj 		      new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4180*38fd1498Szrj   hsa_op_reg *tmp3 = new hsa_op_reg (dest->m_type);
4181*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp3,
4182*38fd1498Szrj 			    gnum_x->get_in_type (dest->m_type, hbb),
4183*38fd1498Szrj 			    gno_y->get_in_type (dest->m_type, hbb), hbb);
4184*38fd1498Szrj   hsa_op_reg *tmp4 = new hsa_op_reg (dest->m_type);
4185*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_ADD, tmp4, tmp3, tmp2, hbb);
4186*38fd1498Szrj   hsa_op_reg *gno_x = new hsa_op_reg (BRIG_TYPE_U32);
4187*38fd1498Szrj   query_hsa_grid_dim (gno_x, BRIG_OPCODE_WORKGROUPID,
4188*38fd1498Szrj 		      new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb);
4189*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_ADD, dest, tmp4,
4190*38fd1498Szrj 			    gno_x->get_in_type (dest->m_type, hbb), hbb);
4191*38fd1498Szrj }
4192*38fd1498Szrj 
4193*38fd1498Szrj /* Emit instructions that get levels-var ICV to lhs of gimple STMT.
4194*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4195*38fd1498Szrj 
4196*38fd1498Szrj static void
gen_get_level(gimple * stmt,hsa_bb * hbb)4197*38fd1498Szrj gen_get_level (gimple *stmt, hsa_bb *hbb)
4198*38fd1498Szrj {
4199*38fd1498Szrj   if (gimple_call_lhs (stmt) == NULL_TREE)
4200*38fd1498Szrj     return;
4201*38fd1498Szrj 
4202*38fd1498Szrj   hbb->append_insn (new hsa_insn_comment ("omp_get_level"));
4203*38fd1498Szrj 
4204*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4205*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4206*38fd1498Szrj 
4207*38fd1498Szrj   hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
4208*38fd1498Szrj   if (shadow_reg_ptr == NULL)
4209*38fd1498Szrj     {
4210*38fd1498Szrj       HSA_SORRY_AT (gimple_location (stmt),
4211*38fd1498Szrj 		    "support for HSA does not implement omp_get_level called "
4212*38fd1498Szrj 		    "from a function not being inlined within a kernel");
4213*38fd1498Szrj       return;
4214*38fd1498Szrj     }
4215*38fd1498Szrj 
4216*38fd1498Szrj   hsa_op_address *addr
4217*38fd1498Szrj     = new hsa_op_address (shadow_reg_ptr,
4218*38fd1498Szrj 			  get_hsa_kernel_dispatch_offset ("omp_level"));
4219*38fd1498Szrj 
4220*38fd1498Szrj   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64,
4221*38fd1498Szrj 					(hsa_op_base *) NULL, addr);
4222*38fd1498Szrj   hbb->append_insn (mem);
4223*38fd1498Szrj   mem->set_output_in_type (dest, 0, hbb);
4224*38fd1498Szrj }
4225*38fd1498Szrj 
4226*38fd1498Szrj /* Emit instruction that implement omp_get_max_threads of gimple STMT.  */
4227*38fd1498Szrj 
4228*38fd1498Szrj static void
gen_get_max_threads(gimple * stmt,hsa_bb * hbb)4229*38fd1498Szrj gen_get_max_threads (gimple *stmt, hsa_bb *hbb)
4230*38fd1498Szrj {
4231*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4232*38fd1498Szrj   if (!lhs)
4233*38fd1498Szrj     return;
4234*38fd1498Szrj 
4235*38fd1498Szrj   hbb->append_insn (new hsa_insn_comment ("omp_get_max_threads"));
4236*38fd1498Szrj 
4237*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4238*38fd1498Szrj   hsa_op_with_type *num_theads_reg = gen_num_threads_for_dispatch (hbb)
4239*38fd1498Szrj     ->get_in_type (dest->m_type, hbb);
4240*38fd1498Szrj   hsa_build_append_simple_mov (dest, num_theads_reg, hbb);
4241*38fd1498Szrj }
4242*38fd1498Szrj 
4243*38fd1498Szrj /* Emit instructions that implement alloca builtin gimple STMT.
4244*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4245*38fd1498Szrj 
4246*38fd1498Szrj static void
gen_hsa_alloca(gcall * call,hsa_bb * hbb)4247*38fd1498Szrj gen_hsa_alloca (gcall *call, hsa_bb *hbb)
4248*38fd1498Szrj {
4249*38fd1498Szrj   tree lhs = gimple_call_lhs (call);
4250*38fd1498Szrj   if (lhs == NULL_TREE)
4251*38fd1498Szrj     return;
4252*38fd1498Szrj 
4253*38fd1498Szrj   built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
4254*38fd1498Szrj 
4255*38fd1498Szrj   gcc_checking_assert (ALLOCA_FUNCTION_CODE_P (fn));
4256*38fd1498Szrj 
4257*38fd1498Szrj   unsigned bit_alignment = 0;
4258*38fd1498Szrj 
4259*38fd1498Szrj   if (fn != BUILT_IN_ALLOCA)
4260*38fd1498Szrj     {
4261*38fd1498Szrj       tree alignment_tree = gimple_call_arg (call, 1);
4262*38fd1498Szrj       if (TREE_CODE (alignment_tree) != INTEGER_CST)
4263*38fd1498Szrj 	{
4264*38fd1498Szrj 	  HSA_SORRY_ATV (gimple_location (call),
4265*38fd1498Szrj 			 "support for HSA does not implement "
4266*38fd1498Szrj 			 "__builtin_alloca_with_align with a non-constant "
4267*38fd1498Szrj 			 "alignment: %E", alignment_tree);
4268*38fd1498Szrj 	}
4269*38fd1498Szrj 
4270*38fd1498Szrj       bit_alignment = tree_to_uhwi (alignment_tree);
4271*38fd1498Szrj     }
4272*38fd1498Szrj 
4273*38fd1498Szrj   tree rhs1 = gimple_call_arg (call, 0);
4274*38fd1498Szrj   hsa_op_with_type *size = hsa_reg_or_immed_for_gimple_op (rhs1, hbb)
4275*38fd1498Szrj     ->get_in_type (BRIG_TYPE_U32, hbb);
4276*38fd1498Szrj   hsa_op_with_type *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4277*38fd1498Szrj 
4278*38fd1498Szrj   hsa_op_reg *tmp
4279*38fd1498Szrj     = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE));
4280*38fd1498Szrj   hsa_insn_alloca *a = new hsa_insn_alloca (tmp, size, bit_alignment);
4281*38fd1498Szrj   hbb->append_insn (a);
4282*38fd1498Szrj 
4283*38fd1498Szrj   hsa_insn_seg *seg
4284*38fd1498Szrj     = new hsa_insn_seg (BRIG_OPCODE_STOF,
4285*38fd1498Szrj 			hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT),
4286*38fd1498Szrj 			tmp->m_type, BRIG_SEGMENT_PRIVATE, dest, tmp);
4287*38fd1498Szrj   hbb->append_insn (seg);
4288*38fd1498Szrj }
4289*38fd1498Szrj 
4290*38fd1498Szrj /* Emit instructions that implement clrsb builtin STMT:
4291*38fd1498Szrj    Returns the number of leading redundant sign bits in x, i.e. the number
4292*38fd1498Szrj    of bits following the most significant bit that are identical to it.
4293*38fd1498Szrj    There are no special cases for 0 or other values.
4294*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4295*38fd1498Szrj 
4296*38fd1498Szrj static void
gen_hsa_clrsb(gcall * call,hsa_bb * hbb)4297*38fd1498Szrj gen_hsa_clrsb (gcall *call, hsa_bb *hbb)
4298*38fd1498Szrj {
4299*38fd1498Szrj   tree lhs = gimple_call_lhs (call);
4300*38fd1498Szrj   if (lhs == NULL_TREE)
4301*38fd1498Szrj     return;
4302*38fd1498Szrj 
4303*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4304*38fd1498Szrj   tree rhs1 = gimple_call_arg (call, 0);
4305*38fd1498Szrj   hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
4306*38fd1498Szrj   arg->extend_int_to_32bit (hbb);
4307*38fd1498Szrj   BrigType16_t bittype = hsa_bittype_for_type (arg->m_type);
4308*38fd1498Szrj   unsigned bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs1)));
4309*38fd1498Szrj 
4310*38fd1498Szrj   /* FIRSTBIT instruction is defined just for 32 and 64-bits wide integers.  */
4311*38fd1498Szrj   gcc_checking_assert (bitsize == 32 || bitsize == 64);
4312*38fd1498Szrj 
4313*38fd1498Szrj   /* Set true to MOST_SIG if the most significant bit is set to one.  */
4314*38fd1498Szrj   hsa_op_immed *c = new hsa_op_immed (1ul << (bitsize - 1),
4315*38fd1498Szrj 				      hsa_uint_for_bitsize (bitsize));
4316*38fd1498Szrj 
4317*38fd1498Szrj   hsa_op_reg *and_reg = new hsa_op_reg (bittype);
4318*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_AND, and_reg, arg, c, hbb);
4319*38fd1498Szrj 
4320*38fd1498Szrj   hsa_op_reg *most_sign = new hsa_op_reg (BRIG_TYPE_B1);
4321*38fd1498Szrj   hsa_insn_cmp *cmp
4322*38fd1498Szrj     = new hsa_insn_cmp (BRIG_COMPARE_EQ, most_sign->m_type, most_sign,
4323*38fd1498Szrj 			and_reg, c);
4324*38fd1498Szrj   hbb->append_insn (cmp);
4325*38fd1498Szrj 
4326*38fd1498Szrj   /* If the most significant bit is one, negate the input.  Otherwise
4327*38fd1498Szrj      shift the input value to left by one bit.  */
4328*38fd1498Szrj   hsa_op_reg *arg_neg = new hsa_op_reg (arg->m_type);
4329*38fd1498Szrj   gen_hsa_unary_operation (BRIG_OPCODE_NEG, arg_neg, arg, hbb);
4330*38fd1498Szrj 
4331*38fd1498Szrj   hsa_op_reg *shifted_arg = new hsa_op_reg (arg->m_type);
4332*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_SHL, shifted_arg, arg,
4333*38fd1498Szrj 			    new hsa_op_immed (1, BRIG_TYPE_U64), hbb);
4334*38fd1498Szrj 
4335*38fd1498Szrj   /* Assign the value that can be used for FIRSTBIT instruction according
4336*38fd1498Szrj      to the most significant bit.  */
4337*38fd1498Szrj   hsa_op_reg *tmp = new hsa_op_reg (bittype);
4338*38fd1498Szrj   hsa_insn_basic *cmov
4339*38fd1498Szrj     = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, bittype, tmp, most_sign,
4340*38fd1498Szrj 			  arg_neg, shifted_arg);
4341*38fd1498Szrj   hbb->append_insn (cmov);
4342*38fd1498Szrj 
4343*38fd1498Szrj   hsa_op_reg *leading_bits = new hsa_op_reg (BRIG_TYPE_S32);
4344*38fd1498Szrj   gen_hsa_unary_operation (BRIG_OPCODE_FIRSTBIT, leading_bits,
4345*38fd1498Szrj 			   tmp->get_in_type (hsa_uint_for_bitsize (bitsize),
4346*38fd1498Szrj 					     hbb), hbb);
4347*38fd1498Szrj 
4348*38fd1498Szrj   /* Set flag if the input value is equal to zero.  */
4349*38fd1498Szrj   hsa_op_reg *is_zero = new hsa_op_reg (BRIG_TYPE_B1);
4350*38fd1498Szrj   cmp = new hsa_insn_cmp (BRIG_COMPARE_EQ, is_zero->m_type, is_zero, arg,
4351*38fd1498Szrj 			  new hsa_op_immed (0, arg->m_type));
4352*38fd1498Szrj   hbb->append_insn (cmp);
4353*38fd1498Szrj 
4354*38fd1498Szrj   /* Return the number of leading bits,
4355*38fd1498Szrj      or (bitsize - 1) if the input value is zero.  */
4356*38fd1498Szrj   cmov = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, BRIG_TYPE_B32, NULL, is_zero,
4357*38fd1498Szrj 			     new hsa_op_immed (bitsize - 1, BRIG_TYPE_U32),
4358*38fd1498Szrj 			     leading_bits->get_in_type (BRIG_TYPE_B32, hbb));
4359*38fd1498Szrj   hbb->append_insn (cmov);
4360*38fd1498Szrj   cmov->set_output_in_type (dest, 0, hbb);
4361*38fd1498Szrj }
4362*38fd1498Szrj 
4363*38fd1498Szrj /* Emit instructions that implement ffs builtin STMT:
4364*38fd1498Szrj    Returns one plus the index of the least significant 1-bit of x,
4365*38fd1498Szrj    or if x is zero, returns zero.
4366*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4367*38fd1498Szrj 
4368*38fd1498Szrj static void
gen_hsa_ffs(gcall * call,hsa_bb * hbb)4369*38fd1498Szrj gen_hsa_ffs (gcall *call, hsa_bb *hbb)
4370*38fd1498Szrj {
4371*38fd1498Szrj   tree lhs = gimple_call_lhs (call);
4372*38fd1498Szrj   if (lhs == NULL_TREE)
4373*38fd1498Szrj     return;
4374*38fd1498Szrj 
4375*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4376*38fd1498Szrj 
4377*38fd1498Szrj   tree rhs1 = gimple_call_arg (call, 0);
4378*38fd1498Szrj   hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
4379*38fd1498Szrj   arg = arg->extend_int_to_32bit (hbb);
4380*38fd1498Szrj 
4381*38fd1498Szrj   hsa_op_reg *tmp = new hsa_op_reg (BRIG_TYPE_U32);
4382*38fd1498Szrj   hsa_insn_srctype *insn = new hsa_insn_srctype (2, BRIG_OPCODE_LASTBIT,
4383*38fd1498Szrj 						 tmp->m_type, arg->m_type,
4384*38fd1498Szrj 						 tmp, arg);
4385*38fd1498Szrj   hbb->append_insn (insn);
4386*38fd1498Szrj 
4387*38fd1498Szrj   hsa_insn_basic *addition
4388*38fd1498Szrj     = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, NULL, tmp,
4389*38fd1498Szrj 			  new hsa_op_immed (1, tmp->m_type));
4390*38fd1498Szrj   hbb->append_insn (addition);
4391*38fd1498Szrj   addition->set_output_in_type (dest, 0, hbb);
4392*38fd1498Szrj }
4393*38fd1498Szrj 
4394*38fd1498Szrj static void
gen_hsa_popcount_to_dest(hsa_op_reg * dest,hsa_op_with_type * arg,hsa_bb * hbb)4395*38fd1498Szrj gen_hsa_popcount_to_dest (hsa_op_reg *dest, hsa_op_with_type *arg, hsa_bb *hbb)
4396*38fd1498Szrj {
4397*38fd1498Szrj   gcc_checking_assert (hsa_type_integer_p (arg->m_type));
4398*38fd1498Szrj 
4399*38fd1498Szrj   if (hsa_type_bit_size (arg->m_type) < 32)
4400*38fd1498Szrj     arg = arg->get_in_type (BRIG_TYPE_B32, hbb);
4401*38fd1498Szrj 
4402*38fd1498Szrj   BrigType16_t srctype = hsa_bittype_for_type (arg->m_type);
4403*38fd1498Szrj   if (!hsa_btype_p (arg->m_type))
4404*38fd1498Szrj     arg = arg->get_in_type (srctype, hbb);
4405*38fd1498Szrj 
4406*38fd1498Szrj   hsa_insn_srctype *popcount
4407*38fd1498Szrj     = new hsa_insn_srctype (2, BRIG_OPCODE_POPCOUNT, BRIG_TYPE_U32,
4408*38fd1498Szrj 			    srctype, NULL, arg);
4409*38fd1498Szrj   hbb->append_insn (popcount);
4410*38fd1498Szrj   popcount->set_output_in_type (dest, 0, hbb);
4411*38fd1498Szrj }
4412*38fd1498Szrj 
4413*38fd1498Szrj /* Emit instructions that implement parity builtin STMT:
4414*38fd1498Szrj    Returns the parity of x, i.e. the number of 1-bits in x modulo 2.
4415*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4416*38fd1498Szrj 
4417*38fd1498Szrj static void
gen_hsa_parity(gcall * call,hsa_bb * hbb)4418*38fd1498Szrj gen_hsa_parity (gcall *call, hsa_bb *hbb)
4419*38fd1498Szrj {
4420*38fd1498Szrj   tree lhs = gimple_call_lhs (call);
4421*38fd1498Szrj   if (lhs == NULL_TREE)
4422*38fd1498Szrj     return;
4423*38fd1498Szrj 
4424*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4425*38fd1498Szrj   tree rhs1 = gimple_call_arg (call, 0);
4426*38fd1498Szrj   hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
4427*38fd1498Szrj 
4428*38fd1498Szrj   hsa_op_reg *popcount = new hsa_op_reg (BRIG_TYPE_U32);
4429*38fd1498Szrj   gen_hsa_popcount_to_dest (popcount, arg, hbb);
4430*38fd1498Szrj 
4431*38fd1498Szrj   hsa_insn_basic *insn
4432*38fd1498Szrj     = new hsa_insn_basic (3, BRIG_OPCODE_REM, popcount->m_type, NULL, popcount,
4433*38fd1498Szrj 			  new hsa_op_immed (2, popcount->m_type));
4434*38fd1498Szrj   hbb->append_insn (insn);
4435*38fd1498Szrj   insn->set_output_in_type (dest, 0, hbb);
4436*38fd1498Szrj }
4437*38fd1498Szrj 
4438*38fd1498Szrj /* Emit instructions that implement popcount builtin STMT.
4439*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4440*38fd1498Szrj 
4441*38fd1498Szrj static void
gen_hsa_popcount(gcall * call,hsa_bb * hbb)4442*38fd1498Szrj gen_hsa_popcount (gcall *call, hsa_bb *hbb)
4443*38fd1498Szrj {
4444*38fd1498Szrj   tree lhs = gimple_call_lhs (call);
4445*38fd1498Szrj   if (lhs == NULL_TREE)
4446*38fd1498Szrj     return;
4447*38fd1498Szrj 
4448*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4449*38fd1498Szrj   tree rhs1 = gimple_call_arg (call, 0);
4450*38fd1498Szrj   hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
4451*38fd1498Szrj 
4452*38fd1498Szrj   gen_hsa_popcount_to_dest (dest, arg, hbb);
4453*38fd1498Szrj }
4454*38fd1498Szrj 
4455*38fd1498Szrj /* Emit instructions that implement DIVMOD builtin STMT.
4456*38fd1498Szrj    Instructions are appended to basic block HBB.  */
4457*38fd1498Szrj 
4458*38fd1498Szrj static void
gen_hsa_divmod(gcall * call,hsa_bb * hbb)4459*38fd1498Szrj gen_hsa_divmod (gcall *call, hsa_bb *hbb)
4460*38fd1498Szrj {
4461*38fd1498Szrj   tree lhs = gimple_call_lhs (call);
4462*38fd1498Szrj   if (lhs == NULL_TREE)
4463*38fd1498Szrj     return;
4464*38fd1498Szrj 
4465*38fd1498Szrj   tree rhs0 = gimple_call_arg (call, 0);
4466*38fd1498Szrj   tree rhs1 = gimple_call_arg (call, 1);
4467*38fd1498Szrj 
4468*38fd1498Szrj   hsa_op_with_type *arg0 = hsa_reg_or_immed_for_gimple_op (rhs0, hbb);
4469*38fd1498Szrj   arg0 = arg0->extend_int_to_32bit (hbb);
4470*38fd1498Szrj   hsa_op_with_type *arg1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
4471*38fd1498Szrj   arg1 = arg1->extend_int_to_32bit (hbb);
4472*38fd1498Szrj 
4473*38fd1498Szrj   hsa_op_reg *dest0 = new hsa_op_reg (arg0->m_type);
4474*38fd1498Szrj   hsa_op_reg *dest1 = new hsa_op_reg (arg1->m_type);
4475*38fd1498Szrj 
4476*38fd1498Szrj   hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_DIV, dest0->m_type,
4477*38fd1498Szrj 					     dest0, arg0, arg1);
4478*38fd1498Szrj   hbb->append_insn (insn);
4479*38fd1498Szrj   insn = new hsa_insn_basic (3, BRIG_OPCODE_REM, dest1->m_type, dest1, arg0,
4480*38fd1498Szrj 			     arg1);
4481*38fd1498Szrj   hbb->append_insn (insn);
4482*38fd1498Szrj 
4483*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4484*38fd1498Szrj   BrigType16_t dst_type = hsa_extend_inttype_to_32bit (dest->m_type);
4485*38fd1498Szrj   BrigType16_t src_type = hsa_bittype_for_type (dest0->m_type);
4486*38fd1498Szrj 
4487*38fd1498Szrj   insn = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dst_type,
4488*38fd1498Szrj 			      src_type, NULL, dest0, dest1);
4489*38fd1498Szrj   hbb->append_insn (insn);
4490*38fd1498Szrj   insn->set_output_in_type (dest, 0, hbb);
4491*38fd1498Szrj }
4492*38fd1498Szrj 
4493*38fd1498Szrj /* Set VALUE to a shadow kernel debug argument and append a new instruction
4494*38fd1498Szrj    to HBB basic block.  */
4495*38fd1498Szrj 
4496*38fd1498Szrj static void
set_debug_value(hsa_bb * hbb,hsa_op_with_type * value)4497*38fd1498Szrj set_debug_value (hsa_bb *hbb, hsa_op_with_type *value)
4498*38fd1498Szrj {
4499*38fd1498Szrj   hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
4500*38fd1498Szrj   if (shadow_reg_ptr == NULL)
4501*38fd1498Szrj     return;
4502*38fd1498Szrj 
4503*38fd1498Szrj   hsa_op_address *addr
4504*38fd1498Szrj     = new hsa_op_address (shadow_reg_ptr,
4505*38fd1498Szrj 			  get_hsa_kernel_dispatch_offset ("debug"));
4506*38fd1498Szrj   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, BRIG_TYPE_U64, value,
4507*38fd1498Szrj 					addr);
4508*38fd1498Szrj   hbb->append_insn (mem);
4509*38fd1498Szrj }
4510*38fd1498Szrj 
4511*38fd1498Szrj void
generate(gimple * stmt,hsa_bb * hbb)4512*38fd1498Szrj omp_simple_builtin::generate (gimple *stmt, hsa_bb *hbb)
4513*38fd1498Szrj {
4514*38fd1498Szrj   if (m_sorry)
4515*38fd1498Szrj     {
4516*38fd1498Szrj       if (m_warning_message)
4517*38fd1498Szrj 	HSA_SORRY_AT (gimple_location (stmt), m_warning_message);
4518*38fd1498Szrj       else
4519*38fd1498Szrj 	HSA_SORRY_ATV (gimple_location (stmt),
4520*38fd1498Szrj 		       "Support for HSA does not implement calls to %s\n",
4521*38fd1498Szrj 		       m_name);
4522*38fd1498Szrj     }
4523*38fd1498Szrj   else if (m_warning_message != NULL)
4524*38fd1498Szrj     warning_at (gimple_location (stmt), OPT_Whsa, m_warning_message);
4525*38fd1498Szrj 
4526*38fd1498Szrj   if (m_return_value != NULL)
4527*38fd1498Szrj     {
4528*38fd1498Szrj       tree lhs = gimple_call_lhs (stmt);
4529*38fd1498Szrj       if (!lhs)
4530*38fd1498Szrj 	return;
4531*38fd1498Szrj 
4532*38fd1498Szrj       hbb->append_insn (new hsa_insn_comment (m_name));
4533*38fd1498Szrj 
4534*38fd1498Szrj       hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4535*38fd1498Szrj       hsa_op_with_type *op = m_return_value->get_in_type (dest->m_type, hbb);
4536*38fd1498Szrj       hsa_build_append_simple_mov (dest, op, hbb);
4537*38fd1498Szrj     }
4538*38fd1498Szrj }
4539*38fd1498Szrj 
4540*38fd1498Szrj /* If STMT is a call of a known library function, generate code to perform
4541*38fd1498Szrj    it and return true.  */
4542*38fd1498Szrj 
4543*38fd1498Szrj static bool
gen_hsa_insns_for_known_library_call(gimple * stmt,hsa_bb * hbb)4544*38fd1498Szrj gen_hsa_insns_for_known_library_call (gimple *stmt, hsa_bb *hbb)
4545*38fd1498Szrj {
4546*38fd1498Szrj   bool handled = false;
4547*38fd1498Szrj   const char *name = hsa_get_declaration_name (gimple_call_fndecl (stmt));
4548*38fd1498Szrj 
4549*38fd1498Szrj   char *copy = NULL;
4550*38fd1498Szrj   size_t len = strlen (name);
4551*38fd1498Szrj   if (len > 0 && name[len - 1] == '_')
4552*38fd1498Szrj     {
4553*38fd1498Szrj       copy = XNEWVEC (char, len + 1);
4554*38fd1498Szrj       strcpy (copy, name);
4555*38fd1498Szrj       copy[len - 1] = '\0';
4556*38fd1498Szrj       name = copy;
4557*38fd1498Szrj     }
4558*38fd1498Szrj 
4559*38fd1498Szrj   /* Handle omp_* routines.  */
4560*38fd1498Szrj   if (strstr (name, "omp_") == name)
4561*38fd1498Szrj     {
4562*38fd1498Szrj       hsa_init_simple_builtins ();
4563*38fd1498Szrj       omp_simple_builtin *builtin = omp_simple_builtins->get (name);
4564*38fd1498Szrj       if (builtin)
4565*38fd1498Szrj 	{
4566*38fd1498Szrj 	  builtin->generate (stmt, hbb);
4567*38fd1498Szrj 	  return true;
4568*38fd1498Szrj 	}
4569*38fd1498Szrj 
4570*38fd1498Szrj       handled = true;
4571*38fd1498Szrj       if (strcmp (name, "omp_set_num_threads") == 0)
4572*38fd1498Szrj 	gen_set_num_threads (gimple_call_arg (stmt, 0), hbb);
4573*38fd1498Szrj       else if (strcmp (name, "omp_get_thread_num") == 0)
4574*38fd1498Szrj 	{
4575*38fd1498Szrj 	  hbb->append_insn (new hsa_insn_comment (name));
4576*38fd1498Szrj 	  query_hsa_grid_nodim (stmt, BRIG_OPCODE_WORKITEMFLATABSID, hbb);
4577*38fd1498Szrj 	}
4578*38fd1498Szrj       else if (strcmp (name, "omp_get_num_threads") == 0)
4579*38fd1498Szrj 	{
4580*38fd1498Szrj 	  hbb->append_insn (new hsa_insn_comment (name));
4581*38fd1498Szrj 	  gen_get_num_threads (stmt, hbb);
4582*38fd1498Szrj 	}
4583*38fd1498Szrj       else if (strcmp (name, "omp_get_num_teams") == 0)
4584*38fd1498Szrj 	gen_get_num_teams (stmt, hbb);
4585*38fd1498Szrj       else if (strcmp (name, "omp_get_team_num") == 0)
4586*38fd1498Szrj 	gen_get_team_num (stmt, hbb);
4587*38fd1498Szrj       else if (strcmp (name, "omp_get_level") == 0)
4588*38fd1498Szrj 	gen_get_level (stmt, hbb);
4589*38fd1498Szrj       else if (strcmp (name, "omp_get_active_level") == 0)
4590*38fd1498Szrj 	gen_get_level (stmt, hbb);
4591*38fd1498Szrj       else if (strcmp (name, "omp_in_parallel") == 0)
4592*38fd1498Szrj 	gen_get_level (stmt, hbb);
4593*38fd1498Szrj       else if (strcmp (name, "omp_get_max_threads") == 0)
4594*38fd1498Szrj 	gen_get_max_threads (stmt, hbb);
4595*38fd1498Szrj       else
4596*38fd1498Szrj 	handled = false;
4597*38fd1498Szrj 
4598*38fd1498Szrj       if (handled)
4599*38fd1498Szrj 	{
4600*38fd1498Szrj 	  if (copy)
4601*38fd1498Szrj 	    free (copy);
4602*38fd1498Szrj 	  return true;
4603*38fd1498Szrj 	}
4604*38fd1498Szrj     }
4605*38fd1498Szrj 
4606*38fd1498Szrj   if (strcmp (name, "__hsa_set_debug_value") == 0)
4607*38fd1498Szrj     {
4608*38fd1498Szrj       handled = true;
4609*38fd1498Szrj       if (hsa_cfun->has_shadow_reg_p ())
4610*38fd1498Szrj 	{
4611*38fd1498Szrj 	  tree rhs1 = gimple_call_arg (stmt, 0);
4612*38fd1498Szrj 	  hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
4613*38fd1498Szrj 
4614*38fd1498Szrj 	  src = src->get_in_type (BRIG_TYPE_U64, hbb);
4615*38fd1498Szrj 	  set_debug_value (hbb, src);
4616*38fd1498Szrj 	}
4617*38fd1498Szrj     }
4618*38fd1498Szrj 
4619*38fd1498Szrj   if (copy)
4620*38fd1498Szrj     free (copy);
4621*38fd1498Szrj   return handled;
4622*38fd1498Szrj }
4623*38fd1498Szrj 
4624*38fd1498Szrj /* Helper functions to create a single unary HSA operations out of calls to
4625*38fd1498Szrj    builtins.  OPCODE is the HSA operation to be generated.  STMT is a gimple
4626*38fd1498Szrj    call to a builtin.  HBB is the HSA BB to which the instruction should be
4627*38fd1498Szrj    added.  Note that nothing will be created if STMT does not have a LHS.  */
4628*38fd1498Szrj 
4629*38fd1498Szrj static void
gen_hsa_unaryop_for_builtin(BrigOpcode opcode,gimple * stmt,hsa_bb * hbb)4630*38fd1498Szrj gen_hsa_unaryop_for_builtin (BrigOpcode opcode, gimple *stmt, hsa_bb *hbb)
4631*38fd1498Szrj {
4632*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4633*38fd1498Szrj   if (!lhs)
4634*38fd1498Szrj     return;
4635*38fd1498Szrj   hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4636*38fd1498Szrj   hsa_op_with_type *op
4637*38fd1498Szrj     = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
4638*38fd1498Szrj   gen_hsa_unary_operation (opcode, dest, op, hbb);
4639*38fd1498Szrj }
4640*38fd1498Szrj 
4641*38fd1498Szrj /* Helper functions to create a call to standard library if LHS of the
4642*38fd1498Szrj    STMT is used.  HBB is the HSA BB to which the instruction should be
4643*38fd1498Szrj    added.  */
4644*38fd1498Szrj 
4645*38fd1498Szrj static void
gen_hsa_unaryop_builtin_call(gimple * stmt,hsa_bb * hbb)4646*38fd1498Szrj gen_hsa_unaryop_builtin_call (gimple *stmt, hsa_bb *hbb)
4647*38fd1498Szrj {
4648*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4649*38fd1498Szrj   if (!lhs)
4650*38fd1498Szrj     return;
4651*38fd1498Szrj 
4652*38fd1498Szrj   if (gimple_call_internal_p (stmt))
4653*38fd1498Szrj     gen_hsa_insns_for_call_of_internal_fn (stmt, hbb);
4654*38fd1498Szrj   else
4655*38fd1498Szrj     gen_hsa_insns_for_direct_call (stmt, hbb);
4656*38fd1498Szrj }
4657*38fd1498Szrj 
4658*38fd1498Szrj /* Helper functions to create a single unary HSA operations out of calls to
4659*38fd1498Szrj    builtins (if unsafe math optimizations are enable). Otherwise, create
4660*38fd1498Szrj    a call to standard library function.
4661*38fd1498Szrj    OPCODE is the HSA operation to be generated.  STMT is a gimple
4662*38fd1498Szrj    call to a builtin.  HBB is the HSA BB to which the instruction should be
4663*38fd1498Szrj    added.  Note that nothing will be created if STMT does not have a LHS.  */
4664*38fd1498Szrj 
4665*38fd1498Szrj static void
gen_hsa_unaryop_or_call_for_builtin(BrigOpcode opcode,gimple * stmt,hsa_bb * hbb)4666*38fd1498Szrj gen_hsa_unaryop_or_call_for_builtin (BrigOpcode opcode, gimple *stmt,
4667*38fd1498Szrj 				     hsa_bb *hbb)
4668*38fd1498Szrj {
4669*38fd1498Szrj   if (flag_unsafe_math_optimizations)
4670*38fd1498Szrj     gen_hsa_unaryop_for_builtin (opcode, stmt, hbb);
4671*38fd1498Szrj   else
4672*38fd1498Szrj     gen_hsa_unaryop_builtin_call (stmt, hbb);
4673*38fd1498Szrj }
4674*38fd1498Szrj 
4675*38fd1498Szrj /* Generate HSA address corresponding to a value VAL (as opposed to a memory
4676*38fd1498Szrj    reference tree), for example an SSA_NAME or an ADDR_EXPR.  HBB is the HSA BB
4677*38fd1498Szrj    to which the instruction should be added.  */
4678*38fd1498Szrj 
4679*38fd1498Szrj static hsa_op_address *
get_address_from_value(tree val,hsa_bb * hbb)4680*38fd1498Szrj get_address_from_value (tree val, hsa_bb *hbb)
4681*38fd1498Szrj {
4682*38fd1498Szrj   switch (TREE_CODE (val))
4683*38fd1498Szrj     {
4684*38fd1498Szrj     case SSA_NAME:
4685*38fd1498Szrj       {
4686*38fd1498Szrj 	BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
4687*38fd1498Szrj 	hsa_op_base *reg
4688*38fd1498Szrj 	  = hsa_cfun->reg_for_gimple_ssa (val)->get_in_type (addrtype, hbb);
4689*38fd1498Szrj 	return new hsa_op_address (NULL, as_a <hsa_op_reg *> (reg), 0);
4690*38fd1498Szrj       }
4691*38fd1498Szrj     case ADDR_EXPR:
4692*38fd1498Szrj       return gen_hsa_addr (TREE_OPERAND (val, 0), hbb);
4693*38fd1498Szrj 
4694*38fd1498Szrj     case INTEGER_CST:
4695*38fd1498Szrj       if (tree_fits_shwi_p (val))
4696*38fd1498Szrj 	return new hsa_op_address (NULL, NULL, tree_to_shwi (val));
4697*38fd1498Szrj       /* fall-through */
4698*38fd1498Szrj 
4699*38fd1498Szrj     default:
4700*38fd1498Szrj       HSA_SORRY_ATV (EXPR_LOCATION (val),
4701*38fd1498Szrj 		     "support for HSA does not implement memory access to %E",
4702*38fd1498Szrj 		     val);
4703*38fd1498Szrj       return new hsa_op_address (NULL, NULL, 0);
4704*38fd1498Szrj     }
4705*38fd1498Szrj }
4706*38fd1498Szrj 
4707*38fd1498Szrj /* Expand assignment of a result of a string BUILTIN to DST.
4708*38fd1498Szrj    Size of the operation is N bytes, where instructions
4709*38fd1498Szrj    will be append to HBB.  */
4710*38fd1498Szrj 
4711*38fd1498Szrj static void
expand_lhs_of_string_op(gimple * stmt,unsigned HOST_WIDE_INT n,hsa_bb * hbb,enum built_in_function builtin)4712*38fd1498Szrj expand_lhs_of_string_op (gimple *stmt,
4713*38fd1498Szrj 			 unsigned HOST_WIDE_INT n, hsa_bb *hbb,
4714*38fd1498Szrj 			 enum built_in_function builtin)
4715*38fd1498Szrj {
4716*38fd1498Szrj   /* If LHS is expected, we need to emit a PHI instruction.  */
4717*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4718*38fd1498Szrj   if (!lhs)
4719*38fd1498Szrj     return;
4720*38fd1498Szrj 
4721*38fd1498Szrj   hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs);
4722*38fd1498Szrj 
4723*38fd1498Szrj   hsa_op_with_type *dst_reg
4724*38fd1498Szrj     = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
4725*38fd1498Szrj   hsa_op_with_type *tmp;
4726*38fd1498Szrj 
4727*38fd1498Szrj   switch (builtin)
4728*38fd1498Szrj     {
4729*38fd1498Szrj     case BUILT_IN_MEMPCPY:
4730*38fd1498Szrj       {
4731*38fd1498Szrj 	tmp = new hsa_op_reg (dst_reg->m_type);
4732*38fd1498Szrj 	hsa_insn_basic *add
4733*38fd1498Szrj 	  = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type,
4734*38fd1498Szrj 				tmp, dst_reg,
4735*38fd1498Szrj 				new hsa_op_immed (n, dst_reg->m_type));
4736*38fd1498Szrj 	hbb->append_insn (add);
4737*38fd1498Szrj 	break;
4738*38fd1498Szrj       }
4739*38fd1498Szrj     case BUILT_IN_MEMCPY:
4740*38fd1498Szrj     case BUILT_IN_MEMSET:
4741*38fd1498Szrj       tmp = dst_reg;
4742*38fd1498Szrj       break;
4743*38fd1498Szrj     default:
4744*38fd1498Szrj       gcc_unreachable ();
4745*38fd1498Szrj     }
4746*38fd1498Szrj 
4747*38fd1498Szrj   hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV, lhs_reg->m_type,
4748*38fd1498Szrj 					lhs_reg, tmp));
4749*38fd1498Szrj }
4750*38fd1498Szrj 
4751*38fd1498Szrj #define HSA_MEMORY_BUILTINS_LIMIT     128
4752*38fd1498Szrj 
4753*38fd1498Szrj /* Expand a string builtin (from a gimple STMT) in a way that
4754*38fd1498Szrj    according to MISALIGNED_FLAG we process either direct emission
4755*38fd1498Szrj    (a bunch of memory load and store instructions), or we emit a function call
4756*38fd1498Szrj    of a library function (for instance 'memcpy'). Actually, a basic block
4757*38fd1498Szrj    for direct emission is just prepared, where caller is responsible
4758*38fd1498Szrj    for emission of corresponding instructions.
4759*38fd1498Szrj    All instruction are appended to HBB.  */
4760*38fd1498Szrj 
4761*38fd1498Szrj hsa_bb *
expand_string_operation_builtin(gimple * stmt,hsa_bb * hbb,hsa_op_reg * misaligned_flag)4762*38fd1498Szrj expand_string_operation_builtin (gimple *stmt, hsa_bb *hbb,
4763*38fd1498Szrj 				 hsa_op_reg *misaligned_flag)
4764*38fd1498Szrj {
4765*38fd1498Szrj   edge e = split_block (hbb->m_bb, stmt);
4766*38fd1498Szrj   basic_block condition_bb = e->src;
4767*38fd1498Szrj   hbb->append_insn (new hsa_insn_cbr (misaligned_flag));
4768*38fd1498Szrj 
4769*38fd1498Szrj   /* Prepare the control flow.  */
4770*38fd1498Szrj   edge condition_edge = EDGE_SUCC (condition_bb, 0);
4771*38fd1498Szrj   basic_block call_bb = split_edge (condition_edge);
4772*38fd1498Szrj 
4773*38fd1498Szrj   basic_block expanded_bb = split_edge (EDGE_SUCC (call_bb, 0));
4774*38fd1498Szrj   basic_block cont_bb = EDGE_SUCC (expanded_bb, 0)->dest;
4775*38fd1498Szrj   basic_block merge_bb = split_edge (EDGE_PRED (cont_bb, 0));
4776*38fd1498Szrj 
4777*38fd1498Szrj   condition_edge->flags &= ~EDGE_FALLTHRU;
4778*38fd1498Szrj   condition_edge->flags |= EDGE_TRUE_VALUE;
4779*38fd1498Szrj   make_edge (condition_bb, expanded_bb, EDGE_FALSE_VALUE);
4780*38fd1498Szrj 
4781*38fd1498Szrj   redirect_edge_succ (EDGE_SUCC (call_bb, 0), merge_bb);
4782*38fd1498Szrj 
4783*38fd1498Szrj   hsa_cfun->m_modified_cfg = true;
4784*38fd1498Szrj 
4785*38fd1498Szrj   hsa_init_new_bb (expanded_bb);
4786*38fd1498Szrj 
4787*38fd1498Szrj   /* Slow path: function call.  */
4788*38fd1498Szrj   gen_hsa_insns_for_direct_call (stmt, hsa_init_new_bb (call_bb), false);
4789*38fd1498Szrj 
4790*38fd1498Szrj   return hsa_bb_for_bb (expanded_bb);
4791*38fd1498Szrj }
4792*38fd1498Szrj 
4793*38fd1498Szrj /* Expand a memory copy BUILTIN (BUILT_IN_MEMCPY, BUILT_IN_MEMPCPY) from
4794*38fd1498Szrj    a gimple STMT and store all necessary instruction to HBB basic block.  */
4795*38fd1498Szrj 
4796*38fd1498Szrj static void
expand_memory_copy(gimple * stmt,hsa_bb * hbb,enum built_in_function builtin)4797*38fd1498Szrj expand_memory_copy (gimple *stmt, hsa_bb *hbb, enum built_in_function builtin)
4798*38fd1498Szrj {
4799*38fd1498Szrj   tree byte_size = gimple_call_arg (stmt, 2);
4800*38fd1498Szrj 
4801*38fd1498Szrj   if (!tree_fits_uhwi_p (byte_size))
4802*38fd1498Szrj     {
4803*38fd1498Szrj       gen_hsa_insns_for_direct_call (stmt, hbb);
4804*38fd1498Szrj       return;
4805*38fd1498Szrj     }
4806*38fd1498Szrj 
4807*38fd1498Szrj   unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
4808*38fd1498Szrj 
4809*38fd1498Szrj   if (n > HSA_MEMORY_BUILTINS_LIMIT)
4810*38fd1498Szrj     {
4811*38fd1498Szrj       gen_hsa_insns_for_direct_call (stmt, hbb);
4812*38fd1498Szrj       return;
4813*38fd1498Szrj     }
4814*38fd1498Szrj 
4815*38fd1498Szrj   tree dst = gimple_call_arg (stmt, 0);
4816*38fd1498Szrj   tree src = gimple_call_arg (stmt, 1);
4817*38fd1498Szrj 
4818*38fd1498Szrj   hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
4819*38fd1498Szrj   hsa_op_address *src_addr = get_address_from_value (src, hbb);
4820*38fd1498Szrj 
4821*38fd1498Szrj   /* As gen_hsa_memory_copy relies on memory alignment
4822*38fd1498Szrj      greater or equal to 8 bytes, we need to verify the alignment.  */
4823*38fd1498Szrj   BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
4824*38fd1498Szrj   hsa_op_reg *src_addr_reg = new hsa_op_reg (addrtype);
4825*38fd1498Szrj   hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
4826*38fd1498Szrj 
4827*38fd1498Szrj   convert_addr_to_flat_segment (src_addr, src_addr_reg, hbb);
4828*38fd1498Szrj   convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
4829*38fd1498Szrj 
4830*38fd1498Szrj   /* Process BIT OR for source and destination addresses.  */
4831*38fd1498Szrj   hsa_op_reg *or_reg = new hsa_op_reg (addrtype);
4832*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_OR, or_reg, src_addr_reg,
4833*38fd1498Szrj 			    dst_addr_reg, hbb);
4834*38fd1498Szrj 
4835*38fd1498Szrj   /* Process BIT AND with 0x7 to identify the desired alignment
4836*38fd1498Szrj      of 8 bytes.  */
4837*38fd1498Szrj   hsa_op_reg *masked = new hsa_op_reg (addrtype);
4838*38fd1498Szrj 
4839*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, or_reg,
4840*38fd1498Szrj 			    new hsa_op_immed (7, addrtype), hbb);
4841*38fd1498Szrj 
4842*38fd1498Szrj   hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
4843*38fd1498Szrj   hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
4844*38fd1498Szrj 				      misaligned, masked,
4845*38fd1498Szrj 				      new hsa_op_immed (0, masked->m_type)));
4846*38fd1498Szrj 
4847*38fd1498Szrj   hsa_bb *native_impl_bb
4848*38fd1498Szrj     = expand_string_operation_builtin (stmt, hbb, misaligned);
4849*38fd1498Szrj 
4850*38fd1498Szrj   gen_hsa_memory_copy (native_impl_bb, dst_addr, src_addr, n, BRIG_ALIGNMENT_8);
4851*38fd1498Szrj   hsa_bb *merge_bb
4852*38fd1498Szrj     = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
4853*38fd1498Szrj   expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
4854*38fd1498Szrj }
4855*38fd1498Szrj 
4856*38fd1498Szrj 
4857*38fd1498Szrj /* Expand a memory set BUILTIN (BUILT_IN_MEMSET, BUILT_IN_BZERO) from
4858*38fd1498Szrj    a gimple STMT and store all necessary instruction to HBB basic block.
4859*38fd1498Szrj    The operation set N bytes with a CONSTANT value.  */
4860*38fd1498Szrj 
4861*38fd1498Szrj static void
expand_memory_set(gimple * stmt,unsigned HOST_WIDE_INT n,unsigned HOST_WIDE_INT constant,hsa_bb * hbb,enum built_in_function builtin)4862*38fd1498Szrj expand_memory_set (gimple *stmt, unsigned HOST_WIDE_INT n,
4863*38fd1498Szrj 		   unsigned HOST_WIDE_INT constant, hsa_bb *hbb,
4864*38fd1498Szrj 		   enum built_in_function builtin)
4865*38fd1498Szrj {
4866*38fd1498Szrj   tree dst = gimple_call_arg (stmt, 0);
4867*38fd1498Szrj   hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
4868*38fd1498Szrj 
4869*38fd1498Szrj   /* As gen_hsa_memory_set relies on memory alignment
4870*38fd1498Szrj      greater or equal to 8 bytes, we need to verify the alignment.  */
4871*38fd1498Szrj   BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
4872*38fd1498Szrj   hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
4873*38fd1498Szrj   convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
4874*38fd1498Szrj 
4875*38fd1498Szrj   /* Process BIT AND with 0x7 to identify the desired alignment
4876*38fd1498Szrj      of 8 bytes.  */
4877*38fd1498Szrj   hsa_op_reg *masked = new hsa_op_reg (addrtype);
4878*38fd1498Szrj 
4879*38fd1498Szrj   gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, dst_addr_reg,
4880*38fd1498Szrj 			    new hsa_op_immed (7, addrtype), hbb);
4881*38fd1498Szrj 
4882*38fd1498Szrj   hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
4883*38fd1498Szrj   hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
4884*38fd1498Szrj 				      misaligned, masked,
4885*38fd1498Szrj 				      new hsa_op_immed (0, masked->m_type)));
4886*38fd1498Szrj 
4887*38fd1498Szrj   hsa_bb *native_impl_bb
4888*38fd1498Szrj     = expand_string_operation_builtin (stmt, hbb, misaligned);
4889*38fd1498Szrj 
4890*38fd1498Szrj   gen_hsa_memory_set (native_impl_bb, dst_addr, constant, n, BRIG_ALIGNMENT_8);
4891*38fd1498Szrj   hsa_bb *merge_bb
4892*38fd1498Szrj     = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
4893*38fd1498Szrj   expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
4894*38fd1498Szrj }
4895*38fd1498Szrj 
4896*38fd1498Szrj /* Store into MEMORDER the memory order specified by tree T, which must be an
4897*38fd1498Szrj    integer constant representing a C++ memory order.  If it isn't, issue an HSA
4898*38fd1498Szrj    sorry message using LOC and return true, otherwise return false and store
4899*38fd1498Szrj    the name of the requested order to *MNAME.  */
4900*38fd1498Szrj 
4901*38fd1498Szrj static bool
hsa_memorder_from_tree(tree t,BrigMemoryOrder * memorder,const char ** mname,location_t loc)4902*38fd1498Szrj hsa_memorder_from_tree (tree t, BrigMemoryOrder *memorder, const char **mname,
4903*38fd1498Szrj 			location_t loc)
4904*38fd1498Szrj {
4905*38fd1498Szrj   if (!tree_fits_uhwi_p (t))
4906*38fd1498Szrj     {
4907*38fd1498Szrj       HSA_SORRY_ATV (loc, "support for HSA does not implement memory model %E",
4908*38fd1498Szrj 		     t);
4909*38fd1498Szrj       return true;
4910*38fd1498Szrj     }
4911*38fd1498Szrj 
4912*38fd1498Szrj   unsigned HOST_WIDE_INT mm = tree_to_uhwi (t);
4913*38fd1498Szrj   switch (mm & MEMMODEL_BASE_MASK)
4914*38fd1498Szrj     {
4915*38fd1498Szrj     case MEMMODEL_RELAXED:
4916*38fd1498Szrj       *memorder = BRIG_MEMORY_ORDER_RELAXED;
4917*38fd1498Szrj       *mname = "relaxed";
4918*38fd1498Szrj       break;
4919*38fd1498Szrj     case MEMMODEL_CONSUME:
4920*38fd1498Szrj       /* HSA does not have an equivalent, but we can use the slightly stronger
4921*38fd1498Szrj 	 ACQUIRE.  */
4922*38fd1498Szrj       *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
4923*38fd1498Szrj       *mname = "consume";
4924*38fd1498Szrj       break;
4925*38fd1498Szrj     case MEMMODEL_ACQUIRE:
4926*38fd1498Szrj       *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
4927*38fd1498Szrj       *mname = "acquire";
4928*38fd1498Szrj       break;
4929*38fd1498Szrj     case MEMMODEL_RELEASE:
4930*38fd1498Szrj       *memorder = BRIG_MEMORY_ORDER_SC_RELEASE;
4931*38fd1498Szrj       *mname = "release";
4932*38fd1498Szrj       break;
4933*38fd1498Szrj     case MEMMODEL_ACQ_REL:
4934*38fd1498Szrj       *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE;
4935*38fd1498Szrj       *mname = "acq_rel";
4936*38fd1498Szrj       break;
4937*38fd1498Szrj     case MEMMODEL_SEQ_CST:
4938*38fd1498Szrj       /* Callers implementing a simple load or store need to remove the release
4939*38fd1498Szrj 	 or acquire part respectively.  */
4940*38fd1498Szrj       *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE;
4941*38fd1498Szrj       *mname = "seq_cst";
4942*38fd1498Szrj       break;
4943*38fd1498Szrj     default:
4944*38fd1498Szrj       {
4945*38fd1498Szrj 	HSA_SORRY_AT (loc, "support for HSA does not implement the specified "
4946*38fd1498Szrj 		      "memory model");
4947*38fd1498Szrj 	return true;
4948*38fd1498Szrj       }
4949*38fd1498Szrj     }
4950*38fd1498Szrj   return false;
4951*38fd1498Szrj }
4952*38fd1498Szrj 
4953*38fd1498Szrj /* Helper function to create an HSA atomic operation instruction out of calls
4954*38fd1498Szrj    to atomic builtins.  RET_ORIG is true if the built-in is the variant that
4955*38fd1498Szrj    return s the value before applying operation, and false if it should return
4956*38fd1498Szrj    the value after applying the operation (if it returns value at all).  ACODE
4957*38fd1498Szrj    is the atomic operation code, STMT is a gimple call to a builtin.  HBB is
4958*38fd1498Szrj    the HSA BB to which the instruction should be added.  If SIGNAL is true, the
4959*38fd1498Szrj    created operation will work on HSA signals rather than atomic variables.  */
4960*38fd1498Szrj 
4961*38fd1498Szrj static void
gen_hsa_atomic_for_builtin(bool ret_orig,enum BrigAtomicOperation acode,gimple * stmt,hsa_bb * hbb,bool signal)4962*38fd1498Szrj gen_hsa_atomic_for_builtin (bool ret_orig, enum BrigAtomicOperation acode,
4963*38fd1498Szrj 			    gimple *stmt, hsa_bb *hbb, bool signal)
4964*38fd1498Szrj {
4965*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
4966*38fd1498Szrj 
4967*38fd1498Szrj   tree type = TREE_TYPE (gimple_call_arg (stmt, 1));
4968*38fd1498Szrj   BrigType16_t hsa_type = hsa_type_for_scalar_tree_type (type, false);
4969*38fd1498Szrj   BrigType16_t mtype = mem_type_for_type (hsa_type);
4970*38fd1498Szrj   BrigMemoryOrder memorder;
4971*38fd1498Szrj   const char *mmname;
4972*38fd1498Szrj 
4973*38fd1498Szrj   if (hsa_memorder_from_tree (gimple_call_arg (stmt, 2), &memorder, &mmname,
4974*38fd1498Szrj 			      gimple_location (stmt)))
4975*38fd1498Szrj     return;
4976*38fd1498Szrj 
4977*38fd1498Szrj   /* Certain atomic insns must have Bx memory types.  */
4978*38fd1498Szrj   switch (acode)
4979*38fd1498Szrj     {
4980*38fd1498Szrj     case BRIG_ATOMIC_LD:
4981*38fd1498Szrj     case BRIG_ATOMIC_ST:
4982*38fd1498Szrj     case BRIG_ATOMIC_AND:
4983*38fd1498Szrj     case BRIG_ATOMIC_OR:
4984*38fd1498Szrj     case BRIG_ATOMIC_XOR:
4985*38fd1498Szrj     case BRIG_ATOMIC_EXCH:
4986*38fd1498Szrj       mtype = hsa_bittype_for_type (mtype);
4987*38fd1498Szrj       break;
4988*38fd1498Szrj     default:
4989*38fd1498Szrj       break;
4990*38fd1498Szrj     }
4991*38fd1498Szrj 
4992*38fd1498Szrj   hsa_op_reg *dest;
4993*38fd1498Szrj   int nops, opcode;
4994*38fd1498Szrj   if (lhs)
4995*38fd1498Szrj     {
4996*38fd1498Szrj       if (ret_orig)
4997*38fd1498Szrj 	dest = hsa_cfun->reg_for_gimple_ssa (lhs);
4998*38fd1498Szrj       else
4999*38fd1498Szrj 	dest = new hsa_op_reg (hsa_type);
5000*38fd1498Szrj       opcode = signal ? BRIG_OPCODE_SIGNAL : BRIG_OPCODE_ATOMIC;
5001*38fd1498Szrj       nops = 3;
5002*38fd1498Szrj     }
5003*38fd1498Szrj   else
5004*38fd1498Szrj     {
5005*38fd1498Szrj       dest = NULL;
5006*38fd1498Szrj       opcode = signal ? BRIG_OPCODE_SIGNALNORET : BRIG_OPCODE_ATOMICNORET;
5007*38fd1498Szrj       nops = 2;
5008*38fd1498Szrj     }
5009*38fd1498Szrj 
5010*38fd1498Szrj   if (acode == BRIG_ATOMIC_ST)
5011*38fd1498Szrj     {
5012*38fd1498Szrj       if (memorder == BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE)
5013*38fd1498Szrj 	memorder = BRIG_MEMORY_ORDER_SC_RELEASE;
5014*38fd1498Szrj 
5015*38fd1498Szrj       if (memorder != BRIG_MEMORY_ORDER_RELAXED
5016*38fd1498Szrj 	  && memorder != BRIG_MEMORY_ORDER_SC_RELEASE
5017*38fd1498Szrj 	  && memorder != BRIG_MEMORY_ORDER_NONE)
5018*38fd1498Szrj 	{
5019*38fd1498Szrj 	  HSA_SORRY_ATV (gimple_location (stmt),
5020*38fd1498Szrj 			 "support for HSA does not implement memory model for "
5021*38fd1498Szrj 			 "ATOMIC_ST: %s", mmname);
5022*38fd1498Szrj 	  return;
5023*38fd1498Szrj 	}
5024*38fd1498Szrj     }
5025*38fd1498Szrj 
5026*38fd1498Szrj   hsa_insn_basic *atominsn;
5027*38fd1498Szrj   hsa_op_base *tgt;
5028*38fd1498Szrj   if (signal)
5029*38fd1498Szrj     {
5030*38fd1498Szrj       atominsn = new hsa_insn_signal (nops, opcode, acode, mtype, memorder);
5031*38fd1498Szrj       tgt = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
5032*38fd1498Szrj     }
5033*38fd1498Szrj   else
5034*38fd1498Szrj     {
5035*38fd1498Szrj       atominsn = new hsa_insn_atomic (nops, opcode, acode, mtype, memorder);
5036*38fd1498Szrj       hsa_op_address *addr;
5037*38fd1498Szrj       addr = get_address_from_value (gimple_call_arg (stmt, 0), hbb);
5038*38fd1498Szrj       if (addr->m_symbol && addr->m_symbol->m_segment == BRIG_SEGMENT_PRIVATE)
5039*38fd1498Szrj 	{
5040*38fd1498Szrj 	  HSA_SORRY_AT (gimple_location (stmt),
5041*38fd1498Szrj 			"HSA does not implement atomic operations in private "
5042*38fd1498Szrj 			"segment");
5043*38fd1498Szrj 	  return;
5044*38fd1498Szrj 	}
5045*38fd1498Szrj       tgt = addr;
5046*38fd1498Szrj     }
5047*38fd1498Szrj 
5048*38fd1498Szrj   hsa_op_with_type *op
5049*38fd1498Szrj     = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), hbb);
5050*38fd1498Szrj   if (lhs)
5051*38fd1498Szrj     {
5052*38fd1498Szrj       atominsn->set_op (0, dest);
5053*38fd1498Szrj       atominsn->set_op (1, tgt);
5054*38fd1498Szrj       atominsn->set_op (2, op);
5055*38fd1498Szrj     }
5056*38fd1498Szrj   else
5057*38fd1498Szrj     {
5058*38fd1498Szrj       atominsn->set_op (0, tgt);
5059*38fd1498Szrj       atominsn->set_op (1, op);
5060*38fd1498Szrj     }
5061*38fd1498Szrj 
5062*38fd1498Szrj   hbb->append_insn (atominsn);
5063*38fd1498Szrj 
5064*38fd1498Szrj   /* HSA does not natively support the variants that return the modified value,
5065*38fd1498Szrj      so re-do the operation again non-atomically if that is what was
5066*38fd1498Szrj      requested.  */
5067*38fd1498Szrj   if (lhs && !ret_orig)
5068*38fd1498Szrj     {
5069*38fd1498Szrj       int arith;
5070*38fd1498Szrj       switch (acode)
5071*38fd1498Szrj 	{
5072*38fd1498Szrj 	case BRIG_ATOMIC_ADD:
5073*38fd1498Szrj 	  arith = BRIG_OPCODE_ADD;
5074*38fd1498Szrj 	  break;
5075*38fd1498Szrj 	case BRIG_ATOMIC_AND:
5076*38fd1498Szrj 	  arith = BRIG_OPCODE_AND;
5077*38fd1498Szrj 	  break;
5078*38fd1498Szrj 	case BRIG_ATOMIC_OR:
5079*38fd1498Szrj 	  arith = BRIG_OPCODE_OR;
5080*38fd1498Szrj 	  break;
5081*38fd1498Szrj 	case BRIG_ATOMIC_SUB:
5082*38fd1498Szrj 	  arith = BRIG_OPCODE_SUB;
5083*38fd1498Szrj 	  break;
5084*38fd1498Szrj 	case BRIG_ATOMIC_XOR:
5085*38fd1498Szrj 	  arith = BRIG_OPCODE_XOR;
5086*38fd1498Szrj 	  break;
5087*38fd1498Szrj 	default:
5088*38fd1498Szrj 	  gcc_unreachable ();
5089*38fd1498Szrj 	}
5090*38fd1498Szrj       hsa_op_reg *real_dest = hsa_cfun->reg_for_gimple_ssa (lhs);
5091*38fd1498Szrj       gen_hsa_binary_operation (arith, real_dest, dest, op, hbb);
5092*38fd1498Szrj     }
5093*38fd1498Szrj }
5094*38fd1498Szrj 
5095*38fd1498Szrj /* Generate HSA instructions for an internal fn.
5096*38fd1498Szrj    Instructions will be appended to HBB, which also needs to be the
5097*38fd1498Szrj    corresponding structure to the basic_block of STMT.  */
5098*38fd1498Szrj 
5099*38fd1498Szrj static void
gen_hsa_insn_for_internal_fn_call(gcall * stmt,hsa_bb * hbb)5100*38fd1498Szrj gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb)
5101*38fd1498Szrj {
5102*38fd1498Szrj   gcc_checking_assert (gimple_call_internal_fn (stmt));
5103*38fd1498Szrj   internal_fn fn = gimple_call_internal_fn (stmt);
5104*38fd1498Szrj 
5105*38fd1498Szrj   bool is_float_type_p = false;
5106*38fd1498Szrj   if (gimple_call_lhs (stmt) != NULL
5107*38fd1498Szrj       && TREE_TYPE (gimple_call_lhs (stmt)) == float_type_node)
5108*38fd1498Szrj     is_float_type_p = true;
5109*38fd1498Szrj 
5110*38fd1498Szrj   switch (fn)
5111*38fd1498Szrj     {
5112*38fd1498Szrj     case IFN_CEIL:
5113*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb);
5114*38fd1498Szrj       break;
5115*38fd1498Szrj 
5116*38fd1498Szrj     case IFN_FLOOR:
5117*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb);
5118*38fd1498Szrj       break;
5119*38fd1498Szrj 
5120*38fd1498Szrj     case IFN_RINT:
5121*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb);
5122*38fd1498Szrj       break;
5123*38fd1498Szrj 
5124*38fd1498Szrj     case IFN_SQRT:
5125*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb);
5126*38fd1498Szrj       break;
5127*38fd1498Szrj 
5128*38fd1498Szrj     case IFN_RSQRT:
5129*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_NRSQRT, stmt, hbb);
5130*38fd1498Szrj       break;
5131*38fd1498Szrj 
5132*38fd1498Szrj     case IFN_TRUNC:
5133*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb);
5134*38fd1498Szrj       break;
5135*38fd1498Szrj 
5136*38fd1498Szrj     case IFN_COS:
5137*38fd1498Szrj       {
5138*38fd1498Szrj 	if (is_float_type_p)
5139*38fd1498Szrj 	  gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb);
5140*38fd1498Szrj 	else
5141*38fd1498Szrj 	  gen_hsa_unaryop_builtin_call (stmt, hbb);
5142*38fd1498Szrj 
5143*38fd1498Szrj 	break;
5144*38fd1498Szrj       }
5145*38fd1498Szrj     case IFN_EXP2:
5146*38fd1498Szrj       {
5147*38fd1498Szrj 	if (is_float_type_p)
5148*38fd1498Szrj 	  gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb);
5149*38fd1498Szrj 	else
5150*38fd1498Szrj 	  gen_hsa_unaryop_builtin_call (stmt, hbb);
5151*38fd1498Szrj 
5152*38fd1498Szrj 	break;
5153*38fd1498Szrj       }
5154*38fd1498Szrj 
5155*38fd1498Szrj     case IFN_LOG2:
5156*38fd1498Szrj       {
5157*38fd1498Szrj 	if (is_float_type_p)
5158*38fd1498Szrj 	  gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb);
5159*38fd1498Szrj 	else
5160*38fd1498Szrj 	  gen_hsa_unaryop_builtin_call (stmt, hbb);
5161*38fd1498Szrj 
5162*38fd1498Szrj 	break;
5163*38fd1498Szrj       }
5164*38fd1498Szrj 
5165*38fd1498Szrj     case IFN_SIN:
5166*38fd1498Szrj       {
5167*38fd1498Szrj 	if (is_float_type_p)
5168*38fd1498Szrj 	  gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb);
5169*38fd1498Szrj 	else
5170*38fd1498Szrj 	  gen_hsa_unaryop_builtin_call (stmt, hbb);
5171*38fd1498Szrj 	break;
5172*38fd1498Szrj       }
5173*38fd1498Szrj 
5174*38fd1498Szrj     case IFN_CLRSB:
5175*38fd1498Szrj       gen_hsa_clrsb (stmt, hbb);
5176*38fd1498Szrj       break;
5177*38fd1498Szrj 
5178*38fd1498Szrj     case IFN_CLZ:
5179*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb);
5180*38fd1498Szrj       break;
5181*38fd1498Szrj 
5182*38fd1498Szrj     case IFN_CTZ:
5183*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb);
5184*38fd1498Szrj       break;
5185*38fd1498Szrj 
5186*38fd1498Szrj     case IFN_FFS:
5187*38fd1498Szrj       gen_hsa_ffs (stmt, hbb);
5188*38fd1498Szrj       break;
5189*38fd1498Szrj 
5190*38fd1498Szrj     case IFN_PARITY:
5191*38fd1498Szrj       gen_hsa_parity (stmt, hbb);
5192*38fd1498Szrj       break;
5193*38fd1498Szrj 
5194*38fd1498Szrj     case IFN_POPCOUNT:
5195*38fd1498Szrj       gen_hsa_popcount (stmt, hbb);
5196*38fd1498Szrj       break;
5197*38fd1498Szrj 
5198*38fd1498Szrj     case IFN_DIVMOD:
5199*38fd1498Szrj       gen_hsa_divmod (stmt, hbb);
5200*38fd1498Szrj       break;
5201*38fd1498Szrj 
5202*38fd1498Szrj     case IFN_ACOS:
5203*38fd1498Szrj     case IFN_ASIN:
5204*38fd1498Szrj     case IFN_ATAN:
5205*38fd1498Szrj     case IFN_EXP:
5206*38fd1498Szrj     case IFN_EXP10:
5207*38fd1498Szrj     case IFN_EXPM1:
5208*38fd1498Szrj     case IFN_LOG:
5209*38fd1498Szrj     case IFN_LOG10:
5210*38fd1498Szrj     case IFN_LOG1P:
5211*38fd1498Szrj     case IFN_LOGB:
5212*38fd1498Szrj     case IFN_SIGNIFICAND:
5213*38fd1498Szrj     case IFN_TAN:
5214*38fd1498Szrj     case IFN_NEARBYINT:
5215*38fd1498Szrj     case IFN_ROUND:
5216*38fd1498Szrj     case IFN_ATAN2:
5217*38fd1498Szrj     case IFN_COPYSIGN:
5218*38fd1498Szrj     case IFN_FMOD:
5219*38fd1498Szrj     case IFN_POW:
5220*38fd1498Szrj     case IFN_REMAINDER:
5221*38fd1498Szrj     case IFN_SCALB:
5222*38fd1498Szrj     case IFN_FMIN:
5223*38fd1498Szrj     case IFN_FMAX:
5224*38fd1498Szrj       gen_hsa_insns_for_call_of_internal_fn (stmt, hbb);
5225*38fd1498Szrj       break;
5226*38fd1498Szrj 
5227*38fd1498Szrj     default:
5228*38fd1498Szrj       HSA_SORRY_ATV (gimple_location (stmt),
5229*38fd1498Szrj 		     "support for HSA does not implement internal function: %s",
5230*38fd1498Szrj 		     internal_fn_name (fn));
5231*38fd1498Szrj       break;
5232*38fd1498Szrj     }
5233*38fd1498Szrj }
5234*38fd1498Szrj 
5235*38fd1498Szrj /* Generate HSA instructions for the given call statement STMT.  Instructions
5236*38fd1498Szrj    will be appended to HBB.  */
5237*38fd1498Szrj 
5238*38fd1498Szrj static void
gen_hsa_insns_for_call(gimple * stmt,hsa_bb * hbb)5239*38fd1498Szrj gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
5240*38fd1498Szrj {
5241*38fd1498Szrj   gcall *call = as_a <gcall *> (stmt);
5242*38fd1498Szrj   tree lhs = gimple_call_lhs (stmt);
5243*38fd1498Szrj   hsa_op_reg *dest;
5244*38fd1498Szrj 
5245*38fd1498Szrj   if (gimple_call_internal_p (stmt))
5246*38fd1498Szrj     {
5247*38fd1498Szrj       gen_hsa_insn_for_internal_fn_call (call, hbb);
5248*38fd1498Szrj       return;
5249*38fd1498Szrj     }
5250*38fd1498Szrj 
5251*38fd1498Szrj   if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5252*38fd1498Szrj     {
5253*38fd1498Szrj       tree function_decl = gimple_call_fndecl (stmt);
5254*38fd1498Szrj       /* Prefetch pass can create type-mismatching prefetch builtin calls which
5255*38fd1498Szrj 	 fail the gimple_call_builtin_p test above.  Handle them here.  */
5256*38fd1498Szrj       if (DECL_BUILT_IN_CLASS (function_decl)
5257*38fd1498Szrj 	  && DECL_FUNCTION_CODE (function_decl) == BUILT_IN_PREFETCH)
5258*38fd1498Szrj 	return;
5259*38fd1498Szrj 
5260*38fd1498Szrj       if (function_decl == NULL_TREE)
5261*38fd1498Szrj 	{
5262*38fd1498Szrj 	  HSA_SORRY_AT (gimple_location (stmt),
5263*38fd1498Szrj 			"support for HSA does not implement indirect calls");
5264*38fd1498Szrj 	  return;
5265*38fd1498Szrj 	}
5266*38fd1498Szrj 
5267*38fd1498Szrj       if (hsa_callable_function_p (function_decl))
5268*38fd1498Szrj 	gen_hsa_insns_for_direct_call (stmt, hbb);
5269*38fd1498Szrj       else if (!gen_hsa_insns_for_known_library_call (stmt, hbb))
5270*38fd1498Szrj 	HSA_SORRY_AT (gimple_location (stmt),
5271*38fd1498Szrj 		      "HSA supports only calls of functions marked with pragma "
5272*38fd1498Szrj 		      "omp declare target");
5273*38fd1498Szrj       return;
5274*38fd1498Szrj     }
5275*38fd1498Szrj 
5276*38fd1498Szrj   tree fndecl = gimple_call_fndecl (stmt);
5277*38fd1498Szrj   enum built_in_function builtin = DECL_FUNCTION_CODE (fndecl);
5278*38fd1498Szrj   switch (builtin)
5279*38fd1498Szrj     {
5280*38fd1498Szrj     case BUILT_IN_FABS:
5281*38fd1498Szrj     case BUILT_IN_FABSF:
5282*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_ABS, stmt, hbb);
5283*38fd1498Szrj       break;
5284*38fd1498Szrj 
5285*38fd1498Szrj     case BUILT_IN_CEIL:
5286*38fd1498Szrj     case BUILT_IN_CEILF:
5287*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb);
5288*38fd1498Szrj       break;
5289*38fd1498Szrj 
5290*38fd1498Szrj     case BUILT_IN_FLOOR:
5291*38fd1498Szrj     case BUILT_IN_FLOORF:
5292*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb);
5293*38fd1498Szrj       break;
5294*38fd1498Szrj 
5295*38fd1498Szrj     case BUILT_IN_RINT:
5296*38fd1498Szrj     case BUILT_IN_RINTF:
5297*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb);
5298*38fd1498Szrj       break;
5299*38fd1498Szrj 
5300*38fd1498Szrj     case BUILT_IN_SQRT:
5301*38fd1498Szrj     case BUILT_IN_SQRTF:
5302*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb);
5303*38fd1498Szrj       break;
5304*38fd1498Szrj 
5305*38fd1498Szrj     case BUILT_IN_TRUNC:
5306*38fd1498Szrj     case BUILT_IN_TRUNCF:
5307*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb);
5308*38fd1498Szrj       break;
5309*38fd1498Szrj 
5310*38fd1498Szrj     case BUILT_IN_COS:
5311*38fd1498Szrj     case BUILT_IN_SIN:
5312*38fd1498Szrj     case BUILT_IN_EXP2:
5313*38fd1498Szrj     case BUILT_IN_LOG2:
5314*38fd1498Szrj       /* HSAIL does not provide an instruction for double argument type.  */
5315*38fd1498Szrj       gen_hsa_unaryop_builtin_call (stmt, hbb);
5316*38fd1498Szrj       break;
5317*38fd1498Szrj 
5318*38fd1498Szrj     case BUILT_IN_COSF:
5319*38fd1498Szrj       gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb);
5320*38fd1498Szrj       break;
5321*38fd1498Szrj 
5322*38fd1498Szrj     case BUILT_IN_EXP2F:
5323*38fd1498Szrj       gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb);
5324*38fd1498Szrj       break;
5325*38fd1498Szrj 
5326*38fd1498Szrj     case BUILT_IN_LOG2F:
5327*38fd1498Szrj       gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb);
5328*38fd1498Szrj       break;
5329*38fd1498Szrj 
5330*38fd1498Szrj     case BUILT_IN_SINF:
5331*38fd1498Szrj       gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb);
5332*38fd1498Szrj       break;
5333*38fd1498Szrj 
5334*38fd1498Szrj     case BUILT_IN_CLRSB:
5335*38fd1498Szrj     case BUILT_IN_CLRSBL:
5336*38fd1498Szrj     case BUILT_IN_CLRSBLL:
5337*38fd1498Szrj       gen_hsa_clrsb (call, hbb);
5338*38fd1498Szrj       break;
5339*38fd1498Szrj 
5340*38fd1498Szrj     case BUILT_IN_CLZ:
5341*38fd1498Szrj     case BUILT_IN_CLZL:
5342*38fd1498Szrj     case BUILT_IN_CLZLL:
5343*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb);
5344*38fd1498Szrj       break;
5345*38fd1498Szrj 
5346*38fd1498Szrj     case BUILT_IN_CTZ:
5347*38fd1498Szrj     case BUILT_IN_CTZL:
5348*38fd1498Szrj     case BUILT_IN_CTZLL:
5349*38fd1498Szrj       gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb);
5350*38fd1498Szrj       break;
5351*38fd1498Szrj 
5352*38fd1498Szrj     case BUILT_IN_FFS:
5353*38fd1498Szrj     case BUILT_IN_FFSL:
5354*38fd1498Szrj     case BUILT_IN_FFSLL:
5355*38fd1498Szrj       gen_hsa_ffs (call, hbb);
5356*38fd1498Szrj       break;
5357*38fd1498Szrj 
5358*38fd1498Szrj     case BUILT_IN_PARITY:
5359*38fd1498Szrj     case BUILT_IN_PARITYL:
5360*38fd1498Szrj     case BUILT_IN_PARITYLL:
5361*38fd1498Szrj       gen_hsa_parity (call, hbb);
5362*38fd1498Szrj       break;
5363*38fd1498Szrj 
5364*38fd1498Szrj     case BUILT_IN_POPCOUNT:
5365*38fd1498Szrj     case BUILT_IN_POPCOUNTL:
5366*38fd1498Szrj     case BUILT_IN_POPCOUNTLL:
5367*38fd1498Szrj       gen_hsa_popcount (call, hbb);
5368*38fd1498Szrj       break;
5369*38fd1498Szrj 
5370*38fd1498Szrj     case BUILT_IN_ATOMIC_LOAD_1:
5371*38fd1498Szrj     case BUILT_IN_ATOMIC_LOAD_2:
5372*38fd1498Szrj     case BUILT_IN_ATOMIC_LOAD_4:
5373*38fd1498Szrj     case BUILT_IN_ATOMIC_LOAD_8:
5374*38fd1498Szrj     case BUILT_IN_ATOMIC_LOAD_16:
5375*38fd1498Szrj       {
5376*38fd1498Szrj 	BrigType16_t mtype;
5377*38fd1498Szrj 	hsa_op_base *src;
5378*38fd1498Szrj 	src = get_address_from_value (gimple_call_arg (stmt, 0), hbb);
5379*38fd1498Szrj 
5380*38fd1498Szrj 	BrigMemoryOrder memorder;
5381*38fd1498Szrj 	const char *mmname;
5382*38fd1498Szrj 	if (hsa_memorder_from_tree (gimple_call_arg (stmt, 1), &memorder,
5383*38fd1498Szrj 				    &mmname, gimple_location (stmt)))
5384*38fd1498Szrj 	  return;
5385*38fd1498Szrj 
5386*38fd1498Szrj 	if (memorder == BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE)
5387*38fd1498Szrj 	  memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
5388*38fd1498Szrj 
5389*38fd1498Szrj 	if (memorder != BRIG_MEMORY_ORDER_RELAXED
5390*38fd1498Szrj 	    && memorder != BRIG_MEMORY_ORDER_SC_ACQUIRE
5391*38fd1498Szrj 	    && memorder != BRIG_MEMORY_ORDER_NONE)
5392*38fd1498Szrj 	  {
5393*38fd1498Szrj 	    HSA_SORRY_ATV (gimple_location (stmt),
5394*38fd1498Szrj 			   "support for HSA does not implement "
5395*38fd1498Szrj 			   "memory model for atomic loads: %s", mmname);
5396*38fd1498Szrj 	    return;
5397*38fd1498Szrj 	  }
5398*38fd1498Szrj 
5399*38fd1498Szrj 	if (lhs)
5400*38fd1498Szrj 	  {
5401*38fd1498Szrj 	    BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
5402*38fd1498Szrj 							    false);
5403*38fd1498Szrj 	    mtype = mem_type_for_type (t);
5404*38fd1498Szrj 	    mtype = hsa_bittype_for_type (mtype);
5405*38fd1498Szrj 	    dest = hsa_cfun->reg_for_gimple_ssa (lhs);
5406*38fd1498Szrj 	  }
5407*38fd1498Szrj 	else
5408*38fd1498Szrj 	  {
5409*38fd1498Szrj 	    mtype = BRIG_TYPE_B64;
5410*38fd1498Szrj 	    dest = new hsa_op_reg (mtype);
5411*38fd1498Szrj 	  }
5412*38fd1498Szrj 
5413*38fd1498Szrj 	hsa_insn_basic *atominsn;
5414*38fd1498Szrj 	atominsn = new hsa_insn_atomic (2, BRIG_OPCODE_ATOMIC, BRIG_ATOMIC_LD,
5415*38fd1498Szrj 					mtype, memorder, dest, src);
5416*38fd1498Szrj 
5417*38fd1498Szrj 	hbb->append_insn (atominsn);
5418*38fd1498Szrj 	break;
5419*38fd1498Szrj       }
5420*38fd1498Szrj 
5421*38fd1498Szrj     case BUILT_IN_ATOMIC_EXCHANGE_1:
5422*38fd1498Szrj     case BUILT_IN_ATOMIC_EXCHANGE_2:
5423*38fd1498Szrj     case BUILT_IN_ATOMIC_EXCHANGE_4:
5424*38fd1498Szrj     case BUILT_IN_ATOMIC_EXCHANGE_8:
5425*38fd1498Szrj     case BUILT_IN_ATOMIC_EXCHANGE_16:
5426*38fd1498Szrj       gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_EXCH, stmt, hbb, false);
5427*38fd1498Szrj       break;
5428*38fd1498Szrj       break;
5429*38fd1498Szrj 
5430*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_ADD_1:
5431*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_ADD_2:
5432*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_ADD_4:
5433*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_ADD_8:
5434*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_ADD_16:
5435*38fd1498Szrj       gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_ADD, stmt, hbb, false);
5436*38fd1498Szrj       break;
5437*38fd1498Szrj       break;
5438*38fd1498Szrj 
5439*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_SUB_1:
5440*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_SUB_2:
5441*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_SUB_4:
5442*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_SUB_8:
5443*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_SUB_16:
5444*38fd1498Szrj       gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_SUB, stmt, hbb, false);
5445*38fd1498Szrj       break;
5446*38fd1498Szrj       break;
5447*38fd1498Szrj 
5448*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_AND_1:
5449*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_AND_2:
5450*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_AND_4:
5451*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_AND_8:
5452*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_AND_16:
5453*38fd1498Szrj       gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_AND, stmt, hbb, false);
5454*38fd1498Szrj       break;
5455*38fd1498Szrj       break;
5456*38fd1498Szrj 
5457*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_XOR_1:
5458*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_XOR_2:
5459*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_XOR_4:
5460*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_XOR_8:
5461*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_XOR_16:
5462*38fd1498Szrj       gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_XOR, stmt, hbb, false);
5463*38fd1498Szrj       break;
5464*38fd1498Szrj       break;
5465*38fd1498Szrj 
5466*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_OR_1:
5467*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_OR_2:
5468*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_OR_4:
5469*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_OR_8:
5470*38fd1498Szrj     case BUILT_IN_ATOMIC_FETCH_OR_16:
5471*38fd1498Szrj       gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_OR, stmt, hbb, false);
5472*38fd1498Szrj       break;
5473*38fd1498Szrj       break;
5474*38fd1498Szrj 
5475*38fd1498Szrj     case BUILT_IN_ATOMIC_STORE_1:
5476*38fd1498Szrj     case BUILT_IN_ATOMIC_STORE_2:
5477*38fd1498Szrj     case BUILT_IN_ATOMIC_STORE_4:
5478*38fd1498Szrj     case BUILT_IN_ATOMIC_STORE_8:
5479*38fd1498Szrj     case BUILT_IN_ATOMIC_STORE_16:
5480*38fd1498Szrj       /* Since there cannot be any LHS, the first parameter is meaningless.  */
5481*38fd1498Szrj       gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_ST, stmt, hbb, false);
5482*38fd1498Szrj       break;
5483*38fd1498Szrj       break;
5484*38fd1498Szrj 
5485*38fd1498Szrj     case BUILT_IN_ATOMIC_ADD_FETCH_1:
5486*38fd1498Szrj     case BUILT_IN_ATOMIC_ADD_FETCH_2:
5487*38fd1498Szrj     case BUILT_IN_ATOMIC_ADD_FETCH_4:
5488*38fd1498Szrj     case BUILT_IN_ATOMIC_ADD_FETCH_8:
5489*38fd1498Szrj     case BUILT_IN_ATOMIC_ADD_FETCH_16:
5490*38fd1498Szrj       gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_ADD, stmt, hbb, false);
5491*38fd1498Szrj       break;
5492*38fd1498Szrj 
5493*38fd1498Szrj     case BUILT_IN_ATOMIC_SUB_FETCH_1:
5494*38fd1498Szrj     case BUILT_IN_ATOMIC_SUB_FETCH_2:
5495*38fd1498Szrj     case BUILT_IN_ATOMIC_SUB_FETCH_4:
5496*38fd1498Szrj     case BUILT_IN_ATOMIC_SUB_FETCH_8:
5497*38fd1498Szrj     case BUILT_IN_ATOMIC_SUB_FETCH_16:
5498*38fd1498Szrj       gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_SUB, stmt, hbb, false);
5499*38fd1498Szrj       break;
5500*38fd1498Szrj 
5501*38fd1498Szrj     case BUILT_IN_ATOMIC_AND_FETCH_1:
5502*38fd1498Szrj     case BUILT_IN_ATOMIC_AND_FETCH_2:
5503*38fd1498Szrj     case BUILT_IN_ATOMIC_AND_FETCH_4:
5504*38fd1498Szrj     case BUILT_IN_ATOMIC_AND_FETCH_8:
5505*38fd1498Szrj     case BUILT_IN_ATOMIC_AND_FETCH_16:
5506*38fd1498Szrj       gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_AND, stmt, hbb, false);
5507*38fd1498Szrj       break;
5508*38fd1498Szrj 
5509*38fd1498Szrj     case BUILT_IN_ATOMIC_XOR_FETCH_1:
5510*38fd1498Szrj     case BUILT_IN_ATOMIC_XOR_FETCH_2:
5511*38fd1498Szrj     case BUILT_IN_ATOMIC_XOR_FETCH_4:
5512*38fd1498Szrj     case BUILT_IN_ATOMIC_XOR_FETCH_8:
5513*38fd1498Szrj     case BUILT_IN_ATOMIC_XOR_FETCH_16:
5514*38fd1498Szrj       gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_XOR, stmt, hbb, false);
5515*38fd1498Szrj       break;
5516*38fd1498Szrj 
5517*38fd1498Szrj     case BUILT_IN_ATOMIC_OR_FETCH_1:
5518*38fd1498Szrj     case BUILT_IN_ATOMIC_OR_FETCH_2:
5519*38fd1498Szrj     case BUILT_IN_ATOMIC_OR_FETCH_4:
5520*38fd1498Szrj     case BUILT_IN_ATOMIC_OR_FETCH_8:
5521*38fd1498Szrj     case BUILT_IN_ATOMIC_OR_FETCH_16:
5522*38fd1498Szrj       gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_OR, stmt, hbb, false);
5523*38fd1498Szrj       break;
5524*38fd1498Szrj 
5525*38fd1498Szrj     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_1:
5526*38fd1498Szrj     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_2:
5527*38fd1498Szrj     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_4:
5528*38fd1498Szrj     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_8:
5529*38fd1498Szrj     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_16:
5530*38fd1498Szrj       {
5531*38fd1498Szrj 	tree type = TREE_TYPE (gimple_call_arg (stmt, 1));
5532*38fd1498Szrj 	BrigType16_t atype
5533*38fd1498Szrj 	  = hsa_bittype_for_type (hsa_type_for_scalar_tree_type (type, false));
5534*38fd1498Szrj 	BrigMemoryOrder memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE;
5535*38fd1498Szrj 	hsa_insn_basic *atominsn;
5536*38fd1498Szrj 	hsa_op_base *tgt;
5537*38fd1498Szrj 	atominsn = new hsa_insn_atomic (4, BRIG_OPCODE_ATOMIC,
5538*38fd1498Szrj 					BRIG_ATOMIC_CAS, atype, memorder);
5539*38fd1498Szrj 	tgt = get_address_from_value (gimple_call_arg (stmt, 0), hbb);
5540*38fd1498Szrj 
5541*38fd1498Szrj 	if (lhs != NULL)
5542*38fd1498Szrj 	  dest = hsa_cfun->reg_for_gimple_ssa (lhs);
5543*38fd1498Szrj 	else
5544*38fd1498Szrj 	  dest = new hsa_op_reg (atype);
5545*38fd1498Szrj 
5546*38fd1498Szrj 	atominsn->set_op (0, dest);
5547*38fd1498Szrj 	atominsn->set_op (1, tgt);
5548*38fd1498Szrj 
5549*38fd1498Szrj 	hsa_op_with_type *op
5550*38fd1498Szrj 	  = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), hbb);
5551*38fd1498Szrj 	atominsn->set_op (2, op);
5552*38fd1498Szrj 	op = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 2), hbb);
5553*38fd1498Szrj 	atominsn->set_op (3, op);
5554*38fd1498Szrj 
5555*38fd1498Szrj 	hbb->append_insn (atominsn);
5556*38fd1498Szrj 	break;
5557*38fd1498Szrj       }
5558*38fd1498Szrj 
5559*38fd1498Szrj     case BUILT_IN_HSA_WORKGROUPID:
5560*38fd1498Szrj       query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKGROUPID, hbb);
5561*38fd1498Szrj       break;
5562*38fd1498Szrj     case BUILT_IN_HSA_WORKITEMID:
5563*38fd1498Szrj       query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKITEMID, hbb);
5564*38fd1498Szrj       break;
5565*38fd1498Szrj     case BUILT_IN_HSA_WORKITEMABSID:
5566*38fd1498Szrj       query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKITEMABSID, hbb);
5567*38fd1498Szrj       break;
5568*38fd1498Szrj     case BUILT_IN_HSA_GRIDSIZE:
5569*38fd1498Szrj       query_hsa_grid_dim (stmt, BRIG_OPCODE_GRIDSIZE, hbb);
5570*38fd1498Szrj       break;
5571*38fd1498Szrj     case BUILT_IN_HSA_CURRENTWORKGROUPSIZE:
5572*38fd1498Szrj       query_hsa_grid_dim (stmt, BRIG_OPCODE_CURRENTWORKGROUPSIZE, hbb);
5573*38fd1498Szrj       break;
5574*38fd1498Szrj 
5575*38fd1498Szrj     case BUILT_IN_GOMP_BARRIER:
5576*38fd1498Szrj       hbb->append_insn (new hsa_insn_br (0, BRIG_OPCODE_BARRIER, BRIG_TYPE_NONE,
5577*38fd1498Szrj 					 BRIG_WIDTH_ALL));
5578*38fd1498Szrj       break;
5579*38fd1498Szrj     case BUILT_IN_GOMP_PARALLEL:
5580*38fd1498Szrj       HSA_SORRY_AT (gimple_location (stmt),
5581*38fd1498Szrj 		    "support for HSA does not implement non-gridified "
5582*38fd1498Szrj 		    "OpenMP parallel constructs.");
5583*38fd1498Szrj       break;
5584*38fd1498Szrj 
5585*38fd1498Szrj     case BUILT_IN_OMP_GET_THREAD_NUM:
5586*38fd1498Szrj       {
5587*38fd1498Szrj 	query_hsa_grid_nodim (stmt, BRIG_OPCODE_WORKITEMFLATABSID, hbb);
5588*38fd1498Szrj 	break;
5589*38fd1498Szrj       }
5590*38fd1498Szrj 
5591*38fd1498Szrj     case BUILT_IN_OMP_GET_NUM_THREADS:
5592*38fd1498Szrj       {
5593*38fd1498Szrj 	gen_get_num_threads (stmt, hbb);
5594*38fd1498Szrj 	break;
5595*38fd1498Szrj       }
5596*38fd1498Szrj     case BUILT_IN_GOMP_TEAMS:
5597*38fd1498Szrj       {
5598*38fd1498Szrj 	gen_set_num_threads (gimple_call_arg (stmt, 1), hbb);
5599*38fd1498Szrj 	break;
5600*38fd1498Szrj       }
5601*38fd1498Szrj     case BUILT_IN_OMP_GET_NUM_TEAMS:
5602*38fd1498Szrj       {
5603*38fd1498Szrj 	gen_get_num_teams (stmt, hbb);
5604*38fd1498Szrj 	break;
5605*38fd1498Szrj       }
5606*38fd1498Szrj     case BUILT_IN_OMP_GET_TEAM_NUM:
5607*38fd1498Szrj       {
5608*38fd1498Szrj 	gen_get_team_num (stmt, hbb);
5609*38fd1498Szrj 	break;
5610*38fd1498Szrj       }
5611*38fd1498Szrj     case BUILT_IN_MEMCPY:
5612*38fd1498Szrj     case BUILT_IN_MEMPCPY:
5613*38fd1498Szrj       {
5614*38fd1498Szrj 	expand_memory_copy (stmt, hbb, builtin);
5615*38fd1498Szrj 	break;
5616*38fd1498Szrj       }
5617*38fd1498Szrj     case BUILT_IN_MEMSET:
5618*38fd1498Szrj       {
5619*38fd1498Szrj 	tree c = gimple_call_arg (stmt, 1);
5620*38fd1498Szrj 
5621*38fd1498Szrj 	if (TREE_CODE (c) != INTEGER_CST)
5622*38fd1498Szrj 	  {
5623*38fd1498Szrj 	    gen_hsa_insns_for_direct_call (stmt, hbb);
5624*38fd1498Szrj 	    return;
5625*38fd1498Szrj 	  }
5626*38fd1498Szrj 
5627*38fd1498Szrj 	tree byte_size = gimple_call_arg (stmt, 2);
5628*38fd1498Szrj 
5629*38fd1498Szrj 	if (!tree_fits_uhwi_p (byte_size))
5630*38fd1498Szrj 	  {
5631*38fd1498Szrj 	    gen_hsa_insns_for_direct_call (stmt, hbb);
5632*38fd1498Szrj 	    return;
5633*38fd1498Szrj 	  }
5634*38fd1498Szrj 
5635*38fd1498Szrj 	unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
5636*38fd1498Szrj 
5637*38fd1498Szrj 	if (n > HSA_MEMORY_BUILTINS_LIMIT)
5638*38fd1498Szrj 	  {
5639*38fd1498Szrj 	    gen_hsa_insns_for_direct_call (stmt, hbb);
5640*38fd1498Szrj 	    return;
5641*38fd1498Szrj 	  }
5642*38fd1498Szrj 
5643*38fd1498Szrj 	unsigned HOST_WIDE_INT constant
5644*38fd1498Szrj 	  = tree_to_uhwi (fold_convert (unsigned_char_type_node, c));
5645*38fd1498Szrj 
5646*38fd1498Szrj 	expand_memory_set (stmt, n, constant, hbb, builtin);
5647*38fd1498Szrj 
5648*38fd1498Szrj 	break;
5649*38fd1498Szrj       }
5650*38fd1498Szrj     case BUILT_IN_BZERO:
5651*38fd1498Szrj       {
5652*38fd1498Szrj 	tree byte_size = gimple_call_arg (stmt, 1);
5653*38fd1498Szrj 
5654*38fd1498Szrj 	if (!tree_fits_uhwi_p (byte_size))
5655*38fd1498Szrj 	  {
5656*38fd1498Szrj 	    gen_hsa_insns_for_direct_call (stmt, hbb);
5657*38fd1498Szrj 	    return;
5658*38fd1498Szrj 	  }
5659*38fd1498Szrj 
5660*38fd1498Szrj 	unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
5661*38fd1498Szrj 
5662*38fd1498Szrj 	if (n > HSA_MEMORY_BUILTINS_LIMIT)
5663*38fd1498Szrj 	  {
5664*38fd1498Szrj 	    gen_hsa_insns_for_direct_call (stmt, hbb);
5665*38fd1498Szrj 	    return;
5666*38fd1498Szrj 	  }
5667*38fd1498Szrj 
5668*38fd1498Szrj 	expand_memory_set (stmt, n, 0, hbb, builtin);
5669*38fd1498Szrj 
5670*38fd1498Szrj 	break;
5671*38fd1498Szrj       }
5672*38fd1498Szrj     CASE_BUILT_IN_ALLOCA:
5673*38fd1498Szrj       {
5674*38fd1498Szrj 	gen_hsa_alloca (call, hbb);
5675*38fd1498Szrj 	break;
5676*38fd1498Szrj       }
5677*38fd1498Szrj     case BUILT_IN_PREFETCH:
5678*38fd1498Szrj       break;
5679*38fd1498Szrj     default:
5680*38fd1498Szrj       {
5681*38fd1498Szrj 	tree name_tree = DECL_NAME (fndecl);
5682*38fd1498Szrj 	const char *s = IDENTIFIER_POINTER (name_tree);
5683*38fd1498Szrj 	size_t len = strlen (s);
5684*38fd1498Szrj 	if (len > 4 && (strncmp (s, "__builtin_GOMP_", 15) == 0))
5685*38fd1498Szrj 	  HSA_SORRY_ATV (gimple_location (stmt),
5686*38fd1498Szrj 			 "support for HSA does not implement GOMP function %s",
5687*38fd1498Szrj 			 s);
5688*38fd1498Szrj 	else
5689*38fd1498Szrj 	  gen_hsa_insns_for_direct_call (stmt, hbb);
5690*38fd1498Szrj 	return;
5691*38fd1498Szrj       }
5692*38fd1498Szrj     }
5693*38fd1498Szrj }
5694*38fd1498Szrj 
5695*38fd1498Szrj /* Generate HSA instructions for a given gimple statement.  Instructions will be
5696*38fd1498Szrj    appended to HBB.  */
5697*38fd1498Szrj 
5698*38fd1498Szrj static void
gen_hsa_insns_for_gimple_stmt(gimple * stmt,hsa_bb * hbb)5699*38fd1498Szrj gen_hsa_insns_for_gimple_stmt (gimple *stmt, hsa_bb *hbb)
5700*38fd1498Szrj {
5701*38fd1498Szrj   switch (gimple_code (stmt))
5702*38fd1498Szrj     {
5703*38fd1498Szrj     case GIMPLE_ASSIGN:
5704*38fd1498Szrj       if (gimple_clobber_p (stmt))
5705*38fd1498Szrj 	break;
5706*38fd1498Szrj 
5707*38fd1498Szrj       if (gimple_assign_single_p (stmt))
5708*38fd1498Szrj 	{
5709*38fd1498Szrj 	  tree lhs = gimple_assign_lhs (stmt);
5710*38fd1498Szrj 	  tree rhs = gimple_assign_rhs1 (stmt);
5711*38fd1498Szrj 	  gen_hsa_insns_for_single_assignment (lhs, rhs, hbb);
5712*38fd1498Szrj 	}
5713*38fd1498Szrj       else
5714*38fd1498Szrj 	gen_hsa_insns_for_operation_assignment (stmt, hbb);
5715*38fd1498Szrj       break;
5716*38fd1498Szrj     case GIMPLE_RETURN:
5717*38fd1498Szrj       gen_hsa_insns_for_return (as_a <greturn *> (stmt), hbb);
5718*38fd1498Szrj       break;
5719*38fd1498Szrj     case GIMPLE_COND:
5720*38fd1498Szrj       gen_hsa_insns_for_cond_stmt (stmt, hbb);
5721*38fd1498Szrj       break;
5722*38fd1498Szrj     case GIMPLE_CALL:
5723*38fd1498Szrj       gen_hsa_insns_for_call (stmt, hbb);
5724*38fd1498Szrj       break;
5725*38fd1498Szrj     case GIMPLE_DEBUG:
5726*38fd1498Szrj       /* ??? HSA supports some debug facilities.  */
5727*38fd1498Szrj       break;
5728*38fd1498Szrj     case GIMPLE_LABEL:
5729*38fd1498Szrj     {
5730*38fd1498Szrj       tree label = gimple_label_label (as_a <glabel *> (stmt));
5731*38fd1498Szrj       if (FORCED_LABEL (label))
5732*38fd1498Szrj 	HSA_SORRY_AT (gimple_location (stmt),
5733*38fd1498Szrj 		      "support for HSA does not implement gimple label with "
5734*38fd1498Szrj 		      "address taken");
5735*38fd1498Szrj 
5736*38fd1498Szrj       break;
5737*38fd1498Szrj     }
5738*38fd1498Szrj     case GIMPLE_NOP:
5739*38fd1498Szrj     {
5740*38fd1498Szrj       hbb->append_insn (new hsa_insn_basic (0, BRIG_OPCODE_NOP));
5741*38fd1498Szrj       break;
5742*38fd1498Szrj     }
5743*38fd1498Szrj     case GIMPLE_SWITCH:
5744*38fd1498Szrj     {
5745*38fd1498Szrj       gen_hsa_insns_for_switch_stmt (as_a <gswitch *> (stmt), hbb);
5746*38fd1498Szrj       break;
5747*38fd1498Szrj     }
5748*38fd1498Szrj     default:
5749*38fd1498Szrj       HSA_SORRY_ATV (gimple_location (stmt),
5750*38fd1498Szrj 		     "support for HSA does not implement gimple statement %s",
5751*38fd1498Szrj 		     gimple_code_name[(int) gimple_code (stmt)]);
5752*38fd1498Szrj     }
5753*38fd1498Szrj }
5754*38fd1498Szrj 
5755*38fd1498Szrj /* Generate a HSA PHI from a gimple PHI.  */
5756*38fd1498Szrj 
5757*38fd1498Szrj static void
gen_hsa_phi_from_gimple_phi(gimple * phi_stmt,hsa_bb * hbb)5758*38fd1498Szrj gen_hsa_phi_from_gimple_phi (gimple *phi_stmt, hsa_bb *hbb)
5759*38fd1498Szrj {
5760*38fd1498Szrj   hsa_insn_phi *hphi;
5761*38fd1498Szrj   unsigned count = gimple_phi_num_args (phi_stmt);
5762*38fd1498Szrj 
5763*38fd1498Szrj   hsa_op_reg *dest
5764*38fd1498Szrj     = hsa_cfun->reg_for_gimple_ssa (gimple_phi_result (phi_stmt));
5765*38fd1498Szrj   hphi = new hsa_insn_phi (count, dest);
5766*38fd1498Szrj   hphi->m_bb = hbb->m_bb;
5767*38fd1498Szrj 
5768*38fd1498Szrj   auto_vec <tree, 8> aexprs;
5769*38fd1498Szrj   auto_vec <hsa_op_reg *, 8> aregs;
5770*38fd1498Szrj 
5771*38fd1498Szrj   /* Calling split_edge when processing a PHI node messes up with the order of
5772*38fd1498Szrj      gimple phi node arguments (it moves the one associated with the edge to
5773*38fd1498Szrj      the end).  We need to keep the order of edges and arguments of HSA phi
5774*38fd1498Szrj      node arguments consistent, so we do all required splitting as the first
5775*38fd1498Szrj      step, and in reverse order as to not be affected by the re-orderings.  */
5776*38fd1498Szrj   for (unsigned j = count; j != 0; j--)
5777*38fd1498Szrj     {
5778*38fd1498Szrj       unsigned i = j - 1;
5779*38fd1498Szrj       tree op = gimple_phi_arg_def (phi_stmt, i);
5780*38fd1498Szrj       if (TREE_CODE (op) != ADDR_EXPR)
5781*38fd1498Szrj 	continue;
5782*38fd1498Szrj 
5783*38fd1498Szrj       edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
5784*38fd1498Szrj       hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
5785*38fd1498Szrj       hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
5786*38fd1498Szrj 					   hbb_src);
5787*38fd1498Szrj 
5788*38fd1498Szrj       hsa_op_reg *dest
5789*38fd1498Szrj 	= new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
5790*38fd1498Szrj       hsa_insn_basic *insn
5791*38fd1498Szrj 	= new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
5792*38fd1498Szrj 			      dest, addr);
5793*38fd1498Szrj       hbb_src->append_insn (insn);
5794*38fd1498Szrj       aexprs.safe_push (op);
5795*38fd1498Szrj       aregs.safe_push (dest);
5796*38fd1498Szrj     }
5797*38fd1498Szrj 
5798*38fd1498Szrj   tree lhs = gimple_phi_result (phi_stmt);
5799*38fd1498Szrj   for (unsigned i = 0; i < count; i++)
5800*38fd1498Szrj     {
5801*38fd1498Szrj       tree op = gimple_phi_arg_def (phi_stmt, i);
5802*38fd1498Szrj 
5803*38fd1498Szrj       if (TREE_CODE (op) == SSA_NAME)
5804*38fd1498Szrj 	{
5805*38fd1498Szrj 	  hsa_op_reg *hreg = hsa_cfun->reg_for_gimple_ssa (op);
5806*38fd1498Szrj 	  hphi->set_op (i, hreg);
5807*38fd1498Szrj 	}
5808*38fd1498Szrj       else
5809*38fd1498Szrj 	{
5810*38fd1498Szrj 	  gcc_assert (is_gimple_min_invariant (op));
5811*38fd1498Szrj 	  tree t = TREE_TYPE (op);
5812*38fd1498Szrj 	  if (!POINTER_TYPE_P (t)
5813*38fd1498Szrj 	      || (TREE_CODE (op) == STRING_CST
5814*38fd1498Szrj 		  && TREE_CODE (TREE_TYPE (t)) == INTEGER_TYPE))
5815*38fd1498Szrj 	    hphi->set_op (i, new hsa_op_immed (op));
5816*38fd1498Szrj 	  else if (POINTER_TYPE_P (TREE_TYPE (lhs))
5817*38fd1498Szrj 		   && TREE_CODE (op) == INTEGER_CST)
5818*38fd1498Szrj 	    {
5819*38fd1498Szrj 	      /* Handle assignment of NULL value to a pointer type.  */
5820*38fd1498Szrj 	      hphi->set_op (i, new hsa_op_immed (op));
5821*38fd1498Szrj 	    }
5822*38fd1498Szrj 	  else if (TREE_CODE (op) == ADDR_EXPR)
5823*38fd1498Szrj 	    {
5824*38fd1498Szrj 	      hsa_op_reg *dest = NULL;
5825*38fd1498Szrj 	      for (unsigned a_idx = 0; a_idx < aexprs.length (); a_idx++)
5826*38fd1498Szrj 		if (aexprs[a_idx] == op)
5827*38fd1498Szrj 		  {
5828*38fd1498Szrj 		    dest = aregs[a_idx];
5829*38fd1498Szrj 		    break;
5830*38fd1498Szrj 		  }
5831*38fd1498Szrj 	      gcc_assert (dest);
5832*38fd1498Szrj 	      hphi->set_op (i, dest);
5833*38fd1498Szrj 	    }
5834*38fd1498Szrj 	  else
5835*38fd1498Szrj 	    {
5836*38fd1498Szrj 	      HSA_SORRY_AT (gimple_location (phi_stmt),
5837*38fd1498Szrj 			    "support for HSA does not handle PHI nodes with "
5838*38fd1498Szrj 			    "constant address operands");
5839*38fd1498Szrj 	      return;
5840*38fd1498Szrj 	    }
5841*38fd1498Szrj 	}
5842*38fd1498Szrj     }
5843*38fd1498Szrj 
5844*38fd1498Szrj   hbb->append_phi (hphi);
5845*38fd1498Szrj }
5846*38fd1498Szrj 
5847*38fd1498Szrj /* Constructor of class containing HSA-specific information about a basic
5848*38fd1498Szrj    block.  CFG_BB is the CFG BB this HSA BB is associated with.  IDX is the new
5849*38fd1498Szrj    index of this BB (so that the constructor does not attempt to use
5850*38fd1498Szrj    hsa_cfun during its construction).  */
5851*38fd1498Szrj 
hsa_bb(basic_block cfg_bb,int idx)5852*38fd1498Szrj hsa_bb::hsa_bb (basic_block cfg_bb, int idx)
5853*38fd1498Szrj   : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL),
5854*38fd1498Szrj     m_last_phi (NULL), m_index (idx)
5855*38fd1498Szrj {
5856*38fd1498Szrj   gcc_assert (!cfg_bb->aux);
5857*38fd1498Szrj   cfg_bb->aux = this;
5858*38fd1498Szrj }
5859*38fd1498Szrj 
5860*38fd1498Szrj /* Constructor of class containing HSA-specific information about a basic
5861*38fd1498Szrj    block.  CFG_BB is the CFG BB this HSA BB is associated with.  */
5862*38fd1498Szrj 
hsa_bb(basic_block cfg_bb)5863*38fd1498Szrj hsa_bb::hsa_bb (basic_block cfg_bb)
5864*38fd1498Szrj   : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL),
5865*38fd1498Szrj     m_last_phi (NULL), m_index (hsa_cfun->m_hbb_count++)
5866*38fd1498Szrj {
5867*38fd1498Szrj   gcc_assert (!cfg_bb->aux);
5868*38fd1498Szrj   cfg_bb->aux = this;
5869*38fd1498Szrj }
5870*38fd1498Szrj 
5871*38fd1498Szrj /* Create and initialize and return a new hsa_bb structure for a given CFG
5872*38fd1498Szrj    basic block BB.  */
5873*38fd1498Szrj 
5874*38fd1498Szrj hsa_bb *
hsa_init_new_bb(basic_block bb)5875*38fd1498Szrj hsa_init_new_bb (basic_block bb)
5876*38fd1498Szrj {
5877*38fd1498Szrj   void *m = obstack_alloc (&hsa_obstack, sizeof (hsa_bb));
5878*38fd1498Szrj   return new (m) hsa_bb (bb);
5879*38fd1498Szrj }
5880*38fd1498Szrj 
5881*38fd1498Szrj /* Initialize OMP in an HSA basic block PROLOGUE.  */
5882*38fd1498Szrj 
5883*38fd1498Szrj static void
init_prologue(void)5884*38fd1498Szrj init_prologue (void)
5885*38fd1498Szrj {
5886*38fd1498Szrj   if (!hsa_cfun->m_kern_p)
5887*38fd1498Szrj     return;
5888*38fd1498Szrj 
5889*38fd1498Szrj   hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5890*38fd1498Szrj 
5891*38fd1498Szrj   /* Create a magic number that is going to be printed by libgomp.  */
5892*38fd1498Szrj   unsigned index = hsa_get_number_decl_kernel_mappings ();
5893*38fd1498Szrj 
5894*38fd1498Szrj   /* Emit store to debug argument.  */
5895*38fd1498Szrj   if (PARAM_VALUE (PARAM_HSA_GEN_DEBUG_STORES) > 0)
5896*38fd1498Szrj     set_debug_value (prologue, new hsa_op_immed (1000 + index, BRIG_TYPE_U64));
5897*38fd1498Szrj }
5898*38fd1498Szrj 
5899*38fd1498Szrj /* Initialize hsa_num_threads to a default value.  */
5900*38fd1498Szrj 
5901*38fd1498Szrj static void
init_hsa_num_threads(void)5902*38fd1498Szrj init_hsa_num_threads (void)
5903*38fd1498Szrj {
5904*38fd1498Szrj   hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5905*38fd1498Szrj 
5906*38fd1498Szrj   /* Save the default value to private variable hsa_num_threads.  */
5907*38fd1498Szrj   hsa_insn_basic *basic
5908*38fd1498Szrj     = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type,
5909*38fd1498Szrj 			new hsa_op_immed (0, hsa_num_threads->m_type),
5910*38fd1498Szrj 			new hsa_op_address (hsa_num_threads));
5911*38fd1498Szrj   prologue->append_insn (basic);
5912*38fd1498Szrj }
5913*38fd1498Szrj 
5914*38fd1498Szrj /* Go over gimple representation and generate our internal HSA one.  */
5915*38fd1498Szrj 
5916*38fd1498Szrj static void
gen_body_from_gimple()5917*38fd1498Szrj gen_body_from_gimple ()
5918*38fd1498Szrj {
5919*38fd1498Szrj   basic_block bb;
5920*38fd1498Szrj 
5921*38fd1498Szrj   /* Verify CFG for complex edges we are unable to handle.  */
5922*38fd1498Szrj   edge_iterator ei;
5923*38fd1498Szrj   edge e;
5924*38fd1498Szrj 
5925*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
5926*38fd1498Szrj     {
5927*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
5928*38fd1498Szrj 	{
5929*38fd1498Szrj 	  /* Verify all unsupported flags for edges that point
5930*38fd1498Szrj 	     to the same basic block.  */
5931*38fd1498Szrj 	  if (e->flags & EDGE_EH)
5932*38fd1498Szrj 	    {
5933*38fd1498Szrj 	      HSA_SORRY_AT (UNKNOWN_LOCATION,
5934*38fd1498Szrj 			    "support for HSA does not implement exception "
5935*38fd1498Szrj 			    "handling");
5936*38fd1498Szrj 	      return;
5937*38fd1498Szrj 	    }
5938*38fd1498Szrj 	}
5939*38fd1498Szrj     }
5940*38fd1498Szrj 
5941*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
5942*38fd1498Szrj     {
5943*38fd1498Szrj       gimple_stmt_iterator gsi;
5944*38fd1498Szrj       hsa_bb *hbb = hsa_bb_for_bb (bb);
5945*38fd1498Szrj       if (hbb)
5946*38fd1498Szrj 	continue;
5947*38fd1498Szrj 
5948*38fd1498Szrj       hbb = hsa_init_new_bb (bb);
5949*38fd1498Szrj 
5950*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5951*38fd1498Szrj 	{
5952*38fd1498Szrj 	  gen_hsa_insns_for_gimple_stmt (gsi_stmt (gsi), hbb);
5953*38fd1498Szrj 	  if (hsa_seen_error ())
5954*38fd1498Szrj 	    return;
5955*38fd1498Szrj 	}
5956*38fd1498Szrj     }
5957*38fd1498Szrj 
5958*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
5959*38fd1498Szrj     {
5960*38fd1498Szrj       gimple_stmt_iterator gsi;
5961*38fd1498Szrj       hsa_bb *hbb = hsa_bb_for_bb (bb);
5962*38fd1498Szrj       gcc_assert (hbb != NULL);
5963*38fd1498Szrj 
5964*38fd1498Szrj       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5965*38fd1498Szrj 	if (!virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
5966*38fd1498Szrj 	  gen_hsa_phi_from_gimple_phi (gsi_stmt (gsi), hbb);
5967*38fd1498Szrj     }
5968*38fd1498Szrj 
5969*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5970*38fd1498Szrj     {
5971*38fd1498Szrj       fprintf (dump_file, "------- Generated SSA form -------\n");
5972*38fd1498Szrj       dump_hsa_cfun (dump_file);
5973*38fd1498Szrj     }
5974*38fd1498Szrj }
5975*38fd1498Szrj 
5976*38fd1498Szrj static void
gen_function_decl_parameters(hsa_function_representation * f,tree decl)5977*38fd1498Szrj gen_function_decl_parameters (hsa_function_representation *f,
5978*38fd1498Szrj 			      tree decl)
5979*38fd1498Szrj {
5980*38fd1498Szrj   tree parm;
5981*38fd1498Szrj   unsigned i;
5982*38fd1498Szrj 
5983*38fd1498Szrj   for (parm = TYPE_ARG_TYPES (TREE_TYPE (decl)), i = 0;
5984*38fd1498Szrj        parm;
5985*38fd1498Szrj        parm = TREE_CHAIN (parm), i++)
5986*38fd1498Szrj     {
5987*38fd1498Szrj       /* Result type if last in the tree list.  */
5988*38fd1498Szrj       if (TREE_CHAIN (parm) == NULL)
5989*38fd1498Szrj 	break;
5990*38fd1498Szrj 
5991*38fd1498Szrj       tree v = TREE_VALUE (parm);
5992*38fd1498Szrj 
5993*38fd1498Szrj       hsa_symbol *arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
5994*38fd1498Szrj 					BRIG_LINKAGE_NONE);
5995*38fd1498Szrj       arg->m_type = hsa_type_for_tree_type (v, &arg->m_dim);
5996*38fd1498Szrj       arg->m_name_number = i;
5997*38fd1498Szrj 
5998*38fd1498Szrj       f->m_input_args.safe_push (arg);
5999*38fd1498Szrj     }
6000*38fd1498Szrj 
6001*38fd1498Szrj   tree result_type = TREE_TYPE (TREE_TYPE (decl));
6002*38fd1498Szrj   if (!VOID_TYPE_P (result_type))
6003*38fd1498Szrj     {
6004*38fd1498Szrj       f->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
6005*38fd1498Szrj 					BRIG_LINKAGE_NONE);
6006*38fd1498Szrj       f->m_output_arg->m_type
6007*38fd1498Szrj 	= hsa_type_for_tree_type (result_type, &f->m_output_arg->m_dim);
6008*38fd1498Szrj       f->m_output_arg->m_name = "res";
6009*38fd1498Szrj     }
6010*38fd1498Szrj }
6011*38fd1498Szrj 
6012*38fd1498Szrj /* Generate the vector of parameters of the HSA representation of the current
6013*38fd1498Szrj    function.  This also includes the output parameter representing the
6014*38fd1498Szrj    result.  */
6015*38fd1498Szrj 
6016*38fd1498Szrj static void
gen_function_def_parameters()6017*38fd1498Szrj gen_function_def_parameters ()
6018*38fd1498Szrj {
6019*38fd1498Szrj   tree parm;
6020*38fd1498Szrj 
6021*38fd1498Szrj   hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6022*38fd1498Szrj 
6023*38fd1498Szrj   for (parm = DECL_ARGUMENTS (cfun->decl); parm;
6024*38fd1498Szrj        parm = DECL_CHAIN (parm))
6025*38fd1498Szrj     {
6026*38fd1498Szrj       struct hsa_symbol **slot;
6027*38fd1498Szrj 
6028*38fd1498Szrj       hsa_symbol *arg
6029*38fd1498Szrj 	= new hsa_symbol (BRIG_TYPE_NONE, hsa_cfun->m_kern_p
6030*38fd1498Szrj 			  ? BRIG_SEGMENT_KERNARG : BRIG_SEGMENT_ARG,
6031*38fd1498Szrj 			  BRIG_LINKAGE_FUNCTION);
6032*38fd1498Szrj       arg->fillup_for_decl (parm);
6033*38fd1498Szrj 
6034*38fd1498Szrj       hsa_cfun->m_input_args.safe_push (arg);
6035*38fd1498Szrj 
6036*38fd1498Szrj       if (hsa_seen_error ())
6037*38fd1498Szrj 	return;
6038*38fd1498Szrj 
6039*38fd1498Szrj       arg->m_name = hsa_get_declaration_name (parm);
6040*38fd1498Szrj 
6041*38fd1498Szrj       /* Copy all input arguments and create corresponding private symbols
6042*38fd1498Szrj 	 for them.  */
6043*38fd1498Szrj       hsa_symbol *private_arg;
6044*38fd1498Szrj       hsa_op_address *parm_addr = new hsa_op_address (arg);
6045*38fd1498Szrj 
6046*38fd1498Szrj       if (TREE_ADDRESSABLE (parm)
6047*38fd1498Szrj 	  || (!is_gimple_reg (parm) && !TREE_READONLY (parm)))
6048*38fd1498Szrj 	{
6049*38fd1498Szrj 	  private_arg = hsa_cfun->create_hsa_temporary (arg->m_type);
6050*38fd1498Szrj 	  private_arg->fillup_for_decl (parm);
6051*38fd1498Szrj 
6052*38fd1498Szrj 	  BrigAlignment8_t align = MIN (arg->m_align, private_arg->m_align);
6053*38fd1498Szrj 
6054*38fd1498Szrj 	  hsa_op_address *private_arg_addr = new hsa_op_address (private_arg);
6055*38fd1498Szrj 	  gen_hsa_memory_copy (prologue, private_arg_addr, parm_addr,
6056*38fd1498Szrj 			       arg->total_byte_size (), align);
6057*38fd1498Szrj 	}
6058*38fd1498Szrj       else
6059*38fd1498Szrj 	private_arg = arg;
6060*38fd1498Szrj 
6061*38fd1498Szrj       slot = hsa_cfun->m_local_symbols->find_slot (private_arg, INSERT);
6062*38fd1498Szrj       gcc_assert (!*slot);
6063*38fd1498Szrj       *slot = private_arg;
6064*38fd1498Szrj 
6065*38fd1498Szrj       if (is_gimple_reg (parm))
6066*38fd1498Szrj 	{
6067*38fd1498Szrj 	  tree ddef = ssa_default_def (cfun, parm);
6068*38fd1498Szrj 	  if (ddef && !has_zero_uses (ddef))
6069*38fd1498Szrj 	    {
6070*38fd1498Szrj 	      BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (ddef),
6071*38fd1498Szrj 							      false);
6072*38fd1498Szrj 	      BrigType16_t mtype = mem_type_for_type (t);
6073*38fd1498Szrj 	      hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (ddef);
6074*38fd1498Szrj 	      hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype,
6075*38fd1498Szrj 						    dest, parm_addr);
6076*38fd1498Szrj 	      gcc_assert (!parm_addr->m_reg);
6077*38fd1498Szrj 	      prologue->append_insn (mem);
6078*38fd1498Szrj 	    }
6079*38fd1498Szrj 	}
6080*38fd1498Szrj     }
6081*38fd1498Szrj 
6082*38fd1498Szrj   if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (cfun->decl))))
6083*38fd1498Szrj     {
6084*38fd1498Szrj       struct hsa_symbol **slot;
6085*38fd1498Szrj 
6086*38fd1498Szrj       hsa_cfun->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
6087*38fd1498Szrj 					       BRIG_LINKAGE_FUNCTION);
6088*38fd1498Szrj       hsa_cfun->m_output_arg->fillup_for_decl (DECL_RESULT (cfun->decl));
6089*38fd1498Szrj 
6090*38fd1498Szrj       if (hsa_seen_error ())
6091*38fd1498Szrj 	return;
6092*38fd1498Szrj 
6093*38fd1498Szrj       hsa_cfun->m_output_arg->m_name = "res";
6094*38fd1498Szrj       slot = hsa_cfun->m_local_symbols->find_slot (hsa_cfun->m_output_arg,
6095*38fd1498Szrj 						   INSERT);
6096*38fd1498Szrj       gcc_assert (!*slot);
6097*38fd1498Szrj       *slot = hsa_cfun->m_output_arg;
6098*38fd1498Szrj     }
6099*38fd1498Szrj }
6100*38fd1498Szrj 
6101*38fd1498Szrj /* Generate function representation that corresponds to
6102*38fd1498Szrj    a function declaration.  */
6103*38fd1498Szrj 
6104*38fd1498Szrj hsa_function_representation *
hsa_generate_function_declaration(tree decl)6105*38fd1498Szrj hsa_generate_function_declaration (tree decl)
6106*38fd1498Szrj {
6107*38fd1498Szrj   hsa_function_representation *fun
6108*38fd1498Szrj     = new hsa_function_representation (decl, false, 0);
6109*38fd1498Szrj 
6110*38fd1498Szrj   fun->m_declaration_p = true;
6111*38fd1498Szrj   fun->m_name = get_brig_function_name (decl);
6112*38fd1498Szrj   gen_function_decl_parameters (fun, decl);
6113*38fd1498Szrj 
6114*38fd1498Szrj   return fun;
6115*38fd1498Szrj }
6116*38fd1498Szrj 
6117*38fd1498Szrj 
6118*38fd1498Szrj /* Generate function representation that corresponds to
6119*38fd1498Szrj    an internal FN.  */
6120*38fd1498Szrj 
6121*38fd1498Szrj hsa_function_representation *
hsa_generate_internal_fn_decl(hsa_internal_fn * fn)6122*38fd1498Szrj hsa_generate_internal_fn_decl (hsa_internal_fn *fn)
6123*38fd1498Szrj {
6124*38fd1498Szrj   hsa_function_representation *fun = new hsa_function_representation (fn);
6125*38fd1498Szrj 
6126*38fd1498Szrj   fun->m_name = fn->name ();
6127*38fd1498Szrj 
6128*38fd1498Szrj   for (unsigned i = 0; i < fn->get_arity (); i++)
6129*38fd1498Szrj     {
6130*38fd1498Szrj       hsa_symbol *arg
6131*38fd1498Szrj 	= new hsa_symbol (fn->get_argument_type (i), BRIG_SEGMENT_ARG,
6132*38fd1498Szrj 			  BRIG_LINKAGE_NONE);
6133*38fd1498Szrj       arg->m_name_number = i;
6134*38fd1498Szrj       fun->m_input_args.safe_push (arg);
6135*38fd1498Szrj     }
6136*38fd1498Szrj 
6137*38fd1498Szrj   fun->m_output_arg = new hsa_symbol (fn->get_argument_type (-1),
6138*38fd1498Szrj 				      BRIG_SEGMENT_ARG, BRIG_LINKAGE_NONE);
6139*38fd1498Szrj   fun->m_output_arg->m_name = "res";
6140*38fd1498Szrj 
6141*38fd1498Szrj   return fun;
6142*38fd1498Szrj }
6143*38fd1498Szrj 
6144*38fd1498Szrj /* Return true if switch statement S can be transformed
6145*38fd1498Szrj    to a SBR instruction in HSAIL.  */
6146*38fd1498Szrj 
6147*38fd1498Szrj static bool
transformable_switch_to_sbr_p(gswitch * s)6148*38fd1498Szrj transformable_switch_to_sbr_p (gswitch *s)
6149*38fd1498Szrj {
6150*38fd1498Szrj   /* Identify if a switch statement can be transformed to
6151*38fd1498Szrj      SBR instruction, like:
6152*38fd1498Szrj 
6153*38fd1498Szrj      sbr_u32 $s1 [@label1, @label2, @label3];
6154*38fd1498Szrj   */
6155*38fd1498Szrj 
6156*38fd1498Szrj   tree size = get_switch_size (s);
6157*38fd1498Szrj   if (!tree_fits_uhwi_p (size))
6158*38fd1498Szrj     return false;
6159*38fd1498Szrj 
6160*38fd1498Szrj   if (tree_to_uhwi (size) > HSA_MAXIMUM_SBR_LABELS)
6161*38fd1498Szrj     return false;
6162*38fd1498Szrj 
6163*38fd1498Szrj   return true;
6164*38fd1498Szrj }
6165*38fd1498Szrj 
6166*38fd1498Szrj /* Structure hold connection between PHI nodes and immediate
6167*38fd1498Szrj    values hold by there nodes.  */
6168*38fd1498Szrj 
6169*38fd1498Szrj struct phi_definition
6170*38fd1498Szrj {
phi_definitionphi_definition6171*38fd1498Szrj   phi_definition (unsigned phi_i, unsigned label_i, tree imm):
6172*38fd1498Szrj     phi_index (phi_i), label_index (label_i), phi_value (imm)
6173*38fd1498Szrj   {}
6174*38fd1498Szrj 
6175*38fd1498Szrj   unsigned phi_index;
6176*38fd1498Szrj   unsigned label_index;
6177*38fd1498Szrj   tree phi_value;
6178*38fd1498Szrj };
6179*38fd1498Szrj 
6180*38fd1498Szrj /* Sum slice of a vector V, starting from index START and ending
6181*38fd1498Szrj    at the index END - 1.  */
6182*38fd1498Szrj 
6183*38fd1498Szrj template <typename T>
6184*38fd1498Szrj static
sum_slice(const auto_vec<T> & v,unsigned start,unsigned end,T zero)6185*38fd1498Szrj T sum_slice (const auto_vec <T> &v, unsigned start, unsigned end,
6186*38fd1498Szrj 	     T zero)
6187*38fd1498Szrj {
6188*38fd1498Szrj   T s = zero;
6189*38fd1498Szrj 
6190*38fd1498Szrj   for (unsigned i = start; i < end; i++)
6191*38fd1498Szrj     s += v[i];
6192*38fd1498Szrj 
6193*38fd1498Szrj   return s;
6194*38fd1498Szrj }
6195*38fd1498Szrj 
6196*38fd1498Szrj /* Function transforms GIMPLE SWITCH statements to a series of IF statements.
6197*38fd1498Szrj    Let's assume following example:
6198*38fd1498Szrj 
6199*38fd1498Szrj L0:
6200*38fd1498Szrj    switch (index)
6201*38fd1498Szrj      case C1:
6202*38fd1498Szrj L1:    hard_work_1 ();
6203*38fd1498Szrj        break;
6204*38fd1498Szrj      case C2..C3:
6205*38fd1498Szrj L2:    hard_work_2 ();
6206*38fd1498Szrj        break;
6207*38fd1498Szrj      default:
6208*38fd1498Szrj LD:    hard_work_3 ();
6209*38fd1498Szrj        break;
6210*38fd1498Szrj 
6211*38fd1498Szrj   The transformation encompasses following steps:
6212*38fd1498Szrj     1) all immediate values used by edges coming from the switch basic block
6213*38fd1498Szrj        are saved
6214*38fd1498Szrj     2) all these edges are removed
6215*38fd1498Szrj     3) the switch statement (in L0) is replaced by:
6216*38fd1498Szrj 	 if (index == C1)
6217*38fd1498Szrj 	   goto L1;
6218*38fd1498Szrj 	 else
6219*38fd1498Szrj 	   goto L1';
6220*38fd1498Szrj 
6221*38fd1498Szrj     4) newly created basic block Lx' is used for generation of
6222*38fd1498Szrj        a next condition
6223*38fd1498Szrj     5) else branch of the last condition goes to LD
6224*38fd1498Szrj     6) fix all immediate values in PHI nodes that were propagated though
6225*38fd1498Szrj        edges that were removed in step 2
6226*38fd1498Szrj 
6227*38fd1498Szrj   Note: if a case is made by a range C1..C2, then process
6228*38fd1498Szrj 	following transformation:
6229*38fd1498Szrj 
6230*38fd1498Szrj   switch_cond_op1 = C1 <= index;
6231*38fd1498Szrj   switch_cond_op2 = index <= C2;
6232*38fd1498Szrj   switch_cond_and = switch_cond_op1 & switch_cond_op2;
6233*38fd1498Szrj   if (switch_cond_and != 0)
6234*38fd1498Szrj     goto Lx;
6235*38fd1498Szrj   else
6236*38fd1498Szrj     goto Ly;
6237*38fd1498Szrj 
6238*38fd1498Szrj */
6239*38fd1498Szrj 
6240*38fd1498Szrj static bool
convert_switch_statements(void)6241*38fd1498Szrj convert_switch_statements (void)
6242*38fd1498Szrj {
6243*38fd1498Szrj   function *func = DECL_STRUCT_FUNCTION (current_function_decl);
6244*38fd1498Szrj   basic_block bb;
6245*38fd1498Szrj 
6246*38fd1498Szrj   bool modified_cfg = false;
6247*38fd1498Szrj 
6248*38fd1498Szrj   FOR_EACH_BB_FN (bb, func)
6249*38fd1498Szrj   {
6250*38fd1498Szrj     gimple_stmt_iterator gsi = gsi_last_bb (bb);
6251*38fd1498Szrj     if (gsi_end_p (gsi))
6252*38fd1498Szrj       continue;
6253*38fd1498Szrj 
6254*38fd1498Szrj     gimple *stmt = gsi_stmt (gsi);
6255*38fd1498Szrj 
6256*38fd1498Szrj     if (gimple_code (stmt) == GIMPLE_SWITCH)
6257*38fd1498Szrj       {
6258*38fd1498Szrj 	gswitch *s = as_a <gswitch *> (stmt);
6259*38fd1498Szrj 
6260*38fd1498Szrj 	/* If the switch can utilize SBR insn, skip the statement.  */
6261*38fd1498Szrj 	if (transformable_switch_to_sbr_p (s))
6262*38fd1498Szrj 	  continue;
6263*38fd1498Szrj 
6264*38fd1498Szrj 	modified_cfg = true;
6265*38fd1498Szrj 
6266*38fd1498Szrj 	unsigned labels = gimple_switch_num_labels (s);
6267*38fd1498Szrj 	tree index = gimple_switch_index (s);
6268*38fd1498Szrj 	tree index_type = TREE_TYPE (index);
6269*38fd1498Szrj 	tree default_label = gimple_switch_default_label (s);
6270*38fd1498Szrj 	basic_block default_label_bb
6271*38fd1498Szrj 	  = label_to_block_fn (func, CASE_LABEL (default_label));
6272*38fd1498Szrj 	basic_block cur_bb = bb;
6273*38fd1498Szrj 
6274*38fd1498Szrj 	auto_vec <edge> new_edges;
6275*38fd1498Szrj 	auto_vec <phi_definition *> phi_todo_list;
6276*38fd1498Szrj 	auto_vec <profile_count> edge_counts;
6277*38fd1498Szrj 	auto_vec <profile_probability> edge_probabilities;
6278*38fd1498Szrj 
6279*38fd1498Szrj 	/* Investigate all labels that and PHI nodes in these edges which
6280*38fd1498Szrj 	   should be fixed after we add new collection of edges.  */
6281*38fd1498Szrj 	for (unsigned i = 0; i < labels; i++)
6282*38fd1498Szrj 	  {
6283*38fd1498Szrj 	    tree label = gimple_switch_label (s, i);
6284*38fd1498Szrj 	    basic_block label_bb = label_to_block_fn (func, CASE_LABEL (label));
6285*38fd1498Szrj 	    edge e = find_edge (bb, label_bb);
6286*38fd1498Szrj 	    edge_counts.safe_push (e->count ());
6287*38fd1498Szrj 	    edge_probabilities.safe_push (e->probability);
6288*38fd1498Szrj 	    gphi_iterator phi_gsi;
6289*38fd1498Szrj 
6290*38fd1498Szrj 	    /* Save PHI definitions that will be destroyed because of an edge
6291*38fd1498Szrj 	       is going to be removed.  */
6292*38fd1498Szrj 	    unsigned phi_index = 0;
6293*38fd1498Szrj 	    for (phi_gsi = gsi_start_phis (e->dest);
6294*38fd1498Szrj 		 !gsi_end_p (phi_gsi); gsi_next (&phi_gsi))
6295*38fd1498Szrj 	      {
6296*38fd1498Szrj 		gphi *phi = phi_gsi.phi ();
6297*38fd1498Szrj 		for (unsigned j = 0; j < gimple_phi_num_args (phi); j++)
6298*38fd1498Szrj 		  {
6299*38fd1498Szrj 		    if (gimple_phi_arg_edge (phi, j) == e)
6300*38fd1498Szrj 		      {
6301*38fd1498Szrj 			tree imm = gimple_phi_arg_def (phi, j);
6302*38fd1498Szrj 			phi_definition *p = new phi_definition (phi_index, i,
6303*38fd1498Szrj 								imm);
6304*38fd1498Szrj 			phi_todo_list.safe_push (p);
6305*38fd1498Szrj 			break;
6306*38fd1498Szrj 		      }
6307*38fd1498Szrj 		  }
6308*38fd1498Szrj 		phi_index++;
6309*38fd1498Szrj 	      }
6310*38fd1498Szrj 	  }
6311*38fd1498Szrj 
6312*38fd1498Szrj 	/* Remove all edges for the current basic block.  */
6313*38fd1498Szrj 	for (int i = EDGE_COUNT (bb->succs) - 1; i >= 0; i--)
6314*38fd1498Szrj  	  {
6315*38fd1498Szrj 	    edge e = EDGE_SUCC (bb, i);
6316*38fd1498Szrj 	    remove_edge (e);
6317*38fd1498Szrj 	  }
6318*38fd1498Szrj 
6319*38fd1498Szrj 	/* Iterate all non-default labels.  */
6320*38fd1498Szrj 	for (unsigned i = 1; i < labels; i++)
6321*38fd1498Szrj 	  {
6322*38fd1498Szrj 	    tree label = gimple_switch_label (s, i);
6323*38fd1498Szrj 	    tree low = CASE_LOW (label);
6324*38fd1498Szrj 	    tree high = CASE_HIGH (label);
6325*38fd1498Szrj 
6326*38fd1498Szrj 	    if (!useless_type_conversion_p (TREE_TYPE (low), index_type))
6327*38fd1498Szrj 	      low = fold_convert (index_type, low);
6328*38fd1498Szrj 
6329*38fd1498Szrj 	    gimple_stmt_iterator cond_gsi = gsi_last_bb (cur_bb);
6330*38fd1498Szrj 	    gimple *c = NULL;
6331*38fd1498Szrj 	    if (high)
6332*38fd1498Szrj 	      {
6333*38fd1498Szrj 		tree tmp1 = make_temp_ssa_name (boolean_type_node, NULL,
6334*38fd1498Szrj 						"switch_cond_op1");
6335*38fd1498Szrj 
6336*38fd1498Szrj 		gimple *assign1 = gimple_build_assign (tmp1, LE_EXPR, low,
6337*38fd1498Szrj 						      index);
6338*38fd1498Szrj 
6339*38fd1498Szrj 		tree tmp2 = make_temp_ssa_name (boolean_type_node, NULL,
6340*38fd1498Szrj 						"switch_cond_op2");
6341*38fd1498Szrj 
6342*38fd1498Szrj 		if (!useless_type_conversion_p (TREE_TYPE (high), index_type))
6343*38fd1498Szrj 		  high = fold_convert (index_type, high);
6344*38fd1498Szrj 		gimple *assign2 = gimple_build_assign (tmp2, LE_EXPR, index,
6345*38fd1498Szrj 						      high);
6346*38fd1498Szrj 
6347*38fd1498Szrj 		tree tmp3 = make_temp_ssa_name (boolean_type_node, NULL,
6348*38fd1498Szrj 						"switch_cond_and");
6349*38fd1498Szrj 		gimple *assign3 = gimple_build_assign (tmp3, BIT_AND_EXPR, tmp1,
6350*38fd1498Szrj 						      tmp2);
6351*38fd1498Szrj 
6352*38fd1498Szrj 		gsi_insert_before (&cond_gsi, assign1, GSI_SAME_STMT);
6353*38fd1498Szrj 		gsi_insert_before (&cond_gsi, assign2, GSI_SAME_STMT);
6354*38fd1498Szrj 		gsi_insert_before (&cond_gsi, assign3, GSI_SAME_STMT);
6355*38fd1498Szrj 
6356*38fd1498Szrj 		tree b = constant_boolean_node (false, boolean_type_node);
6357*38fd1498Szrj 		c = gimple_build_cond (NE_EXPR, tmp3, b, NULL, NULL);
6358*38fd1498Szrj 	      }
6359*38fd1498Szrj 	    else
6360*38fd1498Szrj 	      c = gimple_build_cond (EQ_EXPR, index, low, NULL, NULL);
6361*38fd1498Szrj 
6362*38fd1498Szrj 	    gimple_set_location (c, gimple_location (stmt));
6363*38fd1498Szrj 
6364*38fd1498Szrj 	    gsi_insert_before (&cond_gsi, c, GSI_SAME_STMT);
6365*38fd1498Szrj 
6366*38fd1498Szrj 	    basic_block label_bb
6367*38fd1498Szrj 	      = label_to_block_fn (func, CASE_LABEL (label));
6368*38fd1498Szrj 	    edge new_edge = make_edge (cur_bb, label_bb, EDGE_TRUE_VALUE);
6369*38fd1498Szrj 	    profile_probability prob_sum = sum_slice <profile_probability>
6370*38fd1498Szrj 		 (edge_probabilities, i, labels, profile_probability::never ())
6371*38fd1498Szrj 		  + edge_probabilities[0];
6372*38fd1498Szrj 
6373*38fd1498Szrj 	    if (prob_sum.initialized_p ())
6374*38fd1498Szrj 	      new_edge->probability = edge_probabilities[i] / prob_sum;
6375*38fd1498Szrj 
6376*38fd1498Szrj 	    new_edges.safe_push (new_edge);
6377*38fd1498Szrj 
6378*38fd1498Szrj 	    if (i < labels - 1)
6379*38fd1498Szrj 	      {
6380*38fd1498Szrj 		/* Prepare another basic block that will contain
6381*38fd1498Szrj 		   next condition.  */
6382*38fd1498Szrj 		basic_block next_bb = create_empty_bb (cur_bb);
6383*38fd1498Szrj 		if (current_loops)
6384*38fd1498Szrj 		  {
6385*38fd1498Szrj 		    add_bb_to_loop (next_bb, cur_bb->loop_father);
6386*38fd1498Szrj 		    loops_state_set (LOOPS_NEED_FIXUP);
6387*38fd1498Szrj 		  }
6388*38fd1498Szrj 
6389*38fd1498Szrj 		edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
6390*38fd1498Szrj 		next_edge->probability = new_edge->probability.invert ();
6391*38fd1498Szrj 		next_bb->count = next_edge->count ();
6392*38fd1498Szrj 		cur_bb = next_bb;
6393*38fd1498Szrj 	      }
6394*38fd1498Szrj 	    else /* Link last IF statement and default label
6395*38fd1498Szrj 		    of the switch.  */
6396*38fd1498Szrj 	      {
6397*38fd1498Szrj 		edge e = make_edge (cur_bb, default_label_bb, EDGE_FALSE_VALUE);
6398*38fd1498Szrj 		e->probability = new_edge->probability.invert ();
6399*38fd1498Szrj 		new_edges.safe_insert (0, e);
6400*38fd1498Szrj 	      }
6401*38fd1498Szrj 	  }
6402*38fd1498Szrj 
6403*38fd1498Szrj 	  /* Restore original PHI immediate value.  */
6404*38fd1498Szrj 	  for (unsigned i = 0; i < phi_todo_list.length (); i++)
6405*38fd1498Szrj 	    {
6406*38fd1498Szrj 	      phi_definition *phi_def = phi_todo_list[i];
6407*38fd1498Szrj 	      edge new_edge = new_edges[phi_def->label_index];
6408*38fd1498Szrj 
6409*38fd1498Szrj 	      gphi_iterator it = gsi_start_phis (new_edge->dest);
6410*38fd1498Szrj 	      for (unsigned i = 0; i < phi_def->phi_index; i++)
6411*38fd1498Szrj 		gsi_next (&it);
6412*38fd1498Szrj 
6413*38fd1498Szrj 	      gphi *phi = it.phi ();
6414*38fd1498Szrj 	      add_phi_arg (phi, phi_def->phi_value, new_edge, UNKNOWN_LOCATION);
6415*38fd1498Szrj 	      delete phi_def;
6416*38fd1498Szrj 	    }
6417*38fd1498Szrj 
6418*38fd1498Szrj 	/* Remove the original GIMPLE switch statement.  */
6419*38fd1498Szrj 	gsi_remove (&gsi, true);
6420*38fd1498Szrj       }
6421*38fd1498Szrj   }
6422*38fd1498Szrj 
6423*38fd1498Szrj   if (dump_file)
6424*38fd1498Szrj     dump_function_to_file (current_function_decl, dump_file, TDF_DETAILS);
6425*38fd1498Szrj 
6426*38fd1498Szrj   return modified_cfg;
6427*38fd1498Szrj }
6428*38fd1498Szrj 
6429*38fd1498Szrj /* Expand builtins that can't be handled by HSA back-end.  */
6430*38fd1498Szrj 
6431*38fd1498Szrj static void
expand_builtins()6432*38fd1498Szrj expand_builtins ()
6433*38fd1498Szrj {
6434*38fd1498Szrj   function *func = DECL_STRUCT_FUNCTION (current_function_decl);
6435*38fd1498Szrj   basic_block bb;
6436*38fd1498Szrj 
6437*38fd1498Szrj   FOR_EACH_BB_FN (bb, func)
6438*38fd1498Szrj   {
6439*38fd1498Szrj     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
6440*38fd1498Szrj 	 gsi_next (&gsi))
6441*38fd1498Szrj       {
6442*38fd1498Szrj 	gimple *stmt = gsi_stmt (gsi);
6443*38fd1498Szrj 
6444*38fd1498Szrj 	if (gimple_code (stmt) != GIMPLE_CALL)
6445*38fd1498Szrj 	  continue;
6446*38fd1498Szrj 
6447*38fd1498Szrj 	gcall *call = as_a <gcall *> (stmt);
6448*38fd1498Szrj 
6449*38fd1498Szrj 	if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL))
6450*38fd1498Szrj 	  continue;
6451*38fd1498Szrj 
6452*38fd1498Szrj 	tree fndecl = gimple_call_fndecl (stmt);
6453*38fd1498Szrj 	enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
6454*38fd1498Szrj 	switch (fn)
6455*38fd1498Szrj 	  {
6456*38fd1498Szrj 	  case BUILT_IN_CEXPF:
6457*38fd1498Szrj 	  case BUILT_IN_CEXPIF:
6458*38fd1498Szrj 	  case BUILT_IN_CEXPI:
6459*38fd1498Szrj 	    {
6460*38fd1498Szrj 	      /* Similar to builtins.c (expand_builtin_cexpi), the builtin
6461*38fd1498Szrj 		 can be transformed to: cexp(I * z) = ccos(z) + I * csin(z).  */
6462*38fd1498Szrj 	      tree lhs = gimple_call_lhs (stmt);
6463*38fd1498Szrj 	      tree rhs = gimple_call_arg (stmt, 0);
6464*38fd1498Szrj 	      tree rhs_type = TREE_TYPE (rhs);
6465*38fd1498Szrj 	      bool float_type_p = rhs_type == float_type_node;
6466*38fd1498Szrj 	      tree real_part = make_temp_ssa_name (rhs_type, NULL,
6467*38fd1498Szrj 						   "cexp_real_part");
6468*38fd1498Szrj 	      tree imag_part = make_temp_ssa_name (rhs_type, NULL,
6469*38fd1498Szrj 						   "cexp_imag_part");
6470*38fd1498Szrj 
6471*38fd1498Szrj 	      tree cos_fndecl
6472*38fd1498Szrj 		= mathfn_built_in (rhs_type, fn == float_type_p
6473*38fd1498Szrj 				   ? BUILT_IN_COSF : BUILT_IN_COS);
6474*38fd1498Szrj 	      gcall *cos = gimple_build_call (cos_fndecl, 1, rhs);
6475*38fd1498Szrj 	      gimple_call_set_lhs (cos, real_part);
6476*38fd1498Szrj 	      gsi_insert_before (&gsi, cos, GSI_SAME_STMT);
6477*38fd1498Szrj 
6478*38fd1498Szrj 	      tree sin_fndecl
6479*38fd1498Szrj 		= mathfn_built_in (rhs_type, fn == float_type_p
6480*38fd1498Szrj 				   ? BUILT_IN_SINF : BUILT_IN_SIN);
6481*38fd1498Szrj 	      gcall *sin = gimple_build_call (sin_fndecl, 1, rhs);
6482*38fd1498Szrj 	      gimple_call_set_lhs (sin, imag_part);
6483*38fd1498Szrj 	      gsi_insert_before (&gsi, sin, GSI_SAME_STMT);
6484*38fd1498Szrj 
6485*38fd1498Szrj 
6486*38fd1498Szrj 	      gassign *assign = gimple_build_assign (lhs, COMPLEX_EXPR,
6487*38fd1498Szrj 						     real_part, imag_part);
6488*38fd1498Szrj 	      gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
6489*38fd1498Szrj 	      gsi_remove (&gsi, true);
6490*38fd1498Szrj 
6491*38fd1498Szrj 	      break;
6492*38fd1498Szrj 	    }
6493*38fd1498Szrj 	  default:
6494*38fd1498Szrj 	    break;
6495*38fd1498Szrj 	  }
6496*38fd1498Szrj       }
6497*38fd1498Szrj   }
6498*38fd1498Szrj }
6499*38fd1498Szrj 
6500*38fd1498Szrj /* Emit HSA module variables that are global for the entire module.  */
6501*38fd1498Szrj 
6502*38fd1498Szrj static void
emit_hsa_module_variables(void)6503*38fd1498Szrj emit_hsa_module_variables (void)
6504*38fd1498Szrj {
6505*38fd1498Szrj   hsa_num_threads = new hsa_symbol (BRIG_TYPE_U32, BRIG_SEGMENT_PRIVATE,
6506*38fd1498Szrj 				    BRIG_LINKAGE_MODULE, true);
6507*38fd1498Szrj 
6508*38fd1498Szrj   hsa_num_threads->m_name = "hsa_num_threads";
6509*38fd1498Szrj 
6510*38fd1498Szrj   hsa_brig_emit_omp_symbols ();
6511*38fd1498Szrj }
6512*38fd1498Szrj 
6513*38fd1498Szrj /* Generate HSAIL representation of the current function and write into a
6514*38fd1498Szrj    special section of the output file.  If KERNEL is set, the function will be
6515*38fd1498Szrj    considered an HSA kernel callable from the host, otherwise it will be
6516*38fd1498Szrj    compiled as an HSA function callable from other HSA code.  */
6517*38fd1498Szrj 
6518*38fd1498Szrj static void
generate_hsa(bool kernel)6519*38fd1498Szrj generate_hsa (bool kernel)
6520*38fd1498Szrj {
6521*38fd1498Szrj   hsa_init_data_for_cfun ();
6522*38fd1498Szrj 
6523*38fd1498Szrj   if (hsa_num_threads == NULL)
6524*38fd1498Szrj     emit_hsa_module_variables ();
6525*38fd1498Szrj 
6526*38fd1498Szrj   bool modified_cfg = convert_switch_statements ();
6527*38fd1498Szrj   /* Initialize hsa_cfun.  */
6528*38fd1498Szrj   hsa_cfun = new hsa_function_representation (cfun->decl, kernel,
6529*38fd1498Szrj 					      SSANAMES (cfun)->length (),
6530*38fd1498Szrj 					      modified_cfg);
6531*38fd1498Szrj   hsa_cfun->init_extra_bbs ();
6532*38fd1498Szrj 
6533*38fd1498Szrj   if (flag_tm)
6534*38fd1498Szrj     {
6535*38fd1498Szrj       HSA_SORRY_AT (UNKNOWN_LOCATION,
6536*38fd1498Szrj 		    "support for HSA does not implement transactional memory");
6537*38fd1498Szrj       goto fail;
6538*38fd1498Szrj     }
6539*38fd1498Szrj 
6540*38fd1498Szrj   verify_function_arguments (cfun->decl);
6541*38fd1498Szrj   if (hsa_seen_error ())
6542*38fd1498Szrj     goto fail;
6543*38fd1498Szrj 
6544*38fd1498Szrj   hsa_cfun->m_name = get_brig_function_name (cfun->decl);
6545*38fd1498Szrj 
6546*38fd1498Szrj   gen_function_def_parameters ();
6547*38fd1498Szrj   if (hsa_seen_error ())
6548*38fd1498Szrj     goto fail;
6549*38fd1498Szrj 
6550*38fd1498Szrj   init_prologue ();
6551*38fd1498Szrj 
6552*38fd1498Szrj   gen_body_from_gimple ();
6553*38fd1498Szrj   if (hsa_seen_error ())
6554*38fd1498Szrj     goto fail;
6555*38fd1498Szrj 
6556*38fd1498Szrj   if (hsa_cfun->m_kernel_dispatch_count)
6557*38fd1498Szrj     init_hsa_num_threads ();
6558*38fd1498Szrj 
6559*38fd1498Szrj   if (hsa_cfun->m_kern_p)
6560*38fd1498Szrj     {
6561*38fd1498Szrj       hsa_function_summary *s
6562*38fd1498Szrj 	= hsa_summaries->get (cgraph_node::get (hsa_cfun->m_decl));
6563*38fd1498Szrj       hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->m_name,
6564*38fd1498Szrj 				 hsa_cfun->m_maximum_omp_data_size,
6565*38fd1498Szrj 				 s->m_gridified_kernel_p);
6566*38fd1498Szrj     }
6567*38fd1498Szrj 
6568*38fd1498Szrj   if (flag_checking)
6569*38fd1498Szrj     {
6570*38fd1498Szrj       for (unsigned i = 0; i < hsa_cfun->m_ssa_map.length (); i++)
6571*38fd1498Szrj 	if (hsa_cfun->m_ssa_map[i])
6572*38fd1498Szrj 	  hsa_cfun->m_ssa_map[i]->verify_ssa ();
6573*38fd1498Szrj 
6574*38fd1498Szrj       basic_block bb;
6575*38fd1498Szrj       FOR_EACH_BB_FN (bb, cfun)
6576*38fd1498Szrj 	{
6577*38fd1498Szrj 	  hsa_bb *hbb = hsa_bb_for_bb (bb);
6578*38fd1498Szrj 
6579*38fd1498Szrj 	  for (hsa_insn_basic *insn = hbb->m_first_insn; insn;
6580*38fd1498Szrj 	       insn = insn->m_next)
6581*38fd1498Szrj 	    insn->verify ();
6582*38fd1498Szrj 	}
6583*38fd1498Szrj     }
6584*38fd1498Szrj 
6585*38fd1498Szrj   hsa_regalloc ();
6586*38fd1498Szrj   hsa_brig_emit_function ();
6587*38fd1498Szrj 
6588*38fd1498Szrj  fail:
6589*38fd1498Szrj   hsa_deinit_data_for_cfun ();
6590*38fd1498Szrj }
6591*38fd1498Szrj 
6592*38fd1498Szrj namespace {
6593*38fd1498Szrj 
6594*38fd1498Szrj const pass_data pass_data_gen_hsail =
6595*38fd1498Szrj {
6596*38fd1498Szrj   GIMPLE_PASS,
6597*38fd1498Szrj   "hsagen",	 			/* name */
6598*38fd1498Szrj   OPTGROUP_OMP,				/* optinfo_flags */
6599*38fd1498Szrj   TV_NONE,				/* tv_id */
6600*38fd1498Szrj   PROP_cfg | PROP_ssa,			/* properties_required */
6601*38fd1498Szrj   0,					/* properties_provided */
6602*38fd1498Szrj   0,					/* properties_destroyed */
6603*38fd1498Szrj   0,					/* todo_flags_start */
6604*38fd1498Szrj   0					/* todo_flags_finish */
6605*38fd1498Szrj };
6606*38fd1498Szrj 
6607*38fd1498Szrj class pass_gen_hsail : public gimple_opt_pass
6608*38fd1498Szrj {
6609*38fd1498Szrj public:
pass_gen_hsail(gcc::context * ctxt)6610*38fd1498Szrj   pass_gen_hsail (gcc::context *ctxt)
6611*38fd1498Szrj     : gimple_opt_pass(pass_data_gen_hsail, ctxt)
6612*38fd1498Szrj   {}
6613*38fd1498Szrj 
6614*38fd1498Szrj   /* opt_pass methods: */
6615*38fd1498Szrj   bool gate (function *);
6616*38fd1498Szrj   unsigned int execute (function *);
6617*38fd1498Szrj 
6618*38fd1498Szrj }; // class pass_gen_hsail
6619*38fd1498Szrj 
6620*38fd1498Szrj /* Determine whether or not to run generation of HSAIL.  */
6621*38fd1498Szrj 
6622*38fd1498Szrj bool
gate(function * f)6623*38fd1498Szrj pass_gen_hsail::gate (function *f)
6624*38fd1498Szrj {
6625*38fd1498Szrj   return hsa_gen_requested_p ()
6626*38fd1498Szrj     && hsa_gpu_implementation_p (f->decl);
6627*38fd1498Szrj }
6628*38fd1498Szrj 
6629*38fd1498Szrj unsigned int
execute(function *)6630*38fd1498Szrj pass_gen_hsail::execute (function *)
6631*38fd1498Szrj {
6632*38fd1498Szrj   hsa_function_summary *s
6633*38fd1498Szrj     = hsa_summaries->get (cgraph_node::get_create (current_function_decl));
6634*38fd1498Szrj 
6635*38fd1498Szrj   expand_builtins ();
6636*38fd1498Szrj   generate_hsa (s->m_kind == HSA_KERNEL);
6637*38fd1498Szrj   TREE_ASM_WRITTEN (current_function_decl) = 1;
6638*38fd1498Szrj   return TODO_discard_function;
6639*38fd1498Szrj }
6640*38fd1498Szrj 
6641*38fd1498Szrj } // anon namespace
6642*38fd1498Szrj 
6643*38fd1498Szrj /* Create the instance of hsa gen pass.  */
6644*38fd1498Szrj 
6645*38fd1498Szrj gimple_opt_pass *
make_pass_gen_hsail(gcc::context * ctxt)6646*38fd1498Szrj make_pass_gen_hsail (gcc::context *ctxt)
6647*38fd1498Szrj {
6648*38fd1498Szrj   return new pass_gen_hsail (ctxt);
6649*38fd1498Szrj }
6650