1*38fd1498Szrj /* HSAIL IL Register allocation and out-of-SSA.
2*38fd1498Szrj    Copyright (C) 2013-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Michael Matz <matz@suse.de>
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj 
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify
8*38fd1498Szrj it under the terms of the GNU General Public License as published by
9*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj any later version.
11*38fd1498Szrj 
12*38fd1498Szrj GCC is distributed in the hope that it will be useful,
13*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
14*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15*38fd1498Szrj GNU General Public License for more details.
16*38fd1498Szrj 
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
20*38fd1498Szrj 
21*38fd1498Szrj #include "config.h"
22*38fd1498Szrj #include "system.h"
23*38fd1498Szrj #include "coretypes.h"
24*38fd1498Szrj #include "tm.h"
25*38fd1498Szrj #include "is-a.h"
26*38fd1498Szrj #include "vec.h"
27*38fd1498Szrj #include "tree.h"
28*38fd1498Szrj #include "dominance.h"
29*38fd1498Szrj #include "basic-block.h"
30*38fd1498Szrj #include "cfg.h"
31*38fd1498Szrj #include "cfganal.h"
32*38fd1498Szrj #include "function.h"
33*38fd1498Szrj #include "bitmap.h"
34*38fd1498Szrj #include "dumpfile.h"
35*38fd1498Szrj #include "cgraph.h"
36*38fd1498Szrj #include "print-tree.h"
37*38fd1498Szrj #include "cfghooks.h"
38*38fd1498Szrj #include "symbol-summary.h"
39*38fd1498Szrj #include "hsa-common.h"
40*38fd1498Szrj 
41*38fd1498Szrj 
42*38fd1498Szrj /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa.  */
43*38fd1498Szrj 
44*38fd1498Szrj static void
naive_process_phi(hsa_insn_phi * phi,const vec<edge> & predecessors)45*38fd1498Szrj naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
46*38fd1498Szrj {
47*38fd1498Szrj   unsigned count = phi->operand_count ();
48*38fd1498Szrj   for (unsigned i = 0; i < count; i++)
49*38fd1498Szrj     {
50*38fd1498Szrj       gcc_checking_assert (phi->get_op (i));
51*38fd1498Szrj       hsa_op_base *op = phi->get_op (i);
52*38fd1498Szrj       hsa_bb *hbb;
53*38fd1498Szrj       edge e;
54*38fd1498Szrj 
55*38fd1498Szrj       if (!op)
56*38fd1498Szrj 	break;
57*38fd1498Szrj 
58*38fd1498Szrj       e = predecessors[i];
59*38fd1498Szrj       if (single_succ_p (e->src))
60*38fd1498Szrj 	hbb = hsa_bb_for_bb (e->src);
61*38fd1498Szrj       else
62*38fd1498Szrj 	{
63*38fd1498Szrj 	  basic_block old_dest = e->dest;
64*38fd1498Szrj 	  hbb = hsa_init_new_bb (split_edge (e));
65*38fd1498Szrj 
66*38fd1498Szrj 	  /* If switch insn used this edge, fix jump table.  */
67*38fd1498Szrj 	  hsa_bb *source = hsa_bb_for_bb (e->src);
68*38fd1498Szrj 	  hsa_insn_sbr *sbr;
69*38fd1498Szrj 	  if (source->m_last_insn
70*38fd1498Szrj 	      && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
71*38fd1498Szrj 	    sbr->replace_all_labels (old_dest, hbb->m_bb);
72*38fd1498Szrj 	}
73*38fd1498Szrj 
74*38fd1498Szrj       hsa_build_append_simple_mov (phi->m_dest, op, hbb);
75*38fd1498Szrj     }
76*38fd1498Szrj }
77*38fd1498Szrj 
78*38fd1498Szrj /* Naive out-of SSA.  */
79*38fd1498Szrj 
80*38fd1498Szrj static void
naive_outof_ssa(void)81*38fd1498Szrj naive_outof_ssa (void)
82*38fd1498Szrj {
83*38fd1498Szrj   basic_block bb;
84*38fd1498Szrj 
85*38fd1498Szrj   hsa_cfun->m_in_ssa = false;
86*38fd1498Szrj 
87*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
88*38fd1498Szrj   {
89*38fd1498Szrj     hsa_bb *hbb = hsa_bb_for_bb (bb);
90*38fd1498Szrj     hsa_insn_phi *phi;
91*38fd1498Szrj 
92*38fd1498Szrj     /* naive_process_phi can call split_edge on an incoming edge which order if
93*38fd1498Szrj        the incoming edges to the basic block and thus make it inconsistent with
94*38fd1498Szrj        the ordering of PHI arguments, so we collect them in advance.  */
95*38fd1498Szrj     auto_vec<edge, 8> predecessors;
96*38fd1498Szrj     unsigned pred_count = EDGE_COUNT (bb->preds);
97*38fd1498Szrj     for (unsigned i = 0; i < pred_count; i++)
98*38fd1498Szrj       predecessors.safe_push (EDGE_PRED (bb, i));
99*38fd1498Szrj 
100*38fd1498Szrj     for (phi = hbb->m_first_phi;
101*38fd1498Szrj 	 phi;
102*38fd1498Szrj 	 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
103*38fd1498Szrj       naive_process_phi (phi, predecessors);
104*38fd1498Szrj 
105*38fd1498Szrj     /* Zap PHI nodes, they will be deallocated when everything else will.  */
106*38fd1498Szrj     hbb->m_first_phi = NULL;
107*38fd1498Szrj     hbb->m_last_phi = NULL;
108*38fd1498Szrj   }
109*38fd1498Szrj }
110*38fd1498Szrj 
111*38fd1498Szrj /* Return register class number for the given HSA TYPE.  0 means the 'c' one
112*38fd1498Szrj    bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
113*38fd1498Szrj    and 3 for 'q' 128 bit class.  */
114*38fd1498Szrj 
115*38fd1498Szrj static int
m_reg_class_for_type(BrigType16_t type)116*38fd1498Szrj m_reg_class_for_type (BrigType16_t type)
117*38fd1498Szrj {
118*38fd1498Szrj   switch (type)
119*38fd1498Szrj     {
120*38fd1498Szrj     case BRIG_TYPE_B1:
121*38fd1498Szrj       return 0;
122*38fd1498Szrj 
123*38fd1498Szrj     case BRIG_TYPE_U8:
124*38fd1498Szrj     case BRIG_TYPE_U16:
125*38fd1498Szrj     case BRIG_TYPE_U32:
126*38fd1498Szrj     case BRIG_TYPE_S8:
127*38fd1498Szrj     case BRIG_TYPE_S16:
128*38fd1498Szrj     case BRIG_TYPE_S32:
129*38fd1498Szrj     case BRIG_TYPE_F16:
130*38fd1498Szrj     case BRIG_TYPE_F32:
131*38fd1498Szrj     case BRIG_TYPE_B8:
132*38fd1498Szrj     case BRIG_TYPE_B16:
133*38fd1498Szrj     case BRIG_TYPE_B32:
134*38fd1498Szrj     case BRIG_TYPE_U8X4:
135*38fd1498Szrj     case BRIG_TYPE_S8X4:
136*38fd1498Szrj     case BRIG_TYPE_U16X2:
137*38fd1498Szrj     case BRIG_TYPE_S16X2:
138*38fd1498Szrj     case BRIG_TYPE_F16X2:
139*38fd1498Szrj       return 1;
140*38fd1498Szrj 
141*38fd1498Szrj     case BRIG_TYPE_U64:
142*38fd1498Szrj     case BRIG_TYPE_S64:
143*38fd1498Szrj     case BRIG_TYPE_F64:
144*38fd1498Szrj     case BRIG_TYPE_B64:
145*38fd1498Szrj     case BRIG_TYPE_U8X8:
146*38fd1498Szrj     case BRIG_TYPE_S8X8:
147*38fd1498Szrj     case BRIG_TYPE_U16X4:
148*38fd1498Szrj     case BRIG_TYPE_S16X4:
149*38fd1498Szrj     case BRIG_TYPE_F16X4:
150*38fd1498Szrj     case BRIG_TYPE_U32X2:
151*38fd1498Szrj     case BRIG_TYPE_S32X2:
152*38fd1498Szrj     case BRIG_TYPE_F32X2:
153*38fd1498Szrj       return 2;
154*38fd1498Szrj 
155*38fd1498Szrj     case BRIG_TYPE_B128:
156*38fd1498Szrj     case BRIG_TYPE_U8X16:
157*38fd1498Szrj     case BRIG_TYPE_S8X16:
158*38fd1498Szrj     case BRIG_TYPE_U16X8:
159*38fd1498Szrj     case BRIG_TYPE_S16X8:
160*38fd1498Szrj     case BRIG_TYPE_F16X8:
161*38fd1498Szrj     case BRIG_TYPE_U32X4:
162*38fd1498Szrj     case BRIG_TYPE_U64X2:
163*38fd1498Szrj     case BRIG_TYPE_S32X4:
164*38fd1498Szrj     case BRIG_TYPE_S64X2:
165*38fd1498Szrj     case BRIG_TYPE_F32X4:
166*38fd1498Szrj     case BRIG_TYPE_F64X2:
167*38fd1498Szrj       return 3;
168*38fd1498Szrj 
169*38fd1498Szrj     default:
170*38fd1498Szrj       gcc_unreachable ();
171*38fd1498Szrj     }
172*38fd1498Szrj }
173*38fd1498Szrj 
174*38fd1498Szrj /* If the Ith operands of INSN is or contains a register (in an address),
175*38fd1498Szrj    return the address of that register operand.  If not return NULL.  */
176*38fd1498Szrj 
177*38fd1498Szrj static hsa_op_reg **
insn_reg_addr(hsa_insn_basic * insn,int i)178*38fd1498Szrj insn_reg_addr (hsa_insn_basic *insn, int i)
179*38fd1498Szrj {
180*38fd1498Szrj   hsa_op_base *op = insn->get_op (i);
181*38fd1498Szrj   if (!op)
182*38fd1498Szrj     return NULL;
183*38fd1498Szrj   hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
184*38fd1498Szrj   if (reg)
185*38fd1498Szrj     return (hsa_op_reg **) insn->get_op_addr (i);
186*38fd1498Szrj   hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
187*38fd1498Szrj   if (addr && addr->m_reg)
188*38fd1498Szrj     return &addr->m_reg;
189*38fd1498Szrj   return NULL;
190*38fd1498Szrj }
191*38fd1498Szrj 
192*38fd1498Szrj struct m_reg_class_desc
193*38fd1498Szrj {
194*38fd1498Szrj   unsigned next_avail, max_num;
195*38fd1498Szrj   unsigned used_num, max_used;
196*38fd1498Szrj   uint64_t used[2];
197*38fd1498Szrj   char cl_char;
198*38fd1498Szrj };
199*38fd1498Szrj 
200*38fd1498Szrj /* Rewrite the instructions in BB to observe spilled live ranges.
201*38fd1498Szrj    CLASSES is the global register class state.  */
202*38fd1498Szrj 
203*38fd1498Szrj static void
rewrite_code_bb(basic_block bb,struct m_reg_class_desc * classes)204*38fd1498Szrj rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
205*38fd1498Szrj {
206*38fd1498Szrj   hsa_bb *hbb = hsa_bb_for_bb (bb);
207*38fd1498Szrj   hsa_insn_basic *insn, *next_insn;
208*38fd1498Szrj 
209*38fd1498Szrj   for (insn = hbb->m_first_insn; insn; insn = next_insn)
210*38fd1498Szrj     {
211*38fd1498Szrj       next_insn = insn->m_next;
212*38fd1498Szrj       unsigned count = insn->operand_count ();
213*38fd1498Szrj       for (unsigned i = 0; i < count; i++)
214*38fd1498Szrj 	{
215*38fd1498Szrj 	  gcc_checking_assert (insn->get_op (i));
216*38fd1498Szrj 	  hsa_op_reg **regaddr = insn_reg_addr (insn, i);
217*38fd1498Szrj 
218*38fd1498Szrj 	  if (regaddr)
219*38fd1498Szrj 	    {
220*38fd1498Szrj 	      hsa_op_reg *reg = *regaddr;
221*38fd1498Szrj 	      if (reg->m_reg_class)
222*38fd1498Szrj 		continue;
223*38fd1498Szrj 	      gcc_assert (reg->m_spill_sym);
224*38fd1498Szrj 
225*38fd1498Szrj 	      int cl = m_reg_class_for_type (reg->m_type);
226*38fd1498Szrj 	      hsa_op_reg *tmp, *tmp2;
227*38fd1498Szrj 	      if (insn->op_output_p (i))
228*38fd1498Szrj 		tmp = hsa_spill_out (insn, reg, &tmp2);
229*38fd1498Szrj 	      else
230*38fd1498Szrj 		tmp = hsa_spill_in (insn, reg, &tmp2);
231*38fd1498Szrj 
232*38fd1498Szrj 	      *regaddr = tmp;
233*38fd1498Szrj 
234*38fd1498Szrj 	      tmp->m_reg_class = classes[cl].cl_char;
235*38fd1498Szrj 	      tmp->m_hard_num = (char) (classes[cl].max_num + i);
236*38fd1498Szrj 	      if (tmp2)
237*38fd1498Szrj 		{
238*38fd1498Szrj 		  gcc_assert (cl == 0);
239*38fd1498Szrj 		  tmp2->m_reg_class = classes[1].cl_char;
240*38fd1498Szrj 		  tmp2->m_hard_num = (char) (classes[1].max_num + i);
241*38fd1498Szrj 		}
242*38fd1498Szrj 	    }
243*38fd1498Szrj 	}
244*38fd1498Szrj     }
245*38fd1498Szrj }
246*38fd1498Szrj 
247*38fd1498Szrj /* Dump current function to dump file F, with info specific
248*38fd1498Szrj    to register allocation.  */
249*38fd1498Szrj 
250*38fd1498Szrj void
dump_hsa_cfun_regalloc(FILE * f)251*38fd1498Szrj dump_hsa_cfun_regalloc (FILE *f)
252*38fd1498Szrj {
253*38fd1498Szrj   basic_block bb;
254*38fd1498Szrj 
255*38fd1498Szrj   fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
256*38fd1498Szrj 
257*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
258*38fd1498Szrj   {
259*38fd1498Szrj     hsa_bb *hbb = (struct hsa_bb *) bb->aux;
260*38fd1498Szrj     bitmap_print (dump_file, hbb->m_livein, "m_livein  ", "\n");
261*38fd1498Szrj     dump_hsa_bb (f, hbb);
262*38fd1498Szrj     bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
263*38fd1498Szrj   }
264*38fd1498Szrj }
265*38fd1498Szrj 
266*38fd1498Szrj /* Given the global register allocation state CLASSES and a
267*38fd1498Szrj    register REG, try to give it a hardware register.  If successful,
268*38fd1498Szrj    store that hardreg in REG and return it, otherwise return -1.
269*38fd1498Szrj    Also changes CLASSES to accommodate for the allocated register.  */
270*38fd1498Szrj 
271*38fd1498Szrj static int
try_alloc_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)272*38fd1498Szrj try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
273*38fd1498Szrj {
274*38fd1498Szrj   int cl = m_reg_class_for_type (reg->m_type);
275*38fd1498Szrj   int ret = -1;
276*38fd1498Szrj   if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
277*38fd1498Szrj       >= 128 - 5)
278*38fd1498Szrj     return -1;
279*38fd1498Szrj   if (classes[cl].used_num < classes[cl].max_num)
280*38fd1498Szrj     {
281*38fd1498Szrj       unsigned int i;
282*38fd1498Szrj       classes[cl].used_num++;
283*38fd1498Szrj       if (classes[cl].used_num > classes[cl].max_used)
284*38fd1498Szrj 	classes[cl].max_used = classes[cl].used_num;
285*38fd1498Szrj       for (i = 0; i < classes[cl].used_num; i++)
286*38fd1498Szrj 	if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
287*38fd1498Szrj 	  break;
288*38fd1498Szrj       ret = i;
289*38fd1498Szrj       classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
290*38fd1498Szrj       reg->m_reg_class = classes[cl].cl_char;
291*38fd1498Szrj       reg->m_hard_num = i;
292*38fd1498Szrj     }
293*38fd1498Szrj   return ret;
294*38fd1498Szrj }
295*38fd1498Szrj 
296*38fd1498Szrj /* Free up hardregs used by REG, into allocation state CLASSES.  */
297*38fd1498Szrj 
298*38fd1498Szrj static void
free_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)299*38fd1498Szrj free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
300*38fd1498Szrj {
301*38fd1498Szrj   int cl = m_reg_class_for_type (reg->m_type);
302*38fd1498Szrj   int ret = reg->m_hard_num;
303*38fd1498Szrj   gcc_assert (reg->m_reg_class == classes[cl].cl_char);
304*38fd1498Szrj   classes[cl].used_num--;
305*38fd1498Szrj   classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
306*38fd1498Szrj }
307*38fd1498Szrj 
308*38fd1498Szrj /* Note that the live range for REG ends at least at END.  */
309*38fd1498Szrj 
310*38fd1498Szrj static void
note_lr_end(hsa_op_reg * reg,int end)311*38fd1498Szrj note_lr_end (hsa_op_reg *reg, int end)
312*38fd1498Szrj {
313*38fd1498Szrj   if (reg->m_lr_end < end)
314*38fd1498Szrj     reg->m_lr_end = end;
315*38fd1498Szrj }
316*38fd1498Szrj 
317*38fd1498Szrj /* Note that the live range for REG starts at least at BEGIN.  */
318*38fd1498Szrj 
319*38fd1498Szrj static void
note_lr_begin(hsa_op_reg * reg,int begin)320*38fd1498Szrj note_lr_begin (hsa_op_reg *reg, int begin)
321*38fd1498Szrj {
322*38fd1498Szrj   if (reg->m_lr_begin > begin)
323*38fd1498Szrj     reg->m_lr_begin = begin;
324*38fd1498Szrj }
325*38fd1498Szrj 
326*38fd1498Szrj /* Given two registers A and B, return -1, 0 or 1 if A's live range
327*38fd1498Szrj    starts before, at or after B's live range.  */
328*38fd1498Szrj 
329*38fd1498Szrj static int
cmp_begin(const void * a,const void * b)330*38fd1498Szrj cmp_begin (const void *a, const void *b)
331*38fd1498Szrj {
332*38fd1498Szrj   const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
333*38fd1498Szrj   const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
334*38fd1498Szrj   int ret;
335*38fd1498Szrj   if (rega == regb)
336*38fd1498Szrj     return 0;
337*38fd1498Szrj   ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
338*38fd1498Szrj   if (ret)
339*38fd1498Szrj     return ret;
340*38fd1498Szrj   return ((*rega)->m_order - (*regb)->m_order);
341*38fd1498Szrj }
342*38fd1498Szrj 
343*38fd1498Szrj /* Given two registers REGA and REGB, return true if REGA's
344*38fd1498Szrj    live range ends after REGB's.  This results in a sorting order
345*38fd1498Szrj    with earlier end points at the end.  */
346*38fd1498Szrj 
347*38fd1498Szrj static bool
cmp_end(hsa_op_reg * const & rega,hsa_op_reg * const & regb)348*38fd1498Szrj cmp_end (hsa_op_reg * const &rega, hsa_op_reg * const &regb)
349*38fd1498Szrj {
350*38fd1498Szrj   int ret;
351*38fd1498Szrj   if (rega == regb)
352*38fd1498Szrj     return false;
353*38fd1498Szrj   ret = (regb)->m_lr_end - (rega)->m_lr_end;
354*38fd1498Szrj   if (ret)
355*38fd1498Szrj     return ret < 0;
356*38fd1498Szrj   return (((regb)->m_order - (rega)->m_order)) < 0;
357*38fd1498Szrj }
358*38fd1498Szrj 
359*38fd1498Szrj /* Expire all old intervals in ACTIVE (a per-regclass vector),
360*38fd1498Szrj    that is, those that end before the interval REG starts.  Give
361*38fd1498Szrj    back resources freed so into the state CLASSES.  */
362*38fd1498Szrj 
363*38fd1498Szrj static void
expire_old_intervals(hsa_op_reg * reg,vec<hsa_op_reg * > * active,struct m_reg_class_desc * classes)364*38fd1498Szrj expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
365*38fd1498Szrj 		      struct m_reg_class_desc *classes)
366*38fd1498Szrj {
367*38fd1498Szrj   for (int i = 0; i < 4; i++)
368*38fd1498Szrj     while (!active[i].is_empty ())
369*38fd1498Szrj       {
370*38fd1498Szrj 	hsa_op_reg *a = active[i].pop ();
371*38fd1498Szrj 	if (a->m_lr_end > reg->m_lr_begin)
372*38fd1498Szrj 	  {
373*38fd1498Szrj 	    active[i].quick_push (a);
374*38fd1498Szrj 	    break;
375*38fd1498Szrj 	  }
376*38fd1498Szrj 	free_reg (classes, a);
377*38fd1498Szrj       }
378*38fd1498Szrj }
379*38fd1498Szrj 
380*38fd1498Szrj /* The interval REG didn't get a hardreg.  Spill it or one of those
381*38fd1498Szrj    from ACTIVE (if the latter, then REG will become allocated to the
382*38fd1498Szrj    hardreg that formerly was used by it).  */
383*38fd1498Szrj 
384*38fd1498Szrj static void
spill_at_interval(hsa_op_reg * reg,vec<hsa_op_reg * > * active)385*38fd1498Szrj spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
386*38fd1498Szrj {
387*38fd1498Szrj   int cl = m_reg_class_for_type (reg->m_type);
388*38fd1498Szrj   gcc_assert (!active[cl].is_empty ());
389*38fd1498Szrj   hsa_op_reg *cand = active[cl][0];
390*38fd1498Szrj   if (cand->m_lr_end > reg->m_lr_end)
391*38fd1498Szrj     {
392*38fd1498Szrj       reg->m_reg_class = cand->m_reg_class;
393*38fd1498Szrj       reg->m_hard_num = cand->m_hard_num;
394*38fd1498Szrj       active[cl].ordered_remove (0);
395*38fd1498Szrj       unsigned place = active[cl].lower_bound (reg, cmp_end);
396*38fd1498Szrj       active[cl].quick_insert (place, reg);
397*38fd1498Szrj     }
398*38fd1498Szrj   else
399*38fd1498Szrj     cand = reg;
400*38fd1498Szrj 
401*38fd1498Szrj   gcc_assert (!cand->m_spill_sym);
402*38fd1498Szrj   BrigType16_t type = cand->m_type;
403*38fd1498Szrj   if (type == BRIG_TYPE_B1)
404*38fd1498Szrj     type = BRIG_TYPE_U8;
405*38fd1498Szrj   cand->m_reg_class = 0;
406*38fd1498Szrj   cand->m_spill_sym = hsa_get_spill_symbol (type);
407*38fd1498Szrj   cand->m_spill_sym->m_name_number = cand->m_order;
408*38fd1498Szrj }
409*38fd1498Szrj 
410*38fd1498Szrj /* Given the global register state CLASSES allocate all HSA virtual
411*38fd1498Szrj    registers either to hardregs or to a spill symbol.  */
412*38fd1498Szrj 
413*38fd1498Szrj static void
linear_scan_regalloc(struct m_reg_class_desc * classes)414*38fd1498Szrj linear_scan_regalloc (struct m_reg_class_desc *classes)
415*38fd1498Szrj {
416*38fd1498Szrj   /* Compute liveness.  */
417*38fd1498Szrj   bool changed;
418*38fd1498Szrj   int i, n;
419*38fd1498Szrj   int insn_order;
420*38fd1498Szrj   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
421*38fd1498Szrj   bitmap work = BITMAP_ALLOC (NULL);
422*38fd1498Szrj   vec<hsa_op_reg*> ind2reg = vNULL;
423*38fd1498Szrj   vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
424*38fd1498Szrj   hsa_insn_basic *m_last_insn;
425*38fd1498Szrj 
426*38fd1498Szrj   /* We will need the reverse post order for linearization,
427*38fd1498Szrj      and the post order for liveness analysis, which is the same
428*38fd1498Szrj      backward.  */
429*38fd1498Szrj   n = pre_and_rev_post_order_compute (NULL, bbs, true);
430*38fd1498Szrj   ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
431*38fd1498Szrj 
432*38fd1498Szrj   /* Give all instructions a linearized number, at the same time
433*38fd1498Szrj      build a mapping from register index to register.  */
434*38fd1498Szrj   insn_order = 1;
435*38fd1498Szrj   for (i = 0; i < n; i++)
436*38fd1498Szrj     {
437*38fd1498Szrj       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
438*38fd1498Szrj       hsa_bb *hbb = hsa_bb_for_bb (bb);
439*38fd1498Szrj       hsa_insn_basic *insn;
440*38fd1498Szrj       for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
441*38fd1498Szrj 	{
442*38fd1498Szrj 	  unsigned opi;
443*38fd1498Szrj 	  insn->m_number = insn_order++;
444*38fd1498Szrj 	  for (opi = 0; opi < insn->operand_count (); opi++)
445*38fd1498Szrj 	    {
446*38fd1498Szrj 	      gcc_checking_assert (insn->get_op (opi));
447*38fd1498Szrj 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
448*38fd1498Szrj 	      if (regaddr)
449*38fd1498Szrj 		ind2reg[(*regaddr)->m_order] = *regaddr;
450*38fd1498Szrj 	    }
451*38fd1498Szrj 	}
452*38fd1498Szrj     }
453*38fd1498Szrj 
454*38fd1498Szrj   /* Initialize all live ranges to [after-end, 0).  */
455*38fd1498Szrj   for (i = 0; i < hsa_cfun->m_reg_count; i++)
456*38fd1498Szrj     if (ind2reg[i])
457*38fd1498Szrj       ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
458*38fd1498Szrj 
459*38fd1498Szrj   /* Classic liveness analysis, as long as something changes:
460*38fd1498Szrj        m_liveout is union (m_livein of successors)
461*38fd1498Szrj        m_livein is m_liveout minus defs plus uses.  */
462*38fd1498Szrj   do
463*38fd1498Szrj     {
464*38fd1498Szrj       changed = false;
465*38fd1498Szrj       for (i = n - 1; i >= 0; i--)
466*38fd1498Szrj 	{
467*38fd1498Szrj 	  edge e;
468*38fd1498Szrj 	  edge_iterator ei;
469*38fd1498Szrj 	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
470*38fd1498Szrj 	  hsa_bb *hbb = hsa_bb_for_bb (bb);
471*38fd1498Szrj 
472*38fd1498Szrj 	  /* Union of successors m_livein (or empty if none).  */
473*38fd1498Szrj 	  bool first = true;
474*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, bb->succs)
475*38fd1498Szrj 	    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
476*38fd1498Szrj 	      {
477*38fd1498Szrj 		hsa_bb *succ = hsa_bb_for_bb (e->dest);
478*38fd1498Szrj 		if (first)
479*38fd1498Szrj 		  {
480*38fd1498Szrj 		    bitmap_copy (work, succ->m_livein);
481*38fd1498Szrj 		    first = false;
482*38fd1498Szrj 		  }
483*38fd1498Szrj 		else
484*38fd1498Szrj 		  bitmap_ior_into (work, succ->m_livein);
485*38fd1498Szrj 	      }
486*38fd1498Szrj 	  if (first)
487*38fd1498Szrj 	    bitmap_clear (work);
488*38fd1498Szrj 
489*38fd1498Szrj 	  bitmap_copy (hbb->m_liveout, work);
490*38fd1498Szrj 
491*38fd1498Szrj 	  /* Remove defs, include uses in a backward insn walk.  */
492*38fd1498Szrj 	  hsa_insn_basic *insn;
493*38fd1498Szrj 	  for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
494*38fd1498Szrj 	    {
495*38fd1498Szrj 	      unsigned opi;
496*38fd1498Szrj 	      unsigned ndefs = insn->input_count ();
497*38fd1498Szrj 	      for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
498*38fd1498Szrj 		{
499*38fd1498Szrj 		  gcc_checking_assert (insn->get_op (opi));
500*38fd1498Szrj 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
501*38fd1498Szrj 		  if (regaddr)
502*38fd1498Szrj 		    bitmap_clear_bit (work, (*regaddr)->m_order);
503*38fd1498Szrj 		}
504*38fd1498Szrj 	      for (; opi < insn->operand_count (); opi++)
505*38fd1498Szrj 		{
506*38fd1498Szrj 		  gcc_checking_assert (insn->get_op (opi));
507*38fd1498Szrj 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
508*38fd1498Szrj 		  if (regaddr)
509*38fd1498Szrj 		    bitmap_set_bit (work, (*regaddr)->m_order);
510*38fd1498Szrj 		}
511*38fd1498Szrj 	    }
512*38fd1498Szrj 
513*38fd1498Szrj 	  /* Note if that changed something.  */
514*38fd1498Szrj 	  if (bitmap_ior_into (hbb->m_livein, work))
515*38fd1498Szrj 	    changed = true;
516*38fd1498Szrj 	}
517*38fd1498Szrj     }
518*38fd1498Szrj   while (changed);
519*38fd1498Szrj 
520*38fd1498Szrj   /* Make one pass through all instructions in linear order,
521*38fd1498Szrj      noting and merging possible live range start and end points.  */
522*38fd1498Szrj   m_last_insn = NULL;
523*38fd1498Szrj   for (i = n - 1; i >= 0; i--)
524*38fd1498Szrj     {
525*38fd1498Szrj       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
526*38fd1498Szrj       hsa_bb *hbb = hsa_bb_for_bb (bb);
527*38fd1498Szrj       hsa_insn_basic *insn;
528*38fd1498Szrj       int after_end_number;
529*38fd1498Szrj       unsigned bit;
530*38fd1498Szrj       bitmap_iterator bi;
531*38fd1498Szrj 
532*38fd1498Szrj       if (m_last_insn)
533*38fd1498Szrj 	after_end_number = m_last_insn->m_number;
534*38fd1498Szrj       else
535*38fd1498Szrj 	after_end_number = insn_order;
536*38fd1498Szrj       /* Everything live-out in this BB has at least an end point
537*38fd1498Szrj 	 after us.  */
538*38fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
539*38fd1498Szrj 	note_lr_end (ind2reg[bit], after_end_number);
540*38fd1498Szrj 
541*38fd1498Szrj       for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
542*38fd1498Szrj 	{
543*38fd1498Szrj 	  unsigned opi;
544*38fd1498Szrj 	  unsigned ndefs = insn->input_count ();
545*38fd1498Szrj 	  for (opi = 0; opi < insn->operand_count (); opi++)
546*38fd1498Szrj 	    {
547*38fd1498Szrj 	      gcc_checking_assert (insn->get_op (opi));
548*38fd1498Szrj 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
549*38fd1498Szrj 	      if (regaddr)
550*38fd1498Szrj 		{
551*38fd1498Szrj 		  hsa_op_reg *reg = *regaddr;
552*38fd1498Szrj 		  if (opi < ndefs)
553*38fd1498Szrj 		    note_lr_begin (reg, insn->m_number);
554*38fd1498Szrj 		  else
555*38fd1498Szrj 		    note_lr_end (reg, insn->m_number);
556*38fd1498Szrj 		}
557*38fd1498Szrj 	    }
558*38fd1498Szrj 	}
559*38fd1498Szrj 
560*38fd1498Szrj       /* Everything live-in in this BB has a start point before
561*38fd1498Szrj 	 our first insn.  */
562*38fd1498Szrj       int before_start_number;
563*38fd1498Szrj       if (hbb->m_first_insn)
564*38fd1498Szrj 	before_start_number = hbb->m_first_insn->m_number;
565*38fd1498Szrj       else
566*38fd1498Szrj 	before_start_number = after_end_number;
567*38fd1498Szrj       before_start_number--;
568*38fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
569*38fd1498Szrj 	note_lr_begin (ind2reg[bit], before_start_number);
570*38fd1498Szrj 
571*38fd1498Szrj       if (hbb->m_first_insn)
572*38fd1498Szrj 	m_last_insn = hbb->m_first_insn;
573*38fd1498Szrj     }
574*38fd1498Szrj 
575*38fd1498Szrj   for (i = 0; i < hsa_cfun->m_reg_count; i++)
576*38fd1498Szrj     if (ind2reg[i])
577*38fd1498Szrj       {
578*38fd1498Szrj 	/* All regs that have still their start at after all code actually
579*38fd1498Szrj 	   are defined at the start of the routine (prologue).  */
580*38fd1498Szrj 	if (ind2reg[i]->m_lr_begin == insn_order)
581*38fd1498Szrj 	  ind2reg[i]->m_lr_begin = 0;
582*38fd1498Szrj 	/* All regs that have no use but a def will have lr_end == 0,
583*38fd1498Szrj 	   they are actually live from def until after the insn they are
584*38fd1498Szrj 	   defined in.  */
585*38fd1498Szrj 	if (ind2reg[i]->m_lr_end == 0)
586*38fd1498Szrj 	  ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
587*38fd1498Szrj       }
588*38fd1498Szrj 
589*38fd1498Szrj   /* Sort all intervals by increasing start point.  */
590*38fd1498Szrj   gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
591*38fd1498Szrj 
592*38fd1498Szrj   if (flag_checking)
593*38fd1498Szrj     for (unsigned i = 0; i < ind2reg.length (); i++)
594*38fd1498Szrj       gcc_assert (ind2reg[i]);
595*38fd1498Szrj 
596*38fd1498Szrj   ind2reg.qsort (cmp_begin);
597*38fd1498Szrj   for (i = 0; i < 4; i++)
598*38fd1498Szrj     active[i].reserve_exact (hsa_cfun->m_reg_count);
599*38fd1498Szrj 
600*38fd1498Szrj   /* Now comes the linear scan allocation.  */
601*38fd1498Szrj   for (i = 0; i < hsa_cfun->m_reg_count; i++)
602*38fd1498Szrj     {
603*38fd1498Szrj       hsa_op_reg *reg = ind2reg[i];
604*38fd1498Szrj       if (!reg)
605*38fd1498Szrj 	continue;
606*38fd1498Szrj       expire_old_intervals (reg, active, classes);
607*38fd1498Szrj       int cl = m_reg_class_for_type (reg->m_type);
608*38fd1498Szrj       if (try_alloc_reg (classes, reg) >= 0)
609*38fd1498Szrj 	{
610*38fd1498Szrj 	  unsigned place = active[cl].lower_bound (reg, cmp_end);
611*38fd1498Szrj 	  active[cl].quick_insert (place, reg);
612*38fd1498Szrj 	}
613*38fd1498Szrj       else
614*38fd1498Szrj 	spill_at_interval (reg, active);
615*38fd1498Szrj 
616*38fd1498Szrj       /* Some interesting dumping as we go.  */
617*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
618*38fd1498Szrj 	{
619*38fd1498Szrj 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->",
620*38fd1498Szrj 		   reg->m_order, reg->m_lr_begin, reg->m_lr_end);
621*38fd1498Szrj 	  if (reg->m_reg_class)
622*38fd1498Szrj 	    fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
623*38fd1498Szrj 	  else
624*38fd1498Szrj 	    fprintf (dump_file, "[%%__%s_%i]",
625*38fd1498Szrj 		     hsa_seg_name (reg->m_spill_sym->m_segment),
626*38fd1498Szrj 		     reg->m_spill_sym->m_name_number);
627*38fd1498Szrj 	  for (int cl = 0; cl < 4; cl++)
628*38fd1498Szrj 	    {
629*38fd1498Szrj 	      bool first = true;
630*38fd1498Szrj 	      hsa_op_reg *r;
631*38fd1498Szrj 	      fprintf (dump_file, " {");
632*38fd1498Szrj 	      for (int j = 0; active[cl].iterate (j, &r); j++)
633*38fd1498Szrj 		if (first)
634*38fd1498Szrj 		  {
635*38fd1498Szrj 		    fprintf (dump_file, "%d", r->m_order);
636*38fd1498Szrj 		    first = false;
637*38fd1498Szrj 		  }
638*38fd1498Szrj 		else
639*38fd1498Szrj 		  fprintf (dump_file, ", %d", r->m_order);
640*38fd1498Szrj 	      fprintf (dump_file, "}");
641*38fd1498Szrj 	    }
642*38fd1498Szrj 	  fprintf (dump_file, "\n");
643*38fd1498Szrj 	}
644*38fd1498Szrj     }
645*38fd1498Szrj 
646*38fd1498Szrj   BITMAP_FREE (work);
647*38fd1498Szrj   free (bbs);
648*38fd1498Szrj 
649*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
650*38fd1498Szrj     {
651*38fd1498Szrj       fprintf (dump_file, "------- After liveness: -------\n");
652*38fd1498Szrj       dump_hsa_cfun_regalloc (dump_file);
653*38fd1498Szrj       fprintf (dump_file, "  ----- Intervals:\n");
654*38fd1498Szrj       for (i = 0; i < hsa_cfun->m_reg_count; i++)
655*38fd1498Szrj 	{
656*38fd1498Szrj 	  hsa_op_reg *reg = ind2reg[i];
657*38fd1498Szrj 	  if (!reg)
658*38fd1498Szrj 	    continue;
659*38fd1498Szrj 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->", reg->m_order,
660*38fd1498Szrj 		   reg->m_lr_begin, reg->m_lr_end);
661*38fd1498Szrj 	  if (reg->m_reg_class)
662*38fd1498Szrj 	    fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
663*38fd1498Szrj 	  else
664*38fd1498Szrj 	    fprintf (dump_file, "[%%__%s_%i]\n",
665*38fd1498Szrj 		     hsa_seg_name (reg->m_spill_sym->m_segment),
666*38fd1498Szrj 		     reg->m_spill_sym->m_name_number);
667*38fd1498Szrj 	}
668*38fd1498Szrj     }
669*38fd1498Szrj 
670*38fd1498Szrj   for (i = 0; i < 4; i++)
671*38fd1498Szrj     active[i].release ();
672*38fd1498Szrj   ind2reg.release ();
673*38fd1498Szrj }
674*38fd1498Szrj 
675*38fd1498Szrj /* Entry point for register allocation.  */
676*38fd1498Szrj 
677*38fd1498Szrj static void
regalloc(void)678*38fd1498Szrj regalloc (void)
679*38fd1498Szrj {
680*38fd1498Szrj   basic_block bb;
681*38fd1498Szrj   m_reg_class_desc classes[4];
682*38fd1498Szrj 
683*38fd1498Szrj   /* If there are no registers used in the function, exit right away.  */
684*38fd1498Szrj   if (hsa_cfun->m_reg_count == 0)
685*38fd1498Szrj     return;
686*38fd1498Szrj 
687*38fd1498Szrj   memset (classes, 0, sizeof (classes));
688*38fd1498Szrj   classes[0].next_avail = 0;
689*38fd1498Szrj   classes[0].max_num = 7;
690*38fd1498Szrj   classes[0].cl_char = 'c';
691*38fd1498Szrj   classes[1].cl_char = 's';
692*38fd1498Szrj   classes[2].cl_char = 'd';
693*38fd1498Szrj   classes[3].cl_char = 'q';
694*38fd1498Szrj 
695*38fd1498Szrj   for (int i = 1; i < 4; i++)
696*38fd1498Szrj     {
697*38fd1498Szrj       classes[i].next_avail = 0;
698*38fd1498Szrj       classes[i].max_num = 20;
699*38fd1498Szrj     }
700*38fd1498Szrj 
701*38fd1498Szrj   linear_scan_regalloc (classes);
702*38fd1498Szrj 
703*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
704*38fd1498Szrj     rewrite_code_bb (bb, classes);
705*38fd1498Szrj }
706*38fd1498Szrj 
707*38fd1498Szrj /* Out of SSA and register allocation on HSAIL IL.  */
708*38fd1498Szrj 
709*38fd1498Szrj void
hsa_regalloc(void)710*38fd1498Szrj hsa_regalloc (void)
711*38fd1498Szrj {
712*38fd1498Szrj   hsa_cfun->update_dominance ();
713*38fd1498Szrj   naive_outof_ssa ();
714*38fd1498Szrj 
715*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
716*38fd1498Szrj     {
717*38fd1498Szrj       fprintf (dump_file, "------- After out-of-SSA: -------\n");
718*38fd1498Szrj       dump_hsa_cfun (dump_file);
719*38fd1498Szrj     }
720*38fd1498Szrj 
721*38fd1498Szrj   regalloc ();
722*38fd1498Szrj 
723*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
724*38fd1498Szrj     {
725*38fd1498Szrj       fprintf (dump_file, "------- After register allocation: -------\n");
726*38fd1498Szrj       dump_hsa_cfun (dump_file);
727*38fd1498Szrj     }
728*38fd1498Szrj }
729