1 /* HSAIL IL Register allocation and out-of-SSA.
2    Copyright (C) 2013-2020 Free Software Foundation, Inc.
3    Contributed by Michael Matz <matz@suse.de>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "is-a.h"
26 #include "vec.h"
27 #include "tree.h"
28 #include "dominance.h"
29 #include "basic-block.h"
30 #include "function.h"
31 #include "cfganal.h"
32 #include "cfg.h"
33 #include "bitmap.h"
34 #include "dumpfile.h"
35 #include "cgraph.h"
36 #include "print-tree.h"
37 #include "cfghooks.h"
38 #include "alloc-pool.h"
39 #include "symbol-summary.h"
40 #include "hsa-common.h"
41 
42 
43 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa.  */
44 
45 static void
naive_process_phi(hsa_insn_phi * phi,const vec<edge> & predecessors)46 naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
47 {
48   unsigned count = phi->operand_count ();
49   for (unsigned i = 0; i < count; i++)
50     {
51       gcc_checking_assert (phi->get_op (i));
52       hsa_op_base *op = phi->get_op (i);
53       hsa_bb *hbb;
54       edge e;
55 
56       if (!op)
57 	break;
58 
59       e = predecessors[i];
60       if (single_succ_p (e->src))
61 	hbb = hsa_bb_for_bb (e->src);
62       else
63 	{
64 	  basic_block old_dest = e->dest;
65 	  hbb = hsa_init_new_bb (split_edge (e));
66 
67 	  /* If switch insn used this edge, fix jump table.  */
68 	  hsa_bb *source = hsa_bb_for_bb (e->src);
69 	  hsa_insn_sbr *sbr;
70 	  if (source->m_last_insn
71 	      && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
72 	    sbr->replace_all_labels (old_dest, hbb->m_bb);
73 	}
74 
75       hsa_build_append_simple_mov (phi->m_dest, op, hbb);
76     }
77 }
78 
79 /* Naive out-of SSA.  */
80 
81 static void
naive_outof_ssa(void)82 naive_outof_ssa (void)
83 {
84   basic_block bb;
85 
86   hsa_cfun->m_in_ssa = false;
87 
88   FOR_ALL_BB_FN (bb, cfun)
89   {
90     hsa_bb *hbb = hsa_bb_for_bb (bb);
91     hsa_insn_phi *phi;
92 
93     /* naive_process_phi can call split_edge on an incoming edge which order if
94        the incoming edges to the basic block and thus make it inconsistent with
95        the ordering of PHI arguments, so we collect them in advance.  */
96     auto_vec<edge, 8> predecessors;
97     unsigned pred_count = EDGE_COUNT (bb->preds);
98     for (unsigned i = 0; i < pred_count; i++)
99       predecessors.safe_push (EDGE_PRED (bb, i));
100 
101     for (phi = hbb->m_first_phi;
102 	 phi;
103 	 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
104       naive_process_phi (phi, predecessors);
105 
106     /* Zap PHI nodes, they will be deallocated when everything else will.  */
107     hbb->m_first_phi = NULL;
108     hbb->m_last_phi = NULL;
109   }
110 }
111 
112 /* Return register class number for the given HSA TYPE.  0 means the 'c' one
113    bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
114    and 3 for 'q' 128 bit class.  */
115 
116 static int
m_reg_class_for_type(BrigType16_t type)117 m_reg_class_for_type (BrigType16_t type)
118 {
119   switch (type)
120     {
121     case BRIG_TYPE_B1:
122       return 0;
123 
124     case BRIG_TYPE_U8:
125     case BRIG_TYPE_U16:
126     case BRIG_TYPE_U32:
127     case BRIG_TYPE_S8:
128     case BRIG_TYPE_S16:
129     case BRIG_TYPE_S32:
130     case BRIG_TYPE_F16:
131     case BRIG_TYPE_F32:
132     case BRIG_TYPE_B8:
133     case BRIG_TYPE_B16:
134     case BRIG_TYPE_B32:
135     case BRIG_TYPE_U8X4:
136     case BRIG_TYPE_S8X4:
137     case BRIG_TYPE_U16X2:
138     case BRIG_TYPE_S16X2:
139     case BRIG_TYPE_F16X2:
140       return 1;
141 
142     case BRIG_TYPE_U64:
143     case BRIG_TYPE_S64:
144     case BRIG_TYPE_F64:
145     case BRIG_TYPE_B64:
146     case BRIG_TYPE_U8X8:
147     case BRIG_TYPE_S8X8:
148     case BRIG_TYPE_U16X4:
149     case BRIG_TYPE_S16X4:
150     case BRIG_TYPE_F16X4:
151     case BRIG_TYPE_U32X2:
152     case BRIG_TYPE_S32X2:
153     case BRIG_TYPE_F32X2:
154       return 2;
155 
156     case BRIG_TYPE_B128:
157     case BRIG_TYPE_U8X16:
158     case BRIG_TYPE_S8X16:
159     case BRIG_TYPE_U16X8:
160     case BRIG_TYPE_S16X8:
161     case BRIG_TYPE_F16X8:
162     case BRIG_TYPE_U32X4:
163     case BRIG_TYPE_U64X2:
164     case BRIG_TYPE_S32X4:
165     case BRIG_TYPE_S64X2:
166     case BRIG_TYPE_F32X4:
167     case BRIG_TYPE_F64X2:
168       return 3;
169 
170     default:
171       gcc_unreachable ();
172     }
173 }
174 
175 /* If the Ith operands of INSN is or contains a register (in an address),
176    return the address of that register operand.  If not return NULL.  */
177 
178 static hsa_op_reg **
insn_reg_addr(hsa_insn_basic * insn,int i)179 insn_reg_addr (hsa_insn_basic *insn, int i)
180 {
181   hsa_op_base *op = insn->get_op (i);
182   if (!op)
183     return NULL;
184   hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
185   if (reg)
186     return (hsa_op_reg **) insn->get_op_addr (i);
187   hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
188   if (addr && addr->m_reg)
189     return &addr->m_reg;
190   return NULL;
191 }
192 
193 struct m_reg_class_desc
194 {
195   unsigned next_avail, max_num;
196   unsigned used_num, max_used;
197   uint64_t used[2];
198   char cl_char;
199 };
200 
201 /* Rewrite the instructions in BB to observe spilled live ranges.
202    CLASSES is the global register class state.  */
203 
204 static void
rewrite_code_bb(basic_block bb,struct m_reg_class_desc * classes)205 rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
206 {
207   hsa_bb *hbb = hsa_bb_for_bb (bb);
208   hsa_insn_basic *insn, *next_insn;
209 
210   for (insn = hbb->m_first_insn; insn; insn = next_insn)
211     {
212       next_insn = insn->m_next;
213       unsigned count = insn->operand_count ();
214       for (unsigned i = 0; i < count; i++)
215 	{
216 	  gcc_checking_assert (insn->get_op (i));
217 	  hsa_op_reg **regaddr = insn_reg_addr (insn, i);
218 
219 	  if (regaddr)
220 	    {
221 	      hsa_op_reg *reg = *regaddr;
222 	      if (reg->m_reg_class)
223 		continue;
224 	      gcc_assert (reg->m_spill_sym);
225 
226 	      int cl = m_reg_class_for_type (reg->m_type);
227 	      hsa_op_reg *tmp, *tmp2;
228 	      if (insn->op_output_p (i))
229 		tmp = hsa_spill_out (insn, reg, &tmp2);
230 	      else
231 		tmp = hsa_spill_in (insn, reg, &tmp2);
232 
233 	      *regaddr = tmp;
234 
235 	      tmp->m_reg_class = classes[cl].cl_char;
236 	      tmp->m_hard_num = (char) (classes[cl].max_num + i);
237 	      if (tmp2)
238 		{
239 		  gcc_assert (cl == 0);
240 		  tmp2->m_reg_class = classes[1].cl_char;
241 		  tmp2->m_hard_num = (char) (classes[1].max_num + i);
242 		}
243 	    }
244 	}
245     }
246 }
247 
248 /* Dump current function to dump file F, with info specific
249    to register allocation.  */
250 
251 void
dump_hsa_cfun_regalloc(FILE * f)252 dump_hsa_cfun_regalloc (FILE *f)
253 {
254   basic_block bb;
255 
256   fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
257 
258   FOR_ALL_BB_FN (bb, cfun)
259   {
260     hsa_bb *hbb = (class hsa_bb *) bb->aux;
261     bitmap_print (dump_file, hbb->m_livein, "m_livein  ", "\n");
262     dump_hsa_bb (f, hbb);
263     bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
264   }
265 }
266 
267 /* Given the global register allocation state CLASSES and a
268    register REG, try to give it a hardware register.  If successful,
269    store that hardreg in REG and return it, otherwise return -1.
270    Also changes CLASSES to accommodate for the allocated register.  */
271 
272 static int
try_alloc_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)273 try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
274 {
275   int cl = m_reg_class_for_type (reg->m_type);
276   int ret = -1;
277   if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
278       >= 128 - 5)
279     return -1;
280   if (classes[cl].used_num < classes[cl].max_num)
281     {
282       unsigned int i;
283       classes[cl].used_num++;
284       if (classes[cl].used_num > classes[cl].max_used)
285 	classes[cl].max_used = classes[cl].used_num;
286       for (i = 0; i < classes[cl].used_num; i++)
287 	if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
288 	  break;
289       ret = i;
290       classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
291       reg->m_reg_class = classes[cl].cl_char;
292       reg->m_hard_num = i;
293     }
294   return ret;
295 }
296 
297 /* Free up hardregs used by REG, into allocation state CLASSES.  */
298 
299 static void
free_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)300 free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
301 {
302   int cl = m_reg_class_for_type (reg->m_type);
303   int ret = reg->m_hard_num;
304   gcc_assert (reg->m_reg_class == classes[cl].cl_char);
305   classes[cl].used_num--;
306   classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
307 }
308 
309 /* Note that the live range for REG ends at least at END.  */
310 
311 static void
note_lr_end(hsa_op_reg * reg,int end)312 note_lr_end (hsa_op_reg *reg, int end)
313 {
314   if (reg->m_lr_end < end)
315     reg->m_lr_end = end;
316 }
317 
318 /* Note that the live range for REG starts at least at BEGIN.  */
319 
320 static void
note_lr_begin(hsa_op_reg * reg,int begin)321 note_lr_begin (hsa_op_reg *reg, int begin)
322 {
323   if (reg->m_lr_begin > begin)
324     reg->m_lr_begin = begin;
325 }
326 
327 /* Given two registers A and B, return -1, 0 or 1 if A's live range
328    starts before, at or after B's live range.  */
329 
330 static int
cmp_begin(const void * a,const void * b)331 cmp_begin (const void *a, const void *b)
332 {
333   const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
334   const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
335   int ret;
336   if (rega == regb)
337     return 0;
338   ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
339   if (ret)
340     return ret;
341   return ((*rega)->m_order - (*regb)->m_order);
342 }
343 
344 /* Given two registers REGA and REGB, return true if REGA's
345    live range ends after REGB's.  This results in a sorting order
346    with earlier end points at the end.  */
347 
348 static bool
cmp_end(hsa_op_reg * const & rega,hsa_op_reg * const & regb)349 cmp_end (hsa_op_reg * const &rega, hsa_op_reg * const &regb)
350 {
351   int ret;
352   if (rega == regb)
353     return false;
354   ret = (regb)->m_lr_end - (rega)->m_lr_end;
355   if (ret)
356     return ret < 0;
357   return (((regb)->m_order - (rega)->m_order)) < 0;
358 }
359 
360 /* Expire all old intervals in ACTIVE (a per-regclass vector),
361    that is, those that end before the interval REG starts.  Give
362    back resources freed so into the state CLASSES.  */
363 
364 static void
expire_old_intervals(hsa_op_reg * reg,vec<hsa_op_reg * > * active,struct m_reg_class_desc * classes)365 expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
366 		      struct m_reg_class_desc *classes)
367 {
368   for (int i = 0; i < 4; i++)
369     while (!active[i].is_empty ())
370       {
371 	hsa_op_reg *a = active[i].pop ();
372 	if (a->m_lr_end > reg->m_lr_begin)
373 	  {
374 	    active[i].quick_push (a);
375 	    break;
376 	  }
377 	free_reg (classes, a);
378       }
379 }
380 
381 /* The interval REG didn't get a hardreg.  Spill it or one of those
382    from ACTIVE (if the latter, then REG will become allocated to the
383    hardreg that formerly was used by it).  */
384 
385 static void
spill_at_interval(hsa_op_reg * reg,vec<hsa_op_reg * > * active)386 spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
387 {
388   int cl = m_reg_class_for_type (reg->m_type);
389   gcc_assert (!active[cl].is_empty ());
390   hsa_op_reg *cand = active[cl][0];
391   if (cand->m_lr_end > reg->m_lr_end)
392     {
393       reg->m_reg_class = cand->m_reg_class;
394       reg->m_hard_num = cand->m_hard_num;
395       active[cl].ordered_remove (0);
396       unsigned place = active[cl].lower_bound (reg, cmp_end);
397       active[cl].quick_insert (place, reg);
398     }
399   else
400     cand = reg;
401 
402   gcc_assert (!cand->m_spill_sym);
403   BrigType16_t type = cand->m_type;
404   if (type == BRIG_TYPE_B1)
405     type = BRIG_TYPE_U8;
406   cand->m_reg_class = 0;
407   cand->m_spill_sym = hsa_get_spill_symbol (type);
408   cand->m_spill_sym->m_name_number = cand->m_order;
409 }
410 
411 /* Given the global register state CLASSES allocate all HSA virtual
412    registers either to hardregs or to a spill symbol.  */
413 
414 static void
linear_scan_regalloc(struct m_reg_class_desc * classes)415 linear_scan_regalloc (struct m_reg_class_desc *classes)
416 {
417   /* Compute liveness.  */
418   bool changed;
419   int i, n;
420   int insn_order;
421   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
422   bitmap work = BITMAP_ALLOC (NULL);
423   vec<hsa_op_reg*> ind2reg = vNULL;
424   vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
425   hsa_insn_basic *m_last_insn;
426 
427   /* We will need the reverse post order for linearization,
428      and the post order for liveness analysis, which is the same
429      backward.  */
430   n = pre_and_rev_post_order_compute (NULL, bbs, true);
431   ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
432 
433   /* Give all instructions a linearized number, at the same time
434      build a mapping from register index to register.  */
435   insn_order = 1;
436   for (i = 0; i < n; i++)
437     {
438       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
439       hsa_bb *hbb = hsa_bb_for_bb (bb);
440       hsa_insn_basic *insn;
441       for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
442 	{
443 	  unsigned opi;
444 	  insn->m_number = insn_order++;
445 	  for (opi = 0; opi < insn->operand_count (); opi++)
446 	    {
447 	      gcc_checking_assert (insn->get_op (opi));
448 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
449 	      if (regaddr)
450 		ind2reg[(*regaddr)->m_order] = *regaddr;
451 	    }
452 	}
453     }
454 
455   /* Initialize all live ranges to [after-end, 0).  */
456   for (i = 0; i < hsa_cfun->m_reg_count; i++)
457     if (ind2reg[i])
458       ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
459 
460   /* Classic liveness analysis, as long as something changes:
461        m_liveout is union (m_livein of successors)
462        m_livein is m_liveout minus defs plus uses.  */
463   do
464     {
465       changed = false;
466       for (i = n - 1; i >= 0; i--)
467 	{
468 	  edge e;
469 	  edge_iterator ei;
470 	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
471 	  hsa_bb *hbb = hsa_bb_for_bb (bb);
472 
473 	  /* Union of successors m_livein (or empty if none).  */
474 	  bool first = true;
475 	  FOR_EACH_EDGE (e, ei, bb->succs)
476 	    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
477 	      {
478 		hsa_bb *succ = hsa_bb_for_bb (e->dest);
479 		if (first)
480 		  {
481 		    bitmap_copy (work, succ->m_livein);
482 		    first = false;
483 		  }
484 		else
485 		  bitmap_ior_into (work, succ->m_livein);
486 	      }
487 	  if (first)
488 	    bitmap_clear (work);
489 
490 	  bitmap_copy (hbb->m_liveout, work);
491 
492 	  /* Remove defs, include uses in a backward insn walk.  */
493 	  hsa_insn_basic *insn;
494 	  for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
495 	    {
496 	      unsigned opi;
497 	      unsigned ndefs = insn->input_count ();
498 	      for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
499 		{
500 		  gcc_checking_assert (insn->get_op (opi));
501 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
502 		  if (regaddr)
503 		    bitmap_clear_bit (work, (*regaddr)->m_order);
504 		}
505 	      for (; opi < insn->operand_count (); opi++)
506 		{
507 		  gcc_checking_assert (insn->get_op (opi));
508 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
509 		  if (regaddr)
510 		    bitmap_set_bit (work, (*regaddr)->m_order);
511 		}
512 	    }
513 
514 	  /* Note if that changed something.  */
515 	  if (bitmap_ior_into (hbb->m_livein, work))
516 	    changed = true;
517 	}
518     }
519   while (changed);
520 
521   /* Make one pass through all instructions in linear order,
522      noting and merging possible live range start and end points.  */
523   m_last_insn = NULL;
524   for (i = n - 1; i >= 0; i--)
525     {
526       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
527       hsa_bb *hbb = hsa_bb_for_bb (bb);
528       hsa_insn_basic *insn;
529       int after_end_number;
530       unsigned bit;
531       bitmap_iterator bi;
532 
533       if (m_last_insn)
534 	after_end_number = m_last_insn->m_number;
535       else
536 	after_end_number = insn_order;
537       /* Everything live-out in this BB has at least an end point
538 	 after us.  */
539       EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
540 	note_lr_end (ind2reg[bit], after_end_number);
541 
542       for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
543 	{
544 	  unsigned opi;
545 	  unsigned ndefs = insn->input_count ();
546 	  for (opi = 0; opi < insn->operand_count (); opi++)
547 	    {
548 	      gcc_checking_assert (insn->get_op (opi));
549 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
550 	      if (regaddr)
551 		{
552 		  hsa_op_reg *reg = *regaddr;
553 		  if (opi < ndefs)
554 		    note_lr_begin (reg, insn->m_number);
555 		  else
556 		    note_lr_end (reg, insn->m_number);
557 		}
558 	    }
559 	}
560 
561       /* Everything live-in in this BB has a start point before
562 	 our first insn.  */
563       int before_start_number;
564       if (hbb->m_first_insn)
565 	before_start_number = hbb->m_first_insn->m_number;
566       else
567 	before_start_number = after_end_number;
568       before_start_number--;
569       EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
570 	note_lr_begin (ind2reg[bit], before_start_number);
571 
572       if (hbb->m_first_insn)
573 	m_last_insn = hbb->m_first_insn;
574     }
575 
576   for (i = 0; i < hsa_cfun->m_reg_count; i++)
577     if (ind2reg[i])
578       {
579 	/* All regs that have still their start at after all code actually
580 	   are defined at the start of the routine (prologue).  */
581 	if (ind2reg[i]->m_lr_begin == insn_order)
582 	  ind2reg[i]->m_lr_begin = 0;
583 	/* All regs that have no use but a def will have lr_end == 0,
584 	   they are actually live from def until after the insn they are
585 	   defined in.  */
586 	if (ind2reg[i]->m_lr_end == 0)
587 	  ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
588       }
589 
590   /* Sort all intervals by increasing start point.  */
591   gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
592 
593   if (flag_checking)
594     for (unsigned i = 0; i < ind2reg.length (); i++)
595       gcc_assert (ind2reg[i]);
596 
597   ind2reg.qsort (cmp_begin);
598   for (i = 0; i < 4; i++)
599     active[i].reserve_exact (hsa_cfun->m_reg_count);
600 
601   /* Now comes the linear scan allocation.  */
602   for (i = 0; i < hsa_cfun->m_reg_count; i++)
603     {
604       hsa_op_reg *reg = ind2reg[i];
605       if (!reg)
606 	continue;
607       expire_old_intervals (reg, active, classes);
608       int cl = m_reg_class_for_type (reg->m_type);
609       if (try_alloc_reg (classes, reg) >= 0)
610 	{
611 	  unsigned place = active[cl].lower_bound (reg, cmp_end);
612 	  active[cl].quick_insert (place, reg);
613 	}
614       else
615 	spill_at_interval (reg, active);
616 
617       /* Some interesting dumping as we go.  */
618       if (dump_file && (dump_flags & TDF_DETAILS))
619 	{
620 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->",
621 		   reg->m_order, reg->m_lr_begin, reg->m_lr_end);
622 	  if (reg->m_reg_class)
623 	    fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
624 	  else
625 	    fprintf (dump_file, "[%%__%s_%i]",
626 		     hsa_seg_name (reg->m_spill_sym->m_segment),
627 		     reg->m_spill_sym->m_name_number);
628 	  for (int cl = 0; cl < 4; cl++)
629 	    {
630 	      bool first = true;
631 	      hsa_op_reg *r;
632 	      fprintf (dump_file, " {");
633 	      for (int j = 0; active[cl].iterate (j, &r); j++)
634 		if (first)
635 		  {
636 		    fprintf (dump_file, "%d", r->m_order);
637 		    first = false;
638 		  }
639 		else
640 		  fprintf (dump_file, ", %d", r->m_order);
641 	      fprintf (dump_file, "}");
642 	    }
643 	  fprintf (dump_file, "\n");
644 	}
645     }
646 
647   BITMAP_FREE (work);
648   free (bbs);
649 
650   if (dump_file && (dump_flags & TDF_DETAILS))
651     {
652       fprintf (dump_file, "------- After liveness: -------\n");
653       dump_hsa_cfun_regalloc (dump_file);
654       fprintf (dump_file, "  ----- Intervals:\n");
655       for (i = 0; i < hsa_cfun->m_reg_count; i++)
656 	{
657 	  hsa_op_reg *reg = ind2reg[i];
658 	  if (!reg)
659 	    continue;
660 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->", reg->m_order,
661 		   reg->m_lr_begin, reg->m_lr_end);
662 	  if (reg->m_reg_class)
663 	    fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
664 	  else
665 	    fprintf (dump_file, "[%%__%s_%i]\n",
666 		     hsa_seg_name (reg->m_spill_sym->m_segment),
667 		     reg->m_spill_sym->m_name_number);
668 	}
669     }
670 
671   for (i = 0; i < 4; i++)
672     active[i].release ();
673   ind2reg.release ();
674 }
675 
676 /* Entry point for register allocation.  */
677 
678 static void
regalloc(void)679 regalloc (void)
680 {
681   basic_block bb;
682   m_reg_class_desc classes[4];
683 
684   /* If there are no registers used in the function, exit right away.  */
685   if (hsa_cfun->m_reg_count == 0)
686     return;
687 
688   memset (classes, 0, sizeof (classes));
689   classes[0].next_avail = 0;
690   classes[0].max_num = 7;
691   classes[0].cl_char = 'c';
692   classes[1].cl_char = 's';
693   classes[2].cl_char = 'd';
694   classes[3].cl_char = 'q';
695 
696   for (int i = 1; i < 4; i++)
697     {
698       classes[i].next_avail = 0;
699       classes[i].max_num = 20;
700     }
701 
702   linear_scan_regalloc (classes);
703 
704   FOR_ALL_BB_FN (bb, cfun)
705     rewrite_code_bb (bb, classes);
706 }
707 
708 /* Out of SSA and register allocation on HSAIL IL.  */
709 
710 void
hsa_regalloc(void)711 hsa_regalloc (void)
712 {
713   hsa_cfun->update_dominance ();
714   naive_outof_ssa ();
715 
716   if (dump_file && (dump_flags & TDF_DETAILS))
717     {
718       fprintf (dump_file, "------- After out-of-SSA: -------\n");
719       dump_hsa_cfun (dump_file);
720     }
721 
722   regalloc ();
723 
724   if (dump_file && (dump_flags & TDF_DETAILS))
725     {
726       fprintf (dump_file, "------- After register allocation: -------\n");
727       dump_hsa_cfun (dump_file);
728     }
729 }
730