1 /* HSAIL IL Register allocation and out-of-SSA.
2    Copyright (C) 2013-2018 Free Software Foundation, Inc.
3    Contributed by Michael Matz <matz@suse.de>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "is-a.h"
26 #include "vec.h"
27 #include "tree.h"
28 #include "dominance.h"
29 #include "basic-block.h"
30 #include "cfg.h"
31 #include "cfganal.h"
32 #include "function.h"
33 #include "bitmap.h"
34 #include "dumpfile.h"
35 #include "cgraph.h"
36 #include "print-tree.h"
37 #include "cfghooks.h"
38 #include "symbol-summary.h"
39 #include "hsa-common.h"
40 
41 
42 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa.  */
43 
44 static void
naive_process_phi(hsa_insn_phi * phi,const vec<edge> & predecessors)45 naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
46 {
47   unsigned count = phi->operand_count ();
48   for (unsigned i = 0; i < count; i++)
49     {
50       gcc_checking_assert (phi->get_op (i));
51       hsa_op_base *op = phi->get_op (i);
52       hsa_bb *hbb;
53       edge e;
54 
55       if (!op)
56 	break;
57 
58       e = predecessors[i];
59       if (single_succ_p (e->src))
60 	hbb = hsa_bb_for_bb (e->src);
61       else
62 	{
63 	  basic_block old_dest = e->dest;
64 	  hbb = hsa_init_new_bb (split_edge (e));
65 
66 	  /* If switch insn used this edge, fix jump table.  */
67 	  hsa_bb *source = hsa_bb_for_bb (e->src);
68 	  hsa_insn_sbr *sbr;
69 	  if (source->m_last_insn
70 	      && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
71 	    sbr->replace_all_labels (old_dest, hbb->m_bb);
72 	}
73 
74       hsa_build_append_simple_mov (phi->m_dest, op, hbb);
75     }
76 }
77 
78 /* Naive out-of SSA.  */
79 
80 static void
naive_outof_ssa(void)81 naive_outof_ssa (void)
82 {
83   basic_block bb;
84 
85   hsa_cfun->m_in_ssa = false;
86 
87   FOR_ALL_BB_FN (bb, cfun)
88   {
89     hsa_bb *hbb = hsa_bb_for_bb (bb);
90     hsa_insn_phi *phi;
91 
92     /* naive_process_phi can call split_edge on an incoming edge which order if
93        the incoming edges to the basic block and thus make it inconsistent with
94        the ordering of PHI arguments, so we collect them in advance.  */
95     auto_vec<edge, 8> predecessors;
96     unsigned pred_count = EDGE_COUNT (bb->preds);
97     for (unsigned i = 0; i < pred_count; i++)
98       predecessors.safe_push (EDGE_PRED (bb, i));
99 
100     for (phi = hbb->m_first_phi;
101 	 phi;
102 	 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
103       naive_process_phi (phi, predecessors);
104 
105     /* Zap PHI nodes, they will be deallocated when everything else will.  */
106     hbb->m_first_phi = NULL;
107     hbb->m_last_phi = NULL;
108   }
109 }
110 
111 /* Return register class number for the given HSA TYPE.  0 means the 'c' one
112    bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
113    and 3 for 'q' 128 bit class.  */
114 
115 static int
m_reg_class_for_type(BrigType16_t type)116 m_reg_class_for_type (BrigType16_t type)
117 {
118   switch (type)
119     {
120     case BRIG_TYPE_B1:
121       return 0;
122 
123     case BRIG_TYPE_U8:
124     case BRIG_TYPE_U16:
125     case BRIG_TYPE_U32:
126     case BRIG_TYPE_S8:
127     case BRIG_TYPE_S16:
128     case BRIG_TYPE_S32:
129     case BRIG_TYPE_F16:
130     case BRIG_TYPE_F32:
131     case BRIG_TYPE_B8:
132     case BRIG_TYPE_B16:
133     case BRIG_TYPE_B32:
134     case BRIG_TYPE_U8X4:
135     case BRIG_TYPE_S8X4:
136     case BRIG_TYPE_U16X2:
137     case BRIG_TYPE_S16X2:
138     case BRIG_TYPE_F16X2:
139       return 1;
140 
141     case BRIG_TYPE_U64:
142     case BRIG_TYPE_S64:
143     case BRIG_TYPE_F64:
144     case BRIG_TYPE_B64:
145     case BRIG_TYPE_U8X8:
146     case BRIG_TYPE_S8X8:
147     case BRIG_TYPE_U16X4:
148     case BRIG_TYPE_S16X4:
149     case BRIG_TYPE_F16X4:
150     case BRIG_TYPE_U32X2:
151     case BRIG_TYPE_S32X2:
152     case BRIG_TYPE_F32X2:
153       return 2;
154 
155     case BRIG_TYPE_B128:
156     case BRIG_TYPE_U8X16:
157     case BRIG_TYPE_S8X16:
158     case BRIG_TYPE_U16X8:
159     case BRIG_TYPE_S16X8:
160     case BRIG_TYPE_F16X8:
161     case BRIG_TYPE_U32X4:
162     case BRIG_TYPE_U64X2:
163     case BRIG_TYPE_S32X4:
164     case BRIG_TYPE_S64X2:
165     case BRIG_TYPE_F32X4:
166     case BRIG_TYPE_F64X2:
167       return 3;
168 
169     default:
170       gcc_unreachable ();
171     }
172 }
173 
174 /* If the Ith operands of INSN is or contains a register (in an address),
175    return the address of that register operand.  If not return NULL.  */
176 
177 static hsa_op_reg **
insn_reg_addr(hsa_insn_basic * insn,int i)178 insn_reg_addr (hsa_insn_basic *insn, int i)
179 {
180   hsa_op_base *op = insn->get_op (i);
181   if (!op)
182     return NULL;
183   hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
184   if (reg)
185     return (hsa_op_reg **) insn->get_op_addr (i);
186   hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
187   if (addr && addr->m_reg)
188     return &addr->m_reg;
189   return NULL;
190 }
191 
192 struct m_reg_class_desc
193 {
194   unsigned next_avail, max_num;
195   unsigned used_num, max_used;
196   uint64_t used[2];
197   char cl_char;
198 };
199 
200 /* Rewrite the instructions in BB to observe spilled live ranges.
201    CLASSES is the global register class state.  */
202 
203 static void
rewrite_code_bb(basic_block bb,struct m_reg_class_desc * classes)204 rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
205 {
206   hsa_bb *hbb = hsa_bb_for_bb (bb);
207   hsa_insn_basic *insn, *next_insn;
208 
209   for (insn = hbb->m_first_insn; insn; insn = next_insn)
210     {
211       next_insn = insn->m_next;
212       unsigned count = insn->operand_count ();
213       for (unsigned i = 0; i < count; i++)
214 	{
215 	  gcc_checking_assert (insn->get_op (i));
216 	  hsa_op_reg **regaddr = insn_reg_addr (insn, i);
217 
218 	  if (regaddr)
219 	    {
220 	      hsa_op_reg *reg = *regaddr;
221 	      if (reg->m_reg_class)
222 		continue;
223 	      gcc_assert (reg->m_spill_sym);
224 
225 	      int cl = m_reg_class_for_type (reg->m_type);
226 	      hsa_op_reg *tmp, *tmp2;
227 	      if (insn->op_output_p (i))
228 		tmp = hsa_spill_out (insn, reg, &tmp2);
229 	      else
230 		tmp = hsa_spill_in (insn, reg, &tmp2);
231 
232 	      *regaddr = tmp;
233 
234 	      tmp->m_reg_class = classes[cl].cl_char;
235 	      tmp->m_hard_num = (char) (classes[cl].max_num + i);
236 	      if (tmp2)
237 		{
238 		  gcc_assert (cl == 0);
239 		  tmp2->m_reg_class = classes[1].cl_char;
240 		  tmp2->m_hard_num = (char) (classes[1].max_num + i);
241 		}
242 	    }
243 	}
244     }
245 }
246 
247 /* Dump current function to dump file F, with info specific
248    to register allocation.  */
249 
250 void
dump_hsa_cfun_regalloc(FILE * f)251 dump_hsa_cfun_regalloc (FILE *f)
252 {
253   basic_block bb;
254 
255   fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
256 
257   FOR_ALL_BB_FN (bb, cfun)
258   {
259     hsa_bb *hbb = (struct hsa_bb *) bb->aux;
260     bitmap_print (dump_file, hbb->m_livein, "m_livein  ", "\n");
261     dump_hsa_bb (f, hbb);
262     bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
263   }
264 }
265 
266 /* Given the global register allocation state CLASSES and a
267    register REG, try to give it a hardware register.  If successful,
268    store that hardreg in REG and return it, otherwise return -1.
269    Also changes CLASSES to accommodate for the allocated register.  */
270 
271 static int
try_alloc_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)272 try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
273 {
274   int cl = m_reg_class_for_type (reg->m_type);
275   int ret = -1;
276   if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
277       >= 128 - 5)
278     return -1;
279   if (classes[cl].used_num < classes[cl].max_num)
280     {
281       unsigned int i;
282       classes[cl].used_num++;
283       if (classes[cl].used_num > classes[cl].max_used)
284 	classes[cl].max_used = classes[cl].used_num;
285       for (i = 0; i < classes[cl].used_num; i++)
286 	if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
287 	  break;
288       ret = i;
289       classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
290       reg->m_reg_class = classes[cl].cl_char;
291       reg->m_hard_num = i;
292     }
293   return ret;
294 }
295 
296 /* Free up hardregs used by REG, into allocation state CLASSES.  */
297 
298 static void
free_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)299 free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
300 {
301   int cl = m_reg_class_for_type (reg->m_type);
302   int ret = reg->m_hard_num;
303   gcc_assert (reg->m_reg_class == classes[cl].cl_char);
304   classes[cl].used_num--;
305   classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
306 }
307 
308 /* Note that the live range for REG ends at least at END.  */
309 
310 static void
note_lr_end(hsa_op_reg * reg,int end)311 note_lr_end (hsa_op_reg *reg, int end)
312 {
313   if (reg->m_lr_end < end)
314     reg->m_lr_end = end;
315 }
316 
317 /* Note that the live range for REG starts at least at BEGIN.  */
318 
319 static void
note_lr_begin(hsa_op_reg * reg,int begin)320 note_lr_begin (hsa_op_reg *reg, int begin)
321 {
322   if (reg->m_lr_begin > begin)
323     reg->m_lr_begin = begin;
324 }
325 
326 /* Given two registers A and B, return -1, 0 or 1 if A's live range
327    starts before, at or after B's live range.  */
328 
329 static int
cmp_begin(const void * a,const void * b)330 cmp_begin (const void *a, const void *b)
331 {
332   const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
333   const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
334   int ret;
335   if (rega == regb)
336     return 0;
337   ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
338   if (ret)
339     return ret;
340   return ((*rega)->m_order - (*regb)->m_order);
341 }
342 
343 /* Given two registers REGA and REGB, return true if REGA's
344    live range ends after REGB's.  This results in a sorting order
345    with earlier end points at the end.  */
346 
347 static bool
cmp_end(hsa_op_reg * const & rega,hsa_op_reg * const & regb)348 cmp_end (hsa_op_reg * const &rega, hsa_op_reg * const &regb)
349 {
350   int ret;
351   if (rega == regb)
352     return false;
353   ret = (regb)->m_lr_end - (rega)->m_lr_end;
354   if (ret)
355     return ret < 0;
356   return (((regb)->m_order - (rega)->m_order)) < 0;
357 }
358 
359 /* Expire all old intervals in ACTIVE (a per-regclass vector),
360    that is, those that end before the interval REG starts.  Give
361    back resources freed so into the state CLASSES.  */
362 
363 static void
expire_old_intervals(hsa_op_reg * reg,vec<hsa_op_reg * > * active,struct m_reg_class_desc * classes)364 expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
365 		      struct m_reg_class_desc *classes)
366 {
367   for (int i = 0; i < 4; i++)
368     while (!active[i].is_empty ())
369       {
370 	hsa_op_reg *a = active[i].pop ();
371 	if (a->m_lr_end > reg->m_lr_begin)
372 	  {
373 	    active[i].quick_push (a);
374 	    break;
375 	  }
376 	free_reg (classes, a);
377       }
378 }
379 
380 /* The interval REG didn't get a hardreg.  Spill it or one of those
381    from ACTIVE (if the latter, then REG will become allocated to the
382    hardreg that formerly was used by it).  */
383 
384 static void
spill_at_interval(hsa_op_reg * reg,vec<hsa_op_reg * > * active)385 spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
386 {
387   int cl = m_reg_class_for_type (reg->m_type);
388   gcc_assert (!active[cl].is_empty ());
389   hsa_op_reg *cand = active[cl][0];
390   if (cand->m_lr_end > reg->m_lr_end)
391     {
392       reg->m_reg_class = cand->m_reg_class;
393       reg->m_hard_num = cand->m_hard_num;
394       active[cl].ordered_remove (0);
395       unsigned place = active[cl].lower_bound (reg, cmp_end);
396       active[cl].quick_insert (place, reg);
397     }
398   else
399     cand = reg;
400 
401   gcc_assert (!cand->m_spill_sym);
402   BrigType16_t type = cand->m_type;
403   if (type == BRIG_TYPE_B1)
404     type = BRIG_TYPE_U8;
405   cand->m_reg_class = 0;
406   cand->m_spill_sym = hsa_get_spill_symbol (type);
407   cand->m_spill_sym->m_name_number = cand->m_order;
408 }
409 
410 /* Given the global register state CLASSES allocate all HSA virtual
411    registers either to hardregs or to a spill symbol.  */
412 
413 static void
linear_scan_regalloc(struct m_reg_class_desc * classes)414 linear_scan_regalloc (struct m_reg_class_desc *classes)
415 {
416   /* Compute liveness.  */
417   bool changed;
418   int i, n;
419   int insn_order;
420   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
421   bitmap work = BITMAP_ALLOC (NULL);
422   vec<hsa_op_reg*> ind2reg = vNULL;
423   vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
424   hsa_insn_basic *m_last_insn;
425 
426   /* We will need the reverse post order for linearization,
427      and the post order for liveness analysis, which is the same
428      backward.  */
429   n = pre_and_rev_post_order_compute (NULL, bbs, true);
430   ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
431 
432   /* Give all instructions a linearized number, at the same time
433      build a mapping from register index to register.  */
434   insn_order = 1;
435   for (i = 0; i < n; i++)
436     {
437       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
438       hsa_bb *hbb = hsa_bb_for_bb (bb);
439       hsa_insn_basic *insn;
440       for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
441 	{
442 	  unsigned opi;
443 	  insn->m_number = insn_order++;
444 	  for (opi = 0; opi < insn->operand_count (); opi++)
445 	    {
446 	      gcc_checking_assert (insn->get_op (opi));
447 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
448 	      if (regaddr)
449 		ind2reg[(*regaddr)->m_order] = *regaddr;
450 	    }
451 	}
452     }
453 
454   /* Initialize all live ranges to [after-end, 0).  */
455   for (i = 0; i < hsa_cfun->m_reg_count; i++)
456     if (ind2reg[i])
457       ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
458 
459   /* Classic liveness analysis, as long as something changes:
460        m_liveout is union (m_livein of successors)
461        m_livein is m_liveout minus defs plus uses.  */
462   do
463     {
464       changed = false;
465       for (i = n - 1; i >= 0; i--)
466 	{
467 	  edge e;
468 	  edge_iterator ei;
469 	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
470 	  hsa_bb *hbb = hsa_bb_for_bb (bb);
471 
472 	  /* Union of successors m_livein (or empty if none).  */
473 	  bool first = true;
474 	  FOR_EACH_EDGE (e, ei, bb->succs)
475 	    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
476 	      {
477 		hsa_bb *succ = hsa_bb_for_bb (e->dest);
478 		if (first)
479 		  {
480 		    bitmap_copy (work, succ->m_livein);
481 		    first = false;
482 		  }
483 		else
484 		  bitmap_ior_into (work, succ->m_livein);
485 	      }
486 	  if (first)
487 	    bitmap_clear (work);
488 
489 	  bitmap_copy (hbb->m_liveout, work);
490 
491 	  /* Remove defs, include uses in a backward insn walk.  */
492 	  hsa_insn_basic *insn;
493 	  for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
494 	    {
495 	      unsigned opi;
496 	      unsigned ndefs = insn->input_count ();
497 	      for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
498 		{
499 		  gcc_checking_assert (insn->get_op (opi));
500 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
501 		  if (regaddr)
502 		    bitmap_clear_bit (work, (*regaddr)->m_order);
503 		}
504 	      for (; opi < insn->operand_count (); opi++)
505 		{
506 		  gcc_checking_assert (insn->get_op (opi));
507 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
508 		  if (regaddr)
509 		    bitmap_set_bit (work, (*regaddr)->m_order);
510 		}
511 	    }
512 
513 	  /* Note if that changed something.  */
514 	  if (bitmap_ior_into (hbb->m_livein, work))
515 	    changed = true;
516 	}
517     }
518   while (changed);
519 
520   /* Make one pass through all instructions in linear order,
521      noting and merging possible live range start and end points.  */
522   m_last_insn = NULL;
523   for (i = n - 1; i >= 0; i--)
524     {
525       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
526       hsa_bb *hbb = hsa_bb_for_bb (bb);
527       hsa_insn_basic *insn;
528       int after_end_number;
529       unsigned bit;
530       bitmap_iterator bi;
531 
532       if (m_last_insn)
533 	after_end_number = m_last_insn->m_number;
534       else
535 	after_end_number = insn_order;
536       /* Everything live-out in this BB has at least an end point
537 	 after us.  */
538       EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
539 	note_lr_end (ind2reg[bit], after_end_number);
540 
541       for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
542 	{
543 	  unsigned opi;
544 	  unsigned ndefs = insn->input_count ();
545 	  for (opi = 0; opi < insn->operand_count (); opi++)
546 	    {
547 	      gcc_checking_assert (insn->get_op (opi));
548 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
549 	      if (regaddr)
550 		{
551 		  hsa_op_reg *reg = *regaddr;
552 		  if (opi < ndefs)
553 		    note_lr_begin (reg, insn->m_number);
554 		  else
555 		    note_lr_end (reg, insn->m_number);
556 		}
557 	    }
558 	}
559 
560       /* Everything live-in in this BB has a start point before
561 	 our first insn.  */
562       int before_start_number;
563       if (hbb->m_first_insn)
564 	before_start_number = hbb->m_first_insn->m_number;
565       else
566 	before_start_number = after_end_number;
567       before_start_number--;
568       EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
569 	note_lr_begin (ind2reg[bit], before_start_number);
570 
571       if (hbb->m_first_insn)
572 	m_last_insn = hbb->m_first_insn;
573     }
574 
575   for (i = 0; i < hsa_cfun->m_reg_count; i++)
576     if (ind2reg[i])
577       {
578 	/* All regs that have still their start at after all code actually
579 	   are defined at the start of the routine (prologue).  */
580 	if (ind2reg[i]->m_lr_begin == insn_order)
581 	  ind2reg[i]->m_lr_begin = 0;
582 	/* All regs that have no use but a def will have lr_end == 0,
583 	   they are actually live from def until after the insn they are
584 	   defined in.  */
585 	if (ind2reg[i]->m_lr_end == 0)
586 	  ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
587       }
588 
589   /* Sort all intervals by increasing start point.  */
590   gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
591 
592   if (flag_checking)
593     for (unsigned i = 0; i < ind2reg.length (); i++)
594       gcc_assert (ind2reg[i]);
595 
596   ind2reg.qsort (cmp_begin);
597   for (i = 0; i < 4; i++)
598     active[i].reserve_exact (hsa_cfun->m_reg_count);
599 
600   /* Now comes the linear scan allocation.  */
601   for (i = 0; i < hsa_cfun->m_reg_count; i++)
602     {
603       hsa_op_reg *reg = ind2reg[i];
604       if (!reg)
605 	continue;
606       expire_old_intervals (reg, active, classes);
607       int cl = m_reg_class_for_type (reg->m_type);
608       if (try_alloc_reg (classes, reg) >= 0)
609 	{
610 	  unsigned place = active[cl].lower_bound (reg, cmp_end);
611 	  active[cl].quick_insert (place, reg);
612 	}
613       else
614 	spill_at_interval (reg, active);
615 
616       /* Some interesting dumping as we go.  */
617       if (dump_file && (dump_flags & TDF_DETAILS))
618 	{
619 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->",
620 		   reg->m_order, reg->m_lr_begin, reg->m_lr_end);
621 	  if (reg->m_reg_class)
622 	    fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
623 	  else
624 	    fprintf (dump_file, "[%%__%s_%i]",
625 		     hsa_seg_name (reg->m_spill_sym->m_segment),
626 		     reg->m_spill_sym->m_name_number);
627 	  for (int cl = 0; cl < 4; cl++)
628 	    {
629 	      bool first = true;
630 	      hsa_op_reg *r;
631 	      fprintf (dump_file, " {");
632 	      for (int j = 0; active[cl].iterate (j, &r); j++)
633 		if (first)
634 		  {
635 		    fprintf (dump_file, "%d", r->m_order);
636 		    first = false;
637 		  }
638 		else
639 		  fprintf (dump_file, ", %d", r->m_order);
640 	      fprintf (dump_file, "}");
641 	    }
642 	  fprintf (dump_file, "\n");
643 	}
644     }
645 
646   BITMAP_FREE (work);
647   free (bbs);
648 
649   if (dump_file && (dump_flags & TDF_DETAILS))
650     {
651       fprintf (dump_file, "------- After liveness: -------\n");
652       dump_hsa_cfun_regalloc (dump_file);
653       fprintf (dump_file, "  ----- Intervals:\n");
654       for (i = 0; i < hsa_cfun->m_reg_count; i++)
655 	{
656 	  hsa_op_reg *reg = ind2reg[i];
657 	  if (!reg)
658 	    continue;
659 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->", reg->m_order,
660 		   reg->m_lr_begin, reg->m_lr_end);
661 	  if (reg->m_reg_class)
662 	    fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
663 	  else
664 	    fprintf (dump_file, "[%%__%s_%i]\n",
665 		     hsa_seg_name (reg->m_spill_sym->m_segment),
666 		     reg->m_spill_sym->m_name_number);
667 	}
668     }
669 
670   for (i = 0; i < 4; i++)
671     active[i].release ();
672   ind2reg.release ();
673 }
674 
675 /* Entry point for register allocation.  */
676 
677 static void
regalloc(void)678 regalloc (void)
679 {
680   basic_block bb;
681   m_reg_class_desc classes[4];
682 
683   /* If there are no registers used in the function, exit right away.  */
684   if (hsa_cfun->m_reg_count == 0)
685     return;
686 
687   memset (classes, 0, sizeof (classes));
688   classes[0].next_avail = 0;
689   classes[0].max_num = 7;
690   classes[0].cl_char = 'c';
691   classes[1].cl_char = 's';
692   classes[2].cl_char = 'd';
693   classes[3].cl_char = 'q';
694 
695   for (int i = 1; i < 4; i++)
696     {
697       classes[i].next_avail = 0;
698       classes[i].max_num = 20;
699     }
700 
701   linear_scan_regalloc (classes);
702 
703   FOR_ALL_BB_FN (bb, cfun)
704     rewrite_code_bb (bb, classes);
705 }
706 
707 /* Out of SSA and register allocation on HSAIL IL.  */
708 
709 void
hsa_regalloc(void)710 hsa_regalloc (void)
711 {
712   hsa_cfun->update_dominance ();
713   naive_outof_ssa ();
714 
715   if (dump_file && (dump_flags & TDF_DETAILS))
716     {
717       fprintf (dump_file, "------- After out-of-SSA: -------\n");
718       dump_hsa_cfun (dump_file);
719     }
720 
721   regalloc ();
722 
723   if (dump_file && (dump_flags & TDF_DETAILS))
724     {
725       fprintf (dump_file, "------- After register allocation: -------\n");
726       dump_hsa_cfun (dump_file);
727     }
728 }
729