1 /* Perform branch target register load optimizations.
2    Copyright (C) 2001-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "df.h"
28 #include "insn-config.h"
29 #include "regs.h"
30 #include "emit-rtl.h"
31 #include "recog.h"
32 #include "diagnostic-core.h"
33 #include "expr.h"
34 #include "insn-attr.h"
35 #include "tree-pass.h"
36 #include "cfgrtl.h"
37 #include "cfganal.h"
38 #include "cfgcleanup.h"
39 #include "cfgloop.h"
40 #include "rtl-iter.h"
41 #include "fibonacci_heap.h"
42 
43 struct btr_def;
44 
45 /* Target register optimizations - these are performed after reload.  */
46 
47 struct btr_def_group
48 {
49   btr_def_group *next;
50   rtx src;
51   btr_def *members;
52 };
53 
54 struct btr_user
55 {
56   btr_user *next;
57   basic_block bb;
58   int luid;
59   rtx_insn *insn;
60   /* If INSN has a single use of a single branch register, then
61      USE points to it within INSN.  If there is more than
62      one branch register use, or the use is in some way ambiguous,
63      then USE is NULL.  */
64   rtx use;
65   int n_reaching_defs;
66   int first_reaching_def;
67   char other_use_this_block;
68 };
69 
70 /* btr_def structs appear on three lists:
71      1. A list of all btr_def structures (head is
72 	ALL_BTR_DEFS, linked by the NEXT field).
73      2. A list of branch reg definitions per basic block (head is
74 	BB_BTR_DEFS[i], linked by the NEXT_THIS_BB field).
75      3. A list of all branch reg definitions belonging to the same
76 	group (head is in a BTR_DEF_GROUP struct, linked by
77 	NEXT_THIS_GROUP field).  */
78 
79 struct btr_def
80 {
81   btr_def *next_this_bb;
82   btr_def *next_this_group;
83   basic_block bb;
84   int luid;
85   rtx_insn *insn;
86   int btr;
87   int cost;
88   /* For a branch register setting insn that has a constant
89      source (i.e. a label), group links together all the
90      insns with the same source.  For other branch register
91      setting insns, group is NULL.  */
92   btr_def_group *group;
93   btr_user *uses;
94   /* If this def has a reaching use which is not a simple use
95      in a branch instruction, then has_ambiguous_use will be true,
96      and we will not attempt to migrate this definition.  */
97   char has_ambiguous_use;
98   /* live_range is an approximation to the true live range for this
99      def/use web, because it records the set of blocks that contain
100      the live range.  There could be other live ranges for the same
101      branch register in that set of blocks, either in the block
102      containing the def (before the def), or in a block containing
103      a use (after the use).  If there are such other live ranges, then
104      other_btr_uses_before_def or other_btr_uses_after_use must be set true
105      as appropriate.  */
106   char other_btr_uses_before_def;
107   char other_btr_uses_after_use;
108   /* We set own_end when we have moved a definition into a dominator.
109      Thus, when a later combination removes this definition again, we know
110      to clear out trs_live_at_end again.  */
111   char own_end;
112   bitmap live_range;
113 };
114 
115 typedef fibonacci_heap <long, btr_def> btr_heap_t;
116 typedef fibonacci_node <long, btr_def> btr_heap_node_t;
117 
118 static int issue_rate;
119 
120 static int basic_block_freq (const_basic_block);
121 static int insn_sets_btr_p (const rtx_insn *, int, int *);
122 static void find_btr_def_group (btr_def_group **, btr_def *);
123 static btr_def *add_btr_def (btr_heap_t *, basic_block, int, rtx_insn *,
124 			    unsigned int, int, btr_def_group **);
125 static btr_user *new_btr_user (basic_block, int, rtx_insn *);
126 static void dump_hard_reg_set (HARD_REG_SET);
127 static void dump_btrs_live (int);
128 static void note_other_use_this_block (unsigned int, btr_user *);
129 static void compute_defs_uses_and_gen (btr_heap_t *, btr_def **, btr_user **,
130 				       sbitmap *, sbitmap *, HARD_REG_SET *);
131 static void compute_kill (sbitmap *, sbitmap *, HARD_REG_SET *);
132 static void compute_out (sbitmap *bb_out, sbitmap *, sbitmap *, int);
133 static void link_btr_uses (btr_def **, btr_user **, sbitmap *, sbitmap *, int);
134 static void build_btr_def_use_webs (btr_heap_t *);
135 static int block_at_edge_of_live_range_p (int, btr_def *);
136 static void clear_btr_from_live_range (btr_def *def);
137 static void add_btr_to_live_range (btr_def *, int);
138 static void augment_live_range (bitmap, HARD_REG_SET *, basic_block,
139 				basic_block, int);
140 static int choose_btr (HARD_REG_SET);
141 static void combine_btr_defs (btr_def *, HARD_REG_SET *);
142 static void btr_def_live_range (btr_def *, HARD_REG_SET *);
143 static void move_btr_def (basic_block, int, btr_def *, bitmap, HARD_REG_SET *);
144 static int migrate_btr_def (btr_def *, int);
145 static void migrate_btr_defs (enum reg_class, int);
146 static int can_move_up (const_basic_block, const rtx_insn *, int);
147 static void note_btr_set (rtx, const_rtx, void *);
148 
149 /* The following code performs code motion of target load instructions
150    (instructions that set branch target registers), to move them
151    forward away from the branch instructions and out of loops (or,
152    more generally, from a more frequently executed place to a less
153    frequently executed place).
154    Moving target load instructions further in front of the branch
155    instruction that uses the target register value means that the hardware
156    has a better chance of preloading the instructions at the branch
157    target by the time the branch is reached.  This avoids bubbles
158    when a taken branch needs to flush out the pipeline.
159    Moving target load instructions out of loops means they are executed
160    less frequently.  */
161 
162 /* An obstack to hold the def-use web data structures built up for
163    migrating branch target load instructions.  */
164 static struct obstack migrate_btrl_obstack;
165 
166 /* Array indexed by basic block number, giving the set of registers
167    live in that block.  */
168 static HARD_REG_SET *btrs_live;
169 
170 /* Array indexed by basic block number, giving the set of registers live at
171   the end of that block, including any uses by a final jump insn, if any.  */
172 static HARD_REG_SET *btrs_live_at_end;
173 
174 /* Set of all target registers that we are willing to allocate.  */
175 static HARD_REG_SET all_btrs;
176 
177 /* Provide lower and upper bounds for target register numbers, so that
178    we don't need to search through all the hard registers all the time.  */
179 static int first_btr, last_btr;
180 
181 
182 
183 /* Return an estimate of the frequency of execution of block bb.  */
184 static int
basic_block_freq(const_basic_block bb)185 basic_block_freq (const_basic_block bb)
186 {
187   return bb->frequency;
188 }
189 
190 /* If the rtx at *XP references (sets or reads) any branch target
191    register, return one such register.  If EXCLUDEP is set, disregard
192    any references within that location.  */
193 static rtx *
194 find_btr_use (rtx *xp, rtx *excludep = 0)
195 {
196   subrtx_ptr_iterator::array_type array;
FOR_EACH_SUBRTX_PTR(iter,array,xp,NONCONST)197   FOR_EACH_SUBRTX_PTR (iter, array, xp, NONCONST)
198     {
199       rtx *loc = *iter;
200       if (loc == excludep)
201 	iter.skip_subrtxes ();
202       else
203 	{
204 	  const_rtx x = *loc;
205 	  if (REG_P (x)
206 	      && overlaps_hard_reg_set_p (all_btrs, GET_MODE (x), REGNO (x)))
207 	    return loc;
208 	}
209     }
210   return 0;
211 }
212 
213 /* Return true if insn is an instruction that sets a target register.
214    if CHECK_CONST is true, only return true if the source is constant.
215    If such a set is found and REGNO is nonzero, assign the register number
216    of the destination register to *REGNO.  */
217 static int
insn_sets_btr_p(const rtx_insn * insn,int check_const,int * regno)218 insn_sets_btr_p (const rtx_insn *insn, int check_const, int *regno)
219 {
220   rtx set;
221 
222   if (NONJUMP_INSN_P (insn)
223       && (set = single_set (insn)))
224     {
225       rtx dest = SET_DEST (set);
226       rtx src = SET_SRC (set);
227 
228       if (GET_CODE (dest) == SUBREG)
229 	dest = XEXP (dest, 0);
230 
231       if (REG_P (dest)
232 	  && TEST_HARD_REG_BIT (all_btrs, REGNO (dest)))
233 	{
234 	  gcc_assert (!find_btr_use (&src));
235 
236 	  if (!check_const || CONSTANT_P (src))
237 	    {
238 	      if (regno)
239 		*regno = REGNO (dest);
240 	      return 1;
241 	    }
242 	}
243     }
244   return 0;
245 }
246 
247 /* Find the group that the target register definition DEF belongs
248    to in the list starting with *ALL_BTR_DEF_GROUPS.  If no such
249    group exists, create one.  Add def to the group.  */
250 static void
find_btr_def_group(btr_def_group ** all_btr_def_groups,btr_def * def)251 find_btr_def_group (btr_def_group **all_btr_def_groups, btr_def *def)
252 {
253   if (insn_sets_btr_p (def->insn, 1, NULL))
254     {
255       btr_def_group *this_group;
256       rtx def_src = SET_SRC (single_set (def->insn));
257 
258       /* ?? This linear search is an efficiency concern, particularly
259 	 as the search will almost always fail to find a match.  */
260       for (this_group = *all_btr_def_groups;
261 	   this_group != NULL;
262 	   this_group = this_group->next)
263 	if (rtx_equal_p (def_src, this_group->src))
264 	  break;
265 
266       if (!this_group)
267 	{
268 	  this_group = XOBNEW (&migrate_btrl_obstack, btr_def_group);
269 	  this_group->src = def_src;
270 	  this_group->members = NULL;
271 	  this_group->next = *all_btr_def_groups;
272 	  *all_btr_def_groups = this_group;
273 	}
274       def->group = this_group;
275       def->next_this_group = this_group->members;
276       this_group->members = def;
277     }
278   else
279     def->group = NULL;
280 }
281 
282 /* Create a new target register definition structure, for a definition in
283    block BB, instruction INSN, and insert it into ALL_BTR_DEFS.  Return
284    the new definition.  */
285 static btr_def *
add_btr_def(btr_heap_t * all_btr_defs,basic_block bb,int insn_luid,rtx_insn * insn,unsigned int dest_reg,int other_btr_uses_before_def,btr_def_group ** all_btr_def_groups)286 add_btr_def (btr_heap_t *all_btr_defs, basic_block bb, int insn_luid,
287 	     rtx_insn *insn,
288 	     unsigned int dest_reg, int other_btr_uses_before_def,
289 	     btr_def_group **all_btr_def_groups)
290 {
291   btr_def *this_def = XOBNEW (&migrate_btrl_obstack, btr_def);
292   this_def->bb = bb;
293   this_def->luid = insn_luid;
294   this_def->insn = insn;
295   this_def->btr = dest_reg;
296   this_def->cost = basic_block_freq (bb);
297   this_def->has_ambiguous_use = 0;
298   this_def->other_btr_uses_before_def = other_btr_uses_before_def;
299   this_def->other_btr_uses_after_use = 0;
300   this_def->next_this_bb = NULL;
301   this_def->next_this_group = NULL;
302   this_def->uses = NULL;
303   this_def->live_range = NULL;
304   find_btr_def_group (all_btr_def_groups, this_def);
305 
306   all_btr_defs->insert (-this_def->cost, this_def);
307 
308   if (dump_file)
309     fprintf (dump_file,
310       "Found target reg definition: sets %u { bb %d, insn %d }%s priority %d\n",
311 	     dest_reg, bb->index, INSN_UID (insn),
312 	     (this_def->group ? "" : ":not const"), this_def->cost);
313 
314   return this_def;
315 }
316 
317 /* Create a new target register user structure, for a use in block BB,
318    instruction INSN.  Return the new user.  */
319 static btr_user *
new_btr_user(basic_block bb,int insn_luid,rtx_insn * insn)320 new_btr_user (basic_block bb, int insn_luid, rtx_insn *insn)
321 {
322   /* This instruction reads target registers.  We need
323      to decide whether we can replace all target register
324      uses easily.
325    */
326   rtx *usep = find_btr_use (&PATTERN (insn));
327   rtx use;
328   btr_user *user = NULL;
329 
330   if (usep)
331     {
332       int unambiguous_single_use;
333 
334       /* We want to ensure that USE is the only use of a target
335 	 register in INSN, so that we know that to rewrite INSN to use
336 	 a different target register, all we have to do is replace USE.  */
337       unambiguous_single_use = !find_btr_use (&PATTERN (insn), usep);
338       if (!unambiguous_single_use)
339 	usep = NULL;
340     }
341   use = usep ? *usep : NULL_RTX;
342   user = XOBNEW (&migrate_btrl_obstack, btr_user);
343   user->bb = bb;
344   user->luid = insn_luid;
345   user->insn = insn;
346   user->use = use;
347   user->other_use_this_block = 0;
348   user->next = NULL;
349   user->n_reaching_defs = 0;
350   user->first_reaching_def = -1;
351 
352   if (dump_file)
353     {
354       fprintf (dump_file, "Uses target reg: { bb %d, insn %d }",
355 	       bb->index, INSN_UID (insn));
356 
357       if (user->use)
358 	fprintf (dump_file, ": unambiguous use of reg %d\n",
359 		 REGNO (user->use));
360     }
361 
362   return user;
363 }
364 
365 /* Write the contents of S to the dump file.  */
366 static void
dump_hard_reg_set(HARD_REG_SET s)367 dump_hard_reg_set (HARD_REG_SET s)
368 {
369   int reg;
370   for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
371     if (TEST_HARD_REG_BIT (s, reg))
372       fprintf (dump_file, " %d", reg);
373 }
374 
375 /* Write the set of target regs live in block BB to the dump file.  */
376 static void
dump_btrs_live(int bb)377 dump_btrs_live (int bb)
378 {
379   fprintf (dump_file, "BB%d live:", bb);
380   dump_hard_reg_set (btrs_live[bb]);
381   fprintf (dump_file, "\n");
382 }
383 
384 /* REGNO is the number of a branch target register that is being used or
385    set.  USERS_THIS_BB is a list of preceding branch target register users;
386    If any of them use the same register, set their other_use_this_block
387    flag.  */
388 static void
note_other_use_this_block(unsigned int regno,btr_user * users_this_bb)389 note_other_use_this_block (unsigned int regno, btr_user *users_this_bb)
390 {
391   btr_user *user;
392 
393   for (user = users_this_bb; user != NULL; user = user->next)
394     if (user->use && REGNO (user->use) == regno)
395       user->other_use_this_block = 1;
396 }
397 
398 struct defs_uses_info {
399   btr_user *users_this_bb;
400   HARD_REG_SET btrs_written_in_block;
401   HARD_REG_SET btrs_live_in_block;
402   sbitmap bb_gen;
403   sbitmap *btr_defset;
404 };
405 
406 /* Called via note_stores or directly to register stores into /
407    clobbers of a branch target register DEST that are not recognized as
408    straightforward definitions.  DATA points to information about the
409    current basic block that needs updating.  */
410 static void
note_btr_set(rtx dest,const_rtx set ATTRIBUTE_UNUSED,void * data)411 note_btr_set (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
412 {
413   defs_uses_info *info = (defs_uses_info *) data;
414   int regno, end_regno;
415 
416   if (!REG_P (dest))
417     return;
418   regno = REGNO (dest);
419   end_regno = END_REGNO (dest);
420   for (; regno < end_regno; regno++)
421     if (TEST_HARD_REG_BIT (all_btrs, regno))
422       {
423 	note_other_use_this_block (regno, info->users_this_bb);
424 	SET_HARD_REG_BIT (info->btrs_written_in_block, regno);
425 	SET_HARD_REG_BIT (info->btrs_live_in_block, regno);
426 	bitmap_and_compl (info->bb_gen, info->bb_gen,
427 			    info->btr_defset[regno - first_btr]);
428       }
429 }
430 
431 static void
compute_defs_uses_and_gen(btr_heap_t * all_btr_defs,btr_def ** def_array,btr_user ** use_array,sbitmap * btr_defset,sbitmap * bb_gen,HARD_REG_SET * btrs_written)432 compute_defs_uses_and_gen (btr_heap_t *all_btr_defs, btr_def **def_array,
433 			   btr_user **use_array, sbitmap *btr_defset,
434 			   sbitmap *bb_gen, HARD_REG_SET *btrs_written)
435 {
436   /* Scan the code building up the set of all defs and all uses.
437      For each target register, build the set of defs of that register.
438      For each block, calculate the set of target registers
439      written in that block.
440      Also calculate the set of btrs ever live in that block.
441   */
442   int i;
443   int insn_luid = 0;
444   btr_def_group *all_btr_def_groups = NULL;
445   defs_uses_info info;
446 
447   bitmap_vector_clear (bb_gen, last_basic_block_for_fn (cfun));
448   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
449     {
450       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
451       int reg;
452       btr_def *defs_this_bb = NULL;
453       rtx_insn *insn;
454       rtx_insn *last;
455       int can_throw = 0;
456 
457       info.users_this_bb = NULL;
458       info.bb_gen = bb_gen[i];
459       info.btr_defset = btr_defset;
460 
461       CLEAR_HARD_REG_SET (info.btrs_live_in_block);
462       CLEAR_HARD_REG_SET (info.btrs_written_in_block);
463       for (reg = first_btr; reg <= last_btr; reg++)
464 	if (TEST_HARD_REG_BIT (all_btrs, reg)
465 	    && REGNO_REG_SET_P (df_get_live_in (bb), reg))
466 	  SET_HARD_REG_BIT (info.btrs_live_in_block, reg);
467 
468       for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb));
469 	   insn != last;
470 	   insn = NEXT_INSN (insn), insn_luid++)
471 	{
472 	  if (INSN_P (insn))
473 	    {
474 	      int regno;
475 	      int insn_uid = INSN_UID (insn);
476 
477 	      if (insn_sets_btr_p (insn, 0, &regno))
478 		{
479 		  btr_def *def = add_btr_def (
480 		      all_btr_defs, bb, insn_luid, insn, regno,
481 		      TEST_HARD_REG_BIT (info.btrs_live_in_block, regno),
482 		      &all_btr_def_groups);
483 
484 		  def_array[insn_uid] = def;
485 		  SET_HARD_REG_BIT (info.btrs_written_in_block, regno);
486 		  SET_HARD_REG_BIT (info.btrs_live_in_block, regno);
487 		  bitmap_and_compl (bb_gen[i], bb_gen[i],
488 				      btr_defset[regno - first_btr]);
489 		  bitmap_set_bit (bb_gen[i], insn_uid);
490 		  def->next_this_bb = defs_this_bb;
491 		  defs_this_bb = def;
492 		  bitmap_set_bit (btr_defset[regno - first_btr], insn_uid);
493 		  note_other_use_this_block (regno, info.users_this_bb);
494 		}
495 	      /* Check for the blockage emitted by expand_nl_goto_receiver.  */
496 	      else if (cfun->has_nonlocal_label
497 		       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE)
498 		{
499 		  btr_user *user;
500 
501 		  /* Do the equivalent of calling note_other_use_this_block
502 		     for every target register.  */
503 		  for (user = info.users_this_bb; user != NULL;
504 		       user = user->next)
505 		    if (user->use)
506 		      user->other_use_this_block = 1;
507 		  IOR_HARD_REG_SET (info.btrs_written_in_block, all_btrs);
508 		  IOR_HARD_REG_SET (info.btrs_live_in_block, all_btrs);
509 		  bitmap_clear (info.bb_gen);
510 		}
511 	      else
512 		{
513 		  if (find_btr_use (&PATTERN (insn)))
514 		    {
515 		      btr_user *user = new_btr_user (bb, insn_luid, insn);
516 
517 		      use_array[insn_uid] = user;
518 		      if (user->use)
519 			SET_HARD_REG_BIT (info.btrs_live_in_block,
520 					  REGNO (user->use));
521 		      else
522 			{
523 			  int reg;
524 			  for (reg = first_btr; reg <= last_btr; reg++)
525 			    if (TEST_HARD_REG_BIT (all_btrs, reg)
526 				&& refers_to_regno_p (reg, user->insn))
527 			      {
528 				note_other_use_this_block (reg,
529 							   info.users_this_bb);
530 				SET_HARD_REG_BIT (info.btrs_live_in_block, reg);
531 			      }
532 			  note_stores (PATTERN (insn), note_btr_set, &info);
533 			}
534 		      user->next = info.users_this_bb;
535 		      info.users_this_bb = user;
536 		    }
537 		  if (CALL_P (insn))
538 		    {
539 		      HARD_REG_SET *clobbered = &call_used_reg_set;
540 		      HARD_REG_SET call_saved;
541 		      rtx pat = PATTERN (insn);
542 		      int i;
543 
544 		      /* Check for sibcall.  */
545 		      if (GET_CODE (pat) == PARALLEL)
546 			for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
547 			  if (ANY_RETURN_P (XVECEXP (pat, 0, i)))
548 			    {
549 			      COMPL_HARD_REG_SET (call_saved,
550 						  call_used_reg_set);
551 			      clobbered = &call_saved;
552 			    }
553 
554 		      for (regno = first_btr; regno <= last_btr; regno++)
555 			if (TEST_HARD_REG_BIT (*clobbered, regno))
556 			  note_btr_set (regno_reg_rtx[regno], NULL_RTX, &info);
557 		    }
558 		}
559 	    }
560 	}
561 
562       COPY_HARD_REG_SET (btrs_live[i], info.btrs_live_in_block);
563       COPY_HARD_REG_SET (btrs_written[i], info.btrs_written_in_block);
564 
565       REG_SET_TO_HARD_REG_SET (btrs_live_at_end[i], df_get_live_out (bb));
566       /* If this block ends in a jump insn, add any uses or even clobbers
567 	 of branch target registers that it might have.  */
568       for (insn = BB_END (bb); insn != BB_HEAD (bb) && ! INSN_P (insn); )
569 	insn = PREV_INSN (insn);
570       /* ??? for the fall-through edge, it would make sense to insert the
571 	 btr set on the edge, but that would require to split the block
572 	 early on so that we can distinguish between dominance from the fall
573 	 through edge - which can use the call-clobbered registers - from
574 	 dominance by the throw edge.  */
575       if (can_throw_internal (insn))
576 	{
577 	  HARD_REG_SET tmp;
578 
579 	  COPY_HARD_REG_SET (tmp, call_used_reg_set);
580 	  AND_HARD_REG_SET (tmp, all_btrs);
581 	  IOR_HARD_REG_SET (btrs_live_at_end[i], tmp);
582 	  can_throw = 1;
583 	}
584       if (can_throw || JUMP_P (insn))
585 	{
586 	  int regno;
587 
588 	  for (regno = first_btr; regno <= last_btr; regno++)
589 	    if (refers_to_regno_p (regno, insn))
590 	      SET_HARD_REG_BIT (btrs_live_at_end[i], regno);
591 	}
592 
593       if (dump_file)
594 	dump_btrs_live (i);
595     }
596 }
597 
598 static void
compute_kill(sbitmap * bb_kill,sbitmap * btr_defset,HARD_REG_SET * btrs_written)599 compute_kill (sbitmap *bb_kill, sbitmap *btr_defset,
600 	      HARD_REG_SET *btrs_written)
601 {
602   int i;
603   int regno;
604 
605   /* For each basic block, form the set BB_KILL - the set
606      of definitions that the block kills.  */
607   bitmap_vector_clear (bb_kill, last_basic_block_for_fn (cfun));
608   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
609     {
610       for (regno = first_btr; regno <= last_btr; regno++)
611 	if (TEST_HARD_REG_BIT (all_btrs, regno)
612 	    && TEST_HARD_REG_BIT (btrs_written[i], regno))
613 	  bitmap_ior (bb_kill[i], bb_kill[i],
614 			  btr_defset[regno - first_btr]);
615     }
616 }
617 
618 static void
compute_out(sbitmap * bb_out,sbitmap * bb_gen,sbitmap * bb_kill,int max_uid)619 compute_out (sbitmap *bb_out, sbitmap *bb_gen, sbitmap *bb_kill, int max_uid)
620 {
621   /* Perform iterative dataflow:
622       Initially, for all blocks, BB_OUT = BB_GEN.
623       For each block,
624 	BB_IN  = union over predecessors of BB_OUT(pred)
625 	BB_OUT = (BB_IN - BB_KILL) + BB_GEN
626      Iterate until the bb_out sets stop growing.  */
627   int i;
628   int changed;
629   sbitmap bb_in = sbitmap_alloc (max_uid);
630 
631   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
632     bitmap_copy (bb_out[i], bb_gen[i]);
633 
634   changed = 1;
635   while (changed)
636     {
637       changed = 0;
638       for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
639 	{
640 	  bitmap_union_of_preds (bb_in, bb_out, BASIC_BLOCK_FOR_FN (cfun, i));
641 	  changed |= bitmap_ior_and_compl (bb_out[i], bb_gen[i],
642 					       bb_in, bb_kill[i]);
643 	}
644     }
645   sbitmap_free (bb_in);
646 }
647 
648 static void
link_btr_uses(btr_def ** def_array,btr_user ** use_array,sbitmap * bb_out,sbitmap * btr_defset,int max_uid)649 link_btr_uses (btr_def **def_array, btr_user **use_array, sbitmap *bb_out,
650 	       sbitmap *btr_defset, int max_uid)
651 {
652   int i;
653   sbitmap reaching_defs = sbitmap_alloc (max_uid);
654 
655   /* Link uses to the uses lists of all of their reaching defs.
656      Count up the number of reaching defs of each use.  */
657   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
658     {
659       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
660       rtx_insn *insn;
661       rtx_insn *last;
662 
663       bitmap_union_of_preds (reaching_defs, bb_out, BASIC_BLOCK_FOR_FN (cfun, i));
664       for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb));
665 	   insn != last;
666 	   insn = NEXT_INSN (insn))
667 	{
668 	  if (INSN_P (insn))
669 	    {
670 	      int insn_uid = INSN_UID (insn);
671 
672 	      btr_def *def   = def_array[insn_uid];
673 	      btr_user *user = use_array[insn_uid];
674 	      if (def != NULL)
675 		{
676 		  /* Remove all reaching defs of regno except
677 		     for this one.  */
678 		  bitmap_and_compl (reaching_defs, reaching_defs,
679 				      btr_defset[def->btr - first_btr]);
680 		  bitmap_set_bit (reaching_defs, insn_uid);
681 		}
682 
683 	      if (user != NULL)
684 		{
685 		  /* Find all the reaching defs for this use.  */
686 		  sbitmap reaching_defs_of_reg = sbitmap_alloc (max_uid);
687 		  unsigned int uid = 0;
688 		  sbitmap_iterator sbi;
689 
690 		  if (user->use)
691 		    bitmap_and (
692 		      reaching_defs_of_reg,
693 		      reaching_defs,
694 		      btr_defset[REGNO (user->use) - first_btr]);
695 		  else
696 		    {
697 		      int reg;
698 
699 		      bitmap_clear (reaching_defs_of_reg);
700 		      for (reg = first_btr; reg <= last_btr; reg++)
701 			if (TEST_HARD_REG_BIT (all_btrs, reg)
702 			    && refers_to_regno_p (reg, user->insn))
703 			  bitmap_or_and (reaching_defs_of_reg,
704 			    reaching_defs_of_reg,
705 			    reaching_defs,
706 			    btr_defset[reg - first_btr]);
707 		    }
708 		  EXECUTE_IF_SET_IN_BITMAP (reaching_defs_of_reg, 0, uid, sbi)
709 		    {
710 		      btr_def *def = def_array[uid];
711 
712 		      /* We now know that def reaches user.  */
713 
714 		      if (dump_file)
715 			fprintf (dump_file,
716 			  "Def in insn %d reaches use in insn %d\n",
717 			  uid, insn_uid);
718 
719 		      user->n_reaching_defs++;
720 		      if (!user->use)
721 			def->has_ambiguous_use = 1;
722 		      if (user->first_reaching_def != -1)
723 			{ /* There is more than one reaching def.  This is
724 			     a rare case, so just give up on this def/use
725 			     web when it occurs.  */
726 			  def->has_ambiguous_use = 1;
727 			  def_array[user->first_reaching_def]
728 			    ->has_ambiguous_use = 1;
729 			  if (dump_file)
730 			    fprintf (dump_file,
731 				     "(use %d has multiple reaching defs)\n",
732 				     insn_uid);
733 			}
734 		      else
735 			user->first_reaching_def = uid;
736 		      if (user->other_use_this_block)
737 			def->other_btr_uses_after_use = 1;
738 		      user->next = def->uses;
739 		      def->uses = user;
740 		    }
741 		  sbitmap_free (reaching_defs_of_reg);
742 		}
743 
744 	      if (CALL_P (insn))
745 		{
746 		  int regno;
747 
748 		  for (regno = first_btr; regno <= last_btr; regno++)
749 		    if (TEST_HARD_REG_BIT (all_btrs, regno)
750 			&& TEST_HARD_REG_BIT (call_used_reg_set, regno))
751 		      bitmap_and_compl (reaching_defs, reaching_defs,
752 					  btr_defset[regno - first_btr]);
753 		}
754 	    }
755 	}
756     }
757   sbitmap_free (reaching_defs);
758 }
759 
760 static void
build_btr_def_use_webs(btr_heap_t * all_btr_defs)761 build_btr_def_use_webs (btr_heap_t *all_btr_defs)
762 {
763   const int max_uid = get_max_uid ();
764   btr_def  **def_array   = XCNEWVEC (btr_def *, max_uid);
765   btr_user **use_array   = XCNEWVEC (btr_user *, max_uid);
766   sbitmap *btr_defset   = sbitmap_vector_alloc (
767 			   (last_btr - first_btr) + 1, max_uid);
768   sbitmap *bb_gen = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
769 					  max_uid);
770   HARD_REG_SET *btrs_written = XCNEWVEC (HARD_REG_SET,
771 					 last_basic_block_for_fn (cfun));
772   sbitmap *bb_kill;
773   sbitmap *bb_out;
774 
775   bitmap_vector_clear (btr_defset, (last_btr - first_btr) + 1);
776 
777   compute_defs_uses_and_gen (all_btr_defs, def_array, use_array, btr_defset,
778 			     bb_gen, btrs_written);
779 
780   bb_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid);
781   compute_kill (bb_kill, btr_defset, btrs_written);
782   free (btrs_written);
783 
784   bb_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid);
785   compute_out (bb_out, bb_gen, bb_kill, max_uid);
786 
787   sbitmap_vector_free (bb_gen);
788   sbitmap_vector_free (bb_kill);
789 
790   link_btr_uses (def_array, use_array, bb_out, btr_defset, max_uid);
791 
792   sbitmap_vector_free (bb_out);
793   sbitmap_vector_free (btr_defset);
794   free (use_array);
795   free (def_array);
796 }
797 
798 /* Return true if basic block BB contains the start or end of the
799    live range of the definition DEF, AND there are other live
800    ranges of the same target register that include BB.  */
801 static int
block_at_edge_of_live_range_p(int bb,btr_def * def)802 block_at_edge_of_live_range_p (int bb, btr_def *def)
803 {
804   if (def->other_btr_uses_before_def
805       && BASIC_BLOCK_FOR_FN (cfun, bb) == def->bb)
806     return 1;
807   else if (def->other_btr_uses_after_use)
808     {
809       btr_user *user;
810       for (user = def->uses; user != NULL; user = user->next)
811 	if (BASIC_BLOCK_FOR_FN (cfun, bb) == user->bb)
812 	  return 1;
813     }
814   return 0;
815 }
816 
817 /* We are removing the def/use web DEF.  The target register
818    used in this web is therefore no longer live in the live range
819    of this web, so remove it from the live set of all basic blocks
820    in the live range of the web.
821    Blocks at the boundary of the live range may contain other live
822    ranges for the same target register, so we have to be careful
823    to remove the target register from the live set of these blocks
824    only if they do not contain other live ranges for the same register.  */
825 static void
clear_btr_from_live_range(btr_def * def)826 clear_btr_from_live_range (btr_def *def)
827 {
828   unsigned bb;
829   bitmap_iterator bi;
830 
831   EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
832     {
833       if ((!def->other_btr_uses_before_def
834 	   && !def->other_btr_uses_after_use)
835 	  || !block_at_edge_of_live_range_p (bb, def))
836 	{
837 	  CLEAR_HARD_REG_BIT (btrs_live[bb], def->btr);
838 	  CLEAR_HARD_REG_BIT (btrs_live_at_end[bb], def->btr);
839 	  if (dump_file)
840 	    dump_btrs_live (bb);
841 	}
842     }
843  if (def->own_end)
844    CLEAR_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr);
845 }
846 
847 
848 /* We are adding the def/use web DEF.  Add the target register used
849    in this web to the live set of all of the basic blocks that contain
850    the live range of the web.
851    If OWN_END is set, also show that the register is live from our
852    definitions at the end of the basic block where it is defined.  */
853 static void
add_btr_to_live_range(btr_def * def,int own_end)854 add_btr_to_live_range (btr_def *def, int own_end)
855 {
856   unsigned bb;
857   bitmap_iterator bi;
858 
859   EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
860     {
861       SET_HARD_REG_BIT (btrs_live[bb], def->btr);
862       SET_HARD_REG_BIT (btrs_live_at_end[bb], def->btr);
863       if (dump_file)
864 	dump_btrs_live (bb);
865     }
866   if (own_end)
867     {
868       SET_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr);
869       def->own_end = 1;
870     }
871 }
872 
873 /* Update a live range to contain the basic block NEW_BLOCK, and all
874    blocks on paths between the existing live range and NEW_BLOCK.
875    HEAD is a block contained in the existing live range that dominates
876    all other blocks in the existing live range.
877    Also add to the set BTRS_LIVE_IN_RANGE all target registers that
878    are live in the blocks that we add to the live range.
879    If FULL_RANGE is set, include the full live range of NEW_BB;
880    otherwise, if NEW_BB dominates HEAD_BB, only add registers that
881    are life at the end of NEW_BB for NEW_BB itself.
882    It is a precondition that either NEW_BLOCK dominates HEAD,or
883    HEAD dom NEW_BLOCK.  This is used to speed up the
884    implementation of this function.  */
885 static void
augment_live_range(bitmap live_range,HARD_REG_SET * btrs_live_in_range,basic_block head_bb,basic_block new_bb,int full_range)886 augment_live_range (bitmap live_range, HARD_REG_SET *btrs_live_in_range,
887 		    basic_block head_bb, basic_block new_bb, int full_range)
888 {
889   basic_block *worklist, *tos;
890 
891   tos = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) + 1);
892 
893   if (dominated_by_p (CDI_DOMINATORS, new_bb, head_bb))
894     {
895       if (new_bb == head_bb)
896 	{
897 	  if (full_range)
898 	    IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_bb->index]);
899 	  free (tos);
900 	  return;
901 	}
902       *tos++ = new_bb;
903     }
904   else
905     {
906       edge e;
907       edge_iterator ei;
908       int new_block = new_bb->index;
909 
910       gcc_assert (dominated_by_p (CDI_DOMINATORS, head_bb, new_bb));
911 
912       IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[head_bb->index]);
913       bitmap_set_bit (live_range, new_block);
914       /* A previous btr migration could have caused a register to be
915 	live just at the end of new_block which we need in full, so
916 	use trs_live_at_end even if full_range is set.  */
917       IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live_at_end[new_block]);
918       if (full_range)
919 	IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_block]);
920       if (dump_file)
921 	{
922 	  fprintf (dump_file,
923 		   "Adding end of block %d and rest of %d to live range\n",
924 		   new_block, head_bb->index);
925 	  fprintf (dump_file,"Now live btrs are ");
926 	  dump_hard_reg_set (*btrs_live_in_range);
927 	  fprintf (dump_file, "\n");
928 	}
929       FOR_EACH_EDGE (e, ei, head_bb->preds)
930 	*tos++ = e->src;
931     }
932 
933   while (tos != worklist)
934     {
935       basic_block bb = *--tos;
936       if (!bitmap_bit_p (live_range, bb->index))
937 	{
938 	  edge e;
939 	  edge_iterator ei;
940 
941 	  bitmap_set_bit (live_range, bb->index);
942 	  IOR_HARD_REG_SET (*btrs_live_in_range,
943 	    btrs_live[bb->index]);
944 	  /* A previous btr migration could have caused a register to be
945 	     live just at the end of a block which we need in full.  */
946 	  IOR_HARD_REG_SET (*btrs_live_in_range,
947 	    btrs_live_at_end[bb->index]);
948 	  if (dump_file)
949 	    {
950 	      fprintf (dump_file,
951 		"Adding block %d to live range\n", bb->index);
952 	      fprintf (dump_file,"Now live btrs are ");
953 	      dump_hard_reg_set (*btrs_live_in_range);
954 	      fprintf (dump_file, "\n");
955 	    }
956 
957 	  FOR_EACH_EDGE (e, ei, bb->preds)
958 	    {
959 	      basic_block pred = e->src;
960 	      if (!bitmap_bit_p (live_range, pred->index))
961 		*tos++ = pred;
962 	    }
963 	}
964     }
965 
966   free (worklist);
967 }
968 
969 /*  Return the most desirable target register that is not in
970     the set USED_BTRS.  */
971 static int
choose_btr(HARD_REG_SET used_btrs)972 choose_btr (HARD_REG_SET used_btrs)
973 {
974   int i;
975 
976   if (!hard_reg_set_subset_p (all_btrs, used_btrs))
977     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
978       {
979 #ifdef REG_ALLOC_ORDER
980 	int regno = reg_alloc_order[i];
981 #else
982 	int regno = i;
983 #endif
984 	if (TEST_HARD_REG_BIT (all_btrs, regno)
985 	    && !TEST_HARD_REG_BIT (used_btrs, regno))
986 	  return regno;
987       }
988   return -1;
989 }
990 
991 /* Calculate the set of basic blocks that contain the live range of
992    the def/use web DEF.
993    Also calculate the set of target registers that are live at time
994    in this live range, but ignore the live range represented by DEF
995    when calculating this set.  */
996 static void
btr_def_live_range(btr_def * def,HARD_REG_SET * btrs_live_in_range)997 btr_def_live_range (btr_def *def, HARD_REG_SET *btrs_live_in_range)
998 {
999   if (!def->live_range)
1000     {
1001       btr_user *user;
1002 
1003       def->live_range = BITMAP_ALLOC (NULL);
1004 
1005       bitmap_set_bit (def->live_range, def->bb->index);
1006       COPY_HARD_REG_SET (*btrs_live_in_range,
1007 			 (flag_btr_bb_exclusive
1008 			  ? btrs_live : btrs_live_at_end)[def->bb->index]);
1009 
1010       for (user = def->uses; user != NULL; user = user->next)
1011 	augment_live_range (def->live_range, btrs_live_in_range,
1012 			    def->bb, user->bb,
1013 			    (flag_btr_bb_exclusive
1014 			     || user->insn != BB_END (def->bb)
1015 			     || !JUMP_P (user->insn)));
1016     }
1017   else
1018     {
1019       /* def->live_range is accurate, but we need to recompute
1020 	 the set of target registers live over it, because migration
1021 	 of other PT instructions may have affected it.
1022       */
1023       unsigned bb;
1024       unsigned def_bb = flag_btr_bb_exclusive ? -1 : def->bb->index;
1025       bitmap_iterator bi;
1026 
1027       CLEAR_HARD_REG_SET (*btrs_live_in_range);
1028       EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
1029 	{
1030 	  IOR_HARD_REG_SET (*btrs_live_in_range,
1031 			    (def_bb == bb
1032 			     ? btrs_live_at_end : btrs_live) [bb]);
1033 	}
1034     }
1035   if (!def->other_btr_uses_before_def &&
1036       !def->other_btr_uses_after_use)
1037     CLEAR_HARD_REG_BIT (*btrs_live_in_range, def->btr);
1038 }
1039 
1040 /* Merge into the def/use web DEF any other def/use webs in the same
1041    group that are dominated by DEF, provided that there is a target
1042    register available to allocate to the merged web.  */
1043 static void
combine_btr_defs(btr_def * def,HARD_REG_SET * btrs_live_in_range)1044 combine_btr_defs (btr_def *def, HARD_REG_SET *btrs_live_in_range)
1045 {
1046   btr_def *other_def;
1047 
1048   for (other_def = def->group->members;
1049        other_def != NULL;
1050        other_def = other_def->next_this_group)
1051     {
1052       if (other_def != def
1053 	  && other_def->uses != NULL
1054 	  && ! other_def->has_ambiguous_use
1055 	  && dominated_by_p (CDI_DOMINATORS, other_def->bb, def->bb))
1056 	{
1057 	  /* def->bb dominates the other def, so def and other_def could
1058 	     be combined.  */
1059 	  /* Merge their live ranges, and get the set of
1060 	     target registers live over the merged range.  */
1061 	  int btr;
1062 	  HARD_REG_SET combined_btrs_live;
1063 	  bitmap combined_live_range = BITMAP_ALLOC (NULL);
1064 	  btr_user *user;
1065 
1066 	  if (other_def->live_range == NULL)
1067 	    {
1068 	      HARD_REG_SET dummy_btrs_live_in_range;
1069 	      btr_def_live_range (other_def, &dummy_btrs_live_in_range);
1070 	    }
1071 	  COPY_HARD_REG_SET (combined_btrs_live, *btrs_live_in_range);
1072 	  bitmap_copy (combined_live_range, def->live_range);
1073 
1074 	  for (user = other_def->uses; user != NULL; user = user->next)
1075 	    augment_live_range (combined_live_range, &combined_btrs_live,
1076 				def->bb, user->bb,
1077 				(flag_btr_bb_exclusive
1078 				 || user->insn != BB_END (def->bb)
1079 				 || !JUMP_P (user->insn)));
1080 
1081 	  btr = choose_btr (combined_btrs_live);
1082 	  if (btr != -1)
1083 	    {
1084 	      /* We can combine them.  */
1085 	      if (dump_file)
1086 		fprintf (dump_file,
1087 			 "Combining def in insn %d with def in insn %d\n",
1088 			 INSN_UID (other_def->insn), INSN_UID (def->insn));
1089 
1090 	      def->btr = btr;
1091 	      user = other_def->uses;
1092 	      while (user != NULL)
1093 		{
1094 		  btr_user *next = user->next;
1095 
1096 		  user->next = def->uses;
1097 		  def->uses = user;
1098 		  user = next;
1099 		}
1100 	      /* Combining def/use webs can make target registers live
1101 		 after uses where they previously were not.  This means
1102 		 some REG_DEAD notes may no longer be correct.  We could
1103 		 be more precise about this if we looked at the combined
1104 		 live range, but here I just delete any REG_DEAD notes
1105 		 in case they are no longer correct.  */
1106 	      for (user = def->uses; user != NULL; user = user->next)
1107 		remove_note (user->insn,
1108 			     find_regno_note (user->insn, REG_DEAD,
1109 					      REGNO (user->use)));
1110 	      clear_btr_from_live_range (other_def);
1111 	      other_def->uses = NULL;
1112 	      bitmap_copy (def->live_range, combined_live_range);
1113 	      if (other_def->btr == btr && other_def->other_btr_uses_after_use)
1114 		def->other_btr_uses_after_use = 1;
1115 	      COPY_HARD_REG_SET (*btrs_live_in_range, combined_btrs_live);
1116 
1117 	      /* Delete the old target register initialization.  */
1118 	      delete_insn (other_def->insn);
1119 
1120 	    }
1121 	  BITMAP_FREE (combined_live_range);
1122 	}
1123     }
1124 }
1125 
1126 /* Move the definition DEF from its current position to basic
1127    block NEW_DEF_BB, and modify it to use branch target register BTR.
1128    Delete the old defining insn, and insert a new one in NEW_DEF_BB.
1129    Update all reaching uses of DEF in the RTL to use BTR.
1130    If this new position means that other defs in the
1131    same group can be combined with DEF then combine them.  */
1132 static void
move_btr_def(basic_block new_def_bb,int btr,btr_def * def,bitmap live_range,HARD_REG_SET * btrs_live_in_range)1133 move_btr_def (basic_block new_def_bb, int btr, btr_def *def, bitmap live_range,
1134 	     HARD_REG_SET *btrs_live_in_range)
1135 {
1136   /* We can move the instruction.
1137      Set a target register in block NEW_DEF_BB to the value
1138      needed for this target register definition.
1139      Replace all uses of the old target register definition by
1140      uses of the new definition.  Delete the old definition.  */
1141   basic_block b = new_def_bb;
1142   rtx_insn *insp = BB_HEAD (b);
1143   rtx_insn *old_insn = def->insn;
1144   rtx src;
1145   rtx btr_rtx;
1146   rtx_insn *new_insn;
1147   machine_mode btr_mode;
1148   btr_user *user;
1149   rtx set;
1150 
1151   if (dump_file)
1152     fprintf(dump_file, "migrating to basic block %d, using reg %d\n",
1153 	    new_def_bb->index, btr);
1154 
1155   clear_btr_from_live_range (def);
1156   def->btr = btr;
1157   def->bb = new_def_bb;
1158   def->luid = 0;
1159   def->cost = basic_block_freq (new_def_bb);
1160   bitmap_copy (def->live_range, live_range);
1161   combine_btr_defs (def, btrs_live_in_range);
1162   btr = def->btr;
1163   def->other_btr_uses_before_def
1164     = TEST_HARD_REG_BIT (btrs_live[b->index], btr) ? 1 : 0;
1165   add_btr_to_live_range (def, 1);
1166   if (LABEL_P (insp))
1167     insp = NEXT_INSN (insp);
1168   /* N.B.: insp is expected to be NOTE_INSN_BASIC_BLOCK now.  Some
1169      optimizations can result in insp being both first and last insn of
1170      its basic block.  */
1171   /* ?? some assertions to check that insp is sensible? */
1172 
1173   if (def->other_btr_uses_before_def)
1174     {
1175       insp = BB_END (b);
1176       for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp))
1177 	gcc_assert (insp != BB_HEAD (b));
1178 
1179       if (JUMP_P (insp) || can_throw_internal (insp))
1180 	insp = PREV_INSN (insp);
1181     }
1182 
1183   set = single_set (old_insn);
1184   src = SET_SRC (set);
1185   btr_mode = GET_MODE (SET_DEST (set));
1186   btr_rtx = gen_rtx_REG (btr_mode, btr);
1187 
1188   new_insn = gen_move_insn (btr_rtx, src);
1189 
1190   /* Insert target register initialization at head of basic block.  */
1191   def->insn = emit_insn_after (new_insn, insp);
1192 
1193   df_set_regs_ever_live (btr, true);
1194 
1195   if (dump_file)
1196     fprintf (dump_file, "New pt is insn %d, inserted after insn %d\n",
1197 	     INSN_UID (def->insn), INSN_UID (insp));
1198 
1199   /* Delete the old target register initialization.  */
1200   delete_insn (old_insn);
1201 
1202   /* Replace each use of the old target register by a use of the new target
1203      register.  */
1204   for (user = def->uses; user != NULL; user = user->next)
1205     {
1206       /* Some extra work here to ensure consistent modes, because
1207 	 it seems that a target register REG rtx can be given a different
1208 	 mode depending on the context (surely that should not be
1209 	 the case?).  */
1210       rtx replacement_rtx;
1211       if (GET_MODE (user->use) == GET_MODE (btr_rtx)
1212 	  || GET_MODE (user->use) == VOIDmode)
1213 	replacement_rtx = btr_rtx;
1214       else
1215 	replacement_rtx = gen_rtx_REG (GET_MODE (user->use), btr);
1216       validate_replace_rtx (user->use, replacement_rtx, user->insn);
1217       user->use = replacement_rtx;
1218     }
1219 }
1220 
1221 /* We anticipate intra-block scheduling to be done.  See if INSN could move
1222    up within BB by N_INSNS.  */
1223 static int
can_move_up(const_basic_block bb,const rtx_insn * insn,int n_insns)1224 can_move_up (const_basic_block bb, const rtx_insn *insn, int n_insns)
1225 {
1226   while (insn != BB_HEAD (bb) && n_insns > 0)
1227     {
1228       insn = PREV_INSN (insn);
1229       /* ??? What if we have an anti-dependency that actually prevents the
1230 	 scheduler from doing the move?  We'd like to re-allocate the register,
1231 	 but not necessarily put the load into another basic block.  */
1232       if (INSN_P (insn))
1233 	n_insns--;
1234     }
1235   return n_insns <= 0;
1236 }
1237 
1238 /* Attempt to migrate the target register definition DEF to an
1239    earlier point in the flowgraph.
1240 
1241    It is a precondition of this function that DEF is migratable:
1242    i.e. it has a constant source, and all uses are unambiguous.
1243 
1244    Only migrations that reduce the cost of DEF will be made.
1245    MIN_COST is the lower bound on the cost of the DEF after migration.
1246    If we migrate DEF so that its cost falls below MIN_COST,
1247    then we do not attempt to migrate further.  The idea is that
1248    we migrate definitions in a priority order based on their cost,
1249    when the cost of this definition falls below MIN_COST, then
1250    there is another definition with cost == MIN_COST which now
1251    has a higher priority than this definition.
1252 
1253    Return nonzero if there may be benefit from attempting to
1254    migrate this DEF further (i.e. we have reduced the cost below
1255    MIN_COST, but we may be able to reduce it further).
1256    Return zero if no further migration is possible.  */
1257 static int
migrate_btr_def(btr_def * def,int min_cost)1258 migrate_btr_def (btr_def *def, int min_cost)
1259 {
1260   bitmap live_range;
1261   HARD_REG_SET btrs_live_in_range;
1262   int btr_used_near_def = 0;
1263   int def_basic_block_freq;
1264   basic_block attempt;
1265   int give_up = 0;
1266   int def_moved = 0;
1267   btr_user *user;
1268   int def_latency;
1269 
1270   if (dump_file)
1271     fprintf (dump_file,
1272 	     "Attempting to migrate pt from insn %d (cost = %d, min_cost = %d) ... ",
1273 	     INSN_UID (def->insn), def->cost, min_cost);
1274 
1275   if (!def->group || def->has_ambiguous_use)
1276     /* These defs are not migratable.  */
1277     {
1278       if (dump_file)
1279 	fprintf (dump_file, "it's not migratable\n");
1280       return 0;
1281     }
1282 
1283   if (!def->uses)
1284     /* We have combined this def with another in the same group, so
1285        no need to consider it further.
1286     */
1287     {
1288       if (dump_file)
1289 	fprintf (dump_file, "it's already combined with another pt\n");
1290       return 0;
1291     }
1292 
1293   btr_def_live_range (def, &btrs_live_in_range);
1294   live_range = BITMAP_ALLOC (NULL);
1295   bitmap_copy (live_range, def->live_range);
1296 
1297 #ifdef INSN_SCHEDULING
1298   def_latency = insn_default_latency (def->insn) * issue_rate;
1299 #else
1300   def_latency = issue_rate;
1301 #endif
1302 
1303   for (user = def->uses; user != NULL; user = user->next)
1304     {
1305       if (user->bb == def->bb
1306 	  && user->luid > def->luid
1307 	  && (def->luid + def_latency) > user->luid
1308 	  && ! can_move_up (def->bb, def->insn,
1309 			    (def->luid + def_latency) - user->luid))
1310 	{
1311 	  btr_used_near_def = 1;
1312 	  break;
1313 	}
1314     }
1315 
1316   def_basic_block_freq = basic_block_freq (def->bb);
1317 
1318   for (attempt = get_immediate_dominator (CDI_DOMINATORS, def->bb);
1319        !give_up && attempt && attempt != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1320        && def->cost >= min_cost;
1321        attempt = get_immediate_dominator (CDI_DOMINATORS, attempt))
1322     {
1323       /* Try to move the instruction that sets the target register into
1324 	 basic block ATTEMPT.  */
1325       int try_freq = basic_block_freq (attempt);
1326       edge_iterator ei;
1327       edge e;
1328 
1329       /* If ATTEMPT has abnormal edges, skip it.  */
1330       FOR_EACH_EDGE (e, ei, attempt->succs)
1331 	if (e->flags & EDGE_COMPLEX)
1332 	  break;
1333       if (e)
1334 	continue;
1335 
1336       if (dump_file)
1337 	fprintf (dump_file, "trying block %d ...", attempt->index);
1338 
1339       if (try_freq < def_basic_block_freq
1340 	  || (try_freq == def_basic_block_freq && btr_used_near_def))
1341 	{
1342 	  int btr;
1343 	  augment_live_range (live_range, &btrs_live_in_range, def->bb, attempt,
1344 			      flag_btr_bb_exclusive);
1345 	  if (dump_file)
1346 	    {
1347 	      fprintf (dump_file, "Now btrs live in range are: ");
1348 	      dump_hard_reg_set (btrs_live_in_range);
1349 	      fprintf (dump_file, "\n");
1350 	    }
1351 	  btr = choose_btr (btrs_live_in_range);
1352 	  if (btr != -1)
1353 	    {
1354 	      move_btr_def (attempt, btr, def, live_range, &btrs_live_in_range);
1355 	      bitmap_copy (live_range, def->live_range);
1356 	      btr_used_near_def = 0;
1357 	      def_moved = 1;
1358 	      def_basic_block_freq = basic_block_freq (def->bb);
1359 	    }
1360 	  else
1361 	    {
1362 	      /* There are no free target registers available to move
1363 		 this far forward, so give up */
1364 	      give_up = 1;
1365 	      if (dump_file)
1366 		fprintf (dump_file,
1367 			 "giving up because there are no free target registers\n");
1368 	    }
1369 
1370 	}
1371     }
1372   if (!def_moved)
1373     {
1374       give_up = 1;
1375       if (dump_file)
1376 	fprintf (dump_file, "failed to move\n");
1377     }
1378   BITMAP_FREE (live_range);
1379   return !give_up;
1380 }
1381 
1382 /* Attempt to move instructions that set target registers earlier
1383    in the flowgraph, away from their corresponding uses.  */
1384 static void
migrate_btr_defs(enum reg_class btr_class,int allow_callee_save)1385 migrate_btr_defs (enum reg_class btr_class, int allow_callee_save)
1386 {
1387   btr_heap_t all_btr_defs (LONG_MIN);
1388   int reg;
1389 
1390   gcc_obstack_init (&migrate_btrl_obstack);
1391   if (dump_file)
1392     {
1393       int i;
1394 
1395       for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
1396 	{
1397 	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
1398 	  fprintf (dump_file,
1399 		   "Basic block %d: count = %" PRId64
1400 		   " loop-depth = %d idom = %d\n",
1401 		   i, (int64_t) bb->count, bb_loop_depth (bb),
1402 		   get_immediate_dominator (CDI_DOMINATORS, bb)->index);
1403 	}
1404     }
1405 
1406   CLEAR_HARD_REG_SET (all_btrs);
1407   for (first_btr = -1, reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
1408     if (TEST_HARD_REG_BIT (reg_class_contents[(int) btr_class], reg)
1409 	&& (allow_callee_save || call_used_regs[reg]
1410 	    || df_regs_ever_live_p (reg)))
1411       {
1412 	SET_HARD_REG_BIT (all_btrs, reg);
1413 	last_btr = reg;
1414 	if (first_btr < 0)
1415 	  first_btr = reg;
1416       }
1417 
1418   btrs_live = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun));
1419   btrs_live_at_end = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun));
1420 
1421   build_btr_def_use_webs (&all_btr_defs);
1422 
1423   while (!all_btr_defs.empty ())
1424     {
1425       int min_cost = -all_btr_defs.min_key ();
1426       btr_def *def = all_btr_defs.extract_min ();
1427       if (migrate_btr_def (def, min_cost))
1428 	{
1429 	  all_btr_defs.insert (-def->cost, def);
1430 	  if (dump_file)
1431 	    {
1432 	      fprintf (dump_file,
1433 		"Putting insn %d back on queue with priority %d\n",
1434 		INSN_UID (def->insn), def->cost);
1435 	    }
1436 	}
1437       else
1438 	BITMAP_FREE (def->live_range);
1439     }
1440 
1441   free (btrs_live);
1442   free (btrs_live_at_end);
1443   obstack_free (&migrate_btrl_obstack, NULL);
1444 }
1445 
1446 static void
branch_target_load_optimize(bool after_prologue_epilogue_gen)1447 branch_target_load_optimize (bool after_prologue_epilogue_gen)
1448 {
1449   enum reg_class klass
1450     = (enum reg_class) targetm.branch_target_register_class ();
1451   if (klass != NO_REGS)
1452     {
1453       /* Initialize issue_rate.  */
1454       if (targetm.sched.issue_rate)
1455 	issue_rate = targetm.sched.issue_rate ();
1456       else
1457 	issue_rate = 1;
1458 
1459       if (!after_prologue_epilogue_gen)
1460 	{
1461 	  /* Build the CFG for migrate_btr_defs.  */
1462 #if 1
1463 	  /* This may or may not be needed, depending on where we
1464 	     run this phase.  */
1465 	  cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
1466 #endif
1467 	}
1468       df_analyze ();
1469 
1470 
1471       /* Dominator info is also needed for migrate_btr_def.  */
1472       calculate_dominance_info (CDI_DOMINATORS);
1473       migrate_btr_defs (klass,
1474 		       (targetm.branch_target_register_callee_saved
1475 			(after_prologue_epilogue_gen)));
1476 
1477       free_dominance_info (CDI_DOMINATORS);
1478     }
1479 }
1480 
1481 namespace {
1482 
1483 const pass_data pass_data_branch_target_load_optimize1 =
1484 {
1485   RTL_PASS, /* type */
1486   "btl1", /* name */
1487   OPTGROUP_NONE, /* optinfo_flags */
1488   TV_NONE, /* tv_id */
1489   0, /* properties_required */
1490   0, /* properties_provided */
1491   0, /* properties_destroyed */
1492   0, /* todo_flags_start */
1493   0, /* todo_flags_finish */
1494 };
1495 
1496 class pass_branch_target_load_optimize1 : public rtl_opt_pass
1497 {
1498 public:
pass_branch_target_load_optimize1(gcc::context * ctxt)1499   pass_branch_target_load_optimize1 (gcc::context *ctxt)
1500     : rtl_opt_pass (pass_data_branch_target_load_optimize1, ctxt)
1501   {}
1502 
1503   /* opt_pass methods: */
gate(function *)1504   virtual bool gate (function *) { return flag_branch_target_load_optimize; }
execute(function *)1505   virtual unsigned int execute (function *)
1506     {
1507       branch_target_load_optimize (epilogue_completed);
1508       return 0;
1509     }
1510 
1511 }; // class pass_branch_target_load_optimize1
1512 
1513 } // anon namespace
1514 
1515 rtl_opt_pass *
make_pass_branch_target_load_optimize1(gcc::context * ctxt)1516 make_pass_branch_target_load_optimize1 (gcc::context *ctxt)
1517 {
1518   return new pass_branch_target_load_optimize1 (ctxt);
1519 }
1520 
1521 
1522 namespace {
1523 
1524 const pass_data pass_data_branch_target_load_optimize2 =
1525 {
1526   RTL_PASS, /* type */
1527   "btl2", /* name */
1528   OPTGROUP_NONE, /* optinfo_flags */
1529   TV_NONE, /* tv_id */
1530   0, /* properties_required */
1531   0, /* properties_provided */
1532   0, /* properties_destroyed */
1533   0, /* todo_flags_start */
1534   0, /* todo_flags_finish */
1535 };
1536 
1537 class pass_branch_target_load_optimize2 : public rtl_opt_pass
1538 {
1539 public:
pass_branch_target_load_optimize2(gcc::context * ctxt)1540   pass_branch_target_load_optimize2 (gcc::context *ctxt)
1541     : rtl_opt_pass (pass_data_branch_target_load_optimize2, ctxt)
1542   {}
1543 
1544   /* opt_pass methods: */
gate(function *)1545   virtual bool gate (function *)
1546     {
1547       return (optimize > 0 && flag_branch_target_load_optimize2);
1548     }
1549 
1550   virtual unsigned int execute (function *);
1551 
1552 }; // class pass_branch_target_load_optimize2
1553 
1554 unsigned int
execute(function *)1555 pass_branch_target_load_optimize2::execute (function *)
1556 {
1557   static int warned = 0;
1558 
1559   /* Leave this a warning for now so that it is possible to experiment
1560      with running this pass twice.  In 3.6, we should either make this
1561      an error, or use separate dump files.  */
1562   if (flag_branch_target_load_optimize
1563       && flag_branch_target_load_optimize2
1564       && !warned)
1565     {
1566       warning (0, "branch target register load optimization is not intended "
1567 	       "to be run twice");
1568 
1569       warned = 1;
1570     }
1571 
1572   branch_target_load_optimize (epilogue_completed);
1573   return 0;
1574 }
1575 
1576 } // anon namespace
1577 
1578 rtl_opt_pass *
make_pass_branch_target_load_optimize2(gcc::context * ctxt)1579 make_pass_branch_target_load_optimize2 (gcc::context *ctxt)
1580 {
1581   return new pass_branch_target_load_optimize2 (ctxt);
1582 }
1583