1 /* Perform branch target register load optimizations.
2    Copyright (C) 2001-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "df.h"
28 #include "insn-config.h"
29 #include "regs.h"
30 #include "memmodel.h"
31 #include "emit-rtl.h"
32 #include "recog.h"
33 #include "diagnostic-core.h"
34 #include "expr.h"
35 #include "insn-attr.h"
36 #include "tree-pass.h"
37 #include "cfgrtl.h"
38 #include "cfganal.h"
39 #include "cfgcleanup.h"
40 #include "cfgloop.h"
41 #include "rtl-iter.h"
42 #include "fibonacci_heap.h"
43 
44 struct btr_def;
45 
46 /* Target register optimizations - these are performed after reload.  */
47 
48 struct btr_def_group
49 {
50   btr_def_group *next;
51   rtx src;
52   btr_def *members;
53 };
54 
55 struct btr_user
56 {
57   btr_user *next;
58   basic_block bb;
59   int luid;
60   rtx_insn *insn;
61   /* If INSN has a single use of a single branch register, then
62      USE points to it within INSN.  If there is more than
63      one branch register use, or the use is in some way ambiguous,
64      then USE is NULL.  */
65   rtx use;
66   int n_reaching_defs;
67   int first_reaching_def;
68   char other_use_this_block;
69 };
70 
71 /* btr_def structs appear on three lists:
72      1. A list of all btr_def structures (head is
73 	ALL_BTR_DEFS, linked by the NEXT field).
74      2. A list of branch reg definitions per basic block (head is
75 	BB_BTR_DEFS[i], linked by the NEXT_THIS_BB field).
76      3. A list of all branch reg definitions belonging to the same
77 	group (head is in a BTR_DEF_GROUP struct, linked by
78 	NEXT_THIS_GROUP field).  */
79 
80 struct btr_def
81 {
82   btr_def *next_this_bb;
83   btr_def *next_this_group;
84   basic_block bb;
85   int luid;
86   rtx_insn *insn;
87   int btr;
88   int cost;
89   /* For a branch register setting insn that has a constant
90      source (i.e. a label), group links together all the
91      insns with the same source.  For other branch register
92      setting insns, group is NULL.  */
93   btr_def_group *group;
94   btr_user *uses;
95   /* If this def has a reaching use which is not a simple use
96      in a branch instruction, then has_ambiguous_use will be true,
97      and we will not attempt to migrate this definition.  */
98   char has_ambiguous_use;
99   /* live_range is an approximation to the true live range for this
100      def/use web, because it records the set of blocks that contain
101      the live range.  There could be other live ranges for the same
102      branch register in that set of blocks, either in the block
103      containing the def (before the def), or in a block containing
104      a use (after the use).  If there are such other live ranges, then
105      other_btr_uses_before_def or other_btr_uses_after_use must be set true
106      as appropriate.  */
107   char other_btr_uses_before_def;
108   char other_btr_uses_after_use;
109   /* We set own_end when we have moved a definition into a dominator.
110      Thus, when a later combination removes this definition again, we know
111      to clear out trs_live_at_end again.  */
112   char own_end;
113   bitmap live_range;
114 };
115 
116 typedef fibonacci_heap <long, btr_def> btr_heap_t;
117 typedef fibonacci_node <long, btr_def> btr_heap_node_t;
118 
119 static int issue_rate;
120 
121 static int basic_block_freq (const_basic_block);
122 static int insn_sets_btr_p (const rtx_insn *, int, int *);
123 static void find_btr_def_group (btr_def_group **, btr_def *);
124 static btr_def *add_btr_def (btr_heap_t *, basic_block, int, rtx_insn *,
125 			    unsigned int, int, btr_def_group **);
126 static btr_user *new_btr_user (basic_block, int, rtx_insn *);
127 static void dump_hard_reg_set (HARD_REG_SET);
128 static void dump_btrs_live (int);
129 static void note_other_use_this_block (unsigned int, btr_user *);
130 static void compute_defs_uses_and_gen (btr_heap_t *, btr_def **, btr_user **,
131 				       sbitmap *, sbitmap *, HARD_REG_SET *);
132 static void compute_kill (sbitmap *, sbitmap *, HARD_REG_SET *);
133 static void compute_out (sbitmap *bb_out, sbitmap *, sbitmap *, int);
134 static void link_btr_uses (btr_def **, btr_user **, sbitmap *, sbitmap *, int);
135 static void build_btr_def_use_webs (btr_heap_t *);
136 static int block_at_edge_of_live_range_p (int, btr_def *);
137 static void clear_btr_from_live_range (btr_def *def);
138 static void add_btr_to_live_range (btr_def *, int);
139 static void augment_live_range (bitmap, HARD_REG_SET *, basic_block,
140 				basic_block, int);
141 static int choose_btr (HARD_REG_SET);
142 static void combine_btr_defs (btr_def *, HARD_REG_SET *);
143 static void btr_def_live_range (btr_def *, HARD_REG_SET *);
144 static void move_btr_def (basic_block, int, btr_def *, bitmap, HARD_REG_SET *);
145 static int migrate_btr_def (btr_def *, int);
146 static void migrate_btr_defs (enum reg_class, int);
147 static int can_move_up (const_basic_block, const rtx_insn *, int);
148 static void note_btr_set (rtx, const_rtx, void *);
149 
150 /* The following code performs code motion of target load instructions
151    (instructions that set branch target registers), to move them
152    forward away from the branch instructions and out of loops (or,
153    more generally, from a more frequently executed place to a less
154    frequently executed place).
155    Moving target load instructions further in front of the branch
156    instruction that uses the target register value means that the hardware
157    has a better chance of preloading the instructions at the branch
158    target by the time the branch is reached.  This avoids bubbles
159    when a taken branch needs to flush out the pipeline.
160    Moving target load instructions out of loops means they are executed
161    less frequently.  */
162 
163 /* An obstack to hold the def-use web data structures built up for
164    migrating branch target load instructions.  */
165 static struct obstack migrate_btrl_obstack;
166 
167 /* Array indexed by basic block number, giving the set of registers
168    live in that block.  */
169 static HARD_REG_SET *btrs_live;
170 
171 /* Array indexed by basic block number, giving the set of registers live at
172   the end of that block, including any uses by a final jump insn, if any.  */
173 static HARD_REG_SET *btrs_live_at_end;
174 
175 /* Set of all target registers that we are willing to allocate.  */
176 static HARD_REG_SET all_btrs;
177 
178 /* Provide lower and upper bounds for target register numbers, so that
179    we don't need to search through all the hard registers all the time.  */
180 static int first_btr, last_btr;
181 
182 
183 
184 /* Return an estimate of the frequency of execution of block bb.  */
185 static int
basic_block_freq(const_basic_block bb)186 basic_block_freq (const_basic_block bb)
187 {
188   return bb->count.to_frequency (cfun);
189 }
190 
191 /* If the rtx at *XP references (sets or reads) any branch target
192    register, return one such register.  If EXCLUDEP is set, disregard
193    any references within that location.  */
194 static rtx *
195 find_btr_use (rtx *xp, rtx *excludep = 0)
196 {
197   subrtx_ptr_iterator::array_type array;
FOR_EACH_SUBRTX_PTR(iter,array,xp,NONCONST)198   FOR_EACH_SUBRTX_PTR (iter, array, xp, NONCONST)
199     {
200       rtx *loc = *iter;
201       if (loc == excludep)
202 	iter.skip_subrtxes ();
203       else
204 	{
205 	  const_rtx x = *loc;
206 	  if (REG_P (x)
207 	      && overlaps_hard_reg_set_p (all_btrs, GET_MODE (x), REGNO (x)))
208 	    return loc;
209 	}
210     }
211   return 0;
212 }
213 
214 /* Return true if insn is an instruction that sets a target register.
215    if CHECK_CONST is true, only return true if the source is constant.
216    If such a set is found and REGNO is nonzero, assign the register number
217    of the destination register to *REGNO.  */
218 static int
insn_sets_btr_p(const rtx_insn * insn,int check_const,int * regno)219 insn_sets_btr_p (const rtx_insn *insn, int check_const, int *regno)
220 {
221   rtx set;
222 
223   if (NONJUMP_INSN_P (insn)
224       && (set = single_set (insn)))
225     {
226       rtx dest = SET_DEST (set);
227       rtx src = SET_SRC (set);
228 
229       if (GET_CODE (dest) == SUBREG)
230 	dest = XEXP (dest, 0);
231 
232       if (REG_P (dest)
233 	  && TEST_HARD_REG_BIT (all_btrs, REGNO (dest)))
234 	{
235 	  gcc_assert (!find_btr_use (&src));
236 
237 	  if (!check_const || CONSTANT_P (src))
238 	    {
239 	      if (regno)
240 		*regno = REGNO (dest);
241 	      return 1;
242 	    }
243 	}
244     }
245   return 0;
246 }
247 
248 /* Find the group that the target register definition DEF belongs
249    to in the list starting with *ALL_BTR_DEF_GROUPS.  If no such
250    group exists, create one.  Add def to the group.  */
251 static void
find_btr_def_group(btr_def_group ** all_btr_def_groups,btr_def * def)252 find_btr_def_group (btr_def_group **all_btr_def_groups, btr_def *def)
253 {
254   if (insn_sets_btr_p (def->insn, 1, NULL))
255     {
256       btr_def_group *this_group;
257       rtx def_src = SET_SRC (single_set (def->insn));
258 
259       /* ?? This linear search is an efficiency concern, particularly
260 	 as the search will almost always fail to find a match.  */
261       for (this_group = *all_btr_def_groups;
262 	   this_group != NULL;
263 	   this_group = this_group->next)
264 	if (rtx_equal_p (def_src, this_group->src))
265 	  break;
266 
267       if (!this_group)
268 	{
269 	  this_group = XOBNEW (&migrate_btrl_obstack, btr_def_group);
270 	  this_group->src = def_src;
271 	  this_group->members = NULL;
272 	  this_group->next = *all_btr_def_groups;
273 	  *all_btr_def_groups = this_group;
274 	}
275       def->group = this_group;
276       def->next_this_group = this_group->members;
277       this_group->members = def;
278     }
279   else
280     def->group = NULL;
281 }
282 
283 /* Create a new target register definition structure, for a definition in
284    block BB, instruction INSN, and insert it into ALL_BTR_DEFS.  Return
285    the new definition.  */
286 static btr_def *
add_btr_def(btr_heap_t * all_btr_defs,basic_block bb,int insn_luid,rtx_insn * insn,unsigned int dest_reg,int other_btr_uses_before_def,btr_def_group ** all_btr_def_groups)287 add_btr_def (btr_heap_t *all_btr_defs, basic_block bb, int insn_luid,
288 	     rtx_insn *insn,
289 	     unsigned int dest_reg, int other_btr_uses_before_def,
290 	     btr_def_group **all_btr_def_groups)
291 {
292   btr_def *this_def = XOBNEW (&migrate_btrl_obstack, btr_def);
293   this_def->bb = bb;
294   this_def->luid = insn_luid;
295   this_def->insn = insn;
296   this_def->btr = dest_reg;
297   this_def->cost = basic_block_freq (bb);
298   this_def->has_ambiguous_use = 0;
299   this_def->other_btr_uses_before_def = other_btr_uses_before_def;
300   this_def->other_btr_uses_after_use = 0;
301   this_def->next_this_bb = NULL;
302   this_def->next_this_group = NULL;
303   this_def->uses = NULL;
304   this_def->live_range = NULL;
305   find_btr_def_group (all_btr_def_groups, this_def);
306 
307   all_btr_defs->insert (-this_def->cost, this_def);
308 
309   if (dump_file)
310     fprintf (dump_file,
311       "Found target reg definition: sets %u { bb %d, insn %d }%s priority %d\n",
312 	     dest_reg, bb->index, INSN_UID (insn),
313 	     (this_def->group ? "" : ":not const"), this_def->cost);
314 
315   return this_def;
316 }
317 
318 /* Create a new target register user structure, for a use in block BB,
319    instruction INSN.  Return the new user.  */
320 static btr_user *
new_btr_user(basic_block bb,int insn_luid,rtx_insn * insn)321 new_btr_user (basic_block bb, int insn_luid, rtx_insn *insn)
322 {
323   /* This instruction reads target registers.  We need
324      to decide whether we can replace all target register
325      uses easily.
326    */
327   rtx *usep = find_btr_use (&PATTERN (insn));
328   rtx use;
329   btr_user *user = NULL;
330 
331   if (usep)
332     {
333       int unambiguous_single_use;
334 
335       /* We want to ensure that USE is the only use of a target
336 	 register in INSN, so that we know that to rewrite INSN to use
337 	 a different target register, all we have to do is replace USE.  */
338       unambiguous_single_use = !find_btr_use (&PATTERN (insn), usep);
339       if (!unambiguous_single_use)
340 	usep = NULL;
341     }
342   use = usep ? *usep : NULL_RTX;
343   user = XOBNEW (&migrate_btrl_obstack, btr_user);
344   user->bb = bb;
345   user->luid = insn_luid;
346   user->insn = insn;
347   user->use = use;
348   user->other_use_this_block = 0;
349   user->next = NULL;
350   user->n_reaching_defs = 0;
351   user->first_reaching_def = -1;
352 
353   if (dump_file)
354     {
355       fprintf (dump_file, "Uses target reg: { bb %d, insn %d }",
356 	       bb->index, INSN_UID (insn));
357 
358       if (user->use)
359 	fprintf (dump_file, ": unambiguous use of reg %d\n",
360 		 REGNO (user->use));
361     }
362 
363   return user;
364 }
365 
366 /* Write the contents of S to the dump file.  */
367 static void
dump_hard_reg_set(HARD_REG_SET s)368 dump_hard_reg_set (HARD_REG_SET s)
369 {
370   int reg;
371   for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
372     if (TEST_HARD_REG_BIT (s, reg))
373       fprintf (dump_file, " %d", reg);
374 }
375 
376 /* Write the set of target regs live in block BB to the dump file.  */
377 static void
dump_btrs_live(int bb)378 dump_btrs_live (int bb)
379 {
380   fprintf (dump_file, "BB%d live:", bb);
381   dump_hard_reg_set (btrs_live[bb]);
382   fprintf (dump_file, "\n");
383 }
384 
385 /* REGNO is the number of a branch target register that is being used or
386    set.  USERS_THIS_BB is a list of preceding branch target register users;
387    If any of them use the same register, set their other_use_this_block
388    flag.  */
389 static void
note_other_use_this_block(unsigned int regno,btr_user * users_this_bb)390 note_other_use_this_block (unsigned int regno, btr_user *users_this_bb)
391 {
392   btr_user *user;
393 
394   for (user = users_this_bb; user != NULL; user = user->next)
395     if (user->use && REGNO (user->use) == regno)
396       user->other_use_this_block = 1;
397 }
398 
399 struct defs_uses_info {
400   btr_user *users_this_bb;
401   HARD_REG_SET btrs_written_in_block;
402   HARD_REG_SET btrs_live_in_block;
403   sbitmap bb_gen;
404   sbitmap *btr_defset;
405 };
406 
407 /* Called via note_stores or directly to register stores into /
408    clobbers of a branch target register DEST that are not recognized as
409    straightforward definitions.  DATA points to information about the
410    current basic block that needs updating.  */
411 static void
note_btr_set(rtx dest,const_rtx set ATTRIBUTE_UNUSED,void * data)412 note_btr_set (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
413 {
414   defs_uses_info *info = (defs_uses_info *) data;
415   int regno, end_regno;
416 
417   if (!REG_P (dest))
418     return;
419   regno = REGNO (dest);
420   end_regno = END_REGNO (dest);
421   for (; regno < end_regno; regno++)
422     if (TEST_HARD_REG_BIT (all_btrs, regno))
423       {
424 	note_other_use_this_block (regno, info->users_this_bb);
425 	SET_HARD_REG_BIT (info->btrs_written_in_block, regno);
426 	SET_HARD_REG_BIT (info->btrs_live_in_block, regno);
427 	bitmap_and_compl (info->bb_gen, info->bb_gen,
428 			    info->btr_defset[regno - first_btr]);
429       }
430 }
431 
432 static void
compute_defs_uses_and_gen(btr_heap_t * all_btr_defs,btr_def ** def_array,btr_user ** use_array,sbitmap * btr_defset,sbitmap * bb_gen,HARD_REG_SET * btrs_written)433 compute_defs_uses_and_gen (btr_heap_t *all_btr_defs, btr_def **def_array,
434 			   btr_user **use_array, sbitmap *btr_defset,
435 			   sbitmap *bb_gen, HARD_REG_SET *btrs_written)
436 {
437   /* Scan the code building up the set of all defs and all uses.
438      For each target register, build the set of defs of that register.
439      For each block, calculate the set of target registers
440      written in that block.
441      Also calculate the set of btrs ever live in that block.
442   */
443   int i;
444   int insn_luid = 0;
445   btr_def_group *all_btr_def_groups = NULL;
446   defs_uses_info info;
447 
448   bitmap_vector_clear (bb_gen, last_basic_block_for_fn (cfun));
449   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
450     {
451       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
452       int reg;
453       btr_def *defs_this_bb = NULL;
454       rtx_insn *insn;
455       rtx_insn *last;
456       int can_throw = 0;
457 
458       info.users_this_bb = NULL;
459       info.bb_gen = bb_gen[i];
460       info.btr_defset = btr_defset;
461 
462       CLEAR_HARD_REG_SET (info.btrs_live_in_block);
463       CLEAR_HARD_REG_SET (info.btrs_written_in_block);
464       for (reg = first_btr; reg <= last_btr; reg++)
465 	if (TEST_HARD_REG_BIT (all_btrs, reg)
466 	    && REGNO_REG_SET_P (df_get_live_in (bb), reg))
467 	  SET_HARD_REG_BIT (info.btrs_live_in_block, reg);
468 
469       for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb));
470 	   insn != last;
471 	   insn = NEXT_INSN (insn), insn_luid++)
472 	{
473 	  if (INSN_P (insn))
474 	    {
475 	      int regno;
476 	      int insn_uid = INSN_UID (insn);
477 
478 	      if (insn_sets_btr_p (insn, 0, &regno))
479 		{
480 		  btr_def *def = add_btr_def (
481 		      all_btr_defs, bb, insn_luid, insn, regno,
482 		      TEST_HARD_REG_BIT (info.btrs_live_in_block, regno),
483 		      &all_btr_def_groups);
484 
485 		  def_array[insn_uid] = def;
486 		  SET_HARD_REG_BIT (info.btrs_written_in_block, regno);
487 		  SET_HARD_REG_BIT (info.btrs_live_in_block, regno);
488 		  bitmap_and_compl (bb_gen[i], bb_gen[i],
489 				      btr_defset[regno - first_btr]);
490 		  bitmap_set_bit (bb_gen[i], insn_uid);
491 		  def->next_this_bb = defs_this_bb;
492 		  defs_this_bb = def;
493 		  bitmap_set_bit (btr_defset[regno - first_btr], insn_uid);
494 		  note_other_use_this_block (regno, info.users_this_bb);
495 		}
496 	      /* Check for the blockage emitted by expand_nl_goto_receiver.  */
497 	      else if (cfun->has_nonlocal_label
498 		       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE)
499 		{
500 		  btr_user *user;
501 
502 		  /* Do the equivalent of calling note_other_use_this_block
503 		     for every target register.  */
504 		  for (user = info.users_this_bb; user != NULL;
505 		       user = user->next)
506 		    if (user->use)
507 		      user->other_use_this_block = 1;
508 		  IOR_HARD_REG_SET (info.btrs_written_in_block, all_btrs);
509 		  IOR_HARD_REG_SET (info.btrs_live_in_block, all_btrs);
510 		  bitmap_clear (info.bb_gen);
511 		}
512 	      else
513 		{
514 		  if (find_btr_use (&PATTERN (insn)))
515 		    {
516 		      btr_user *user = new_btr_user (bb, insn_luid, insn);
517 
518 		      use_array[insn_uid] = user;
519 		      if (user->use)
520 			SET_HARD_REG_BIT (info.btrs_live_in_block,
521 					  REGNO (user->use));
522 		      else
523 			{
524 			  int reg;
525 			  for (reg = first_btr; reg <= last_btr; reg++)
526 			    if (TEST_HARD_REG_BIT (all_btrs, reg)
527 				&& refers_to_regno_p (reg, user->insn))
528 			      {
529 				note_other_use_this_block (reg,
530 							   info.users_this_bb);
531 				SET_HARD_REG_BIT (info.btrs_live_in_block, reg);
532 			      }
533 			  note_stores (PATTERN (insn), note_btr_set, &info);
534 			}
535 		      user->next = info.users_this_bb;
536 		      info.users_this_bb = user;
537 		    }
538 		  if (CALL_P (insn))
539 		    {
540 		      HARD_REG_SET *clobbered = &call_used_reg_set;
541 		      HARD_REG_SET call_saved;
542 		      rtx pat = PATTERN (insn);
543 		      int i;
544 
545 		      /* Check for sibcall.  */
546 		      if (GET_CODE (pat) == PARALLEL)
547 			for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
548 			  if (ANY_RETURN_P (XVECEXP (pat, 0, i)))
549 			    {
550 			      COMPL_HARD_REG_SET (call_saved,
551 						  call_used_reg_set);
552 			      clobbered = &call_saved;
553 			    }
554 
555 		      for (regno = first_btr; regno <= last_btr; regno++)
556 			if (TEST_HARD_REG_BIT (*clobbered, regno))
557 			  note_btr_set (regno_reg_rtx[regno], NULL_RTX, &info);
558 		    }
559 		}
560 	    }
561 	}
562 
563       COPY_HARD_REG_SET (btrs_live[i], info.btrs_live_in_block);
564       COPY_HARD_REG_SET (btrs_written[i], info.btrs_written_in_block);
565 
566       REG_SET_TO_HARD_REG_SET (btrs_live_at_end[i], df_get_live_out (bb));
567       /* If this block ends in a jump insn, add any uses or even clobbers
568 	 of branch target registers that it might have.  */
569       for (insn = BB_END (bb); insn != BB_HEAD (bb) && ! INSN_P (insn); )
570 	insn = PREV_INSN (insn);
571       /* ??? for the fall-through edge, it would make sense to insert the
572 	 btr set on the edge, but that would require to split the block
573 	 early on so that we can distinguish between dominance from the fall
574 	 through edge - which can use the call-clobbered registers - from
575 	 dominance by the throw edge.  */
576       if (can_throw_internal (insn))
577 	{
578 	  HARD_REG_SET tmp;
579 
580 	  COPY_HARD_REG_SET (tmp, call_used_reg_set);
581 	  AND_HARD_REG_SET (tmp, all_btrs);
582 	  IOR_HARD_REG_SET (btrs_live_at_end[i], tmp);
583 	  can_throw = 1;
584 	}
585       if (can_throw || JUMP_P (insn))
586 	{
587 	  int regno;
588 
589 	  for (regno = first_btr; regno <= last_btr; regno++)
590 	    if (refers_to_regno_p (regno, insn))
591 	      SET_HARD_REG_BIT (btrs_live_at_end[i], regno);
592 	}
593 
594       if (dump_file)
595 	dump_btrs_live (i);
596     }
597 }
598 
599 static void
compute_kill(sbitmap * bb_kill,sbitmap * btr_defset,HARD_REG_SET * btrs_written)600 compute_kill (sbitmap *bb_kill, sbitmap *btr_defset,
601 	      HARD_REG_SET *btrs_written)
602 {
603   int i;
604   int regno;
605 
606   /* For each basic block, form the set BB_KILL - the set
607      of definitions that the block kills.  */
608   bitmap_vector_clear (bb_kill, last_basic_block_for_fn (cfun));
609   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
610     {
611       for (regno = first_btr; regno <= last_btr; regno++)
612 	if (TEST_HARD_REG_BIT (all_btrs, regno)
613 	    && TEST_HARD_REG_BIT (btrs_written[i], regno))
614 	  bitmap_ior (bb_kill[i], bb_kill[i],
615 			  btr_defset[regno - first_btr]);
616     }
617 }
618 
619 static void
compute_out(sbitmap * bb_out,sbitmap * bb_gen,sbitmap * bb_kill,int max_uid)620 compute_out (sbitmap *bb_out, sbitmap *bb_gen, sbitmap *bb_kill, int max_uid)
621 {
622   /* Perform iterative dataflow:
623       Initially, for all blocks, BB_OUT = BB_GEN.
624       For each block,
625 	BB_IN  = union over predecessors of BB_OUT(pred)
626 	BB_OUT = (BB_IN - BB_KILL) + BB_GEN
627      Iterate until the bb_out sets stop growing.  */
628   int i;
629   int changed;
630   auto_sbitmap bb_in (max_uid);
631 
632   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
633     bitmap_copy (bb_out[i], bb_gen[i]);
634 
635   changed = 1;
636   while (changed)
637     {
638       changed = 0;
639       for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
640 	{
641 	  bitmap_union_of_preds (bb_in, bb_out, BASIC_BLOCK_FOR_FN (cfun, i));
642 	  changed |= bitmap_ior_and_compl (bb_out[i], bb_gen[i],
643 					       bb_in, bb_kill[i]);
644 	}
645     }
646 }
647 
648 static void
link_btr_uses(btr_def ** def_array,btr_user ** use_array,sbitmap * bb_out,sbitmap * btr_defset,int max_uid)649 link_btr_uses (btr_def **def_array, btr_user **use_array, sbitmap *bb_out,
650 	       sbitmap *btr_defset, int max_uid)
651 {
652   int i;
653   auto_sbitmap reaching_defs (max_uid);
654 
655   /* Link uses to the uses lists of all of their reaching defs.
656      Count up the number of reaching defs of each use.  */
657   for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
658     {
659       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
660       rtx_insn *insn;
661       rtx_insn *last;
662 
663       bitmap_union_of_preds (reaching_defs, bb_out, BASIC_BLOCK_FOR_FN (cfun, i));
664       for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb));
665 	   insn != last;
666 	   insn = NEXT_INSN (insn))
667 	{
668 	  if (INSN_P (insn))
669 	    {
670 	      int insn_uid = INSN_UID (insn);
671 
672 	      btr_def *def   = def_array[insn_uid];
673 	      btr_user *user = use_array[insn_uid];
674 	      if (def != NULL)
675 		{
676 		  /* Remove all reaching defs of regno except
677 		     for this one.  */
678 		  bitmap_and_compl (reaching_defs, reaching_defs,
679 				      btr_defset[def->btr - first_btr]);
680 		  bitmap_set_bit (reaching_defs, insn_uid);
681 		}
682 
683 	      if (user != NULL)
684 		{
685 		  /* Find all the reaching defs for this use.  */
686 		  auto_sbitmap reaching_defs_of_reg (max_uid);
687 		  unsigned int uid = 0;
688 		  sbitmap_iterator sbi;
689 
690 		  if (user->use)
691 		    bitmap_and (
692 		      reaching_defs_of_reg,
693 		      reaching_defs,
694 		      btr_defset[REGNO (user->use) - first_btr]);
695 		  else
696 		    {
697 		      int reg;
698 
699 		      bitmap_clear (reaching_defs_of_reg);
700 		      for (reg = first_btr; reg <= last_btr; reg++)
701 			if (TEST_HARD_REG_BIT (all_btrs, reg)
702 			    && refers_to_regno_p (reg, user->insn))
703 			  bitmap_or_and (reaching_defs_of_reg,
704 			    reaching_defs_of_reg,
705 			    reaching_defs,
706 			    btr_defset[reg - first_btr]);
707 		    }
708 		  EXECUTE_IF_SET_IN_BITMAP (reaching_defs_of_reg, 0, uid, sbi)
709 		    {
710 		      btr_def *def = def_array[uid];
711 
712 		      /* We now know that def reaches user.  */
713 
714 		      if (dump_file)
715 			fprintf (dump_file,
716 			  "Def in insn %d reaches use in insn %d\n",
717 			  uid, insn_uid);
718 
719 		      user->n_reaching_defs++;
720 		      if (!user->use)
721 			def->has_ambiguous_use = 1;
722 		      if (user->first_reaching_def != -1)
723 			{ /* There is more than one reaching def.  This is
724 			     a rare case, so just give up on this def/use
725 			     web when it occurs.  */
726 			  def->has_ambiguous_use = 1;
727 			  def_array[user->first_reaching_def]
728 			    ->has_ambiguous_use = 1;
729 			  if (dump_file)
730 			    fprintf (dump_file,
731 				     "(use %d has multiple reaching defs)\n",
732 				     insn_uid);
733 			}
734 		      else
735 			user->first_reaching_def = uid;
736 		      if (user->other_use_this_block)
737 			def->other_btr_uses_after_use = 1;
738 		      user->next = def->uses;
739 		      def->uses = user;
740 		    }
741 		}
742 
743 	      if (CALL_P (insn))
744 		{
745 		  int regno;
746 
747 		  for (regno = first_btr; regno <= last_btr; regno++)
748 		    if (TEST_HARD_REG_BIT (all_btrs, regno)
749 			&& TEST_HARD_REG_BIT (call_used_reg_set, regno))
750 		      bitmap_and_compl (reaching_defs, reaching_defs,
751 					  btr_defset[regno - first_btr]);
752 		}
753 	    }
754 	}
755     }
756 }
757 
758 static void
build_btr_def_use_webs(btr_heap_t * all_btr_defs)759 build_btr_def_use_webs (btr_heap_t *all_btr_defs)
760 {
761   const int max_uid = get_max_uid ();
762   btr_def  **def_array   = XCNEWVEC (btr_def *, max_uid);
763   btr_user **use_array   = XCNEWVEC (btr_user *, max_uid);
764   sbitmap *btr_defset   = sbitmap_vector_alloc (
765 			   (last_btr - first_btr) + 1, max_uid);
766   sbitmap *bb_gen = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
767 					  max_uid);
768   HARD_REG_SET *btrs_written = XCNEWVEC (HARD_REG_SET,
769 					 last_basic_block_for_fn (cfun));
770   sbitmap *bb_kill;
771   sbitmap *bb_out;
772 
773   bitmap_vector_clear (btr_defset, (last_btr - first_btr) + 1);
774 
775   compute_defs_uses_and_gen (all_btr_defs, def_array, use_array, btr_defset,
776 			     bb_gen, btrs_written);
777 
778   bb_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid);
779   compute_kill (bb_kill, btr_defset, btrs_written);
780   free (btrs_written);
781 
782   bb_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid);
783   compute_out (bb_out, bb_gen, bb_kill, max_uid);
784 
785   sbitmap_vector_free (bb_gen);
786   sbitmap_vector_free (bb_kill);
787 
788   link_btr_uses (def_array, use_array, bb_out, btr_defset, max_uid);
789 
790   sbitmap_vector_free (bb_out);
791   sbitmap_vector_free (btr_defset);
792   free (use_array);
793   free (def_array);
794 }
795 
796 /* Return true if basic block BB contains the start or end of the
797    live range of the definition DEF, AND there are other live
798    ranges of the same target register that include BB.  */
799 static int
block_at_edge_of_live_range_p(int bb,btr_def * def)800 block_at_edge_of_live_range_p (int bb, btr_def *def)
801 {
802   if (def->other_btr_uses_before_def
803       && BASIC_BLOCK_FOR_FN (cfun, bb) == def->bb)
804     return 1;
805   else if (def->other_btr_uses_after_use)
806     {
807       btr_user *user;
808       for (user = def->uses; user != NULL; user = user->next)
809 	if (BASIC_BLOCK_FOR_FN (cfun, bb) == user->bb)
810 	  return 1;
811     }
812   return 0;
813 }
814 
815 /* We are removing the def/use web DEF.  The target register
816    used in this web is therefore no longer live in the live range
817    of this web, so remove it from the live set of all basic blocks
818    in the live range of the web.
819    Blocks at the boundary of the live range may contain other live
820    ranges for the same target register, so we have to be careful
821    to remove the target register from the live set of these blocks
822    only if they do not contain other live ranges for the same register.  */
823 static void
clear_btr_from_live_range(btr_def * def)824 clear_btr_from_live_range (btr_def *def)
825 {
826   unsigned bb;
827   bitmap_iterator bi;
828 
829   EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
830     {
831       if ((!def->other_btr_uses_before_def
832 	   && !def->other_btr_uses_after_use)
833 	  || !block_at_edge_of_live_range_p (bb, def))
834 	{
835 	  CLEAR_HARD_REG_BIT (btrs_live[bb], def->btr);
836 	  CLEAR_HARD_REG_BIT (btrs_live_at_end[bb], def->btr);
837 	  if (dump_file)
838 	    dump_btrs_live (bb);
839 	}
840     }
841  if (def->own_end)
842    CLEAR_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr);
843 }
844 
845 
846 /* We are adding the def/use web DEF.  Add the target register used
847    in this web to the live set of all of the basic blocks that contain
848    the live range of the web.
849    If OWN_END is set, also show that the register is live from our
850    definitions at the end of the basic block where it is defined.  */
851 static void
add_btr_to_live_range(btr_def * def,int own_end)852 add_btr_to_live_range (btr_def *def, int own_end)
853 {
854   unsigned bb;
855   bitmap_iterator bi;
856 
857   EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
858     {
859       SET_HARD_REG_BIT (btrs_live[bb], def->btr);
860       SET_HARD_REG_BIT (btrs_live_at_end[bb], def->btr);
861       if (dump_file)
862 	dump_btrs_live (bb);
863     }
864   if (own_end)
865     {
866       SET_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr);
867       def->own_end = 1;
868     }
869 }
870 
871 /* Update a live range to contain the basic block NEW_BLOCK, and all
872    blocks on paths between the existing live range and NEW_BLOCK.
873    HEAD is a block contained in the existing live range that dominates
874    all other blocks in the existing live range.
875    Also add to the set BTRS_LIVE_IN_RANGE all target registers that
876    are live in the blocks that we add to the live range.
877    If FULL_RANGE is set, include the full live range of NEW_BB;
878    otherwise, if NEW_BB dominates HEAD_BB, only add registers that
879    are life at the end of NEW_BB for NEW_BB itself.
880    It is a precondition that either NEW_BLOCK dominates HEAD,or
881    HEAD dom NEW_BLOCK.  This is used to speed up the
882    implementation of this function.  */
883 static void
augment_live_range(bitmap live_range,HARD_REG_SET * btrs_live_in_range,basic_block head_bb,basic_block new_bb,int full_range)884 augment_live_range (bitmap live_range, HARD_REG_SET *btrs_live_in_range,
885 		    basic_block head_bb, basic_block new_bb, int full_range)
886 {
887   basic_block *worklist, *tos;
888 
889   tos = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) + 1);
890 
891   if (dominated_by_p (CDI_DOMINATORS, new_bb, head_bb))
892     {
893       if (new_bb == head_bb)
894 	{
895 	  if (full_range)
896 	    IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_bb->index]);
897 	  free (tos);
898 	  return;
899 	}
900       *tos++ = new_bb;
901     }
902   else
903     {
904       edge e;
905       edge_iterator ei;
906       int new_block = new_bb->index;
907 
908       gcc_assert (dominated_by_p (CDI_DOMINATORS, head_bb, new_bb));
909 
910       IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[head_bb->index]);
911       bitmap_set_bit (live_range, new_block);
912       /* A previous btr migration could have caused a register to be
913 	live just at the end of new_block which we need in full, so
914 	use trs_live_at_end even if full_range is set.  */
915       IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live_at_end[new_block]);
916       if (full_range)
917 	IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_block]);
918       if (dump_file)
919 	{
920 	  fprintf (dump_file,
921 		   "Adding end of block %d and rest of %d to live range\n",
922 		   new_block, head_bb->index);
923 	  fprintf (dump_file,"Now live btrs are ");
924 	  dump_hard_reg_set (*btrs_live_in_range);
925 	  fprintf (dump_file, "\n");
926 	}
927       FOR_EACH_EDGE (e, ei, head_bb->preds)
928 	*tos++ = e->src;
929     }
930 
931   while (tos != worklist)
932     {
933       basic_block bb = *--tos;
934       if (!bitmap_bit_p (live_range, bb->index))
935 	{
936 	  edge e;
937 	  edge_iterator ei;
938 
939 	  bitmap_set_bit (live_range, bb->index);
940 	  IOR_HARD_REG_SET (*btrs_live_in_range,
941 	    btrs_live[bb->index]);
942 	  /* A previous btr migration could have caused a register to be
943 	     live just at the end of a block which we need in full.  */
944 	  IOR_HARD_REG_SET (*btrs_live_in_range,
945 	    btrs_live_at_end[bb->index]);
946 	  if (dump_file)
947 	    {
948 	      fprintf (dump_file,
949 		"Adding block %d to live range\n", bb->index);
950 	      fprintf (dump_file,"Now live btrs are ");
951 	      dump_hard_reg_set (*btrs_live_in_range);
952 	      fprintf (dump_file, "\n");
953 	    }
954 
955 	  FOR_EACH_EDGE (e, ei, bb->preds)
956 	    {
957 	      basic_block pred = e->src;
958 	      if (!bitmap_bit_p (live_range, pred->index))
959 		*tos++ = pred;
960 	    }
961 	}
962     }
963 
964   free (worklist);
965 }
966 
967 /*  Return the most desirable target register that is not in
968     the set USED_BTRS.  */
969 static int
choose_btr(HARD_REG_SET used_btrs)970 choose_btr (HARD_REG_SET used_btrs)
971 {
972   int i;
973 
974   if (!hard_reg_set_subset_p (all_btrs, used_btrs))
975     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
976       {
977 #ifdef REG_ALLOC_ORDER
978 	int regno = reg_alloc_order[i];
979 #else
980 	int regno = i;
981 #endif
982 	if (TEST_HARD_REG_BIT (all_btrs, regno)
983 	    && !TEST_HARD_REG_BIT (used_btrs, regno))
984 	  return regno;
985       }
986   return -1;
987 }
988 
989 /* Calculate the set of basic blocks that contain the live range of
990    the def/use web DEF.
991    Also calculate the set of target registers that are live at time
992    in this live range, but ignore the live range represented by DEF
993    when calculating this set.  */
994 static void
btr_def_live_range(btr_def * def,HARD_REG_SET * btrs_live_in_range)995 btr_def_live_range (btr_def *def, HARD_REG_SET *btrs_live_in_range)
996 {
997   if (!def->live_range)
998     {
999       btr_user *user;
1000 
1001       def->live_range = BITMAP_ALLOC (NULL);
1002 
1003       bitmap_set_bit (def->live_range, def->bb->index);
1004       COPY_HARD_REG_SET (*btrs_live_in_range,
1005 			 (flag_btr_bb_exclusive
1006 			  ? btrs_live : btrs_live_at_end)[def->bb->index]);
1007 
1008       for (user = def->uses; user != NULL; user = user->next)
1009 	augment_live_range (def->live_range, btrs_live_in_range,
1010 			    def->bb, user->bb,
1011 			    (flag_btr_bb_exclusive
1012 			     || user->insn != BB_END (def->bb)
1013 			     || !JUMP_P (user->insn)));
1014     }
1015   else
1016     {
1017       /* def->live_range is accurate, but we need to recompute
1018 	 the set of target registers live over it, because migration
1019 	 of other PT instructions may have affected it.
1020       */
1021       unsigned bb;
1022       unsigned def_bb = flag_btr_bb_exclusive ? -1 : def->bb->index;
1023       bitmap_iterator bi;
1024 
1025       CLEAR_HARD_REG_SET (*btrs_live_in_range);
1026       EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
1027 	{
1028 	  IOR_HARD_REG_SET (*btrs_live_in_range,
1029 			    (def_bb == bb
1030 			     ? btrs_live_at_end : btrs_live) [bb]);
1031 	}
1032     }
1033   if (!def->other_btr_uses_before_def &&
1034       !def->other_btr_uses_after_use)
1035     CLEAR_HARD_REG_BIT (*btrs_live_in_range, def->btr);
1036 }
1037 
1038 /* Merge into the def/use web DEF any other def/use webs in the same
1039    group that are dominated by DEF, provided that there is a target
1040    register available to allocate to the merged web.  */
1041 static void
combine_btr_defs(btr_def * def,HARD_REG_SET * btrs_live_in_range)1042 combine_btr_defs (btr_def *def, HARD_REG_SET *btrs_live_in_range)
1043 {
1044   btr_def *other_def;
1045 
1046   for (other_def = def->group->members;
1047        other_def != NULL;
1048        other_def = other_def->next_this_group)
1049     {
1050       if (other_def != def
1051 	  && other_def->uses != NULL
1052 	  && ! other_def->has_ambiguous_use
1053 	  && dominated_by_p (CDI_DOMINATORS, other_def->bb, def->bb))
1054 	{
1055 	  /* def->bb dominates the other def, so def and other_def could
1056 	     be combined.  */
1057 	  /* Merge their live ranges, and get the set of
1058 	     target registers live over the merged range.  */
1059 	  int btr;
1060 	  HARD_REG_SET combined_btrs_live;
1061 	  auto_bitmap combined_live_range;
1062 	  btr_user *user;
1063 
1064 	  if (other_def->live_range == NULL)
1065 	    {
1066 	      HARD_REG_SET dummy_btrs_live_in_range;
1067 	      btr_def_live_range (other_def, &dummy_btrs_live_in_range);
1068 	    }
1069 	  COPY_HARD_REG_SET (combined_btrs_live, *btrs_live_in_range);
1070 	  bitmap_copy (combined_live_range, def->live_range);
1071 
1072 	  for (user = other_def->uses; user != NULL; user = user->next)
1073 	    augment_live_range (combined_live_range, &combined_btrs_live,
1074 				def->bb, user->bb,
1075 				(flag_btr_bb_exclusive
1076 				 || user->insn != BB_END (def->bb)
1077 				 || !JUMP_P (user->insn)));
1078 
1079 	  btr = choose_btr (combined_btrs_live);
1080 	  if (btr != -1)
1081 	    {
1082 	      /* We can combine them.  */
1083 	      if (dump_file)
1084 		fprintf (dump_file,
1085 			 "Combining def in insn %d with def in insn %d\n",
1086 			 INSN_UID (other_def->insn), INSN_UID (def->insn));
1087 
1088 	      def->btr = btr;
1089 	      user = other_def->uses;
1090 	      while (user != NULL)
1091 		{
1092 		  btr_user *next = user->next;
1093 
1094 		  user->next = def->uses;
1095 		  def->uses = user;
1096 		  user = next;
1097 		}
1098 	      /* Combining def/use webs can make target registers live
1099 		 after uses where they previously were not.  This means
1100 		 some REG_DEAD notes may no longer be correct.  We could
1101 		 be more precise about this if we looked at the combined
1102 		 live range, but here I just delete any REG_DEAD notes
1103 		 in case they are no longer correct.  */
1104 	      for (user = def->uses; user != NULL; user = user->next)
1105 		remove_note (user->insn,
1106 			     find_regno_note (user->insn, REG_DEAD,
1107 					      REGNO (user->use)));
1108 	      clear_btr_from_live_range (other_def);
1109 	      other_def->uses = NULL;
1110 	      bitmap_copy (def->live_range, combined_live_range);
1111 	      if (other_def->btr == btr && other_def->other_btr_uses_after_use)
1112 		def->other_btr_uses_after_use = 1;
1113 	      COPY_HARD_REG_SET (*btrs_live_in_range, combined_btrs_live);
1114 
1115 	      /* Delete the old target register initialization.  */
1116 	      delete_insn (other_def->insn);
1117 
1118 	    }
1119 	}
1120     }
1121 }
1122 
1123 /* Move the definition DEF from its current position to basic
1124    block NEW_DEF_BB, and modify it to use branch target register BTR.
1125    Delete the old defining insn, and insert a new one in NEW_DEF_BB.
1126    Update all reaching uses of DEF in the RTL to use BTR.
1127    If this new position means that other defs in the
1128    same group can be combined with DEF then combine them.  */
1129 static void
move_btr_def(basic_block new_def_bb,int btr,btr_def * def,bitmap live_range,HARD_REG_SET * btrs_live_in_range)1130 move_btr_def (basic_block new_def_bb, int btr, btr_def *def, bitmap live_range,
1131 	     HARD_REG_SET *btrs_live_in_range)
1132 {
1133   /* We can move the instruction.
1134      Set a target register in block NEW_DEF_BB to the value
1135      needed for this target register definition.
1136      Replace all uses of the old target register definition by
1137      uses of the new definition.  Delete the old definition.  */
1138   basic_block b = new_def_bb;
1139   rtx_insn *insp = BB_HEAD (b);
1140   rtx_insn *old_insn = def->insn;
1141   rtx src;
1142   rtx btr_rtx;
1143   rtx_insn *new_insn;
1144   machine_mode btr_mode;
1145   btr_user *user;
1146   rtx set;
1147 
1148   if (dump_file)
1149     fprintf(dump_file, "migrating to basic block %d, using reg %d\n",
1150 	    new_def_bb->index, btr);
1151 
1152   clear_btr_from_live_range (def);
1153   def->btr = btr;
1154   def->bb = new_def_bb;
1155   def->luid = 0;
1156   def->cost = basic_block_freq (new_def_bb);
1157   bitmap_copy (def->live_range, live_range);
1158   combine_btr_defs (def, btrs_live_in_range);
1159   btr = def->btr;
1160   def->other_btr_uses_before_def
1161     = TEST_HARD_REG_BIT (btrs_live[b->index], btr) ? 1 : 0;
1162   add_btr_to_live_range (def, 1);
1163   if (LABEL_P (insp))
1164     insp = NEXT_INSN (insp);
1165   /* N.B.: insp is expected to be NOTE_INSN_BASIC_BLOCK now.  Some
1166      optimizations can result in insp being both first and last insn of
1167      its basic block.  */
1168   /* ?? some assertions to check that insp is sensible? */
1169 
1170   if (def->other_btr_uses_before_def)
1171     {
1172       insp = BB_END (b);
1173       for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp))
1174 	gcc_assert (insp != BB_HEAD (b));
1175 
1176       if (JUMP_P (insp) || can_throw_internal (insp))
1177 	insp = PREV_INSN (insp);
1178     }
1179 
1180   set = single_set (old_insn);
1181   src = SET_SRC (set);
1182   btr_mode = GET_MODE (SET_DEST (set));
1183   btr_rtx = gen_rtx_REG (btr_mode, btr);
1184 
1185   new_insn = gen_move_insn (btr_rtx, src);
1186 
1187   /* Insert target register initialization at head of basic block.  */
1188   def->insn = emit_insn_after (new_insn, insp);
1189 
1190   df_set_regs_ever_live (btr, true);
1191 
1192   if (dump_file)
1193     fprintf (dump_file, "New pt is insn %d, inserted after insn %d\n",
1194 	     INSN_UID (def->insn), INSN_UID (insp));
1195 
1196   /* Delete the old target register initialization.  */
1197   delete_insn (old_insn);
1198 
1199   /* Replace each use of the old target register by a use of the new target
1200      register.  */
1201   for (user = def->uses; user != NULL; user = user->next)
1202     {
1203       /* Some extra work here to ensure consistent modes, because
1204 	 it seems that a target register REG rtx can be given a different
1205 	 mode depending on the context (surely that should not be
1206 	 the case?).  */
1207       rtx replacement_rtx;
1208       if (GET_MODE (user->use) == GET_MODE (btr_rtx)
1209 	  || GET_MODE (user->use) == VOIDmode)
1210 	replacement_rtx = btr_rtx;
1211       else
1212 	replacement_rtx = gen_rtx_REG (GET_MODE (user->use), btr);
1213       validate_replace_rtx (user->use, replacement_rtx, user->insn);
1214       user->use = replacement_rtx;
1215     }
1216 }
1217 
1218 /* We anticipate intra-block scheduling to be done.  See if INSN could move
1219    up within BB by N_INSNS.  */
1220 static int
can_move_up(const_basic_block bb,const rtx_insn * insn,int n_insns)1221 can_move_up (const_basic_block bb, const rtx_insn *insn, int n_insns)
1222 {
1223   while (insn != BB_HEAD (bb) && n_insns > 0)
1224     {
1225       insn = PREV_INSN (insn);
1226       /* ??? What if we have an anti-dependency that actually prevents the
1227 	 scheduler from doing the move?  We'd like to re-allocate the register,
1228 	 but not necessarily put the load into another basic block.  */
1229       if (INSN_P (insn))
1230 	n_insns--;
1231     }
1232   return n_insns <= 0;
1233 }
1234 
1235 /* Attempt to migrate the target register definition DEF to an
1236    earlier point in the flowgraph.
1237 
1238    It is a precondition of this function that DEF is migratable:
1239    i.e. it has a constant source, and all uses are unambiguous.
1240 
1241    Only migrations that reduce the cost of DEF will be made.
1242    MIN_COST is the lower bound on the cost of the DEF after migration.
1243    If we migrate DEF so that its cost falls below MIN_COST,
1244    then we do not attempt to migrate further.  The idea is that
1245    we migrate definitions in a priority order based on their cost,
1246    when the cost of this definition falls below MIN_COST, then
1247    there is another definition with cost == MIN_COST which now
1248    has a higher priority than this definition.
1249 
1250    Return nonzero if there may be benefit from attempting to
1251    migrate this DEF further (i.e. we have reduced the cost below
1252    MIN_COST, but we may be able to reduce it further).
1253    Return zero if no further migration is possible.  */
1254 static int
migrate_btr_def(btr_def * def,int min_cost)1255 migrate_btr_def (btr_def *def, int min_cost)
1256 {
1257   HARD_REG_SET btrs_live_in_range;
1258   int btr_used_near_def = 0;
1259   int def_basic_block_freq;
1260   basic_block attempt;
1261   int give_up = 0;
1262   int def_moved = 0;
1263   btr_user *user;
1264   int def_latency;
1265 
1266   if (dump_file)
1267     fprintf (dump_file,
1268 	     "Attempting to migrate pt from insn %d (cost = %d, min_cost = %d) ... ",
1269 	     INSN_UID (def->insn), def->cost, min_cost);
1270 
1271   if (!def->group || def->has_ambiguous_use)
1272     /* These defs are not migratable.  */
1273     {
1274       if (dump_file)
1275 	fprintf (dump_file, "it's not migratable\n");
1276       return 0;
1277     }
1278 
1279   if (!def->uses)
1280     /* We have combined this def with another in the same group, so
1281        no need to consider it further.
1282     */
1283     {
1284       if (dump_file)
1285 	fprintf (dump_file, "it's already combined with another pt\n");
1286       return 0;
1287     }
1288 
1289   btr_def_live_range (def, &btrs_live_in_range);
1290   auto_bitmap live_range;
1291   bitmap_copy (live_range, def->live_range);
1292 
1293 #ifdef INSN_SCHEDULING
1294   def_latency = insn_default_latency (def->insn) * issue_rate;
1295 #else
1296   def_latency = issue_rate;
1297 #endif
1298 
1299   for (user = def->uses; user != NULL; user = user->next)
1300     {
1301       if (user->bb == def->bb
1302 	  && user->luid > def->luid
1303 	  && (def->luid + def_latency) > user->luid
1304 	  && ! can_move_up (def->bb, def->insn,
1305 			    (def->luid + def_latency) - user->luid))
1306 	{
1307 	  btr_used_near_def = 1;
1308 	  break;
1309 	}
1310     }
1311 
1312   def_basic_block_freq = basic_block_freq (def->bb);
1313 
1314   for (attempt = get_immediate_dominator (CDI_DOMINATORS, def->bb);
1315        !give_up && attempt && attempt != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1316        && def->cost >= min_cost;
1317        attempt = get_immediate_dominator (CDI_DOMINATORS, attempt))
1318     {
1319       /* Try to move the instruction that sets the target register into
1320 	 basic block ATTEMPT.  */
1321       int try_freq = basic_block_freq (attempt);
1322       edge_iterator ei;
1323       edge e;
1324 
1325       /* If ATTEMPT has abnormal edges, skip it.  */
1326       FOR_EACH_EDGE (e, ei, attempt->succs)
1327 	if (e->flags & EDGE_COMPLEX)
1328 	  break;
1329       if (e)
1330 	continue;
1331 
1332       if (dump_file)
1333 	fprintf (dump_file, "trying block %d ...", attempt->index);
1334 
1335       if (try_freq < def_basic_block_freq
1336 	  || (try_freq == def_basic_block_freq && btr_used_near_def))
1337 	{
1338 	  int btr;
1339 	  augment_live_range (live_range, &btrs_live_in_range, def->bb, attempt,
1340 			      flag_btr_bb_exclusive);
1341 	  if (dump_file)
1342 	    {
1343 	      fprintf (dump_file, "Now btrs live in range are: ");
1344 	      dump_hard_reg_set (btrs_live_in_range);
1345 	      fprintf (dump_file, "\n");
1346 	    }
1347 	  btr = choose_btr (btrs_live_in_range);
1348 	  if (btr != -1)
1349 	    {
1350 	      move_btr_def (attempt, btr, def, live_range, &btrs_live_in_range);
1351 	      bitmap_copy (live_range, def->live_range);
1352 	      btr_used_near_def = 0;
1353 	      def_moved = 1;
1354 	      def_basic_block_freq = basic_block_freq (def->bb);
1355 	    }
1356 	  else
1357 	    {
1358 	      /* There are no free target registers available to move
1359 		 this far forward, so give up */
1360 	      give_up = 1;
1361 	      if (dump_file)
1362 		fprintf (dump_file,
1363 			 "giving up because there are no free target registers\n");
1364 	    }
1365 
1366 	}
1367     }
1368   if (!def_moved)
1369     {
1370       give_up = 1;
1371       if (dump_file)
1372 	fprintf (dump_file, "failed to move\n");
1373     }
1374 
1375   return !give_up;
1376 }
1377 
1378 /* Attempt to move instructions that set target registers earlier
1379    in the flowgraph, away from their corresponding uses.  */
1380 static void
migrate_btr_defs(enum reg_class btr_class,int allow_callee_save)1381 migrate_btr_defs (enum reg_class btr_class, int allow_callee_save)
1382 {
1383   btr_heap_t all_btr_defs (LONG_MIN);
1384   int reg;
1385 
1386   gcc_obstack_init (&migrate_btrl_obstack);
1387   if (dump_file)
1388     {
1389       int i;
1390 
1391       for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
1392 	{
1393 	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
1394 	  fprintf (dump_file, "Basic block %d: count = ", i);
1395 	  bb->count.dump (dump_file);
1396 	  fprintf (dump_file, " loop-depth = %d idom = %d\n",
1397 		   bb_loop_depth (bb),
1398 		   get_immediate_dominator (CDI_DOMINATORS, bb)->index);
1399 	}
1400     }
1401 
1402   CLEAR_HARD_REG_SET (all_btrs);
1403   for (first_btr = -1, reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
1404     if (TEST_HARD_REG_BIT (reg_class_contents[(int) btr_class], reg)
1405 	&& (allow_callee_save || call_used_regs[reg]
1406 	    || df_regs_ever_live_p (reg)))
1407       {
1408 	SET_HARD_REG_BIT (all_btrs, reg);
1409 	last_btr = reg;
1410 	if (first_btr < 0)
1411 	  first_btr = reg;
1412       }
1413 
1414   btrs_live = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun));
1415   btrs_live_at_end = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun));
1416 
1417   build_btr_def_use_webs (&all_btr_defs);
1418 
1419   while (!all_btr_defs.empty ())
1420     {
1421       int min_cost = -all_btr_defs.min_key ();
1422       btr_def *def = all_btr_defs.extract_min ();
1423       if (migrate_btr_def (def, min_cost))
1424 	{
1425 	  all_btr_defs.insert (-def->cost, def);
1426 	  if (dump_file)
1427 	    {
1428 	      fprintf (dump_file,
1429 		"Putting insn %d back on queue with priority %d\n",
1430 		INSN_UID (def->insn), def->cost);
1431 	    }
1432 	}
1433       else
1434 	BITMAP_FREE (def->live_range);
1435     }
1436 
1437   free (btrs_live);
1438   free (btrs_live_at_end);
1439   obstack_free (&migrate_btrl_obstack, NULL);
1440 }
1441 
1442 static void
branch_target_load_optimize(bool after_prologue_epilogue_gen)1443 branch_target_load_optimize (bool after_prologue_epilogue_gen)
1444 {
1445   enum reg_class klass
1446     = (enum reg_class) targetm.branch_target_register_class ();
1447   if (klass != NO_REGS)
1448     {
1449       /* Initialize issue_rate.  */
1450       if (targetm.sched.issue_rate)
1451 	issue_rate = targetm.sched.issue_rate ();
1452       else
1453 	issue_rate = 1;
1454 
1455       if (!after_prologue_epilogue_gen)
1456 	{
1457 	  /* Build the CFG for migrate_btr_defs.  */
1458 #if 1
1459 	  /* This may or may not be needed, depending on where we
1460 	     run this phase.  */
1461 	  cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
1462 #endif
1463 	}
1464       df_analyze ();
1465 
1466 
1467       /* Dominator info is also needed for migrate_btr_def.  */
1468       calculate_dominance_info (CDI_DOMINATORS);
1469       migrate_btr_defs (klass,
1470 		       (targetm.branch_target_register_callee_saved
1471 			(after_prologue_epilogue_gen)));
1472 
1473       free_dominance_info (CDI_DOMINATORS);
1474     }
1475 }
1476 
1477 namespace {
1478 
1479 const pass_data pass_data_branch_target_load_optimize1 =
1480 {
1481   RTL_PASS, /* type */
1482   "btl1", /* name */
1483   OPTGROUP_NONE, /* optinfo_flags */
1484   TV_NONE, /* tv_id */
1485   0, /* properties_required */
1486   0, /* properties_provided */
1487   0, /* properties_destroyed */
1488   0, /* todo_flags_start */
1489   0, /* todo_flags_finish */
1490 };
1491 
1492 class pass_branch_target_load_optimize1 : public rtl_opt_pass
1493 {
1494 public:
pass_branch_target_load_optimize1(gcc::context * ctxt)1495   pass_branch_target_load_optimize1 (gcc::context *ctxt)
1496     : rtl_opt_pass (pass_data_branch_target_load_optimize1, ctxt)
1497   {}
1498 
1499   /* opt_pass methods: */
gate(function *)1500   virtual bool gate (function *) { return flag_branch_target_load_optimize; }
execute(function *)1501   virtual unsigned int execute (function *)
1502     {
1503       branch_target_load_optimize (epilogue_completed);
1504       return 0;
1505     }
1506 
1507 }; // class pass_branch_target_load_optimize1
1508 
1509 } // anon namespace
1510 
1511 rtl_opt_pass *
make_pass_branch_target_load_optimize1(gcc::context * ctxt)1512 make_pass_branch_target_load_optimize1 (gcc::context *ctxt)
1513 {
1514   return new pass_branch_target_load_optimize1 (ctxt);
1515 }
1516 
1517 
1518 namespace {
1519 
1520 const pass_data pass_data_branch_target_load_optimize2 =
1521 {
1522   RTL_PASS, /* type */
1523   "btl2", /* name */
1524   OPTGROUP_NONE, /* optinfo_flags */
1525   TV_NONE, /* tv_id */
1526   0, /* properties_required */
1527   0, /* properties_provided */
1528   0, /* properties_destroyed */
1529   0, /* todo_flags_start */
1530   0, /* todo_flags_finish */
1531 };
1532 
1533 class pass_branch_target_load_optimize2 : public rtl_opt_pass
1534 {
1535 public:
pass_branch_target_load_optimize2(gcc::context * ctxt)1536   pass_branch_target_load_optimize2 (gcc::context *ctxt)
1537     : rtl_opt_pass (pass_data_branch_target_load_optimize2, ctxt)
1538   {}
1539 
1540   /* opt_pass methods: */
gate(function *)1541   virtual bool gate (function *)
1542     {
1543       return (optimize > 0 && flag_branch_target_load_optimize2);
1544     }
1545 
1546   virtual unsigned int execute (function *);
1547 
1548 }; // class pass_branch_target_load_optimize2
1549 
1550 unsigned int
execute(function *)1551 pass_branch_target_load_optimize2::execute (function *)
1552 {
1553   static int warned = 0;
1554 
1555   /* Leave this a warning for now so that it is possible to experiment
1556      with running this pass twice.  In 3.6, we should either make this
1557      an error, or use separate dump files.  */
1558   if (flag_branch_target_load_optimize
1559       && flag_branch_target_load_optimize2
1560       && !warned)
1561     {
1562       warning (0, "branch target register load optimization is not intended "
1563 	       "to be run twice");
1564 
1565       warned = 1;
1566     }
1567 
1568   branch_target_load_optimize (epilogue_completed);
1569   return 0;
1570 }
1571 
1572 } // anon namespace
1573 
1574 rtl_opt_pass *
make_pass_branch_target_load_optimize2(gcc::context * ctxt)1575 make_pass_branch_target_load_optimize2 (gcc::context *ctxt)
1576 {
1577   return new pass_branch_target_load_optimize2 (ctxt);
1578 }
1579