1 /* Perform instruction reorganizations for delay slot filling.
2    Copyright (C) 1992-2018 Free Software Foundation, Inc.
3    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4    Hacked by Michael Tiemann (tiemann@cygnus.com).
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* Instruction reorganization pass.
23 
24    This pass runs after register allocation and final jump
25    optimization.  It should be the last pass to run before peephole.
26    It serves primarily to fill delay slots of insns, typically branch
27    and call insns.  Other insns typically involve more complicated
28    interactions of data dependencies and resource constraints, and
29    are better handled by scheduling before register allocation (by the
30    function `schedule_insns').
31 
32    The Branch Penalty is the number of extra cycles that are needed to
33    execute a branch insn.  On an ideal machine, branches take a single
34    cycle, and the Branch Penalty is 0.  Several RISC machines approach
35    branch delays differently:
36 
37    The MIPS has a single branch delay slot.  Most insns
38    (except other branches) can be used to fill this slot.  When the
39    slot is filled, two insns execute in two cycles, reducing the
40    branch penalty to zero.
41 
42    The SPARC always has a branch delay slot, but its effects can be
43    annulled when the branch is not taken.  This means that failing to
44    find other sources of insns, we can hoist an insn from the branch
45    target that would only be safe to execute knowing that the branch
46    is taken.
47 
48    The HP-PA always has a branch delay slot.  For unconditional branches
49    its effects can be annulled when the branch is taken.  The effects
50    of the delay slot in a conditional branch can be nullified for forward
51    taken branches, or for untaken backward branches.  This means
52    we can hoist insns from the fall-through path for forward branches or
53    steal insns from the target of backward branches.
54 
55    The TMS320C3x and C4x have three branch delay slots.  When the three
56    slots are filled, the branch penalty is zero.  Most insns can fill the
57    delay slots except jump insns.
58 
59    Three techniques for filling delay slots have been implemented so far:
60 
61    (1) `fill_simple_delay_slots' is the simplest, most efficient way
62    to fill delay slots.  This pass first looks for insns which come
63    from before the branch and which are safe to execute after the
64    branch.  Then it searches after the insn requiring delay slots or,
65    in the case of a branch, for insns that are after the point at
66    which the branch merges into the fallthrough code, if such a point
67    exists.  When such insns are found, the branch penalty decreases
68    and no code expansion takes place.
69 
70    (2) `fill_eager_delay_slots' is more complicated: it is used for
71    scheduling conditional jumps, or for scheduling jumps which cannot
72    be filled using (1).  A machine need not have annulled jumps to use
73    this strategy, but it helps (by keeping more options open).
74    `fill_eager_delay_slots' tries to guess the direction the branch
75    will go; if it guesses right 100% of the time, it can reduce the
76    branch penalty as much as `fill_simple_delay_slots' does.  If it
77    guesses wrong 100% of the time, it might as well schedule nops.  When
78    `fill_eager_delay_slots' takes insns from the fall-through path of
79    the jump, usually there is no code expansion; when it takes insns
80    from the branch target, there is code expansion if it is not the
81    only way to reach that target.
82 
83    (3) `relax_delay_slots' uses a set of rules to simplify code that
84    has been reorganized by (1) and (2).  It finds cases where
85    conditional test can be eliminated, jumps can be threaded, extra
86    insns can be eliminated, etc.  It is the job of (1) and (2) to do a
87    good job of scheduling locally; `relax_delay_slots' takes care of
88    making the various individual schedules work well together.  It is
89    especially tuned to handle the control flow interactions of branch
90    insns.  It does nothing for insns with delay slots that do not
91    branch.
92 
93    On machines that use CC0, we are very conservative.  We will not make
94    a copy of an insn involving CC0 since we want to maintain a 1-1
95    correspondence between the insn that sets and uses CC0.  The insns are
96    allowed to be separated by placing an insn that sets CC0 (but not an insn
97    that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98    delay slot.  In that case, we point each insn at the other with REG_CC_USER
99    and REG_CC_SETTER notes.  Note that these restrictions affect very few
100    machines because most RISC machines with delay slots will not use CC0
101    (the RT is the only known exception at this point).  */
102 
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "memmodel.h"
112 #include "tm_p.h"
113 #include "expmed.h"
114 #include "insn-config.h"
115 #include "emit-rtl.h"
116 #include "recog.h"
117 #include "insn-attr.h"
118 #include "resource.h"
119 #include "params.h"
120 #include "tree-pass.h"
121 
122 
123 /* First, some functions that were used before GCC got a control flow graph.
124    These functions are now only used here in reorg.c, and have therefore
125    been moved here to avoid inadvertent misuse elsewhere in the compiler.  */
126 
127 /* Return the last label to mark the same position as LABEL.  Return LABEL
128    itself if it is null or any return rtx.  */
129 
130 static rtx
skip_consecutive_labels(rtx label_or_return)131 skip_consecutive_labels (rtx label_or_return)
132 {
133   rtx_insn *insn;
134 
135   if (label_or_return && ANY_RETURN_P (label_or_return))
136     return label_or_return;
137 
138   rtx_insn *label = as_a <rtx_insn *> (label_or_return);
139 
140   for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
141     if (LABEL_P (insn))
142       label = insn;
143 
144   return label;
145 }
146 
147 /* INSN uses CC0 and is being moved into a delay slot.  Set up REG_CC_SETTER
148    and REG_CC_USER notes so we can find it.  */
149 
150 static void
link_cc0_insns(rtx_insn * insn)151 link_cc0_insns (rtx_insn *insn)
152 {
153   rtx user = next_nonnote_insn (insn);
154 
155   if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
156     user = XVECEXP (PATTERN (user), 0, 0);
157 
158   add_reg_note (user, REG_CC_SETTER, insn);
159   add_reg_note (insn, REG_CC_USER, user);
160 }
161 
162 /* Insns which have delay slots that have not yet been filled.  */
163 
164 static struct obstack unfilled_slots_obstack;
165 static rtx *unfilled_firstobj;
166 
167 /* Define macros to refer to the first and last slot containing unfilled
168    insns.  These are used because the list may move and its address
169    should be recomputed at each use.  */
170 
171 #define unfilled_slots_base	\
172   ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
173 
174 #define unfilled_slots_next	\
175   ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
176 
177 /* Points to the label before the end of the function, or before a
178    return insn.  */
179 static rtx_code_label *function_return_label;
180 /* Likewise for a simple_return.  */
181 static rtx_code_label *function_simple_return_label;
182 
183 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
184    not always monotonically increase.  */
185 static int *uid_to_ruid;
186 
187 /* Highest valid index in `uid_to_ruid'.  */
188 static int max_uid;
189 
190 static int stop_search_p (rtx_insn *, int);
191 static int resource_conflicts_p (struct resources *, struct resources *);
192 static int insn_references_resource_p (rtx, struct resources *, bool);
193 static int insn_sets_resource_p (rtx, struct resources *, bool);
194 static rtx_code_label *find_end_label (rtx);
195 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
196 				      int);
197 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
198 static rtx_insn *delete_from_delay_slot (rtx_insn *);
199 static void delete_scheduled_jump (rtx_insn *);
200 static void note_delay_statistics (int, int);
201 static int get_jump_flags (const rtx_insn *, rtx);
202 static int mostly_true_jump (rtx);
203 static rtx get_branch_condition (const rtx_insn *, rtx);
204 static int condition_dominates_p (rtx, const rtx_insn *);
205 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
206 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
207 					    const vec<rtx_insn *> &);
208 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
209 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
210 					  vec<rtx_insn *> *,
211 					  struct resources *,
212 					  struct resources *,
213 					  struct resources *,
214 					  int, int *, int *,
215 					  rtx *);
216 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
217 					       vec<rtx_insn *> *,
218 					       struct resources *,
219 					       struct resources *,
220 					       struct resources *,
221 					       int, int *, int *);
222 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
223 static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
224 static int own_thread_p (rtx, rtx, int);
225 static void update_block (rtx_insn *, rtx_insn *);
226 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
227 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
228 static void fix_reg_dead_note (rtx_insn *, rtx);
229 static void update_reg_unused_notes (rtx_insn *, rtx);
230 static void fill_simple_delay_slots (int);
231 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
232 				    int, int, int, int,
233 				    int *, vec<rtx_insn *> *);
234 static void fill_eager_delay_slots (void);
235 static void relax_delay_slots (rtx_insn *);
236 static void make_return_insns (rtx_insn *);
237 
238 /* A wrapper around next_active_insn which takes care to return ret_rtx
239    unchanged.  */
240 
241 static rtx
first_active_target_insn(rtx insn)242 first_active_target_insn (rtx insn)
243 {
244   if (ANY_RETURN_P (insn))
245     return insn;
246   return next_active_insn (as_a <rtx_insn *> (insn));
247 }
248 
249 /* Return true iff INSN is a simplejump, or any kind of return insn.  */
250 
251 static bool
simplejump_or_return_p(rtx insn)252 simplejump_or_return_p (rtx insn)
253 {
254   return (JUMP_P (insn)
255 	  && (simplejump_p (as_a <rtx_insn *> (insn))
256 	      || ANY_RETURN_P (PATTERN (insn))));
257 }
258 
259 /* Return TRUE if this insn should stop the search for insn to fill delay
260    slots.  LABELS_P indicates that labels should terminate the search.
261    In all cases, jumps terminate the search.  */
262 
263 static int
stop_search_p(rtx_insn * insn,int labels_p)264 stop_search_p (rtx_insn *insn, int labels_p)
265 {
266   if (insn == 0)
267     return 1;
268 
269   /* If the insn can throw an exception that is caught within the function,
270      it may effectively perform a jump from the viewpoint of the function.
271      Therefore act like for a jump.  */
272   if (can_throw_internal (insn))
273     return 1;
274 
275   switch (GET_CODE (insn))
276     {
277     case NOTE:
278     case CALL_INSN:
279     case DEBUG_INSN:
280       return 0;
281 
282     case CODE_LABEL:
283       return labels_p;
284 
285     case JUMP_INSN:
286     case BARRIER:
287       return 1;
288 
289     case INSN:
290       /* OK unless it contains a delay slot or is an `asm' insn of some type.
291 	 We don't know anything about these.  */
292       return (GET_CODE (PATTERN (insn)) == SEQUENCE
293 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT
294 	      || asm_noperands (PATTERN (insn)) >= 0);
295 
296     default:
297       gcc_unreachable ();
298     }
299 }
300 
301 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
302    resource set contains a volatile memory reference.  Otherwise, return FALSE.  */
303 
304 static int
resource_conflicts_p(struct resources * res1,struct resources * res2)305 resource_conflicts_p (struct resources *res1, struct resources *res2)
306 {
307   if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
308       || res1->volatil || res2->volatil)
309     return 1;
310 
311   return hard_reg_set_intersect_p (res1->regs, res2->regs);
312 }
313 
314 /* Return TRUE if any resource marked in RES, a `struct resources', is
315    referenced by INSN.  If INCLUDE_DELAYED_EFFECTS is set, return if the called
316    routine is using those resources.
317 
318    We compute this by computing all the resources referenced by INSN and
319    seeing if this conflicts with RES.  It might be faster to directly check
320    ourselves, and this is the way it used to work, but it means duplicating
321    a large block of complex code.  */
322 
323 static int
insn_references_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)324 insn_references_resource_p (rtx insn, struct resources *res,
325 			    bool include_delayed_effects)
326 {
327   struct resources insn_res;
328 
329   CLEAR_RESOURCE (&insn_res);
330   mark_referenced_resources (insn, &insn_res, include_delayed_effects);
331   return resource_conflicts_p (&insn_res, res);
332 }
333 
334 /* Return TRUE if INSN modifies resources that are marked in RES.
335    INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
336    included.   CC0 is only modified if it is explicitly set; see comments
337    in front of mark_set_resources for details.  */
338 
339 static int
insn_sets_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)340 insn_sets_resource_p (rtx insn, struct resources *res,
341 		      bool include_delayed_effects)
342 {
343   struct resources insn_sets;
344 
345   CLEAR_RESOURCE (&insn_sets);
346   mark_set_resources (insn, &insn_sets, 0,
347 		      (include_delayed_effects
348 		       ? MARK_SRC_DEST_CALL
349 		       : MARK_SRC_DEST));
350   return resource_conflicts_p (&insn_sets, res);
351 }
352 
353 /* Find a label at the end of the function or before a RETURN.  If there
354    is none, try to make one.  If that fails, returns 0.
355 
356    The property of such a label is that it is placed just before the
357    epilogue or a bare RETURN insn, so that another bare RETURN can be
358    turned into a jump to the label unconditionally.  In particular, the
359    label cannot be placed before a RETURN insn with a filled delay slot.
360 
361    ??? There may be a problem with the current implementation.  Suppose
362    we start with a bare RETURN insn and call find_end_label.  It may set
363    function_return_label just before the RETURN.  Suppose the machinery
364    is able to fill the delay slot of the RETURN insn afterwards.  Then
365    function_return_label is no longer valid according to the property
366    described above and find_end_label will still return it unmodified.
367    Note that this is probably mitigated by the following observation:
368    once function_return_label is made, it is very likely the target of
369    a jump, so filling the delay slot of the RETURN will be much more
370    difficult.
371    KIND is either simple_return_rtx or ret_rtx, indicating which type of
372    return we're looking for.  */
373 
374 static rtx_code_label *
find_end_label(rtx kind)375 find_end_label (rtx kind)
376 {
377   rtx_insn *insn;
378   rtx_code_label **plabel;
379 
380   if (kind == ret_rtx)
381     plabel = &function_return_label;
382   else
383     {
384       gcc_assert (kind == simple_return_rtx);
385       plabel = &function_simple_return_label;
386     }
387 
388   /* If we found one previously, return it.  */
389   if (*plabel)
390     return *plabel;
391 
392   /* Otherwise, see if there is a label at the end of the function.  If there
393      is, it must be that RETURN insns aren't needed, so that is our return
394      label and we don't have to do anything else.  */
395 
396   insn = get_last_insn ();
397   while (NOTE_P (insn)
398 	 || (NONJUMP_INSN_P (insn)
399 	     && (GET_CODE (PATTERN (insn)) == USE
400 		 || GET_CODE (PATTERN (insn)) == CLOBBER)))
401     insn = PREV_INSN (insn);
402 
403   /* When a target threads its epilogue we might already have a
404      suitable return insn.  If so put a label before it for the
405      function_return_label.  */
406   if (BARRIER_P (insn)
407       && JUMP_P (PREV_INSN (insn))
408       && PATTERN (PREV_INSN (insn)) == kind)
409     {
410       rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
411       rtx_code_label *label = gen_label_rtx ();
412       LABEL_NUSES (label) = 0;
413 
414       /* Put the label before any USE insns that may precede the RETURN
415 	 insn.  */
416       while (GET_CODE (temp) == USE)
417 	temp = PREV_INSN (temp);
418 
419       emit_label_after (label, temp);
420       *plabel = label;
421     }
422 
423   else if (LABEL_P (insn))
424     *plabel = as_a <rtx_code_label *> (insn);
425   else
426     {
427       rtx_code_label *label = gen_label_rtx ();
428       LABEL_NUSES (label) = 0;
429       /* If the basic block reorder pass moves the return insn to
430 	 some other place try to locate it again and put our
431 	 function_return_label there.  */
432       while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
433 	insn = PREV_INSN (insn);
434       if (insn)
435 	{
436 	  insn = PREV_INSN (insn);
437 
438 	  /* Put the label before any USE insns that may precede the
439 	     RETURN insn.  */
440 	  while (GET_CODE (insn) == USE)
441 	    insn = PREV_INSN (insn);
442 
443 	  emit_label_after (label, insn);
444 	}
445       else
446 	{
447 	  if (targetm.have_epilogue () && ! targetm.have_return ())
448 	    /* The RETURN insn has its delay slot filled so we cannot
449 	       emit the label just before it.  Since we already have
450 	       an epilogue and cannot emit a new RETURN, we cannot
451 	       emit the label at all.  */
452 	    return NULL;
453 
454 	  /* Otherwise, make a new label and emit a RETURN and BARRIER,
455 	     if needed.  */
456 	  emit_label (label);
457 	  if (targetm.have_return ())
458 	    {
459 	      /* The return we make may have delay slots too.  */
460 	      rtx_insn *pat = targetm.gen_return ();
461 	      rtx_insn *insn = emit_jump_insn (pat);
462 	      set_return_jump_label (insn);
463 	      emit_barrier ();
464 	      if (num_delay_slots (insn) > 0)
465 		obstack_ptr_grow (&unfilled_slots_obstack, insn);
466 	    }
467 	}
468       *plabel = label;
469     }
470 
471   /* Show one additional use for this label so it won't go away until
472      we are done.  */
473   ++LABEL_NUSES (*plabel);
474 
475   return *plabel;
476 }
477 
478 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
479    the pattern of INSN with the SEQUENCE.
480 
481    Returns the insn containing the SEQUENCE that replaces INSN.  */
482 
483 static rtx_insn *
emit_delay_sequence(rtx_insn * insn,const vec<rtx_insn * > & list,int length)484 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
485 {
486   /* Allocate the rtvec to hold the insns and the SEQUENCE.  */
487   rtvec seqv = rtvec_alloc (length + 1);
488   rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
489   rtx_insn *seq_insn = make_insn_raw (seq);
490 
491   /* If DELAY_INSN has a location, use it for SEQ_INSN.  If DELAY_INSN does
492      not have a location, but one of the delayed insns does, we pick up a
493      location from there later.  */
494   INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
495 
496   /* Unlink INSN from the insn chain, so that we can put it into
497      the SEQUENCE.   Remember where we want to emit SEQUENCE in AFTER.  */
498   rtx_insn *after = PREV_INSN (insn);
499   remove_insn (insn);
500   SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
501 
502   /* Build our SEQUENCE and rebuild the insn chain.  */
503   start_sequence ();
504   XVECEXP (seq, 0, 0) = emit_insn (insn);
505 
506   unsigned int delay_insns = list.length ();
507   gcc_assert (delay_insns == (unsigned int) length);
508   for (unsigned int i = 0; i < delay_insns; i++)
509     {
510       rtx_insn *tem = list[i];
511       rtx note, next;
512 
513       /* Show that this copy of the insn isn't deleted.  */
514       tem->set_undeleted ();
515 
516       /* Unlink insn from its original place, and re-emit it into
517 	 the sequence.  */
518       SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
519       XVECEXP (seq, 0, i + 1) = emit_insn (tem);
520 
521       /* SPARC assembler, for instance, emit warning when debug info is output
522          into the delay slot.  */
523       if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
524 	INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
525       INSN_LOCATION (tem) = 0;
526 
527       for (note = REG_NOTES (tem); note; note = next)
528 	{
529 	  next = XEXP (note, 1);
530 	  switch (REG_NOTE_KIND (note))
531 	    {
532 	    case REG_DEAD:
533 	      /* Remove any REG_DEAD notes because we can't rely on them now
534 		 that the insn has been moved.  */
535 	      remove_note (tem, note);
536 	      break;
537 
538 	    case REG_LABEL_OPERAND:
539 	    case REG_LABEL_TARGET:
540 	      /* Keep the label reference count up to date.  */
541 	      if (LABEL_P (XEXP (note, 0)))
542 		LABEL_NUSES (XEXP (note, 0)) ++;
543 	      break;
544 
545 	    default:
546 	      break;
547 	    }
548 	}
549     }
550   end_sequence ();
551 
552   /* Splice our SEQUENCE into the insn stream where INSN used to be.  */
553   add_insn_after (seq_insn, after, NULL);
554 
555   return seq_insn;
556 }
557 
558 /* Add INSN to DELAY_LIST and return the head of the new list.  The list must
559    be in the order in which the insns are to be executed.  */
560 
561 static void
add_to_delay_list(rtx_insn * insn,vec<rtx_insn * > * delay_list)562 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
563 {
564   /* If INSN has its block number recorded, clear it since we may
565      be moving the insn to a new block.  */
566       clear_hashed_info_for_insn (insn);
567       delay_list->safe_push (insn);
568 }
569 
570 /* Delete INSN from the delay slot of the insn that it is in, which may
571    produce an insn with no delay slots.  Return the new insn.  */
572 
573 static rtx_insn *
delete_from_delay_slot(rtx_insn * insn)574 delete_from_delay_slot (rtx_insn *insn)
575 {
576   rtx_insn *trial, *seq_insn, *prev;
577   rtx_sequence *seq;
578   int i;
579   int had_barrier = 0;
580 
581   /* We first must find the insn containing the SEQUENCE with INSN in its
582      delay slot.  Do this by finding an insn, TRIAL, where
583      PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL.  */
584 
585   for (trial = insn;
586        PREV_INSN (NEXT_INSN (trial)) == trial;
587        trial = NEXT_INSN (trial))
588     ;
589 
590   seq_insn = PREV_INSN (NEXT_INSN (trial));
591   seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
592 
593   if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
594     had_barrier = 1;
595 
596   /* Create a delay list consisting of all the insns other than the one
597      we are deleting (unless we were the only one).  */
598   auto_vec<rtx_insn *, 5> delay_list;
599   if (seq->len () > 2)
600     for (i = 1; i < seq->len (); i++)
601       if (seq->insn (i) != insn)
602 	add_to_delay_list (seq->insn (i), &delay_list);
603 
604   /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
605      list, and rebuild the delay list if non-empty.  */
606   prev = PREV_INSN (seq_insn);
607   trial = seq->insn (0);
608   delete_related_insns (seq_insn);
609   add_insn_after (trial, prev, NULL);
610 
611   /* If there was a barrier after the old SEQUENCE, remit it.  */
612   if (had_barrier)
613     emit_barrier_after (trial);
614 
615   /* If there are any delay insns, remit them.  Otherwise clear the
616      annul flag.  */
617   if (!delay_list.is_empty ())
618     trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
619   else if (JUMP_P (trial))
620     INSN_ANNULLED_BRANCH_P (trial) = 0;
621 
622   INSN_FROM_TARGET_P (insn) = 0;
623 
624   /* Show we need to fill this insn again.  */
625   obstack_ptr_grow (&unfilled_slots_obstack, trial);
626 
627   return trial;
628 }
629 
630 /* Delete INSN, a JUMP_INSN.  If it is a conditional jump, we must track down
631    the insn that sets CC0 for it and delete it too.  */
632 
633 static void
delete_scheduled_jump(rtx_insn * insn)634 delete_scheduled_jump (rtx_insn *insn)
635 {
636   /* Delete the insn that sets cc0 for us.  On machines without cc0, we could
637      delete the insn that sets the condition code, but it is hard to find it.
638      Since this case is rare anyway, don't bother trying; there would likely
639      be other insns that became dead anyway, which we wouldn't know to
640      delete.  */
641 
642   if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
643     {
644       rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
645 
646       /* If a reg-note was found, it points to an insn to set CC0.  This
647 	 insn is in the delay list of some other insn.  So delete it from
648 	 the delay list it was in.  */
649       if (note)
650 	{
651 	  if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
652 	      && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
653 	    delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
654 	}
655       else
656 	{
657 	  /* The insn setting CC0 is our previous insn, but it may be in
658 	     a delay slot.  It will be the last insn in the delay slot, if
659 	     it is.  */
660 	  rtx_insn *trial = previous_insn (insn);
661 	  if (NOTE_P (trial))
662 	    trial = prev_nonnote_insn (trial);
663 	  if (sets_cc0_p (PATTERN (trial)) != 1
664 	      || FIND_REG_INC_NOTE (trial, NULL_RTX))
665 	    return;
666 	  if (PREV_INSN (NEXT_INSN (trial)) == trial)
667 	    delete_related_insns (trial);
668 	  else
669 	    delete_from_delay_slot (trial);
670 	}
671     }
672 
673   delete_related_insns (insn);
674 }
675 
676 /* Counters for delay-slot filling.  */
677 
678 #define NUM_REORG_FUNCTIONS 2
679 #define MAX_DELAY_HISTOGRAM 3
680 #define MAX_REORG_PASSES 2
681 
682 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
683 
684 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
685 
686 static int reorg_pass_number;
687 
688 static void
note_delay_statistics(int slots_filled,int index)689 note_delay_statistics (int slots_filled, int index)
690 {
691   num_insns_needing_delays[index][reorg_pass_number]++;
692   if (slots_filled > MAX_DELAY_HISTOGRAM)
693     slots_filled = MAX_DELAY_HISTOGRAM;
694   num_filled_delays[index][slots_filled][reorg_pass_number]++;
695 }
696 
697 /* Optimize the following cases:
698 
699    1.  When a conditional branch skips over only one instruction,
700        use an annulling branch and put that insn in the delay slot.
701        Use either a branch that annuls when the condition if true or
702        invert the test with a branch that annuls when the condition is
703        false.  This saves insns, since otherwise we must copy an insn
704        from the L1 target.
705 
706         (orig)		 (skip)		(otherwise)
707 	Bcc.n L1	Bcc',a L1	Bcc,a L1'
708 	insn		insn		insn2
709       L1:	      L1:	      L1:
710 	insn2		insn2		insn2
711 	insn3		insn3	      L1':
712 					insn3
713 
714    2.  When a conditional branch skips over only one instruction,
715        and after that, it unconditionally branches somewhere else,
716        perform the similar optimization. This saves executing the
717        second branch in the case where the inverted condition is true.
718 
719 	Bcc.n L1	Bcc',a L2
720 	insn		insn
721       L1:	      L1:
722 	Bra L2		Bra L2
723 
724    INSN is a JUMP_INSN.
725 
726    This should be expanded to skip over N insns, where N is the number
727    of delay slots required.  */
728 
729 static void
optimize_skip(rtx_jump_insn * insn,vec<rtx_insn * > * delay_list)730 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
731 {
732   rtx_insn *trial = next_nonnote_insn (insn);
733   rtx_insn *next_trial = next_active_insn (trial);
734   int flags;
735 
736   flags = get_jump_flags (insn, JUMP_LABEL (insn));
737 
738   if (trial == 0
739       || !NONJUMP_INSN_P (trial)
740       || GET_CODE (PATTERN (trial)) == SEQUENCE
741       || recog_memoized (trial) < 0
742       || (! eligible_for_annul_false (insn, 0, trial, flags)
743 	  && ! eligible_for_annul_true (insn, 0, trial, flags))
744       || RTX_FRAME_RELATED_P (trial)
745       || can_throw_internal (trial))
746     return;
747 
748   /* There are two cases where we are just executing one insn (we assume
749      here that a branch requires only one insn; this should be generalized
750      at some point):  Where the branch goes around a single insn or where
751      we have one insn followed by a branch to the same label we branch to.
752      In both of these cases, inverting the jump and annulling the delay
753      slot give the same effect in fewer insns.  */
754   if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
755       || (next_trial != 0
756 	  && simplejump_or_return_p (next_trial)
757 	  && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
758     {
759       if (eligible_for_annul_false (insn, 0, trial, flags))
760 	{
761 	  if (invert_jump (insn, JUMP_LABEL (insn), 1))
762 	    INSN_FROM_TARGET_P (trial) = 1;
763 	  else if (! eligible_for_annul_true (insn, 0, trial, flags))
764 	    return;
765 	}
766 
767       add_to_delay_list (trial, delay_list);
768       next_trial = next_active_insn (trial);
769       update_block (trial, trial);
770       delete_related_insns (trial);
771 
772       /* Also, if we are targeting an unconditional
773 	 branch, thread our jump to the target of that branch.  Don't
774 	 change this into a RETURN here, because it may not accept what
775 	 we have in the delay slot.  We'll fix this up later.  */
776       if (next_trial && simplejump_or_return_p (next_trial))
777 	{
778 	  rtx target_label = JUMP_LABEL (next_trial);
779 	  if (ANY_RETURN_P (target_label))
780 	    target_label = find_end_label (target_label);
781 
782 	  if (target_label)
783 	    {
784 	      /* Recompute the flags based on TARGET_LABEL since threading
785 		 the jump to TARGET_LABEL may change the direction of the
786 		 jump (which may change the circumstances in which the
787 		 delay slot is nullified).  */
788 	      flags = get_jump_flags (insn, target_label);
789 	      if (eligible_for_annul_true (insn, 0, trial, flags))
790 		reorg_redirect_jump (insn, target_label);
791 	    }
792 	}
793 
794       INSN_ANNULLED_BRANCH_P (insn) = 1;
795     }
796 }
797 
798 /*  Encode and return branch direction and prediction information for
799     INSN assuming it will jump to LABEL.
800 
801     Non conditional branches return no direction information and
802     are predicted as very likely taken.  */
803 
804 static int
get_jump_flags(const rtx_insn * insn,rtx label)805 get_jump_flags (const rtx_insn *insn, rtx label)
806 {
807   int flags;
808 
809   /* get_jump_flags can be passed any insn with delay slots, these may
810      be INSNs, CALL_INSNs, or JUMP_INSNs.  Only JUMP_INSNs have branch
811      direction information, and only if they are conditional jumps.
812 
813      If LABEL is a return, then there is no way to determine the branch
814      direction.  */
815   if (JUMP_P (insn)
816       && (condjump_p (insn) || condjump_in_parallel_p (insn))
817       && !ANY_RETURN_P (label)
818       && INSN_UID (insn) <= max_uid
819       && INSN_UID (label) <= max_uid)
820     flags
821       = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
822 	 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
823   /* No valid direction information.  */
824   else
825     flags = 0;
826 
827   return flags;
828 }
829 
830 /* Return truth value of the statement that this branch
831    is mostly taken.  If we think that the branch is extremely likely
832    to be taken, we return 2.  If the branch is slightly more likely to be
833    taken, return 1.  If the branch is slightly less likely to be taken,
834    return 0 and if the branch is highly unlikely to be taken, return -1.  */
835 
836 static int
mostly_true_jump(rtx jump_insn)837 mostly_true_jump (rtx jump_insn)
838 {
839   /* If branch probabilities are available, then use that number since it
840      always gives a correct answer.  */
841   rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
842   if (note)
843     {
844       int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
845 			.to_reg_br_prob_base ();
846 
847       if (prob >= REG_BR_PROB_BASE * 9 / 10)
848 	return 2;
849       else if (prob >= REG_BR_PROB_BASE / 2)
850 	return 1;
851       else if (prob >= REG_BR_PROB_BASE / 10)
852 	return 0;
853       else
854 	return -1;
855     }
856 
857   /* If there is no note, assume branches are not taken.
858      This should be rare.  */
859     return 0;
860 }
861 
862 /* Return the condition under which INSN will branch to TARGET.  If TARGET
863    is zero, return the condition under which INSN will return.  If INSN is
864    an unconditional branch, return const_true_rtx.  If INSN isn't a simple
865    type of jump, or it doesn't go to TARGET, return 0.  */
866 
867 static rtx
get_branch_condition(const rtx_insn * insn,rtx target)868 get_branch_condition (const rtx_insn *insn, rtx target)
869 {
870   rtx pat = PATTERN (insn);
871   rtx src;
872 
873   if (condjump_in_parallel_p (insn))
874     pat = XVECEXP (pat, 0, 0);
875 
876   if (ANY_RETURN_P (pat) && pat == target)
877     return const_true_rtx;
878 
879   if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
880     return 0;
881 
882   src = SET_SRC (pat);
883   if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
884     return const_true_rtx;
885 
886   else if (GET_CODE (src) == IF_THEN_ELSE
887 	   && XEXP (src, 2) == pc_rtx
888 	   && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
889 		&& label_ref_label (XEXP (src, 1)) == target)
890 	       || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
891     return XEXP (src, 0);
892 
893   else if (GET_CODE (src) == IF_THEN_ELSE
894 	   && XEXP (src, 1) == pc_rtx
895 	   && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
896 		&& label_ref_label (XEXP (src, 2)) == target)
897 	       || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
898     {
899       enum rtx_code rev;
900       rev = reversed_comparison_code (XEXP (src, 0), insn);
901       if (rev != UNKNOWN)
902 	return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
903 			       XEXP (XEXP (src, 0), 0),
904 			       XEXP (XEXP (src, 0), 1));
905     }
906 
907   return 0;
908 }
909 
910 /* Return nonzero if CONDITION is more strict than the condition of
911    INSN, i.e., if INSN will always branch if CONDITION is true.  */
912 
913 static int
condition_dominates_p(rtx condition,const rtx_insn * insn)914 condition_dominates_p (rtx condition, const rtx_insn *insn)
915 {
916   rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
917   enum rtx_code code = GET_CODE (condition);
918   enum rtx_code other_code;
919 
920   if (rtx_equal_p (condition, other_condition)
921       || other_condition == const_true_rtx)
922     return 1;
923 
924   else if (condition == const_true_rtx || other_condition == 0)
925     return 0;
926 
927   other_code = GET_CODE (other_condition);
928   if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
929       || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
930       || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
931     return 0;
932 
933   return comparison_dominates_p (code, other_code);
934 }
935 
936 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
937    any insns already in the delay slot of JUMP.  */
938 
939 static int
redirect_with_delay_slots_safe_p(rtx_insn * jump,rtx newlabel,rtx seq)940 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
941 {
942   int flags, i;
943   rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
944 
945   /* Make sure all the delay slots of this jump would still
946      be valid after threading the jump.  If they are still
947      valid, then return nonzero.  */
948 
949   flags = get_jump_flags (jump, newlabel);
950   for (i = 1; i < pat->len (); i++)
951     if (! (
952 #if ANNUL_IFFALSE_SLOTS
953 	   (INSN_ANNULLED_BRANCH_P (jump)
954 	    && INSN_FROM_TARGET_P (pat->insn (i)))
955 	   ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
956 #endif
957 #if ANNUL_IFTRUE_SLOTS
958 	   (INSN_ANNULLED_BRANCH_P (jump)
959 	    && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
960 	   ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
961 #endif
962 	   eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
963       break;
964 
965   return (i == pat->len ());
966 }
967 
968 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
969    any insns we wish to place in the delay slot of JUMP.  */
970 
971 static int
redirect_with_delay_list_safe_p(rtx_insn * jump,rtx newlabel,const vec<rtx_insn * > & delay_list)972 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
973 				 const vec<rtx_insn *> &delay_list)
974 {
975   /* Make sure all the insns in DELAY_LIST would still be
976      valid after threading the jump.  If they are still
977      valid, then return nonzero.  */
978 
979   int flags = get_jump_flags (jump, newlabel);
980   unsigned int delay_insns = delay_list.length ();
981   unsigned int i = 0;
982   for (; i < delay_insns; i++)
983     if (! (
984 #if ANNUL_IFFALSE_SLOTS
985 	   (INSN_ANNULLED_BRANCH_P (jump)
986 	    && INSN_FROM_TARGET_P (delay_list[i]))
987 	   ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
988 #endif
989 #if ANNUL_IFTRUE_SLOTS
990 	   (INSN_ANNULLED_BRANCH_P (jump)
991 	    && ! INSN_FROM_TARGET_P (delay_list[i]))
992 	   ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
993 #endif
994 	   eligible_for_delay (jump, i, delay_list[i], flags)))
995       break;
996 
997   return i == delay_insns;
998 }
999 
1000 /* DELAY_LIST is a list of insns that have already been placed into delay
1001    slots.  See if all of them have the same annulling status as ANNUL_TRUE_P.
1002    If not, return 0; otherwise return 1.  */
1003 
1004 static int
check_annul_list_true_false(int annul_true_p,const vec<rtx_insn * > & delay_list)1005 check_annul_list_true_false (int annul_true_p,
1006 			     const vec<rtx_insn *> &delay_list)
1007 {
1008   rtx_insn *trial;
1009   unsigned int i;
1010   FOR_EACH_VEC_ELT (delay_list, i, trial)
1011     if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1012 	|| (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1013       return 0;
1014 
1015   return 1;
1016 }
1017 
1018 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE.  Given that
1019    the condition tested by INSN is CONDITION and the resources shown in
1020    OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1021    from SEQ's delay list, in addition to whatever insns it may execute
1022    (in DELAY_LIST).   SETS and NEEDED are denote resources already set and
1023    needed while searching for delay slot insns.  Return the concatenated
1024    delay list if possible, otherwise, return 0.
1025 
1026    SLOTS_TO_FILL is the total number of slots required by INSN, and
1027    PSLOTS_FILLED points to the number filled so far (also the number of
1028    insns in DELAY_LIST).  It is updated with the number that have been
1029    filled from the SEQUENCE, if any.
1030 
1031    PANNUL_P points to a nonzero value if we already know that we need
1032    to annul INSN.  If this routine determines that annulling is needed,
1033    it may set that value nonzero.
1034 
1035    PNEW_THREAD points to a location that is to receive the place at which
1036    execution should continue.  */
1037 
1038 static void
steal_delay_list_from_target(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p,rtx * pnew_thread)1039 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1040 			      vec<rtx_insn *> *delay_list,
1041 			      struct resources *sets,
1042 			      struct resources *needed,
1043 			      struct resources *other_needed,
1044 			      int slots_to_fill, int *pslots_filled,
1045 			      int *pannul_p, rtx *pnew_thread)
1046 {
1047   int slots_remaining = slots_to_fill - *pslots_filled;
1048   int total_slots_filled = *pslots_filled;
1049   auto_vec<rtx_insn *, 5> new_delay_list;
1050   int must_annul = *pannul_p;
1051   int used_annul = 0;
1052   int i;
1053   struct resources cc_set;
1054   rtx_insn **redundant;
1055 
1056   /* We can't do anything if there are more delay slots in SEQ than we
1057      can handle, or if we don't know that it will be a taken branch.
1058      We know that it will be a taken branch if it is either an unconditional
1059      branch or a conditional branch with a stricter branch condition.
1060 
1061      Also, exit if the branch has more than one set, since then it is computing
1062      other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1063      ??? It may be possible to move other sets into INSN in addition to
1064      moving the instructions in the delay slots.
1065 
1066      We can not steal the delay list if one of the instructions in the
1067      current delay_list modifies the condition codes and the jump in the
1068      sequence is a conditional jump. We can not do this because we can
1069      not change the direction of the jump because the condition codes
1070      will effect the direction of the jump in the sequence.  */
1071 
1072   CLEAR_RESOURCE (&cc_set);
1073 
1074   rtx_insn *trial;
1075   FOR_EACH_VEC_ELT (*delay_list, i, trial)
1076     {
1077       mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1078       if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1079 	return;
1080     }
1081 
1082   if (XVECLEN (seq, 0) - 1 > slots_remaining
1083       || ! condition_dominates_p (condition, seq->insn (0))
1084       || ! single_set (seq->insn (0)))
1085     return;
1086 
1087   /* On some targets, branches with delay slots can have a limited
1088      displacement.  Give the back end a chance to tell us we can't do
1089      this.  */
1090   if (! targetm.can_follow_jump (insn, seq->insn (0)))
1091     return;
1092 
1093   redundant = XALLOCAVEC (rtx_insn *, XVECLEN (seq, 0));
1094   for (i = 1; i < seq->len (); i++)
1095     {
1096       rtx_insn *trial = seq->insn (i);
1097       int flags;
1098 
1099       if (insn_references_resource_p (trial, sets, false)
1100 	  || insn_sets_resource_p (trial, needed, false)
1101 	  || insn_sets_resource_p (trial, sets, false)
1102 	  /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1103 	     delay list.  */
1104 	  || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1105 	  /* If TRIAL is from the fallthrough code of an annulled branch insn
1106 	     in SEQ, we cannot use it.  */
1107 	  || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1108 	      && ! INSN_FROM_TARGET_P (trial)))
1109 	return;
1110 
1111       /* If this insn was already done (usually in a previous delay slot),
1112 	 pretend we put it in our delay slot.  */
1113       redundant[i] = redundant_insn (trial, insn, new_delay_list);
1114       if (redundant[i])
1115 	continue;
1116 
1117       /* We will end up re-vectoring this branch, so compute flags
1118 	 based on jumping to the new label.  */
1119       flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1120 
1121       if (! must_annul
1122 	  && ((condition == const_true_rtx
1123 	       || (! insn_sets_resource_p (trial, other_needed, false)
1124 		   && ! may_trap_or_fault_p (PATTERN (trial)))))
1125 	  ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1126 	  : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1127 	     && (must_annul = 1,
1128 		 check_annul_list_true_false (0, *delay_list)
1129 	         && check_annul_list_true_false (0, new_delay_list)
1130 	         && eligible_for_annul_false (insn, total_slots_filled,
1131 					      trial, flags)))
1132 	{
1133 	  if (must_annul)
1134 	    {
1135 	      /* Frame related instructions cannot go into annulled delay
1136 		 slots, it messes up the dwarf info.  */
1137 	      if (RTX_FRAME_RELATED_P (trial))
1138 		return;
1139 	      used_annul = 1;
1140 	    }
1141 	  rtx_insn *temp = copy_delay_slot_insn (trial);
1142 	  INSN_FROM_TARGET_P (temp) = 1;
1143 	  add_to_delay_list (temp, &new_delay_list);
1144 	  total_slots_filled++;
1145 
1146 	  if (--slots_remaining == 0)
1147 	    break;
1148 	}
1149       else
1150 	return;
1151     }
1152 
1153   /* Record the effect of the instructions that were redundant and which
1154      we therefore decided not to copy.  */
1155   for (i = 1; i < seq->len (); i++)
1156     if (redundant[i])
1157       {
1158 	fix_reg_dead_note (redundant[i], insn);
1159 	update_block (seq->insn (i), insn);
1160       }
1161 
1162   /* Show the place to which we will be branching.  */
1163   *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1164 
1165   /* Add any new insns to the delay list and update the count of the
1166      number of slots filled.  */
1167   *pslots_filled = total_slots_filled;
1168   if (used_annul)
1169     *pannul_p = 1;
1170 
1171   rtx_insn *temp;
1172   FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1173     add_to_delay_list (temp, delay_list);
1174 }
1175 
1176 /* Similar to steal_delay_list_from_target except that SEQ is on the
1177    fallthrough path of INSN.  Here we only do something if the delay insn
1178    of SEQ is an unconditional branch.  In that case we steal its delay slot
1179    for INSN since unconditional branches are much easier to fill.  */
1180 
1181 static void
steal_delay_list_from_fallthrough(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p)1182 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1183 				   rtx_sequence *seq,
1184 				   vec<rtx_insn *> *delay_list,
1185 				   struct resources *sets,
1186 				   struct resources *needed,
1187 				   struct resources *other_needed,
1188 				   int slots_to_fill, int *pslots_filled,
1189 				   int *pannul_p)
1190 {
1191   int i;
1192   int flags;
1193   int must_annul = *pannul_p;
1194   int used_annul = 0;
1195 
1196   flags = get_jump_flags (insn, JUMP_LABEL (insn));
1197 
1198   /* We can't do anything if SEQ's delay insn isn't an
1199      unconditional branch.  */
1200 
1201   if (! simplejump_or_return_p (seq->insn (0)))
1202     return;
1203 
1204   for (i = 1; i < seq->len (); i++)
1205     {
1206       rtx_insn *trial = seq->insn (i);
1207       rtx_insn *prior_insn;
1208 
1209       /* If TRIAL sets CC0, stealing it will move it too far from the use
1210 	 of CC0.  */
1211       if (insn_references_resource_p (trial, sets, false)
1212 	  || insn_sets_resource_p (trial, needed, false)
1213 	  || insn_sets_resource_p (trial, sets, false)
1214 	  || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1215 
1216 	break;
1217 
1218       /* If this insn was already done, we don't need it.  */
1219       if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
1220 	{
1221 	  fix_reg_dead_note (prior_insn, insn);
1222 	  update_block (trial, insn);
1223 	  delete_from_delay_slot (trial);
1224 	  continue;
1225 	}
1226 
1227       if (! must_annul
1228 	  && ((condition == const_true_rtx
1229 	       || (! insn_sets_resource_p (trial, other_needed, false)
1230 		   && ! may_trap_or_fault_p (PATTERN (trial)))))
1231 	  ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1232 	  : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1233 	     check_annul_list_true_false (1, *delay_list)
1234 	     && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1235 	{
1236 	  if (must_annul)
1237 	    used_annul = 1;
1238 	  delete_from_delay_slot (trial);
1239 	  add_to_delay_list (trial, delay_list);
1240 
1241 	  if (++(*pslots_filled) == slots_to_fill)
1242 	    break;
1243 	}
1244       else
1245 	break;
1246     }
1247 
1248   if (used_annul)
1249     *pannul_p = 1;
1250 }
1251 
1252 /* Try merging insns starting at THREAD which match exactly the insns in
1253    INSN's delay list.
1254 
1255    If all insns were matched and the insn was previously annulling, the
1256    annul bit will be cleared.
1257 
1258    For each insn that is merged, if the branch is or will be non-annulling,
1259    we delete the merged insn.  */
1260 
1261 static void
try_merge_delay_insns(rtx_insn * insn,rtx_insn * thread)1262 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1263 {
1264   rtx_insn *trial, *next_trial;
1265   rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1266   int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1267   int slot_number = 1;
1268   int num_slots = XVECLEN (PATTERN (insn), 0);
1269   rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1270   struct resources set, needed, modified;
1271   auto_vec<std::pair<rtx_insn *, bool>, 10> merged_insns;
1272   int flags;
1273 
1274   flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1275 
1276   CLEAR_RESOURCE (&needed);
1277   CLEAR_RESOURCE (&set);
1278 
1279   /* If this is not an annulling branch, take into account anything needed in
1280      INSN's delay slot.  This prevents two increments from being incorrectly
1281      folded into one.  If we are annulling, this would be the correct
1282      thing to do.  (The alternative, looking at things set in NEXT_TO_MATCH
1283      will essentially disable this optimization.  This method is somewhat of
1284      a kludge, but I don't see a better way.)  */
1285   if (! annul_p)
1286     for (int i = 1; i < num_slots; i++)
1287       if (XVECEXP (PATTERN (insn), 0, i))
1288 	mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1289 				   true);
1290 
1291   for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1292     {
1293       rtx pat = PATTERN (trial);
1294       rtx oldtrial = trial;
1295 
1296       next_trial = next_nonnote_insn (trial);
1297 
1298       /* TRIAL must be a CALL_INSN or INSN.  Skip USE and CLOBBER.  */
1299       if (NONJUMP_INSN_P (trial)
1300 	  && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1301 	continue;
1302 
1303       if (GET_CODE (next_to_match) == GET_CODE (trial)
1304 	  /* We can't share an insn that sets cc0.  */
1305 	  && (!HAVE_cc0 || ! sets_cc0_p (pat))
1306 	  && ! insn_references_resource_p (trial, &set, true)
1307 	  && ! insn_sets_resource_p (trial, &set, true)
1308 	  && ! insn_sets_resource_p (trial, &needed, true)
1309 	  && (trial = try_split (pat, trial, 0)) != 0
1310 	  /* Update next_trial, in case try_split succeeded.  */
1311 	  && (next_trial = next_nonnote_insn (trial))
1312 	  /* Likewise THREAD.  */
1313 	  && (thread = oldtrial == thread ? trial : thread)
1314 	  && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1315 	  /* Have to test this condition if annul condition is different
1316 	     from (and less restrictive than) non-annulling one.  */
1317 	  && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1318 	{
1319 
1320 	  if (! annul_p)
1321 	    {
1322 	      update_block (trial, thread);
1323 	      if (trial == thread)
1324 		thread = next_active_insn (thread);
1325 
1326 	      delete_related_insns (trial);
1327 	      INSN_FROM_TARGET_P (next_to_match) = 0;
1328 	    }
1329 	  else
1330 	    merged_insns.safe_push (std::pair<rtx_insn *, bool> (trial, false));
1331 
1332 	  if (++slot_number == num_slots)
1333 	    break;
1334 
1335 	  next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1336 	}
1337 
1338       mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1339       mark_referenced_resources (trial, &needed, true);
1340     }
1341 
1342   /* See if we stopped on a filled insn.  If we did, try to see if its
1343      delay slots match.  */
1344   if (slot_number != num_slots
1345       && trial && NONJUMP_INSN_P (trial)
1346       && GET_CODE (PATTERN (trial)) == SEQUENCE
1347       && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1348            && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1349     {
1350       rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1351       rtx filled_insn = XVECEXP (pat, 0, 0);
1352 
1353       /* Account for resources set/needed by the filled insn.  */
1354       mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1355       mark_referenced_resources (filled_insn, &needed, true);
1356 
1357       for (int i = 1; i < pat->len (); i++)
1358 	{
1359 	  rtx_insn *dtrial = pat->insn (i);
1360 
1361 	  CLEAR_RESOURCE (&modified);
1362 	  /* Account for resources set by the insn following NEXT_TO_MATCH
1363 	     inside INSN's delay list. */
1364 	  for (int j = 1; slot_number + j < num_slots; j++)
1365 	    mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1366 				&modified, 0, MARK_SRC_DEST_CALL);
1367 	  /* Account for resources set by the insn before DTRIAL and inside
1368 	     TRIAL's delay list. */
1369 	  for (int j = 1; j < i; j++)
1370 	    mark_set_resources (XVECEXP (pat, 0, j),
1371 				&modified, 0, MARK_SRC_DEST_CALL);
1372 	  if (! insn_references_resource_p (dtrial, &set, true)
1373 	      && ! insn_sets_resource_p (dtrial, &set, true)
1374 	      && ! insn_sets_resource_p (dtrial, &needed, true)
1375 	      && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1376 	      && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1377 	      /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1378 	         resource modified between them (only dtrial is checked because
1379 	         next_to_match and dtrial shall to be equal in order to hit
1380 	         this line) */
1381 	      && ! insn_references_resource_p (dtrial, &modified, true)
1382 	      && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1383 	    {
1384 	      if (! annul_p)
1385 		{
1386 		  rtx_insn *new_rtx;
1387 
1388 		  update_block (dtrial, thread);
1389 		  new_rtx = delete_from_delay_slot (dtrial);
1390 	          if (thread->deleted ())
1391 		    thread = new_rtx;
1392 		  INSN_FROM_TARGET_P (next_to_match) = 0;
1393 		}
1394 	      else
1395 		merged_insns.safe_push (std::pair<rtx_insn *, bool> (dtrial,
1396 								     true));
1397 
1398 	      if (++slot_number == num_slots)
1399 		break;
1400 
1401 	      next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1402 	    }
1403 	  else
1404 	    {
1405 	      /* Keep track of the set/referenced resources for the delay
1406 		 slots of any trial insns we encounter.  */
1407 	      mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1408 	      mark_referenced_resources (dtrial, &needed, true);
1409 	    }
1410 	}
1411     }
1412 
1413   /* If all insns in the delay slot have been matched and we were previously
1414      annulling the branch, we need not any more.  In that case delete all the
1415      merged insns.  Also clear the INSN_FROM_TARGET_P bit of each insn in
1416      the delay list so that we know that it isn't only being used at the
1417      target.  */
1418   if (slot_number == num_slots && annul_p)
1419     {
1420       unsigned int len = merged_insns.length ();
1421       for (unsigned int i = len - 1; i < len; i--)
1422 	if (merged_insns[i].second)
1423 	  {
1424 	    update_block (merged_insns[i].first, thread);
1425 	    rtx_insn *new_rtx = delete_from_delay_slot (merged_insns[i].first);
1426 	    if (thread->deleted ())
1427 	      thread = new_rtx;
1428 	  }
1429 	else
1430 	  {
1431 	    update_block (merged_insns[i].first, thread);
1432 	    delete_related_insns (merged_insns[i].first);
1433 	  }
1434 
1435       INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1436 
1437       for (int i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1438 	INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1439     }
1440 }
1441 
1442 /* See if INSN is redundant with an insn in front of TARGET.  Often this
1443    is called when INSN is a candidate for a delay slot of TARGET.
1444    DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1445    of INSN.  Often INSN will be redundant with an insn in a delay slot of
1446    some previous insn.  This happens when we have a series of branches to the
1447    same label; in that case the first insn at the target might want to go
1448    into each of the delay slots.
1449 
1450    If we are not careful, this routine can take up a significant fraction
1451    of the total compilation time (4%), but only wins rarely.  Hence we
1452    speed this routine up by making two passes.  The first pass goes back
1453    until it hits a label and sees if it finds an insn with an identical
1454    pattern.  Only in this (relatively rare) event does it check for
1455    data conflicts.
1456 
1457    We do not split insns we encounter.  This could cause us not to find a
1458    redundant insn, but the cost of splitting seems greater than the possible
1459    gain in rare cases.  */
1460 
1461 static rtx_insn *
redundant_insn(rtx insn,rtx_insn * target,const vec<rtx_insn * > & delay_list)1462 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1463 {
1464   rtx target_main = target;
1465   rtx ipat = PATTERN (insn);
1466   rtx_insn *trial;
1467   rtx pat;
1468   struct resources needed, set;
1469   int i;
1470   unsigned insns_to_search;
1471 
1472   /* If INSN has any REG_UNUSED notes, it can't match anything since we
1473      are allowed to not actually assign to such a register.  */
1474   if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1475     return 0;
1476 
1477   /* Scan backwards looking for a match.  */
1478   for (trial = PREV_INSN (target),
1479 	 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1480        trial && insns_to_search > 0;
1481        trial = PREV_INSN (trial))
1482     {
1483       /* (use (insn))s can come immediately after a barrier if the
1484 	 label that used to precede them has been deleted as dead.
1485 	 See delete_related_insns.  */
1486       if (LABEL_P (trial) || BARRIER_P (trial))
1487 	return 0;
1488 
1489       if (!INSN_P (trial))
1490 	continue;
1491       --insns_to_search;
1492 
1493       pat = PATTERN (trial);
1494       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1495 	continue;
1496 
1497       if (GET_CODE (trial) == DEBUG_INSN)
1498 	continue;
1499 
1500       if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1501 	{
1502 	  /* Stop for a CALL and its delay slots because it is difficult to
1503 	     track its resource needs correctly.  */
1504 	  if (CALL_P (seq->element (0)))
1505 	    return 0;
1506 
1507 	  /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1508 	     slots because it is difficult to track its resource needs
1509 	     correctly.  */
1510 
1511 	  if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1512 	    return 0;
1513 
1514 	  if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1515 	    return 0;
1516 
1517 	  /* See if any of the insns in the delay slot match, updating
1518 	     resource requirements as we go.  */
1519 	  for (i = seq->len () - 1; i > 0; i--)
1520 	    if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1521 		&& rtx_equal_p (PATTERN (seq->element (i)), ipat)
1522 		&& ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1523 	      break;
1524 
1525 	  /* If found a match, exit this loop early.  */
1526 	  if (i > 0)
1527 	    break;
1528 	}
1529 
1530       else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1531 	       && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1532 	break;
1533     }
1534 
1535   /* If we didn't find an insn that matches, return 0.  */
1536   if (trial == 0)
1537     return 0;
1538 
1539   /* See what resources this insn sets and needs.  If they overlap, or
1540      if this insn references CC0, it can't be redundant.  */
1541 
1542   CLEAR_RESOURCE (&needed);
1543   CLEAR_RESOURCE (&set);
1544   mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1545   mark_referenced_resources (insn, &needed, true);
1546 
1547   /* If TARGET is a SEQUENCE, get the main insn.  */
1548   if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1549     target_main = XVECEXP (PATTERN (target), 0, 0);
1550 
1551   if (resource_conflicts_p (&needed, &set)
1552       || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1553       /* The insn requiring the delay may not set anything needed or set by
1554 	 INSN.  */
1555       || insn_sets_resource_p (target_main, &needed, true)
1556       || insn_sets_resource_p (target_main, &set, true))
1557     return 0;
1558 
1559   /* Insns we pass may not set either NEEDED or SET, so merge them for
1560      simpler tests.  */
1561   needed.memory |= set.memory;
1562   IOR_HARD_REG_SET (needed.regs, set.regs);
1563 
1564   /* This insn isn't redundant if it conflicts with an insn that either is
1565      or will be in a delay slot of TARGET.  */
1566 
1567   unsigned int j;
1568   rtx_insn *temp;
1569   FOR_EACH_VEC_ELT (delay_list, j, temp)
1570     if (insn_sets_resource_p (temp, &needed, true))
1571       return 0;
1572 
1573   if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1574     for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1575       if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1576 				true))
1577 	return 0;
1578 
1579   /* Scan backwards until we reach a label or an insn that uses something
1580      INSN sets or sets something insn uses or sets.  */
1581 
1582   for (trial = PREV_INSN (target),
1583 	 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1584        trial && !LABEL_P (trial) && insns_to_search > 0;
1585        trial = PREV_INSN (trial))
1586     {
1587       if (!INSN_P (trial))
1588 	continue;
1589       --insns_to_search;
1590 
1591       pat = PATTERN (trial);
1592       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1593 	continue;
1594 
1595       if (GET_CODE (trial) == DEBUG_INSN)
1596 	continue;
1597 
1598       if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1599 	{
1600 	  bool annul_p = false;
1601           rtx_insn *control = seq->insn (0);
1602 
1603 	  /* If this is a CALL_INSN and its delay slots, it is hard to track
1604 	     the resource needs properly, so give up.  */
1605 	  if (CALL_P (control))
1606 	    return 0;
1607 
1608 	  /* If this is an INSN or JUMP_INSN with delayed effects, it
1609 	     is hard to track the resource needs properly, so give up.  */
1610 
1611 	  if (INSN_SETS_ARE_DELAYED (control))
1612 	    return 0;
1613 
1614 	  if (INSN_REFERENCES_ARE_DELAYED (control))
1615 	    return 0;
1616 
1617 	  if (JUMP_P (control))
1618 	    annul_p = INSN_ANNULLED_BRANCH_P (control);
1619 
1620 	  /* See if any of the insns in the delay slot match, updating
1621 	     resource requirements as we go.  */
1622 	  for (i = seq->len () - 1; i > 0; i--)
1623 	    {
1624 	      rtx_insn *candidate = seq->insn (i);
1625 
1626 	      /* If an insn will be annulled if the branch is false, it isn't
1627 		 considered as a possible duplicate insn.  */
1628 	      if (rtx_equal_p (PATTERN (candidate), ipat)
1629 		  && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1630 		{
1631 		  /* Show that this insn will be used in the sequel.  */
1632 		  INSN_FROM_TARGET_P (candidate) = 0;
1633 		  return candidate;
1634 		}
1635 
1636 	      /* Unless this is an annulled insn from the target of a branch,
1637 		 we must stop if it sets anything needed or set by INSN.  */
1638 	      if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1639 		  && insn_sets_resource_p (candidate, &needed, true))
1640 		return 0;
1641 	    }
1642 
1643 	  /* If the insn requiring the delay slot conflicts with INSN, we
1644 	     must stop.  */
1645 	  if (insn_sets_resource_p (control, &needed, true))
1646 	    return 0;
1647 	}
1648       else
1649 	{
1650 	  /* See if TRIAL is the same as INSN.  */
1651 	  pat = PATTERN (trial);
1652 	  if (rtx_equal_p (pat, ipat))
1653 	    return trial;
1654 
1655 	  /* Can't go any further if TRIAL conflicts with INSN.  */
1656 	  if (insn_sets_resource_p (trial, &needed, true))
1657 	    return 0;
1658 	}
1659     }
1660 
1661   return 0;
1662 }
1663 
1664 /* Return 1 if THREAD can only be executed in one way.  If LABEL is nonzero,
1665    it is the target of the branch insn being scanned.  If ALLOW_FALLTHROUGH
1666    is nonzero, we are allowed to fall into this thread; otherwise, we are
1667    not.
1668 
1669    If LABEL is used more than one or we pass a label other than LABEL before
1670    finding an active insn, we do not own this thread.  */
1671 
1672 static int
own_thread_p(rtx thread,rtx label,int allow_fallthrough)1673 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1674 {
1675   rtx_insn *active_insn;
1676   rtx_insn *insn;
1677 
1678   /* We don't own the function end.  */
1679   if (thread == 0 || ANY_RETURN_P (thread))
1680     return 0;
1681 
1682   /* We have a non-NULL insn.  */
1683   rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1684 
1685   /* Get the first active insn, or THREAD_INSN, if it is an active insn.  */
1686   active_insn = next_active_insn (PREV_INSN (thread_insn));
1687 
1688   for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1689     if (LABEL_P (insn)
1690 	&& (insn != label || LABEL_NUSES (insn) != 1))
1691       return 0;
1692 
1693   if (allow_fallthrough)
1694     return 1;
1695 
1696   /* Ensure that we reach a BARRIER before any insn or label.  */
1697   for (insn = prev_nonnote_insn (thread_insn);
1698        insn == 0 || !BARRIER_P (insn);
1699        insn = prev_nonnote_insn (insn))
1700     if (insn == 0
1701 	|| LABEL_P (insn)
1702 	|| (NONJUMP_INSN_P (insn)
1703 	    && GET_CODE (PATTERN (insn)) != USE
1704 	    && GET_CODE (PATTERN (insn)) != CLOBBER))
1705       return 0;
1706 
1707   return 1;
1708 }
1709 
1710 /* Called when INSN is being moved from a location near the target of a jump.
1711    We leave a marker of the form (use (INSN)) immediately in front of WHERE
1712    for mark_target_live_regs.  These markers will be deleted at the end.
1713 
1714    We used to try to update the live status of registers if WHERE is at
1715    the start of a basic block, but that can't work since we may remove a
1716    BARRIER in relax_delay_slots.  */
1717 
1718 static void
update_block(rtx_insn * insn,rtx_insn * where)1719 update_block (rtx_insn *insn, rtx_insn *where)
1720 {
1721   emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1722 
1723   /* INSN might be making a value live in a block where it didn't use to
1724      be.  So recompute liveness information for this block.  */
1725   incr_ticks_for_insn (insn);
1726 }
1727 
1728 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1729    the basic block containing the jump.  */
1730 
1731 static int
reorg_redirect_jump(rtx_jump_insn * jump,rtx nlabel)1732 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1733 {
1734   incr_ticks_for_insn (jump);
1735   return redirect_jump (jump, nlabel, 1);
1736 }
1737 
1738 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1739    We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1740    that reference values used in INSN.  If we find one, then we move the
1741    REG_DEAD note to INSN.
1742 
1743    This is needed to handle the case where a later insn (after INSN) has a
1744    REG_DEAD note for a register used by INSN, and this later insn subsequently
1745    gets moved before a CODE_LABEL because it is a redundant insn.  In this
1746    case, mark_target_live_regs may be confused into thinking the register
1747    is dead because it sees a REG_DEAD note immediately before a CODE_LABEL.  */
1748 
1749 static void
update_reg_dead_notes(rtx_insn * insn,rtx_insn * delayed_insn)1750 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1751 {
1752   rtx link, next;
1753   rtx_insn *p;
1754 
1755   for (p = next_nonnote_insn (insn); p != delayed_insn;
1756        p = next_nonnote_insn (p))
1757     for (link = REG_NOTES (p); link; link = next)
1758       {
1759 	next = XEXP (link, 1);
1760 
1761 	if (REG_NOTE_KIND (link) != REG_DEAD
1762 	    || !REG_P (XEXP (link, 0)))
1763 	  continue;
1764 
1765 	if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1766 	  {
1767 	    /* Move the REG_DEAD note from P to INSN.  */
1768 	    remove_note (p, link);
1769 	    XEXP (link, 1) = REG_NOTES (insn);
1770 	    REG_NOTES (insn) = link;
1771 	  }
1772       }
1773 }
1774 
1775 /* Called when an insn redundant with start_insn is deleted.  If there
1776    is a REG_DEAD note for the target of start_insn between start_insn
1777    and stop_insn, then the REG_DEAD note needs to be deleted since the
1778    value no longer dies there.
1779 
1780    If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1781    confused into thinking the register is dead.  */
1782 
1783 static void
fix_reg_dead_note(rtx_insn * start_insn,rtx stop_insn)1784 fix_reg_dead_note (rtx_insn *start_insn, rtx stop_insn)
1785 {
1786   rtx link, next;
1787   rtx_insn *p;
1788 
1789   for (p = next_nonnote_insn (start_insn); p != stop_insn;
1790        p = next_nonnote_insn (p))
1791     for (link = REG_NOTES (p); link; link = next)
1792       {
1793 	next = XEXP (link, 1);
1794 
1795 	if (REG_NOTE_KIND (link) != REG_DEAD
1796 	    || !REG_P (XEXP (link, 0)))
1797 	  continue;
1798 
1799 	if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1800 	  {
1801 	    remove_note (p, link);
1802 	    return;
1803 	  }
1804       }
1805 }
1806 
1807 /* Delete any REG_UNUSED notes that exist on INSN but not on OTHER_INSN.
1808 
1809    This handles the case of udivmodXi4 instructions which optimize their
1810    output depending on whether any REG_UNUSED notes are present.  We must
1811    make sure that INSN calculates as many results as OTHER_INSN does.  */
1812 
1813 static void
update_reg_unused_notes(rtx_insn * insn,rtx other_insn)1814 update_reg_unused_notes (rtx_insn *insn, rtx other_insn)
1815 {
1816   rtx link, next;
1817 
1818   for (link = REG_NOTES (insn); link; link = next)
1819     {
1820       next = XEXP (link, 1);
1821 
1822       if (REG_NOTE_KIND (link) != REG_UNUSED
1823 	  || !REG_P (XEXP (link, 0)))
1824 	continue;
1825 
1826       if (!find_regno_note (other_insn, REG_UNUSED, REGNO (XEXP (link, 0))))
1827 	remove_note (insn, link);
1828     }
1829 }
1830 
1831 static vec <rtx> sibling_labels;
1832 
1833 /* Return the label before INSN, or put a new label there.  If SIBLING is
1834    non-zero, it is another label associated with the new label (if any),
1835    typically the former target of the jump that will be redirected to
1836    the new label.  */
1837 
1838 static rtx_insn *
get_label_before(rtx_insn * insn,rtx sibling)1839 get_label_before (rtx_insn *insn, rtx sibling)
1840 {
1841   rtx_insn *label;
1842 
1843   /* Find an existing label at this point
1844      or make a new one if there is none.  */
1845   label = prev_nonnote_insn (insn);
1846 
1847   if (label == 0 || !LABEL_P (label))
1848     {
1849       rtx_insn *prev = PREV_INSN (insn);
1850 
1851       label = gen_label_rtx ();
1852       emit_label_after (label, prev);
1853       LABEL_NUSES (label) = 0;
1854       if (sibling)
1855 	{
1856 	  sibling_labels.safe_push (label);
1857 	  sibling_labels.safe_push (sibling);
1858 	}
1859     }
1860   return label;
1861 }
1862 
1863 /* Scan a function looking for insns that need a delay slot and find insns to
1864    put into the delay slot.
1865 
1866    NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1867    as calls).  We do these first since we don't want jump insns (that are
1868    easier to fill) to get the only insns that could be used for non-jump insns.
1869    When it is zero, only try to fill JUMP_INSNs.
1870 
1871    When slots are filled in this manner, the insns (including the
1872    delay_insn) are put together in a SEQUENCE rtx.  In this fashion,
1873    it is possible to tell whether a delay slot has really been filled
1874    or not.  `final' knows how to deal with this, by communicating
1875    through FINAL_SEQUENCE.  */
1876 
1877 static void
fill_simple_delay_slots(int non_jumps_p)1878 fill_simple_delay_slots (int non_jumps_p)
1879 {
1880   rtx_insn *insn, *trial, *next_trial;
1881   rtx pat;
1882   int i;
1883   int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1884   struct resources needed, set;
1885   int slots_to_fill, slots_filled;
1886   auto_vec<rtx_insn *, 5> delay_list;
1887 
1888   for (i = 0; i < num_unfilled_slots; i++)
1889     {
1890       int flags;
1891       /* Get the next insn to fill.  If it has already had any slots assigned,
1892 	 we can't do anything with it.  Maybe we'll improve this later.  */
1893 
1894       insn = unfilled_slots_base[i];
1895       if (insn == 0
1896 	  || insn->deleted ()
1897 	  || (NONJUMP_INSN_P (insn)
1898 	      && GET_CODE (PATTERN (insn)) == SEQUENCE)
1899 	  || (JUMP_P (insn) && non_jumps_p)
1900 	  || (!JUMP_P (insn) && ! non_jumps_p))
1901 	continue;
1902 
1903       /* It may have been that this insn used to need delay slots, but
1904 	 now doesn't; ignore in that case.  This can happen, for example,
1905 	 on the HP PA RISC, where the number of delay slots depends on
1906 	 what insns are nearby.  */
1907       slots_to_fill = num_delay_slots (insn);
1908 
1909       /* Some machine description have defined instructions to have
1910 	 delay slots only in certain circumstances which may depend on
1911 	 nearby insns (which change due to reorg's actions).
1912 
1913 	 For example, the PA port normally has delay slots for unconditional
1914 	 jumps.
1915 
1916 	 However, the PA port claims such jumps do not have a delay slot
1917 	 if they are immediate successors of certain CALL_INSNs.  This
1918 	 allows the port to favor filling the delay slot of the call with
1919 	 the unconditional jump.  */
1920       if (slots_to_fill == 0)
1921 	continue;
1922 
1923       /* This insn needs, or can use, some delay slots.  SLOTS_TO_FILL
1924 	 says how many.  After initialization, first try optimizing
1925 
1926 	 call _foo		call _foo
1927 	 nop			add %o7,.-L1,%o7
1928 	 b,a L1
1929 	 nop
1930 
1931 	 If this case applies, the delay slot of the call is filled with
1932 	 the unconditional jump.  This is done first to avoid having the
1933 	 delay slot of the call filled in the backward scan.  Also, since
1934 	 the unconditional jump is likely to also have a delay slot, that
1935 	 insn must exist when it is subsequently scanned.
1936 
1937 	 This is tried on each insn with delay slots as some machines
1938 	 have insns which perform calls, but are not represented as
1939 	 CALL_INSNs.  */
1940 
1941       slots_filled = 0;
1942       delay_list.truncate (0);
1943 
1944       if (JUMP_P (insn))
1945 	flags = get_jump_flags (insn, JUMP_LABEL (insn));
1946       else
1947 	flags = get_jump_flags (insn, NULL_RTX);
1948 
1949       if ((trial = next_active_insn (insn))
1950 	  && JUMP_P (trial)
1951 	  && simplejump_p (trial)
1952 	  && eligible_for_delay (insn, slots_filled, trial, flags)
1953 	  && no_labels_between_p (insn, trial)
1954 	  && ! can_throw_internal (trial))
1955 	{
1956 	  rtx_insn **tmp;
1957 	  slots_filled++;
1958 	  add_to_delay_list (trial, &delay_list);
1959 
1960 	  /* TRIAL may have had its delay slot filled, then unfilled.  When
1961 	     the delay slot is unfilled, TRIAL is placed back on the unfilled
1962 	     slots obstack.  Unfortunately, it is placed on the end of the
1963 	     obstack, not in its original location.  Therefore, we must search
1964 	     from entry i + 1 to the end of the unfilled slots obstack to
1965 	     try and find TRIAL.  */
1966 	  tmp = &unfilled_slots_base[i + 1];
1967 	  while (*tmp != trial && tmp != unfilled_slots_next)
1968 	    tmp++;
1969 
1970 	  /* Remove the unconditional jump from consideration for delay slot
1971 	     filling and unthread it.  */
1972 	  if (*tmp == trial)
1973 	    *tmp = 0;
1974 	  {
1975 	    rtx_insn *next = NEXT_INSN (trial);
1976 	    rtx_insn *prev = PREV_INSN (trial);
1977 	    if (prev)
1978 	      SET_NEXT_INSN (prev) = next;
1979 	    if (next)
1980 	      SET_PREV_INSN (next) = prev;
1981 	  }
1982 	}
1983 
1984       /* Now, scan backwards from the insn to search for a potential
1985 	 delay-slot candidate.  Stop searching when a label or jump is hit.
1986 
1987 	 For each candidate, if it is to go into the delay slot (moved
1988 	 forward in execution sequence), it must not need or set any resources
1989 	 that were set by later insns and must not set any resources that
1990 	 are needed for those insns.
1991 
1992 	 The delay slot insn itself sets resources unless it is a call
1993 	 (in which case the called routine, not the insn itself, is doing
1994 	 the setting).  */
1995 
1996       if (slots_filled < slots_to_fill)
1997 	{
1998 	  /* If the flags register is dead after the insn, then we want to be
1999 	     able to accept a candidate that clobbers it.  For this purpose,
2000 	     we need to filter the flags register during life analysis, so
2001 	     that it doesn't create RAW and WAW dependencies, while still
2002 	     creating the necessary WAR dependencies.  */
2003 	  bool filter_flags
2004 	    = (slots_to_fill == 1
2005 	       && targetm.flags_regnum != INVALID_REGNUM
2006 	       && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2007 	  struct resources fset;
2008 	  CLEAR_RESOURCE (&needed);
2009 	  CLEAR_RESOURCE (&set);
2010 	  mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2011 	  if (filter_flags)
2012 	    {
2013 	      CLEAR_RESOURCE (&fset);
2014 	      mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2015 	    }
2016 	  mark_referenced_resources (insn, &needed, false);
2017 
2018 	  for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2019 	       trial = next_trial)
2020 	    {
2021 	      next_trial = prev_nonnote_insn (trial);
2022 
2023 	      /* This must be an INSN or CALL_INSN.  */
2024 	      pat = PATTERN (trial);
2025 
2026 	      /* Stand-alone USE and CLOBBER are just for flow.  */
2027 	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2028 		continue;
2029 
2030 	      /* And DEBUG_INSNs never go into delay slots.  */
2031 	      if (GET_CODE (trial) == DEBUG_INSN)
2032 		continue;
2033 
2034 	      /* Check for resource conflict first, to avoid unnecessary
2035 		 splitting.  */
2036 	      if (! insn_references_resource_p (trial, &set, true)
2037 		  && ! insn_sets_resource_p (trial,
2038 					     filter_flags ? &fset : &set,
2039 					     true)
2040 		  && ! insn_sets_resource_p (trial, &needed, true)
2041 		  /* Can't separate set of cc0 from its use.  */
2042 		  && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2043 		  && ! can_throw_internal (trial))
2044 		{
2045 		  trial = try_split (pat, trial, 1);
2046 		  next_trial = prev_nonnote_insn (trial);
2047 		  if (eligible_for_delay (insn, slots_filled, trial, flags))
2048 		    {
2049 		      /* In this case, we are searching backward, so if we
2050 			 find insns to put on the delay list, we want
2051 			 to put them at the head, rather than the
2052 			 tail, of the list.  */
2053 
2054 		      update_reg_dead_notes (trial, insn);
2055 		      delay_list.safe_insert (0, trial);
2056 		      update_block (trial, trial);
2057 		      delete_related_insns (trial);
2058 		      if (slots_to_fill == ++slots_filled)
2059 			break;
2060 		      continue;
2061 		    }
2062 		}
2063 
2064 	      mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2065 	      if (filter_flags)
2066 		{
2067 		  mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2068 		  /* If the flags register is set, then it doesn't create RAW
2069 		     dependencies any longer and it also doesn't create WAW
2070 		     dependencies since it's dead after the original insn.  */
2071 		  if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2072 		    {
2073 		      CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2074 		      CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2075 		    }
2076 		}
2077 	      mark_referenced_resources (trial, &needed, true);
2078 	    }
2079 	}
2080 
2081       /* If all needed slots haven't been filled, we come here.  */
2082 
2083       /* Try to optimize case of jumping around a single insn.  */
2084       if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2085 	&& slots_filled != slots_to_fill
2086 	  && delay_list.is_empty ()
2087 	  && JUMP_P (insn)
2088 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
2089 	  && !ANY_RETURN_P (JUMP_LABEL (insn)))
2090 	{
2091 	  optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2092 	  if (!delay_list.is_empty ())
2093 	    slots_filled += 1;
2094 	}
2095 
2096       /* Try to get insns from beyond the insn needing the delay slot.
2097 	 These insns can neither set or reference resources set in insns being
2098 	 skipped, cannot set resources in the insn being skipped, and, if this
2099 	 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2100 	 call might not return).
2101 
2102 	 There used to be code which continued past the target label if
2103 	 we saw all uses of the target label.  This code did not work,
2104 	 because it failed to account for some instructions which were
2105 	 both annulled and marked as from the target.  This can happen as a
2106 	 result of optimize_skip.  Since this code was redundant with
2107 	 fill_eager_delay_slots anyways, it was just deleted.  */
2108 
2109       if (slots_filled != slots_to_fill
2110 	  /* If this instruction could throw an exception which is
2111 	     caught in the same function, then it's not safe to fill
2112 	     the delay slot with an instruction from beyond this
2113 	     point.  For example, consider:
2114 
2115                int i = 2;
2116 
2117 	       try {
2118                  f();
2119 	         i = 3;
2120                } catch (...) {}
2121 
2122                return i;
2123 
2124 	     Even though `i' is a local variable, we must be sure not
2125 	     to put `i = 3' in the delay slot if `f' might throw an
2126 	     exception.
2127 
2128 	     Presumably, we should also check to see if we could get
2129 	     back to this function via `setjmp'.  */
2130 	  && ! can_throw_internal (insn)
2131 	  && !JUMP_P (insn))
2132 	{
2133 	  int maybe_never = 0;
2134 	  rtx pat, trial_delay;
2135 
2136 	  CLEAR_RESOURCE (&needed);
2137 	  CLEAR_RESOURCE (&set);
2138 	  mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2139 	  mark_referenced_resources (insn, &needed, true);
2140 
2141 	  if (CALL_P (insn))
2142 	    maybe_never = 1;
2143 
2144 	  for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2145 	       trial = next_trial)
2146 	    {
2147 	      next_trial = next_nonnote_insn (trial);
2148 
2149 	      /* This must be an INSN or CALL_INSN.  */
2150 	      pat = PATTERN (trial);
2151 
2152 	      /* Stand-alone USE and CLOBBER are just for flow.  */
2153 	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2154 		continue;
2155 
2156 	      /* And DEBUG_INSNs do not go in delay slots.  */
2157 	      if (GET_CODE (trial) == DEBUG_INSN)
2158 		continue;
2159 
2160 	      /* If this already has filled delay slots, get the insn needing
2161 		 the delay slots.  */
2162 	      if (GET_CODE (pat) == SEQUENCE)
2163 		trial_delay = XVECEXP (pat, 0, 0);
2164 	      else
2165 		trial_delay = trial;
2166 
2167 	      /* Stop our search when seeing a jump.  */
2168 	      if (JUMP_P (trial_delay))
2169 		break;
2170 
2171 	      /* See if we have a resource problem before we try to split.  */
2172 	      if (GET_CODE (pat) != SEQUENCE
2173 		  && ! insn_references_resource_p (trial, &set, true)
2174 		  && ! insn_sets_resource_p (trial, &set, true)
2175 		  && ! insn_sets_resource_p (trial, &needed, true)
2176 		  && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2177 		  && ! (maybe_never && may_trap_or_fault_p (pat))
2178 		  && (trial = try_split (pat, trial, 0))
2179 		  && eligible_for_delay (insn, slots_filled, trial, flags)
2180 		  && ! can_throw_internal (trial))
2181 		{
2182 		  next_trial = next_nonnote_insn (trial);
2183 		  add_to_delay_list (trial, &delay_list);
2184 		  if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2185 		    link_cc0_insns (trial);
2186 
2187 		  delete_related_insns (trial);
2188 		  if (slots_to_fill == ++slots_filled)
2189 		    break;
2190 		  continue;
2191 		}
2192 
2193 	      mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2194 	      mark_referenced_resources (trial, &needed, true);
2195 
2196 	      /* Ensure we don't put insns between the setting of cc and the
2197 		 comparison by moving a setting of cc into an earlier delay
2198 		 slot since these insns could clobber the condition code.  */
2199 	      set.cc = 1;
2200 
2201 	      /* If this is a call, we might not get here.  */
2202 	      if (CALL_P (trial_delay))
2203 		maybe_never = 1;
2204 	    }
2205 
2206 	  /* If there are slots left to fill and our search was stopped by an
2207 	     unconditional branch, try the insn at the branch target.  We can
2208 	     redirect the branch if it works.
2209 
2210 	     Don't do this if the insn at the branch target is a branch.  */
2211 	  if (slots_to_fill != slots_filled
2212 	      && trial
2213 	      && jump_to_label_p (trial)
2214 	      && simplejump_p (trial)
2215 	      && (next_trial = next_active_insn (JUMP_LABEL_AS_INSN (trial))) != 0
2216 	      && ! (NONJUMP_INSN_P (next_trial)
2217 		    && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2218 	      && !JUMP_P (next_trial)
2219 	      && ! insn_references_resource_p (next_trial, &set, true)
2220 	      && ! insn_sets_resource_p (next_trial, &set, true)
2221 	      && ! insn_sets_resource_p (next_trial, &needed, true)
2222 	      && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2223 	      && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2224 	      && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2225 	      && eligible_for_delay (insn, slots_filled, next_trial, flags)
2226 	      && ! can_throw_internal (trial))
2227 	    {
2228 	      /* See comment in relax_delay_slots about necessity of using
2229 		 next_real_nondebug_insn here.  */
2230 	      rtx_insn *new_label = next_real_nondebug_insn (next_trial);
2231 
2232 	      if (new_label != 0)
2233 		new_label = get_label_before (new_label, JUMP_LABEL (trial));
2234 	      else
2235 		new_label = find_end_label (simple_return_rtx);
2236 
2237 	      if (new_label)
2238 	        {
2239 		  add_to_delay_list (copy_delay_slot_insn (next_trial),
2240 				     &delay_list);
2241 		  slots_filled++;
2242 		  reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2243 				       new_label);
2244 		}
2245 	    }
2246 	}
2247 
2248       /* If this is an unconditional jump, then try to get insns from the
2249 	 target of the jump.  */
2250       rtx_jump_insn *jump_insn;
2251       if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2252 	  && simplejump_p (jump_insn)
2253 	  && slots_filled != slots_to_fill)
2254 	fill_slots_from_thread (jump_insn, const_true_rtx,
2255 				next_active_insn (JUMP_LABEL_AS_INSN (insn)),
2256 				NULL, 1, 1, own_thread_p (JUMP_LABEL (insn),
2257 						 JUMP_LABEL (insn), 0),
2258 				slots_to_fill, &slots_filled, &delay_list);
2259 
2260       if (!delay_list.is_empty ())
2261 	unfilled_slots_base[i]
2262 	  = emit_delay_sequence (insn, delay_list, slots_filled);
2263 
2264       if (slots_to_fill == slots_filled)
2265 	unfilled_slots_base[i] = 0;
2266 
2267       note_delay_statistics (slots_filled, 0);
2268     }
2269 }
2270 
2271 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2272    return the ultimate label reached by any such chain of jumps.
2273    Return a suitable return rtx if the chain ultimately leads to a
2274    return instruction.
2275    If LABEL is not followed by a jump, return LABEL.
2276    If the chain loops or we can't find end, return LABEL,
2277    since that tells caller to avoid changing the insn.
2278    If the returned label is obtained by following a crossing jump,
2279    set *CROSSING to true, otherwise set it to false.  */
2280 
2281 static rtx
follow_jumps(rtx label,rtx_insn * jump,bool * crossing)2282 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2283 {
2284   rtx_insn *insn;
2285   rtx_insn *next;
2286   int depth;
2287 
2288   *crossing = false;
2289   if (ANY_RETURN_P (label))
2290     return label;
2291 
2292   rtx_insn *value = as_a <rtx_insn *> (label);
2293 
2294   for (depth = 0;
2295        (depth < 10
2296 	&& (insn = next_active_insn (value)) != 0
2297 	&& JUMP_P (insn)
2298 	&& JUMP_LABEL (insn) != NULL_RTX
2299 	&& ((any_uncondjump_p (insn) && onlyjump_p (insn))
2300 	    || ANY_RETURN_P (PATTERN (insn)))
2301 	&& (next = NEXT_INSN (insn))
2302 	&& BARRIER_P (next));
2303        depth++)
2304     {
2305       rtx this_label_or_return = JUMP_LABEL (insn);
2306 
2307       /* If we have found a cycle, make the insn jump to itself.  */
2308       if (this_label_or_return == label)
2309 	return label;
2310 
2311       /* Cannot follow returns and cannot look through tablejumps.  */
2312       if (ANY_RETURN_P (this_label_or_return))
2313 	return this_label_or_return;
2314 
2315       rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2316       if (NEXT_INSN (this_label)
2317 	  && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2318 	break;
2319 
2320       if (!targetm.can_follow_jump (jump, insn))
2321 	break;
2322       if (!*crossing)
2323 	*crossing = CROSSING_JUMP_P (jump);
2324       value = this_label;
2325     }
2326   if (depth == 10)
2327     return label;
2328   return value;
2329 }
2330 
2331 /* Try to find insns to place in delay slots.
2332 
2333    INSN is the jump needing SLOTS_TO_FILL delay slots.  It tests CONDITION
2334    or is an unconditional branch if CONDITION is const_true_rtx.
2335    *PSLOTS_FILLED is updated with the number of slots that we have filled.
2336 
2337    THREAD is a flow-of-control, either the insns to be executed if the
2338    branch is true or if the branch is false, THREAD_IF_TRUE says which.
2339 
2340    OPPOSITE_THREAD is the thread in the opposite direction.  It is used
2341    to see if any potential delay slot insns set things needed there.
2342 
2343    LIKELY is nonzero if it is extremely likely that the branch will be
2344    taken and THREAD_IF_TRUE is set.  This is used for the branch at the
2345    end of a loop back up to the top.
2346 
2347    OWN_THREAD is true if we are the only user of the thread, i.e. it is
2348    the target of the jump when we are the only jump going there.
2349 
2350    If OWN_THREAD is false, it must be the "true" thread of a jump.  In that
2351    case, we can only take insns from the head of the thread for our delay
2352    slot.  We then adjust the jump to point after the insns we have taken.  */
2353 
2354 static void
fill_slots_from_thread(rtx_jump_insn * insn,rtx condition,rtx thread_or_return,rtx opposite_thread,int likely,int thread_if_true,int own_thread,int slots_to_fill,int * pslots_filled,vec<rtx_insn * > * delay_list)2355 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2356 			rtx thread_or_return, rtx opposite_thread, int likely,
2357 			int thread_if_true, int own_thread, int slots_to_fill,
2358 			int *pslots_filled, vec<rtx_insn *> *delay_list)
2359 {
2360   rtx new_thread;
2361   struct resources opposite_needed, set, needed;
2362   rtx_insn *trial;
2363   int lose = 0;
2364   int must_annul = 0;
2365   int flags;
2366 
2367   /* Validate our arguments.  */
2368   gcc_assert (condition != const_true_rtx || thread_if_true);
2369   gcc_assert (own_thread || thread_if_true);
2370 
2371   flags = get_jump_flags (insn, JUMP_LABEL (insn));
2372 
2373   /* If our thread is the end of subroutine, we can't get any delay
2374      insns from that.  */
2375   if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2376     return;
2377 
2378   rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2379 
2380   /* If this is an unconditional branch, nothing is needed at the
2381      opposite thread.  Otherwise, compute what is needed there.  */
2382   if (condition == const_true_rtx)
2383     CLEAR_RESOURCE (&opposite_needed);
2384   else
2385     mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2386 
2387   /* If the insn at THREAD can be split, do it here to avoid having to
2388      update THREAD and NEW_THREAD if it is done in the loop below.  Also
2389      initialize NEW_THREAD.  */
2390 
2391   new_thread = thread = try_split (PATTERN (thread), thread, 0);
2392 
2393   /* Scan insns at THREAD.  We are looking for an insn that can be removed
2394      from THREAD (it neither sets nor references resources that were set
2395      ahead of it and it doesn't set anything needs by the insns ahead of
2396      it) and that either can be placed in an annulling insn or aren't
2397      needed at OPPOSITE_THREAD.  */
2398 
2399   CLEAR_RESOURCE (&needed);
2400   CLEAR_RESOURCE (&set);
2401 
2402   /* If we do not own this thread, we must stop as soon as we find
2403      something that we can't put in a delay slot, since all we can do
2404      is branch into THREAD at a later point.  Therefore, labels stop
2405      the search if this is not the `true' thread.  */
2406 
2407   for (trial = thread;
2408        ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2409        trial = next_nonnote_insn (trial))
2410     {
2411       rtx pat, old_trial;
2412 
2413       /* If we have passed a label, we no longer own this thread.  */
2414       if (LABEL_P (trial))
2415 	{
2416 	  own_thread = 0;
2417 	  continue;
2418 	}
2419 
2420       pat = PATTERN (trial);
2421       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2422 	continue;
2423 
2424       if (GET_CODE (trial) == DEBUG_INSN)
2425 	continue;
2426 
2427       /* If TRIAL conflicts with the insns ahead of it, we lose.  Also,
2428 	 don't separate or copy insns that set and use CC0.  */
2429       if (! insn_references_resource_p (trial, &set, true)
2430 	  && ! insn_sets_resource_p (trial, &set, true)
2431 	  && ! insn_sets_resource_p (trial, &needed, true)
2432 	  && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2433 			      && (! own_thread || ! sets_cc0_p (pat)))))
2434 	  && ! can_throw_internal (trial))
2435 	{
2436 	  rtx_insn *prior_insn;
2437 
2438 	  /* If TRIAL is redundant with some insn before INSN, we don't
2439 	     actually need to add it to the delay list; we can merely pretend
2440 	     we did.  */
2441 	  if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2442 	    {
2443 	      fix_reg_dead_note (prior_insn, insn);
2444 	      if (own_thread)
2445 		{
2446 		  update_block (trial, thread);
2447 		  if (trial == thread)
2448 		    {
2449 		      thread = next_active_insn (thread);
2450 		      if (new_thread == trial)
2451 			new_thread = thread;
2452 		    }
2453 
2454 		  delete_related_insns (trial);
2455 		}
2456 	      else
2457 		{
2458 		  update_reg_unused_notes (prior_insn, trial);
2459 		  new_thread = next_active_insn (trial);
2460 		}
2461 
2462 	      continue;
2463 	    }
2464 
2465 	  /* There are two ways we can win:  If TRIAL doesn't set anything
2466 	     needed at the opposite thread and can't trap, or if it can
2467 	     go into an annulled delay slot.  But we want neither to copy
2468 	     nor to speculate frame-related insns.  */
2469 	  if (!must_annul
2470 	      && ((condition == const_true_rtx
2471 		   && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2472 	          || (! insn_sets_resource_p (trial, &opposite_needed, true)
2473 		      && ! may_trap_or_fault_p (pat)
2474 		      && ! RTX_FRAME_RELATED_P (trial))))
2475 	    {
2476 	      old_trial = trial;
2477 	      trial = try_split (pat, trial, 0);
2478 	      if (new_thread == old_trial)
2479 		new_thread = trial;
2480 	      if (thread == old_trial)
2481 		thread = trial;
2482 	      pat = PATTERN (trial);
2483 	      if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2484 		goto winner;
2485 	    }
2486 	  else if (!RTX_FRAME_RELATED_P (trial)
2487 		   && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2488 		        || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2489 	    {
2490 	      old_trial = trial;
2491 	      trial = try_split (pat, trial, 0);
2492 	      if (new_thread == old_trial)
2493 		new_thread = trial;
2494 	      if (thread == old_trial)
2495 		thread = trial;
2496 	      pat = PATTERN (trial);
2497 	      if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2498 		   ? check_annul_list_true_false (0, *delay_list)
2499 		     && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2500 		   : check_annul_list_true_false (1, *delay_list)
2501 		     && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2502 		{
2503 		  rtx_insn *temp;
2504 
2505 		  must_annul = 1;
2506 		winner:
2507 
2508 		  if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2509 		    link_cc0_insns (trial);
2510 
2511 		  /* If we own this thread, delete the insn.  If this is the
2512 		     destination of a branch, show that a basic block status
2513 		     may have been updated.  In any case, mark the new
2514 		     starting point of this thread.  */
2515 		  if (own_thread)
2516 		    {
2517 		      rtx note;
2518 
2519 		      update_block (trial, thread);
2520 		      if (trial == thread)
2521 			{
2522 			  thread = next_active_insn (thread);
2523 			  if (new_thread == trial)
2524 			    new_thread = thread;
2525 			}
2526 
2527 		      /* We are moving this insn, not deleting it.  We must
2528 			 temporarily increment the use count on any referenced
2529 			 label lest it be deleted by delete_related_insns.  */
2530 		      for (note = REG_NOTES (trial);
2531 			   note != NULL_RTX;
2532 			   note = XEXP (note, 1))
2533 			if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2534 			    || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2535 			  {
2536 			    /* REG_LABEL_OPERAND could be
2537 			       NOTE_INSN_DELETED_LABEL too.  */
2538 			    if (LABEL_P (XEXP (note, 0)))
2539 			      LABEL_NUSES (XEXP (note, 0))++;
2540 			    else
2541 			      gcc_assert (REG_NOTE_KIND (note)
2542 					  == REG_LABEL_OPERAND);
2543 			  }
2544 		      if (jump_to_label_p (trial))
2545 			LABEL_NUSES (JUMP_LABEL (trial))++;
2546 
2547 		      delete_related_insns (trial);
2548 
2549 		      for (note = REG_NOTES (trial);
2550 			   note != NULL_RTX;
2551 			   note = XEXP (note, 1))
2552 			if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2553 			    || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2554 			  {
2555 			    /* REG_LABEL_OPERAND could be
2556 			       NOTE_INSN_DELETED_LABEL too.  */
2557 			    if (LABEL_P (XEXP (note, 0)))
2558 			      LABEL_NUSES (XEXP (note, 0))--;
2559 			    else
2560 			      gcc_assert (REG_NOTE_KIND (note)
2561 					  == REG_LABEL_OPERAND);
2562 			  }
2563 		      if (jump_to_label_p (trial))
2564 			LABEL_NUSES (JUMP_LABEL (trial))--;
2565 		    }
2566 		  else
2567 		    new_thread = next_active_insn (trial);
2568 
2569 		  temp = own_thread ? trial : copy_delay_slot_insn (trial);
2570 		  if (thread_if_true)
2571 		    INSN_FROM_TARGET_P (temp) = 1;
2572 
2573 		  add_to_delay_list (temp, delay_list);
2574 
2575 		  if (slots_to_fill == ++(*pslots_filled))
2576 		    {
2577 		      /* Even though we have filled all the slots, we
2578 			 may be branching to a location that has a
2579 			 redundant insn.  Skip any if so.  */
2580 		      while (new_thread && ! own_thread
2581 			     && ! insn_sets_resource_p (new_thread, &set, true)
2582 			     && ! insn_sets_resource_p (new_thread, &needed,
2583 							true)
2584 			     && ! insn_references_resource_p (new_thread,
2585 							      &set, true)
2586 			     && (prior_insn
2587 				 = redundant_insn (new_thread, insn,
2588 						   *delay_list)))
2589 			{
2590 			  /* We know we do not own the thread, so no need
2591 			     to call update_block and delete_insn.  */
2592 			  fix_reg_dead_note (prior_insn, insn);
2593 			  update_reg_unused_notes (prior_insn, new_thread);
2594 			  new_thread
2595 			    = next_active_insn (as_a<rtx_insn *> (new_thread));
2596 			}
2597 		      break;
2598 		    }
2599 
2600 		  continue;
2601 		}
2602 	    }
2603 	}
2604 
2605       /* This insn can't go into a delay slot.  */
2606       lose = 1;
2607       mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2608       mark_referenced_resources (trial, &needed, true);
2609 
2610       /* Ensure we don't put insns between the setting of cc and the comparison
2611 	 by moving a setting of cc into an earlier delay slot since these insns
2612 	 could clobber the condition code.  */
2613       set.cc = 1;
2614 
2615       /* If this insn is a register-register copy and the next insn has
2616 	 a use of our destination, change it to use our source.  That way,
2617 	 it will become a candidate for our delay slot the next time
2618 	 through this loop.  This case occurs commonly in loops that
2619 	 scan a list.
2620 
2621 	 We could check for more complex cases than those tested below,
2622 	 but it doesn't seem worth it.  It might also be a good idea to try
2623 	 to swap the two insns.  That might do better.
2624 
2625 	 We can't do this if the next insn modifies our destination, because
2626 	 that would make the replacement into the insn invalid.  We also can't
2627 	 do this if it modifies our source, because it might be an earlyclobber
2628 	 operand.  This latter test also prevents updating the contents of
2629 	 a PRE_INC.  We also can't do this if there's overlap of source and
2630 	 destination.  Overlap may happen for larger-than-register-size modes.  */
2631 
2632       if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2633 	  && REG_P (SET_SRC (pat))
2634 	  && REG_P (SET_DEST (pat))
2635 	  && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2636 	{
2637 	  rtx_insn *next = next_nonnote_insn (trial);
2638 
2639 	  if (next && NONJUMP_INSN_P (next)
2640 	      && GET_CODE (PATTERN (next)) != USE
2641 	      && ! reg_set_p (SET_DEST (pat), next)
2642 	      && ! reg_set_p (SET_SRC (pat), next)
2643 	      && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2644 	      && ! modified_in_p (SET_DEST (pat), next))
2645 	    validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2646 	}
2647     }
2648 
2649   /* If we stopped on a branch insn that has delay slots, see if we can
2650      steal some of the insns in those slots.  */
2651   if (trial && NONJUMP_INSN_P (trial)
2652       && GET_CODE (PATTERN (trial)) == SEQUENCE
2653       && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2654     {
2655       rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2656       /* If this is the `true' thread, we will want to follow the jump,
2657 	 so we can only do this if we have taken everything up to here.  */
2658       if (thread_if_true && trial == new_thread)
2659 	{
2660 	  steal_delay_list_from_target (insn, condition, sequence,
2661 					delay_list, &set, &needed,
2662 					&opposite_needed, slots_to_fill,
2663 					pslots_filled, &must_annul,
2664 					&new_thread);
2665 	  /* If we owned the thread and are told that it branched
2666 	     elsewhere, make sure we own the thread at the new location.  */
2667 	  if (own_thread && trial != new_thread)
2668 	    own_thread = own_thread_p (new_thread, new_thread, 0);
2669 	}
2670       else if (! thread_if_true)
2671 	steal_delay_list_from_fallthrough (insn, condition, sequence,
2672 					   delay_list, &set, &needed,
2673 					   &opposite_needed, slots_to_fill,
2674 					   pslots_filled, &must_annul);
2675     }
2676 
2677   /* If we haven't found anything for this delay slot and it is very
2678      likely that the branch will be taken, see if the insn at our target
2679      increments or decrements a register with an increment that does not
2680      depend on the destination register.  If so, try to place the opposite
2681      arithmetic insn after the jump insn and put the arithmetic insn in the
2682      delay slot.  If we can't do this, return.  */
2683   if (delay_list->is_empty () && likely
2684       && new_thread && !ANY_RETURN_P (new_thread)
2685       && NONJUMP_INSN_P (new_thread)
2686       && !RTX_FRAME_RELATED_P (new_thread)
2687       && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2688       && asm_noperands (PATTERN (new_thread)) < 0)
2689     {
2690       rtx pat = PATTERN (new_thread);
2691       rtx dest;
2692       rtx src;
2693 
2694       /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2695 	 above.  */
2696       trial = as_a <rtx_insn *> (new_thread);
2697       pat = PATTERN (trial);
2698 
2699       if (!NONJUMP_INSN_P (trial)
2700 	  || GET_CODE (pat) != SET
2701 	  || ! eligible_for_delay (insn, 0, trial, flags)
2702 	  || can_throw_internal (trial))
2703 	return;
2704 
2705       dest = SET_DEST (pat), src = SET_SRC (pat);
2706       if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2707 	  && rtx_equal_p (XEXP (src, 0), dest)
2708 	  && (!FLOAT_MODE_P (GET_MODE (src))
2709 	      || flag_unsafe_math_optimizations)
2710 	  && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2711 	  && ! side_effects_p (pat))
2712 	{
2713 	  rtx other = XEXP (src, 1);
2714 	  rtx new_arith;
2715 	  rtx_insn *ninsn;
2716 
2717 	  /* If this is a constant adjustment, use the same code with
2718 	     the negated constant.  Otherwise, reverse the sense of the
2719 	     arithmetic.  */
2720 	  if (CONST_INT_P (other))
2721 	    new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2722 					negate_rtx (GET_MODE (src), other));
2723 	  else
2724 	    new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2725 					GET_MODE (src), dest, other);
2726 
2727 	  ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2728 
2729 	  if (recog_memoized (ninsn) < 0
2730 	      || (extract_insn (ninsn),
2731 		  !constrain_operands (1, get_preferred_alternatives (ninsn))))
2732 	    {
2733 	      delete_related_insns (ninsn);
2734 	      return;
2735 	    }
2736 
2737 	  if (own_thread)
2738 	    {
2739 	      update_block (trial, thread);
2740 	      if (trial == thread)
2741 		{
2742 		  thread = next_active_insn (thread);
2743 		  if (new_thread == trial)
2744 		    new_thread = thread;
2745 		}
2746 	      delete_related_insns (trial);
2747 	    }
2748 	  else
2749 	    new_thread = next_active_insn (trial);
2750 
2751 	  ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2752 	  if (thread_if_true)
2753 	    INSN_FROM_TARGET_P (ninsn) = 1;
2754 
2755 	  add_to_delay_list (ninsn, delay_list);
2756 	  (*pslots_filled)++;
2757 	}
2758     }
2759 
2760   if (!delay_list->is_empty () && must_annul)
2761     INSN_ANNULLED_BRANCH_P (insn) = 1;
2762 
2763   /* If we are to branch into the middle of this thread, find an appropriate
2764      label or make a new one if none, and redirect INSN to it.  If we hit the
2765      end of the function, use the end-of-function label.  */
2766   if (new_thread != thread)
2767     {
2768       rtx label;
2769       bool crossing = false;
2770 
2771       gcc_assert (thread_if_true);
2772 
2773       if (new_thread && simplejump_or_return_p (new_thread)
2774 	  && redirect_with_delay_list_safe_p (insn,
2775 					      JUMP_LABEL (new_thread),
2776 					      *delay_list))
2777 	new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2778 				   &crossing);
2779 
2780       if (ANY_RETURN_P (new_thread))
2781 	label = find_end_label (new_thread);
2782       else if (LABEL_P (new_thread))
2783 	label = new_thread;
2784       else
2785 	label = get_label_before (as_a <rtx_insn *> (new_thread),
2786 				  JUMP_LABEL (insn));
2787 
2788       if (label)
2789 	{
2790 	  reorg_redirect_jump (insn, label);
2791 	  if (crossing)
2792 	    CROSSING_JUMP_P (insn) = 1;
2793 	}
2794     }
2795 }
2796 
2797 /* Make another attempt to find insns to place in delay slots.
2798 
2799    We previously looked for insns located in front of the delay insn
2800    and, for non-jump delay insns, located behind the delay insn.
2801 
2802    Here only try to schedule jump insns and try to move insns from either
2803    the target or the following insns into the delay slot.  If annulling is
2804    supported, we will be likely to do this.  Otherwise, we can do this only
2805    if safe.  */
2806 
2807 static void
fill_eager_delay_slots(void)2808 fill_eager_delay_slots (void)
2809 {
2810   rtx_insn *insn;
2811   int i;
2812   int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2813 
2814   for (i = 0; i < num_unfilled_slots; i++)
2815     {
2816       rtx condition;
2817       rtx target_label, insn_at_target;
2818       rtx_insn *fallthrough_insn;
2819       auto_vec<rtx_insn *, 5> delay_list;
2820       rtx_jump_insn *jump_insn;
2821       int own_target;
2822       int own_fallthrough;
2823       int prediction, slots_to_fill, slots_filled;
2824 
2825       insn = unfilled_slots_base[i];
2826       if (insn == 0
2827 	  || insn->deleted ()
2828 	  || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2829 	  || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2830 	continue;
2831 
2832       slots_to_fill = num_delay_slots (jump_insn);
2833       /* Some machine description have defined instructions to have
2834 	 delay slots only in certain circumstances which may depend on
2835 	 nearby insns (which change due to reorg's actions).
2836 
2837 	 For example, the PA port normally has delay slots for unconditional
2838 	 jumps.
2839 
2840 	 However, the PA port claims such jumps do not have a delay slot
2841 	 if they are immediate successors of certain CALL_INSNs.  This
2842 	 allows the port to favor filling the delay slot of the call with
2843 	 the unconditional jump.  */
2844       if (slots_to_fill == 0)
2845 	continue;
2846 
2847       slots_filled = 0;
2848       target_label = JUMP_LABEL (jump_insn);
2849       condition = get_branch_condition (jump_insn, target_label);
2850 
2851       if (condition == 0)
2852 	continue;
2853 
2854       /* Get the next active fallthrough and target insns and see if we own
2855 	 them.  Then see whether the branch is likely true.  We don't need
2856 	 to do a lot of this for unconditional branches.  */
2857 
2858       insn_at_target = first_active_target_insn (target_label);
2859       own_target = own_thread_p (target_label, target_label, 0);
2860 
2861       if (condition == const_true_rtx)
2862 	{
2863 	  own_fallthrough = 0;
2864 	  fallthrough_insn = 0;
2865 	  prediction = 2;
2866 	}
2867       else
2868 	{
2869 	  fallthrough_insn = next_active_insn (jump_insn);
2870 	  own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2871 	  prediction = mostly_true_jump (jump_insn);
2872 	}
2873 
2874       /* If this insn is expected to branch, first try to get insns from our
2875 	 target, then our fallthrough insns.  If it is not expected to branch,
2876 	 try the other order.  */
2877 
2878       if (prediction > 0)
2879 	{
2880 	  fill_slots_from_thread (jump_insn, condition, insn_at_target,
2881 				  fallthrough_insn, prediction == 2, 1,
2882 				  own_target,
2883 				  slots_to_fill, &slots_filled, &delay_list);
2884 
2885 	  if (delay_list.is_empty () && own_fallthrough)
2886 	    {
2887 	      /* Even though we didn't find anything for delay slots,
2888 		 we might have found a redundant insn which we deleted
2889 		 from the thread that was filled.  So we have to recompute
2890 		 the next insn at the target.  */
2891 	      target_label = JUMP_LABEL (jump_insn);
2892 	      insn_at_target = first_active_target_insn (target_label);
2893 
2894 	      fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2895 				      insn_at_target, 0, 0, own_fallthrough,
2896 				      slots_to_fill, &slots_filled,
2897 				      &delay_list);
2898 	    }
2899 	}
2900       else
2901 	{
2902 	  if (own_fallthrough)
2903 	    fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2904 				    insn_at_target, 0, 0, own_fallthrough,
2905 				    slots_to_fill, &slots_filled, &delay_list);
2906 
2907 	  if (delay_list.is_empty ())
2908 	    fill_slots_from_thread (jump_insn, condition, insn_at_target,
2909 				    next_active_insn (insn), 0, 1, own_target,
2910 				    slots_to_fill, &slots_filled, &delay_list);
2911 	}
2912 
2913       if (!delay_list.is_empty ())
2914 	unfilled_slots_base[i]
2915 	  = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2916 
2917       if (slots_to_fill == slots_filled)
2918 	unfilled_slots_base[i] = 0;
2919 
2920       note_delay_statistics (slots_filled, 1);
2921     }
2922 }
2923 
2924 static void delete_computation (rtx_insn *insn);
2925 
2926 /* Recursively delete prior insns that compute the value (used only by INSN
2927    which the caller is deleting) stored in the register mentioned by NOTE
2928    which is a REG_DEAD note associated with INSN.  */
2929 
2930 static void
delete_prior_computation(rtx note,rtx_insn * insn)2931 delete_prior_computation (rtx note, rtx_insn *insn)
2932 {
2933   rtx_insn *our_prev;
2934   rtx reg = XEXP (note, 0);
2935 
2936   for (our_prev = prev_nonnote_insn (insn);
2937        our_prev && (NONJUMP_INSN_P (our_prev)
2938 		    || CALL_P (our_prev));
2939        our_prev = prev_nonnote_insn (our_prev))
2940     {
2941       rtx pat = PATTERN (our_prev);
2942 
2943       /* If we reach a CALL which is not calling a const function
2944 	 or the callee pops the arguments, then give up.  */
2945       if (CALL_P (our_prev)
2946 	  && (! RTL_CONST_CALL_P (our_prev)
2947 	      || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2948 	break;
2949 
2950       /* If we reach a SEQUENCE, it is too complex to try to
2951 	 do anything with it, so give up.  We can be run during
2952 	 and after reorg, so SEQUENCE rtl can legitimately show
2953 	 up here.  */
2954       if (GET_CODE (pat) == SEQUENCE)
2955 	break;
2956 
2957       if (GET_CODE (pat) == USE
2958 	  && NONJUMP_INSN_P (XEXP (pat, 0)))
2959 	/* reorg creates USEs that look like this.  We leave them
2960 	   alone because reorg needs them for its own purposes.  */
2961 	break;
2962 
2963       if (reg_set_p (reg, pat))
2964 	{
2965 	  if (side_effects_p (pat) && !CALL_P (our_prev))
2966 	    break;
2967 
2968 	  if (GET_CODE (pat) == PARALLEL)
2969 	    {
2970 	      /* If we find a SET of something else, we can't
2971 		 delete the insn.  */
2972 
2973 	      int i;
2974 
2975 	      for (i = 0; i < XVECLEN (pat, 0); i++)
2976 		{
2977 		  rtx part = XVECEXP (pat, 0, i);
2978 
2979 		  if (GET_CODE (part) == SET
2980 		      && SET_DEST (part) != reg)
2981 		    break;
2982 		}
2983 
2984 	      if (i == XVECLEN (pat, 0))
2985 		delete_computation (our_prev);
2986 	    }
2987 	  else if (GET_CODE (pat) == SET
2988 		   && REG_P (SET_DEST (pat)))
2989 	    {
2990 	      int dest_regno = REGNO (SET_DEST (pat));
2991 	      int dest_endregno = END_REGNO (SET_DEST (pat));
2992 	      int regno = REGNO (reg);
2993 	      int endregno = END_REGNO (reg);
2994 
2995 	      if (dest_regno >= regno
2996 		  && dest_endregno <= endregno)
2997 		delete_computation (our_prev);
2998 
2999 	      /* We may have a multi-word hard register and some, but not
3000 		 all, of the words of the register are needed in subsequent
3001 		 insns.  Write REG_UNUSED notes for those parts that were not
3002 		 needed.  */
3003 	      else if (dest_regno <= regno
3004 		       && dest_endregno >= endregno)
3005 		{
3006 		  int i;
3007 
3008 		  add_reg_note (our_prev, REG_UNUSED, reg);
3009 
3010 		  for (i = dest_regno; i < dest_endregno; i++)
3011 		    if (! find_regno_note (our_prev, REG_UNUSED, i))
3012 		      break;
3013 
3014 		  if (i == dest_endregno)
3015 		    delete_computation (our_prev);
3016 		}
3017 	    }
3018 
3019 	  break;
3020 	}
3021 
3022       /* If PAT references the register that dies here, it is an
3023 	 additional use.  Hence any prior SET isn't dead.  However, this
3024 	 insn becomes the new place for the REG_DEAD note.  */
3025       if (reg_overlap_mentioned_p (reg, pat))
3026 	{
3027 	  XEXP (note, 1) = REG_NOTES (our_prev);
3028 	  REG_NOTES (our_prev) = note;
3029 	  break;
3030 	}
3031     }
3032 }
3033 
3034 /* Delete INSN and recursively delete insns that compute values used only
3035    by INSN.  This uses the REG_DEAD notes computed during flow analysis.
3036 
3037    Look at all our REG_DEAD notes.  If a previous insn does nothing other
3038    than set a register that dies in this insn, we can delete that insn
3039    as well.
3040 
3041    On machines with CC0, if CC0 is used in this insn, we may be able to
3042    delete the insn that set it.  */
3043 
3044 static void
delete_computation(rtx_insn * insn)3045 delete_computation (rtx_insn *insn)
3046 {
3047   rtx note, next;
3048 
3049   if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3050     {
3051       rtx_insn *prev = prev_nonnote_insn (insn);
3052       /* We assume that at this stage
3053 	 CC's are always set explicitly
3054 	 and always immediately before the jump that
3055 	 will use them.  So if the previous insn
3056 	 exists to set the CC's, delete it
3057 	 (unless it performs auto-increments, etc.).  */
3058       if (prev && NONJUMP_INSN_P (prev)
3059 	  && sets_cc0_p (PATTERN (prev)))
3060 	{
3061 	  if (sets_cc0_p (PATTERN (prev)) > 0
3062 	      && ! side_effects_p (PATTERN (prev)))
3063 	    delete_computation (prev);
3064 	  else
3065 	    /* Otherwise, show that cc0 won't be used.  */
3066 	    add_reg_note (prev, REG_UNUSED, cc0_rtx);
3067 	}
3068     }
3069 
3070   for (note = REG_NOTES (insn); note; note = next)
3071     {
3072       next = XEXP (note, 1);
3073 
3074       if (REG_NOTE_KIND (note) != REG_DEAD
3075 	  /* Verify that the REG_NOTE is legitimate.  */
3076 	  || !REG_P (XEXP (note, 0)))
3077 	continue;
3078 
3079       delete_prior_computation (note, insn);
3080     }
3081 
3082   delete_related_insns (insn);
3083 }
3084 
3085 /* If all INSN does is set the pc, delete it,
3086    and delete the insn that set the condition codes for it
3087    if that's what the previous thing was.  */
3088 
3089 static void
delete_jump(rtx_insn * insn)3090 delete_jump (rtx_insn *insn)
3091 {
3092   rtx set = single_set (insn);
3093 
3094   if (set && GET_CODE (SET_DEST (set)) == PC)
3095     delete_computation (insn);
3096 }
3097 
3098 static rtx_insn *
label_before_next_insn(rtx_insn * x,rtx scan_limit)3099 label_before_next_insn (rtx_insn *x, rtx scan_limit)
3100 {
3101   rtx_insn *insn = next_active_insn (x);
3102   while (insn)
3103     {
3104       insn = PREV_INSN (insn);
3105       if (insn == scan_limit || insn == NULL_RTX)
3106 	return NULL;
3107       if (LABEL_P (insn))
3108 	break;
3109     }
3110   return insn;
3111 }
3112 
3113 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3114    BEG and END.  */
3115 
3116 static bool
switch_text_sections_between_p(const rtx_insn * beg,const rtx_insn * end)3117 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3118 {
3119   const rtx_insn *p;
3120   for (p = beg; p != end; p = NEXT_INSN (p))
3121     if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3122       return true;
3123   return false;
3124 }
3125 
3126 
3127 /* Once we have tried two ways to fill a delay slot, make a pass over the
3128    code to try to improve the results and to do such things as more jump
3129    threading.  */
3130 
3131 static void
relax_delay_slots(rtx_insn * first)3132 relax_delay_slots (rtx_insn *first)
3133 {
3134   rtx_insn *insn, *next;
3135   rtx_sequence *pat;
3136   rtx_insn *delay_insn;
3137   rtx target_label;
3138 
3139   /* Look at every JUMP_INSN and see if we can improve it.  */
3140   for (insn = first; insn; insn = next)
3141     {
3142       rtx_insn *other, *prior_insn;
3143       bool crossing;
3144 
3145       next = next_active_insn (insn);
3146 
3147       /* If this is a jump insn, see if it now jumps to a jump, jumps to
3148 	 the next insn, or jumps to a label that is not the last of a
3149 	 group of consecutive labels.  */
3150       if (is_a <rtx_jump_insn *> (insn)
3151 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
3152 	  && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3153 	{
3154 	  rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3155 	  target_label
3156 	    = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3157 						     &crossing));
3158 	  if (ANY_RETURN_P (target_label))
3159 	    target_label = find_end_label (target_label);
3160 
3161 	  if (target_label
3162 	      && next_active_insn (as_a<rtx_insn *> (target_label)) == next
3163 	      && ! condjump_in_parallel_p (jump_insn)
3164 	      && ! (next && switch_text_sections_between_p (jump_insn, next)))
3165 	    {
3166 	      delete_jump (jump_insn);
3167 	      continue;
3168 	    }
3169 
3170 	  if (target_label && target_label != JUMP_LABEL (jump_insn))
3171 	    {
3172 	      reorg_redirect_jump (jump_insn, target_label);
3173 	      if (crossing)
3174 		CROSSING_JUMP_P (jump_insn) = 1;
3175 	    }
3176 
3177 	  /* See if this jump conditionally branches around an unconditional
3178 	     jump.  If so, invert this jump and point it to the target of the
3179 	     second jump.  Check if it's possible on the target.  */
3180 	  if (next && simplejump_or_return_p (next)
3181 	      && any_condjump_p (jump_insn)
3182 	      && target_label
3183 	      && (next_active_insn (as_a<rtx_insn *> (target_label))
3184 		  == next_active_insn (next))
3185 	      && no_labels_between_p (jump_insn, next)
3186 	      && targetm.can_follow_jump (jump_insn, next))
3187 	    {
3188 	      rtx label = JUMP_LABEL (next);
3189 
3190 	      /* Be careful how we do this to avoid deleting code or
3191 		 labels that are momentarily dead.  See similar optimization
3192 		 in jump.c.
3193 
3194 		 We also need to ensure we properly handle the case when
3195 		 invert_jump fails.  */
3196 
3197 	      ++LABEL_NUSES (target_label);
3198 	      if (!ANY_RETURN_P (label))
3199 		++LABEL_NUSES (label);
3200 
3201 	      if (invert_jump (jump_insn, label, 1))
3202 		{
3203 		  delete_related_insns (next);
3204 		  next = jump_insn;
3205 		}
3206 
3207 	      if (!ANY_RETURN_P (label))
3208 		--LABEL_NUSES (label);
3209 
3210 	      if (--LABEL_NUSES (target_label) == 0)
3211 		delete_related_insns (target_label);
3212 
3213 	      continue;
3214 	    }
3215 	}
3216 
3217       /* If this is an unconditional jump and the previous insn is a
3218 	 conditional jump, try reversing the condition of the previous
3219 	 insn and swapping our targets.  The next pass might be able to
3220 	 fill the slots.
3221 
3222 	 Don't do this if we expect the conditional branch to be true, because
3223 	 we would then be making the more common case longer.  */
3224 
3225       if (simplejump_or_return_p (insn)
3226 	  && (other = prev_active_insn (insn)) != 0
3227 	  && any_condjump_p (other)
3228 	  && no_labels_between_p (other, insn)
3229 	  && mostly_true_jump (other) < 0)
3230 	{
3231 	  rtx other_target = JUMP_LABEL (other);
3232 	  target_label = JUMP_LABEL (insn);
3233 
3234 	  if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3235 	    reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3236 	}
3237 
3238       /* Now look only at cases where we have a filled delay slot.  */
3239       if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3240 	continue;
3241 
3242       pat = as_a <rtx_sequence *> (PATTERN (insn));
3243       delay_insn = pat->insn (0);
3244 
3245       /* See if the first insn in the delay slot is redundant with some
3246 	 previous insn.  Remove it from the delay slot if so; then set up
3247 	 to reprocess this insn.  */
3248       if ((prior_insn = redundant_insn (pat->insn (1), delay_insn, vNULL)))
3249 	{
3250 	  fix_reg_dead_note (prior_insn, insn);
3251 	  update_block (pat->insn (1), insn);
3252 	  delete_from_delay_slot (pat->insn (1));
3253 	  next = prev_active_insn (next);
3254 	  continue;
3255 	}
3256 
3257       /* See if we have a RETURN insn with a filled delay slot followed
3258 	 by a RETURN insn with an unfilled a delay slot.  If so, we can delete
3259 	 the first RETURN (but not its delay insn).  This gives the same
3260 	 effect in fewer instructions.
3261 
3262 	 Only do so if optimizing for size since this results in slower, but
3263 	 smaller code.  */
3264       if (optimize_function_for_size_p (cfun)
3265 	  && ANY_RETURN_P (PATTERN (delay_insn))
3266 	  && next
3267 	  && JUMP_P (next)
3268 	  && PATTERN (next) == PATTERN (delay_insn))
3269 	{
3270 	  rtx_insn *after;
3271 	  int i;
3272 
3273 	  /* Delete the RETURN and just execute the delay list insns.
3274 
3275 	     We do this by deleting the INSN containing the SEQUENCE, then
3276 	     re-emitting the insns separately, and then deleting the RETURN.
3277 	     This allows the count of the jump target to be properly
3278 	     decremented.
3279 
3280 	     Note that we need to change the INSN_UID of the re-emitted insns
3281 	     since it is used to hash the insns for mark_target_live_regs and
3282 	     the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3283 
3284 	     Clear the from target bit, since these insns are no longer
3285 	     in delay slots.  */
3286 	  for (i = 0; i < XVECLEN (pat, 0); i++)
3287 	    INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3288 
3289 	  rtx_insn *prev = PREV_INSN (insn);
3290 	  delete_related_insns (insn);
3291 	  gcc_assert (GET_CODE (pat) == SEQUENCE);
3292 	  add_insn_after (delay_insn, prev, NULL);
3293 	  after = delay_insn;
3294 	  for (i = 1; i < pat->len (); i++)
3295 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3296 	  delete_scheduled_jump (delay_insn);
3297 	  continue;
3298 	}
3299 
3300       /* Now look only at the cases where we have a filled JUMP_INSN.  */
3301       rtx_jump_insn *delay_jump_insn =
3302 		dyn_cast <rtx_jump_insn *> (delay_insn);
3303       if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3304 	  || condjump_in_parallel_p (delay_jump_insn)))
3305 	continue;
3306 
3307       target_label = JUMP_LABEL (delay_jump_insn);
3308       if (target_label && ANY_RETURN_P (target_label))
3309 	continue;
3310 
3311       /* If this jump goes to another unconditional jump, thread it, but
3312 	 don't convert a jump into a RETURN here.  */
3313       rtx trial = skip_consecutive_labels (follow_jumps (target_label,
3314 							 delay_jump_insn,
3315 							 &crossing));
3316       if (ANY_RETURN_P (trial))
3317 	trial = find_end_label (trial);
3318 
3319       if (trial && trial != target_label
3320 	  && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3321 	{
3322 	  reorg_redirect_jump (delay_jump_insn, trial);
3323 	  target_label = trial;
3324 	  if (crossing)
3325 	    CROSSING_JUMP_P (delay_jump_insn) = 1;
3326 	}
3327 
3328       /* If the first insn at TARGET_LABEL is redundant with a previous
3329 	 insn, redirect the jump to the following insn and process again.
3330 	 We use next_real_nondebug_insn instead of next_active_insn so we
3331 	 don't skip USE-markers, or we'll end up with incorrect
3332 	 liveness info.  */
3333       trial = next_real_nondebug_insn (target_label);
3334       if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3335 	  && redundant_insn (trial, insn, vNULL)
3336 	  && ! can_throw_internal (trial))
3337 	{
3338 	  /* Figure out where to emit the special USE insn so we don't
3339 	     later incorrectly compute register live/death info.  */
3340 	  rtx_insn *tmp = next_active_insn (as_a<rtx_insn *> (trial));
3341 	  if (tmp == 0)
3342 	    tmp = find_end_label (simple_return_rtx);
3343 
3344 	  if (tmp)
3345 	    {
3346 	      /* Insert the special USE insn and update dataflow info.
3347 		 We know "trial" is an insn here as it is the output of
3348 		 next_real_nondebug_insn () above.  */
3349 	      update_block (as_a <rtx_insn *> (trial), tmp);
3350 
3351 	      /* Now emit a label before the special USE insn, and
3352 		 redirect our jump to the new label.  */
3353 	      target_label = get_label_before (PREV_INSN (tmp), target_label);
3354 	      reorg_redirect_jump (delay_jump_insn, target_label);
3355 	      next = insn;
3356 	      continue;
3357 	    }
3358 	}
3359 
3360       /* Similarly, if it is an unconditional jump with one insn in its
3361 	 delay list and that insn is redundant, thread the jump.  */
3362       rtx_sequence *trial_seq =
3363 	trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3364       if (trial_seq
3365 	  && trial_seq->len () == 2
3366 	  && JUMP_P (trial_seq->insn (0))
3367 	  && simplejump_or_return_p (trial_seq->insn (0))
3368 	  && redundant_insn (trial_seq->insn (1), insn, vNULL))
3369 	{
3370 	  rtx temp_label = JUMP_LABEL (trial_seq->insn (0));
3371 	  if (ANY_RETURN_P (temp_label))
3372 	    temp_label = find_end_label (temp_label);
3373 
3374 	  if (temp_label
3375 	      && redirect_with_delay_slots_safe_p (delay_jump_insn,
3376 						   temp_label, insn))
3377 	    {
3378 	      update_block (trial_seq->insn (1), insn);
3379 	      reorg_redirect_jump (delay_jump_insn, temp_label);
3380 	      next = insn;
3381 	      continue;
3382 	    }
3383 	}
3384 
3385       /* See if we have a simple (conditional) jump that is useless.  */
3386       if (!CROSSING_JUMP_P (delay_jump_insn)
3387 	  && !INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3388 	  && !condjump_in_parallel_p (delay_jump_insn)
3389 	  && prev_active_insn (as_a<rtx_insn *> (target_label)) == insn
3390 	  && !BARRIER_P (prev_nonnote_insn (as_a<rtx_insn *> (target_label)))
3391 	  /* If the last insn in the delay slot sets CC0 for some insn,
3392 	     various code assumes that it is in a delay slot.  We could
3393 	     put it back where it belonged and delete the register notes,
3394 	     but it doesn't seem worthwhile in this uncommon case.  */
3395 	  && (!HAVE_cc0
3396 	      || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3397 				  REG_CC_USER, NULL_RTX)))
3398 	{
3399 	  rtx_insn *after;
3400 	  int i;
3401 
3402 	  /* All this insn does is execute its delay list and jump to the
3403 	     following insn.  So delete the jump and just execute the delay
3404 	     list insns.
3405 
3406 	     We do this by deleting the INSN containing the SEQUENCE, then
3407 	     re-emitting the insns separately, and then deleting the jump.
3408 	     This allows the count of the jump target to be properly
3409 	     decremented.
3410 
3411 	     Note that we need to change the INSN_UID of the re-emitted insns
3412 	     since it is used to hash the insns for mark_target_live_regs and
3413 	     the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3414 
3415 	     Clear the from target bit, since these insns are no longer
3416 	     in delay slots.  */
3417 	  for (i = 0; i < XVECLEN (pat, 0); i++)
3418 	    INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3419 
3420 	  rtx_insn *prev = PREV_INSN (insn);
3421 	  delete_related_insns (insn);
3422 	  gcc_assert (GET_CODE (pat) == SEQUENCE);
3423 	  add_insn_after (delay_jump_insn, prev, NULL);
3424 	  after = delay_jump_insn;
3425 	  for (i = 1; i < pat->len (); i++)
3426 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3427 	  delete_scheduled_jump (delay_jump_insn);
3428 	  continue;
3429 	}
3430 
3431       /* See if this is an unconditional jump around a single insn which is
3432 	 identical to the one in its delay slot.  In this case, we can just
3433 	 delete the branch and the insn in its delay slot.  */
3434       if (next && NONJUMP_INSN_P (next)
3435 	  && label_before_next_insn (next, insn) == target_label
3436 	  && simplejump_p (insn)
3437 	  && XVECLEN (pat, 0) == 2
3438 	  && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3439 	{
3440 	  delete_related_insns (insn);
3441 	  continue;
3442 	}
3443 
3444       /* See if this jump (with its delay slots) conditionally branches
3445 	 around an unconditional jump (without delay slots).  If so, invert
3446 	 this jump and point it to the target of the second jump.  We cannot
3447 	 do this for annulled jumps, though.  Again, don't convert a jump to
3448 	 a RETURN here.  */
3449       if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3450 	  && any_condjump_p (delay_jump_insn)
3451 	  && next && simplejump_or_return_p (next)
3452 	  && (next_active_insn (as_a<rtx_insn *> (target_label))
3453 	      == next_active_insn (next))
3454 	  && no_labels_between_p (insn, next))
3455 	{
3456 	  rtx label = JUMP_LABEL (next);
3457 	  rtx old_label = JUMP_LABEL (delay_jump_insn);
3458 
3459 	  if (ANY_RETURN_P (label))
3460 	    label = find_end_label (label);
3461 
3462 	  /* find_end_label can generate a new label. Check this first.  */
3463 	  if (label
3464 	      && no_labels_between_p (insn, next)
3465 	      && redirect_with_delay_slots_safe_p (delay_jump_insn,
3466 						   label, insn))
3467 	    {
3468 	      /* Be careful how we do this to avoid deleting code or labels
3469 		 that are momentarily dead.  See similar optimization in
3470 		 jump.c  */
3471 	      if (old_label)
3472 		++LABEL_NUSES (old_label);
3473 
3474 	      if (invert_jump (delay_jump_insn, label, 1))
3475 		{
3476 		  int i;
3477 
3478 		  /* Must update the INSN_FROM_TARGET_P bits now that
3479 		     the branch is reversed, so that mark_target_live_regs
3480 		     will handle the delay slot insn correctly.  */
3481 		  for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3482 		    {
3483 		      rtx slot = XVECEXP (PATTERN (insn), 0, i);
3484 		      INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3485 		    }
3486 
3487 		  delete_related_insns (next);
3488 		  next = insn;
3489 		}
3490 
3491 	      if (old_label && --LABEL_NUSES (old_label) == 0)
3492 		delete_related_insns (old_label);
3493 	      continue;
3494 	    }
3495 	}
3496 
3497       /* If we own the thread opposite the way this insn branches, see if we
3498 	 can merge its delay slots with following insns.  */
3499       if (INSN_FROM_TARGET_P (pat->insn (1))
3500 	  && own_thread_p (NEXT_INSN (insn), 0, 1))
3501 	try_merge_delay_insns (insn, next);
3502       else if (! INSN_FROM_TARGET_P (pat->insn (1))
3503 	       && own_thread_p (target_label, target_label, 0))
3504 	try_merge_delay_insns (insn,
3505 			       next_active_insn (as_a<rtx_insn *> (target_label)));
3506 
3507       /* If we get here, we haven't deleted INSN.  But we may have deleted
3508 	 NEXT, so recompute it.  */
3509       next = next_active_insn (insn);
3510     }
3511 }
3512 
3513 
3514 /* Look for filled jumps to the end of function label.  We can try to convert
3515    them into RETURN insns if the insns in the delay slot are valid for the
3516    RETURN as well.  */
3517 
3518 static void
make_return_insns(rtx_insn * first)3519 make_return_insns (rtx_insn *first)
3520 {
3521   rtx_insn *insn;
3522   rtx_jump_insn *jump_insn;
3523   rtx real_return_label = function_return_label;
3524   rtx real_simple_return_label = function_simple_return_label;
3525   int slots, i;
3526 
3527   /* See if there is a RETURN insn in the function other than the one we
3528      made for END_OF_FUNCTION_LABEL.  If so, set up anything we can't change
3529      into a RETURN to jump to it.  */
3530   for (insn = first; insn; insn = NEXT_INSN (insn))
3531     if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3532       {
3533 	rtx t = get_label_before (insn, NULL_RTX);
3534 	if (PATTERN (insn) == ret_rtx)
3535 	  real_return_label = t;
3536 	else
3537 	  real_simple_return_label = t;
3538 	break;
3539       }
3540 
3541   /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3542      was equal to END_OF_FUNCTION_LABEL.  */
3543   if (real_return_label)
3544     LABEL_NUSES (real_return_label)++;
3545   if (real_simple_return_label)
3546     LABEL_NUSES (real_simple_return_label)++;
3547 
3548   /* Clear the list of insns to fill so we can use it.  */
3549   obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3550 
3551   for (insn = first; insn; insn = NEXT_INSN (insn))
3552     {
3553       int flags;
3554       rtx kind, real_label;
3555 
3556       /* Only look at filled JUMP_INSNs that go to the end of function
3557 	 label.  */
3558       if (!NONJUMP_INSN_P (insn))
3559 	continue;
3560 
3561       if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3562 	continue;
3563 
3564       rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3565 
3566       if (!jump_to_label_p (pat->insn (0)))
3567 	continue;
3568 
3569       if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3570 	{
3571 	  kind = ret_rtx;
3572 	  real_label = real_return_label;
3573 	}
3574       else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3575 	{
3576 	  kind = simple_return_rtx;
3577 	  real_label = real_simple_return_label;
3578 	}
3579       else
3580 	continue;
3581 
3582       jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3583 
3584       /* If we can't make the jump into a RETURN, try to redirect it to the best
3585 	 RETURN and go on to the next insn.  */
3586       if (!reorg_redirect_jump (jump_insn, kind))
3587 	{
3588 	  /* Make sure redirecting the jump will not invalidate the delay
3589 	     slot insns.  */
3590 	  if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3591 	    reorg_redirect_jump (jump_insn, real_label);
3592 	  continue;
3593 	}
3594 
3595       /* See if this RETURN can accept the insns current in its delay slot.
3596 	 It can if it has more or an equal number of slots and the contents
3597 	 of each is valid.  */
3598 
3599       flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3600       slots = num_delay_slots (jump_insn);
3601       if (slots >= XVECLEN (pat, 0) - 1)
3602 	{
3603 	  for (i = 1; i < XVECLEN (pat, 0); i++)
3604 	    if (! (
3605 #if ANNUL_IFFALSE_SLOTS
3606 		   (INSN_ANNULLED_BRANCH_P (jump_insn)
3607 		    && INSN_FROM_TARGET_P (pat->insn (i)))
3608 		   ? eligible_for_annul_false (jump_insn, i - 1,
3609 					       pat->insn (i), flags) :
3610 #endif
3611 #if ANNUL_IFTRUE_SLOTS
3612 		   (INSN_ANNULLED_BRANCH_P (jump_insn)
3613 		    && ! INSN_FROM_TARGET_P (pat->insn (i)))
3614 		   ? eligible_for_annul_true (jump_insn, i - 1,
3615 					      pat->insn (i), flags) :
3616 #endif
3617 		   eligible_for_delay (jump_insn, i - 1,
3618 				       pat->insn (i), flags)))
3619 	      break;
3620 	}
3621       else
3622 	i = 0;
3623 
3624       if (i == XVECLEN (pat, 0))
3625 	continue;
3626 
3627       /* We have to do something with this insn.  If it is an unconditional
3628 	 RETURN, delete the SEQUENCE and output the individual insns,
3629 	 followed by the RETURN.  Then set things up so we try to find
3630 	 insns for its delay slots, if it needs some.  */
3631       if (ANY_RETURN_P (PATTERN (jump_insn)))
3632 	{
3633 	  rtx_insn *prev = PREV_INSN (insn);
3634 
3635 	  delete_related_insns (insn);
3636 	  for (i = 1; i < XVECLEN (pat, 0); i++)
3637 	    {
3638 	      rtx_insn *in_seq_insn = as_a<rtx_insn *> (XVECEXP (pat, 0, i));
3639 	      prev = emit_insn_after_setloc (PATTERN (in_seq_insn), prev,
3640 					     INSN_LOCATION (in_seq_insn));
3641 	    }
3642 
3643 	  insn = emit_jump_insn_after_setloc (PATTERN (jump_insn), prev,
3644 					      INSN_LOCATION (jump_insn));
3645 	  emit_barrier_after (insn);
3646 
3647 	  if (slots)
3648 	    obstack_ptr_grow (&unfilled_slots_obstack, insn);
3649 	}
3650       else
3651 	/* It is probably more efficient to keep this with its current
3652 	   delay slot as a branch to a RETURN.  */
3653 	reorg_redirect_jump (jump_insn, real_label);
3654     }
3655 
3656   /* Now delete REAL_RETURN_LABEL if we never used it.  Then try to fill any
3657      new delay slots we have created.  */
3658   if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3659     delete_related_insns (real_return_label);
3660   if (real_simple_return_label != NULL_RTX
3661       && --LABEL_NUSES (real_simple_return_label) == 0)
3662     delete_related_insns (real_simple_return_label);
3663 
3664   fill_simple_delay_slots (1);
3665   fill_simple_delay_slots (0);
3666 }
3667 
3668 /* Try to find insns to place in delay slots.  */
3669 
3670 static void
dbr_schedule(rtx_insn * first)3671 dbr_schedule (rtx_insn *first)
3672 {
3673   rtx_insn *insn, *next, *epilogue_insn = 0;
3674   int i;
3675   bool need_return_insns;
3676 
3677   /* If the current function has no insns other than the prologue and
3678      epilogue, then do not try to fill any delay slots.  */
3679   if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3680     return;
3681 
3682   /* Find the highest INSN_UID and allocate and initialize our map from
3683      INSN_UID's to position in code.  */
3684   for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3685     {
3686       if (INSN_UID (insn) > max_uid)
3687 	max_uid = INSN_UID (insn);
3688       if (NOTE_P (insn)
3689 	  && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3690 	epilogue_insn = insn;
3691     }
3692 
3693   uid_to_ruid = XNEWVEC (int, max_uid + 1);
3694   for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3695     uid_to_ruid[INSN_UID (insn)] = i;
3696 
3697   /* Initialize the list of insns that need filling.  */
3698   if (unfilled_firstobj == 0)
3699     {
3700       gcc_obstack_init (&unfilled_slots_obstack);
3701       unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3702     }
3703 
3704   for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3705     {
3706       rtx target;
3707 
3708       /* Skip vector tables.  We can't get attributes for them.  */
3709       if (JUMP_TABLE_DATA_P (insn))
3710 	continue;
3711 
3712       if (JUMP_P (insn))
3713         INSN_ANNULLED_BRANCH_P (insn) = 0;
3714       INSN_FROM_TARGET_P (insn) = 0;
3715 
3716       if (num_delay_slots (insn) > 0)
3717 	obstack_ptr_grow (&unfilled_slots_obstack, insn);
3718 
3719       /* Ensure all jumps go to the last of a set of consecutive labels.  */
3720       if (JUMP_P (insn)
3721 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
3722 	  && !ANY_RETURN_P (JUMP_LABEL (insn))
3723 	  && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3724 	      != JUMP_LABEL (insn)))
3725 	redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3726     }
3727 
3728   init_resource_info (epilogue_insn);
3729 
3730   /* Show we haven't computed an end-of-function label yet.  */
3731   function_return_label = function_simple_return_label = NULL;
3732 
3733   /* Initialize the statistics for this function.  */
3734   memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3735   memset (num_filled_delays, 0, sizeof num_filled_delays);
3736 
3737   /* Now do the delay slot filling.  Try everything twice in case earlier
3738      changes make more slots fillable.  */
3739 
3740   for (reorg_pass_number = 0;
3741        reorg_pass_number < MAX_REORG_PASSES;
3742        reorg_pass_number++)
3743     {
3744       fill_simple_delay_slots (1);
3745       fill_simple_delay_slots (0);
3746       if (!targetm.no_speculation_in_delay_slots_p ())
3747 	fill_eager_delay_slots ();
3748       relax_delay_slots (first);
3749     }
3750 
3751   /* If we made an end of function label, indicate that it is now
3752      safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3753      If it is now unused, delete it.  */
3754   if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3755     delete_related_insns (function_return_label);
3756   if (function_simple_return_label
3757       && --LABEL_NUSES (function_simple_return_label) == 0)
3758     delete_related_insns (function_simple_return_label);
3759 
3760   need_return_insns = false;
3761   need_return_insns |= targetm.have_return () && function_return_label != 0;
3762   need_return_insns |= (targetm.have_simple_return ()
3763 			&& function_simple_return_label != 0);
3764   if (need_return_insns)
3765     make_return_insns (first);
3766 
3767   /* Delete any USE insns made by update_block; subsequent passes don't need
3768      them or know how to deal with them.  */
3769   for (insn = first; insn; insn = next)
3770     {
3771       next = NEXT_INSN (insn);
3772 
3773       if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3774 	  && INSN_P (XEXP (PATTERN (insn), 0)))
3775 	next = delete_related_insns (insn);
3776     }
3777 
3778   obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3779 
3780   /* It is not clear why the line below is needed, but it does seem to be.  */
3781   unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3782 
3783   if (dump_file)
3784     {
3785       int i, j, need_comma;
3786       int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3787       int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3788 
3789       for (reorg_pass_number = 0;
3790 	   reorg_pass_number < MAX_REORG_PASSES;
3791 	   reorg_pass_number++)
3792 	{
3793 	  fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3794 	  for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3795 	    {
3796 	      need_comma = 0;
3797 	      fprintf (dump_file, ";; Reorg function #%d\n", i);
3798 
3799 	      fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3800 		       num_insns_needing_delays[i][reorg_pass_number]);
3801 
3802 	      for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3803 		if (num_filled_delays[i][j][reorg_pass_number])
3804 		  {
3805 		    if (need_comma)
3806 		      fprintf (dump_file, ", ");
3807 		    need_comma = 1;
3808 		    fprintf (dump_file, "%d got %d delays",
3809 			     num_filled_delays[i][j][reorg_pass_number], j);
3810 		  }
3811 	      fprintf (dump_file, "\n");
3812 	    }
3813 	}
3814       memset (total_delay_slots, 0, sizeof total_delay_slots);
3815       memset (total_annul_slots, 0, sizeof total_annul_slots);
3816       for (insn = first; insn; insn = NEXT_INSN (insn))
3817 	{
3818 	  if (! insn->deleted ()
3819 	      && NONJUMP_INSN_P (insn)
3820 	      && GET_CODE (PATTERN (insn)) != USE
3821 	      && GET_CODE (PATTERN (insn)) != CLOBBER)
3822 	    {
3823 	      if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3824 		{
3825                   rtx control;
3826 		  j = XVECLEN (PATTERN (insn), 0) - 1;
3827 		  if (j > MAX_DELAY_HISTOGRAM)
3828 		    j = MAX_DELAY_HISTOGRAM;
3829                   control = XVECEXP (PATTERN (insn), 0, 0);
3830 		  if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3831 		    total_annul_slots[j]++;
3832 		  else
3833 		    total_delay_slots[j]++;
3834 		}
3835 	      else if (num_delay_slots (insn) > 0)
3836 		total_delay_slots[0]++;
3837 	    }
3838 	}
3839       fprintf (dump_file, ";; Reorg totals: ");
3840       need_comma = 0;
3841       for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3842 	{
3843 	  if (total_delay_slots[j])
3844 	    {
3845 	      if (need_comma)
3846 		fprintf (dump_file, ", ");
3847 	      need_comma = 1;
3848 	      fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3849 	    }
3850 	}
3851       fprintf (dump_file, "\n");
3852 
3853       if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3854 	{
3855 	  fprintf (dump_file, ";; Reorg annuls: ");
3856 	  need_comma = 0;
3857 	  for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3858 	    {
3859 	      if (total_annul_slots[j])
3860 		{
3861 		  if (need_comma)
3862 		    fprintf (dump_file, ", ");
3863 		  need_comma = 1;
3864 		  fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3865 		}
3866 	    }
3867 	  fprintf (dump_file, "\n");
3868 	}
3869 
3870       fprintf (dump_file, "\n");
3871     }
3872 
3873   if (!sibling_labels.is_empty ())
3874     {
3875       update_alignments (sibling_labels);
3876       sibling_labels.release ();
3877     }
3878 
3879   free_resource_info ();
3880   free (uid_to_ruid);
3881   crtl->dbr_scheduled_p = true;
3882 }
3883 
3884 /* Run delay slot optimization.  */
3885 static unsigned int
rest_of_handle_delay_slots(void)3886 rest_of_handle_delay_slots (void)
3887 {
3888   if (DELAY_SLOTS)
3889     dbr_schedule (get_insns ());
3890 
3891   return 0;
3892 }
3893 
3894 namespace {
3895 
3896 const pass_data pass_data_delay_slots =
3897 {
3898   RTL_PASS, /* type */
3899   "dbr", /* name */
3900   OPTGROUP_NONE, /* optinfo_flags */
3901   TV_DBR_SCHED, /* tv_id */
3902   0, /* properties_required */
3903   0, /* properties_provided */
3904   0, /* properties_destroyed */
3905   0, /* todo_flags_start */
3906   0, /* todo_flags_finish */
3907 };
3908 
3909 class pass_delay_slots : public rtl_opt_pass
3910 {
3911 public:
pass_delay_slots(gcc::context * ctxt)3912   pass_delay_slots (gcc::context *ctxt)
3913     : rtl_opt_pass (pass_data_delay_slots, ctxt)
3914   {}
3915 
3916   /* opt_pass methods: */
3917   virtual bool gate (function *);
execute(function *)3918   virtual unsigned int execute (function *)
3919     {
3920       return rest_of_handle_delay_slots ();
3921     }
3922 
3923 }; // class pass_delay_slots
3924 
3925 bool
gate(function *)3926 pass_delay_slots::gate (function *)
3927 {
3928   /* At -O0 dataflow info isn't updated after RA.  */
3929   if (DELAY_SLOTS)
3930     return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3931 
3932   return false;
3933 }
3934 
3935 } // anon namespace
3936 
3937 rtl_opt_pass *
make_pass_delay_slots(gcc::context * ctxt)3938 make_pass_delay_slots (gcc::context *ctxt)
3939 {
3940   return new pass_delay_slots (ctxt);
3941 }
3942 
3943 /* Machine dependent reorg pass.  */
3944 
3945 namespace {
3946 
3947 const pass_data pass_data_machine_reorg =
3948 {
3949   RTL_PASS, /* type */
3950   "mach", /* name */
3951   OPTGROUP_NONE, /* optinfo_flags */
3952   TV_MACH_DEP, /* tv_id */
3953   0, /* properties_required */
3954   0, /* properties_provided */
3955   0, /* properties_destroyed */
3956   0, /* todo_flags_start */
3957   0, /* todo_flags_finish */
3958 };
3959 
3960 class pass_machine_reorg : public rtl_opt_pass
3961 {
3962 public:
pass_machine_reorg(gcc::context * ctxt)3963   pass_machine_reorg (gcc::context *ctxt)
3964     : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3965   {}
3966 
3967   /* opt_pass methods: */
gate(function *)3968   virtual bool gate (function *)
3969     {
3970       return targetm.machine_dependent_reorg != 0;
3971     }
3972 
execute(function *)3973   virtual unsigned int execute (function *)
3974     {
3975       targetm.machine_dependent_reorg ();
3976       return 0;
3977     }
3978 
3979 }; // class pass_machine_reorg
3980 
3981 } // anon namespace
3982 
3983 rtl_opt_pass *
make_pass_machine_reorg(gcc::context * ctxt)3984 make_pass_machine_reorg (gcc::context *ctxt)
3985 {
3986   return new pass_machine_reorg (ctxt);
3987 }
3988