1 /* Perform instruction reorganizations for delay slot filling.
2    Copyright (C) 1992-2020 Free Software Foundation, Inc.
3    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4    Hacked by Michael Tiemann (tiemann@cygnus.com).
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* Instruction reorganization pass.
23 
24    This pass runs after register allocation and final jump
25    optimization.  It should be the last pass to run before peephole.
26    It serves primarily to fill delay slots of insns, typically branch
27    and call insns.  Other insns typically involve more complicated
28    interactions of data dependencies and resource constraints, and
29    are better handled by scheduling before register allocation (by the
30    function `schedule_insns').
31 
32    The Branch Penalty is the number of extra cycles that are needed to
33    execute a branch insn.  On an ideal machine, branches take a single
34    cycle, and the Branch Penalty is 0.  Several RISC machines approach
35    branch delays differently:
36 
37    The MIPS has a single branch delay slot.  Most insns
38    (except other branches) can be used to fill this slot.  When the
39    slot is filled, two insns execute in two cycles, reducing the
40    branch penalty to zero.
41 
42    The SPARC always has a branch delay slot, but its effects can be
43    annulled when the branch is not taken.  This means that failing to
44    find other sources of insns, we can hoist an insn from the branch
45    target that would only be safe to execute knowing that the branch
46    is taken.
47 
48    The HP-PA always has a branch delay slot.  For unconditional branches
49    its effects can be annulled when the branch is taken.  The effects
50    of the delay slot in a conditional branch can be nullified for forward
51    taken branches, or for untaken backward branches.  This means
52    we can hoist insns from the fall-through path for forward branches or
53    steal insns from the target of backward branches.
54 
55    The TMS320C3x and C4x have three branch delay slots.  When the three
56    slots are filled, the branch penalty is zero.  Most insns can fill the
57    delay slots except jump insns.
58 
59    Three techniques for filling delay slots have been implemented so far:
60 
61    (1) `fill_simple_delay_slots' is the simplest, most efficient way
62    to fill delay slots.  This pass first looks for insns which come
63    from before the branch and which are safe to execute after the
64    branch.  Then it searches after the insn requiring delay slots or,
65    in the case of a branch, for insns that are after the point at
66    which the branch merges into the fallthrough code, if such a point
67    exists.  When such insns are found, the branch penalty decreases
68    and no code expansion takes place.
69 
70    (2) `fill_eager_delay_slots' is more complicated: it is used for
71    scheduling conditional jumps, or for scheduling jumps which cannot
72    be filled using (1).  A machine need not have annulled jumps to use
73    this strategy, but it helps (by keeping more options open).
74    `fill_eager_delay_slots' tries to guess the direction the branch
75    will go; if it guesses right 100% of the time, it can reduce the
76    branch penalty as much as `fill_simple_delay_slots' does.  If it
77    guesses wrong 100% of the time, it might as well schedule nops.  When
78    `fill_eager_delay_slots' takes insns from the fall-through path of
79    the jump, usually there is no code expansion; when it takes insns
80    from the branch target, there is code expansion if it is not the
81    only way to reach that target.
82 
83    (3) `relax_delay_slots' uses a set of rules to simplify code that
84    has been reorganized by (1) and (2).  It finds cases where
85    conditional test can be eliminated, jumps can be threaded, extra
86    insns can be eliminated, etc.  It is the job of (1) and (2) to do a
87    good job of scheduling locally; `relax_delay_slots' takes care of
88    making the various individual schedules work well together.  It is
89    especially tuned to handle the control flow interactions of branch
90    insns.  It does nothing for insns with delay slots that do not
91    branch.
92 
93    On machines that use CC0, we are very conservative.  We will not make
94    a copy of an insn involving CC0 since we want to maintain a 1-1
95    correspondence between the insn that sets and uses CC0.  The insns are
96    allowed to be separated by placing an insn that sets CC0 (but not an insn
97    that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98    delay slot.  In that case, we point each insn at the other with REG_CC_USER
99    and REG_CC_SETTER notes.  Note that these restrictions affect very few
100    machines because most RISC machines with delay slots will not use CC0
101    (the RT is the only known exception at this point).  */
102 
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "memmodel.h"
112 #include "tm_p.h"
113 #include "expmed.h"
114 #include "insn-config.h"
115 #include "emit-rtl.h"
116 #include "recog.h"
117 #include "insn-attr.h"
118 #include "resource.h"
119 #include "tree-pass.h"
120 
121 
122 /* First, some functions that were used before GCC got a control flow graph.
123    These functions are now only used here in reorg.c, and have therefore
124    been moved here to avoid inadvertent misuse elsewhere in the compiler.  */
125 
126 /* Return the last label to mark the same position as LABEL.  Return LABEL
127    itself if it is null or any return rtx.  */
128 
129 static rtx
skip_consecutive_labels(rtx label_or_return)130 skip_consecutive_labels (rtx label_or_return)
131 {
132   rtx_insn *insn;
133 
134   if (label_or_return && ANY_RETURN_P (label_or_return))
135     return label_or_return;
136 
137   rtx_insn *label = as_a <rtx_insn *> (label_or_return);
138 
139   /* __builtin_unreachable can create a CODE_LABEL followed by a BARRIER.
140 
141      Since reaching the CODE_LABEL is undefined behavior, we can return
142      any code label and we're OK at run time.
143 
144      However, if we return a CODE_LABEL which leads to a shrink-wrapped
145      epilogue, but the path does not have a prologue, then we will trip
146      a sanity check in the dwarf2 cfi code which wants to verify that
147      the CFIs are all the same on the traces leading to the epilogue.
148 
149      So we explicitly disallow looking through BARRIERS here.  */
150   for (insn = label;
151        insn != 0 && !INSN_P (insn) && !BARRIER_P (insn);
152        insn = NEXT_INSN (insn))
153     if (LABEL_P (insn))
154       label = insn;
155 
156   return label;
157 }
158 
159 /* INSN uses CC0 and is being moved into a delay slot.  Set up REG_CC_SETTER
160    and REG_CC_USER notes so we can find it.  */
161 
162 static void
link_cc0_insns(rtx_insn * insn)163 link_cc0_insns (rtx_insn *insn)
164 {
165   rtx user = next_nonnote_insn (insn);
166 
167   if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
168     user = XVECEXP (PATTERN (user), 0, 0);
169 
170   add_reg_note (user, REG_CC_SETTER, insn);
171   add_reg_note (insn, REG_CC_USER, user);
172 }
173 
174 /* Insns which have delay slots that have not yet been filled.  */
175 
176 static struct obstack unfilled_slots_obstack;
177 static rtx *unfilled_firstobj;
178 
179 /* Define macros to refer to the first and last slot containing unfilled
180    insns.  These are used because the list may move and its address
181    should be recomputed at each use.  */
182 
183 #define unfilled_slots_base	\
184   ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
185 
186 #define unfilled_slots_next	\
187   ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
188 
189 /* Points to the label before the end of the function, or before a
190    return insn.  */
191 static rtx_code_label *function_return_label;
192 /* Likewise for a simple_return.  */
193 static rtx_code_label *function_simple_return_label;
194 
195 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
196    not always monotonically increase.  */
197 static int *uid_to_ruid;
198 
199 /* Highest valid index in `uid_to_ruid'.  */
200 static int max_uid;
201 
202 static int stop_search_p (rtx_insn *, int);
203 static int resource_conflicts_p (struct resources *, struct resources *);
204 static int insn_references_resource_p (rtx, struct resources *, bool);
205 static int insn_sets_resource_p (rtx, struct resources *, bool);
206 static rtx_code_label *find_end_label (rtx);
207 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
208 				      int);
209 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
210 static rtx_insn *delete_from_delay_slot (rtx_insn *);
211 static void delete_scheduled_jump (rtx_insn *);
212 static void note_delay_statistics (int, int);
213 static int get_jump_flags (const rtx_insn *, rtx);
214 static int mostly_true_jump (rtx);
215 static rtx get_branch_condition (const rtx_insn *, rtx);
216 static int condition_dominates_p (rtx, const rtx_insn *);
217 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
218 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
219 					    const vec<rtx_insn *> &);
220 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
221 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
222 					  vec<rtx_insn *> *,
223 					  struct resources *,
224 					  struct resources *,
225 					  struct resources *,
226 					  int, int *, int *,
227 					  rtx *);
228 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
229 					       vec<rtx_insn *> *,
230 					       struct resources *,
231 					       struct resources *,
232 					       struct resources *,
233 					       int, int *, int *);
234 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
235 static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
236 static int own_thread_p (rtx, rtx, int);
237 static void update_block (rtx_insn *, rtx_insn *);
238 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
239 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
240 static void fix_reg_dead_note (rtx_insn *, rtx);
241 static void update_reg_unused_notes (rtx_insn *, rtx);
242 static void fill_simple_delay_slots (int);
243 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
244 				    int, int, int, int,
245 				    int *, vec<rtx_insn *> *);
246 static void fill_eager_delay_slots (void);
247 static void relax_delay_slots (rtx_insn *);
248 static void make_return_insns (rtx_insn *);
249 
250 /* A wrapper around next_active_insn which takes care to return ret_rtx
251    unchanged.  */
252 
253 static rtx
first_active_target_insn(rtx insn)254 first_active_target_insn (rtx insn)
255 {
256   if (ANY_RETURN_P (insn))
257     return insn;
258   return next_active_insn (as_a <rtx_insn *> (insn));
259 }
260 
261 /* Return true iff INSN is a simplejump, or any kind of return insn.  */
262 
263 static bool
simplejump_or_return_p(rtx insn)264 simplejump_or_return_p (rtx insn)
265 {
266   return (JUMP_P (insn)
267 	  && (simplejump_p (as_a <rtx_insn *> (insn))
268 	      || ANY_RETURN_P (PATTERN (insn))));
269 }
270 
271 /* Return TRUE if this insn should stop the search for insn to fill delay
272    slots.  LABELS_P indicates that labels should terminate the search.
273    In all cases, jumps terminate the search.  */
274 
275 static int
stop_search_p(rtx_insn * insn,int labels_p)276 stop_search_p (rtx_insn *insn, int labels_p)
277 {
278   if (insn == 0)
279     return 1;
280 
281   /* If the insn can throw an exception that is caught within the function,
282      it may effectively perform a jump from the viewpoint of the function.
283      Therefore act like for a jump.  */
284   if (can_throw_internal (insn))
285     return 1;
286 
287   switch (GET_CODE (insn))
288     {
289     case NOTE:
290     case CALL_INSN:
291     case DEBUG_INSN:
292       return 0;
293 
294     case CODE_LABEL:
295       return labels_p;
296 
297     case JUMP_INSN:
298     case BARRIER:
299       return 1;
300 
301     case INSN:
302       /* OK unless it contains a delay slot or is an `asm' insn of some type.
303 	 We don't know anything about these.  */
304       return (GET_CODE (PATTERN (insn)) == SEQUENCE
305 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT
306 	      || asm_noperands (PATTERN (insn)) >= 0);
307 
308     default:
309       gcc_unreachable ();
310     }
311 }
312 
313 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
314    resource set contains a volatile memory reference.  Otherwise, return FALSE.  */
315 
316 static int
resource_conflicts_p(struct resources * res1,struct resources * res2)317 resource_conflicts_p (struct resources *res1, struct resources *res2)
318 {
319   if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
320       || res1->volatil || res2->volatil)
321     return 1;
322 
323   return hard_reg_set_intersect_p (res1->regs, res2->regs);
324 }
325 
326 /* Return TRUE if any resource marked in RES, a `struct resources', is
327    referenced by INSN.  If INCLUDE_DELAYED_EFFECTS is set, return if the called
328    routine is using those resources.
329 
330    We compute this by computing all the resources referenced by INSN and
331    seeing if this conflicts with RES.  It might be faster to directly check
332    ourselves, and this is the way it used to work, but it means duplicating
333    a large block of complex code.  */
334 
335 static int
insn_references_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)336 insn_references_resource_p (rtx insn, struct resources *res,
337 			    bool include_delayed_effects)
338 {
339   struct resources insn_res;
340 
341   CLEAR_RESOURCE (&insn_res);
342   mark_referenced_resources (insn, &insn_res, include_delayed_effects);
343   return resource_conflicts_p (&insn_res, res);
344 }
345 
346 /* Return TRUE if INSN modifies resources that are marked in RES.
347    INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
348    included.   CC0 is only modified if it is explicitly set; see comments
349    in front of mark_set_resources for details.  */
350 
351 static int
insn_sets_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)352 insn_sets_resource_p (rtx insn, struct resources *res,
353 		      bool include_delayed_effects)
354 {
355   struct resources insn_sets;
356 
357   CLEAR_RESOURCE (&insn_sets);
358   mark_set_resources (insn, &insn_sets, 0,
359 		      (include_delayed_effects
360 		       ? MARK_SRC_DEST_CALL
361 		       : MARK_SRC_DEST));
362   return resource_conflicts_p (&insn_sets, res);
363 }
364 
365 /* Find a label at the end of the function or before a RETURN.  If there
366    is none, try to make one.  If that fails, returns 0.
367 
368    The property of such a label is that it is placed just before the
369    epilogue or a bare RETURN insn, so that another bare RETURN can be
370    turned into a jump to the label unconditionally.  In particular, the
371    label cannot be placed before a RETURN insn with a filled delay slot.
372 
373    ??? There may be a problem with the current implementation.  Suppose
374    we start with a bare RETURN insn and call find_end_label.  It may set
375    function_return_label just before the RETURN.  Suppose the machinery
376    is able to fill the delay slot of the RETURN insn afterwards.  Then
377    function_return_label is no longer valid according to the property
378    described above and find_end_label will still return it unmodified.
379    Note that this is probably mitigated by the following observation:
380    once function_return_label is made, it is very likely the target of
381    a jump, so filling the delay slot of the RETURN will be much more
382    difficult.
383    KIND is either simple_return_rtx or ret_rtx, indicating which type of
384    return we're looking for.  */
385 
386 static rtx_code_label *
find_end_label(rtx kind)387 find_end_label (rtx kind)
388 {
389   rtx_insn *insn;
390   rtx_code_label **plabel;
391 
392   if (kind == ret_rtx)
393     plabel = &function_return_label;
394   else
395     {
396       gcc_assert (kind == simple_return_rtx);
397       plabel = &function_simple_return_label;
398     }
399 
400   /* If we found one previously, return it.  */
401   if (*plabel)
402     return *plabel;
403 
404   /* Otherwise, see if there is a label at the end of the function.  If there
405      is, it must be that RETURN insns aren't needed, so that is our return
406      label and we don't have to do anything else.  */
407 
408   insn = get_last_insn ();
409   while (NOTE_P (insn)
410 	 || (NONJUMP_INSN_P (insn)
411 	     && (GET_CODE (PATTERN (insn)) == USE
412 		 || GET_CODE (PATTERN (insn)) == CLOBBER)))
413     insn = PREV_INSN (insn);
414 
415   /* When a target threads its epilogue we might already have a
416      suitable return insn.  If so put a label before it for the
417      function_return_label.  */
418   if (BARRIER_P (insn)
419       && JUMP_P (PREV_INSN (insn))
420       && PATTERN (PREV_INSN (insn)) == kind)
421     {
422       rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
423       rtx_code_label *label = gen_label_rtx ();
424       LABEL_NUSES (label) = 0;
425 
426       /* Put the label before any USE insns that may precede the RETURN
427 	 insn.  */
428       while (GET_CODE (temp) == USE)
429 	temp = PREV_INSN (temp);
430 
431       emit_label_after (label, temp);
432       *plabel = label;
433     }
434 
435   else if (LABEL_P (insn))
436     *plabel = as_a <rtx_code_label *> (insn);
437   else
438     {
439       rtx_code_label *label = gen_label_rtx ();
440       LABEL_NUSES (label) = 0;
441       /* If the basic block reorder pass moves the return insn to
442 	 some other place try to locate it again and put our
443 	 function_return_label there.  */
444       while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
445 	insn = PREV_INSN (insn);
446       if (insn)
447 	{
448 	  insn = PREV_INSN (insn);
449 
450 	  /* Put the label before any USE insns that may precede the
451 	     RETURN insn.  */
452 	  while (GET_CODE (insn) == USE)
453 	    insn = PREV_INSN (insn);
454 
455 	  emit_label_after (label, insn);
456 	}
457       else
458 	{
459 	  if (targetm.have_epilogue () && ! targetm.have_return ())
460 	    /* The RETURN insn has its delay slot filled so we cannot
461 	       emit the label just before it.  Since we already have
462 	       an epilogue and cannot emit a new RETURN, we cannot
463 	       emit the label at all.  */
464 	    return NULL;
465 
466 	  /* Otherwise, make a new label and emit a RETURN and BARRIER,
467 	     if needed.  */
468 	  emit_label (label);
469 	  if (targetm.have_return ())
470 	    {
471 	      /* The return we make may have delay slots too.  */
472 	      rtx_insn *pat = targetm.gen_return ();
473 	      rtx_insn *insn = emit_jump_insn (pat);
474 	      set_return_jump_label (insn);
475 	      emit_barrier ();
476 	      if (num_delay_slots (insn) > 0)
477 		obstack_ptr_grow (&unfilled_slots_obstack, insn);
478 	    }
479 	}
480       *plabel = label;
481     }
482 
483   /* Show one additional use for this label so it won't go away until
484      we are done.  */
485   ++LABEL_NUSES (*plabel);
486 
487   return *plabel;
488 }
489 
490 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
491    the pattern of INSN with the SEQUENCE.
492 
493    Returns the insn containing the SEQUENCE that replaces INSN.  */
494 
495 static rtx_insn *
emit_delay_sequence(rtx_insn * insn,const vec<rtx_insn * > & list,int length)496 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
497 {
498   /* Allocate the rtvec to hold the insns and the SEQUENCE.  */
499   rtvec seqv = rtvec_alloc (length + 1);
500   rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
501   rtx_insn *seq_insn = make_insn_raw (seq);
502 
503   /* If DELAY_INSN has a location, use it for SEQ_INSN.  If DELAY_INSN does
504      not have a location, but one of the delayed insns does, we pick up a
505      location from there later.  */
506   INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
507 
508   /* Unlink INSN from the insn chain, so that we can put it into
509      the SEQUENCE.   Remember where we want to emit SEQUENCE in AFTER.  */
510   rtx_insn *after = PREV_INSN (insn);
511   remove_insn (insn);
512   SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
513 
514   /* Build our SEQUENCE and rebuild the insn chain.  */
515   start_sequence ();
516   XVECEXP (seq, 0, 0) = emit_insn (insn);
517 
518   unsigned int delay_insns = list.length ();
519   gcc_assert (delay_insns == (unsigned int) length);
520   for (unsigned int i = 0; i < delay_insns; i++)
521     {
522       rtx_insn *tem = list[i];
523       rtx note, next;
524 
525       /* Show that this copy of the insn isn't deleted.  */
526       tem->set_undeleted ();
527 
528       /* Unlink insn from its original place, and re-emit it into
529 	 the sequence.  */
530       SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
531       XVECEXP (seq, 0, i + 1) = emit_insn (tem);
532 
533       /* SPARC assembler, for instance, emit warning when debug info is output
534          into the delay slot.  */
535       if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
536 	INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
537       INSN_LOCATION (tem) = 0;
538 
539       for (note = REG_NOTES (tem); note; note = next)
540 	{
541 	  next = XEXP (note, 1);
542 	  switch (REG_NOTE_KIND (note))
543 	    {
544 	    case REG_DEAD:
545 	      /* Remove any REG_DEAD notes because we can't rely on them now
546 		 that the insn has been moved.  */
547 	      remove_note (tem, note);
548 	      break;
549 
550 	    case REG_LABEL_OPERAND:
551 	    case REG_LABEL_TARGET:
552 	      /* Keep the label reference count up to date.  */
553 	      if (LABEL_P (XEXP (note, 0)))
554 		LABEL_NUSES (XEXP (note, 0)) ++;
555 	      break;
556 
557 	    default:
558 	      break;
559 	    }
560 	}
561     }
562   end_sequence ();
563 
564   /* Splice our SEQUENCE into the insn stream where INSN used to be.  */
565   add_insn_after (seq_insn, after, NULL);
566 
567   return seq_insn;
568 }
569 
570 /* Add INSN to DELAY_LIST and return the head of the new list.  The list must
571    be in the order in which the insns are to be executed.  */
572 
573 static void
add_to_delay_list(rtx_insn * insn,vec<rtx_insn * > * delay_list)574 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
575 {
576   /* If INSN has its block number recorded, clear it since we may
577      be moving the insn to a new block.  */
578   clear_hashed_info_for_insn (insn);
579 
580   delay_list->safe_push (insn);
581 }
582 
583 /* Delete INSN from the delay slot of the insn that it is in, which may
584    produce an insn with no delay slots.  Return the new insn.  */
585 
586 static rtx_insn *
delete_from_delay_slot(rtx_insn * insn)587 delete_from_delay_slot (rtx_insn *insn)
588 {
589   rtx_insn *trial, *seq_insn, *prev;
590   rtx_sequence *seq;
591   int i;
592   int had_barrier = 0;
593 
594   /* We first must find the insn containing the SEQUENCE with INSN in its
595      delay slot.  Do this by finding an insn, TRIAL, where
596      PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL.  */
597 
598   for (trial = insn;
599        PREV_INSN (NEXT_INSN (trial)) == trial;
600        trial = NEXT_INSN (trial))
601     ;
602 
603   seq_insn = PREV_INSN (NEXT_INSN (trial));
604   seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
605 
606   if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
607     had_barrier = 1;
608 
609   /* Create a delay list consisting of all the insns other than the one
610      we are deleting (unless we were the only one).  */
611   auto_vec<rtx_insn *, 5> delay_list;
612   if (seq->len () > 2)
613     for (i = 1; i < seq->len (); i++)
614       if (seq->insn (i) != insn)
615 	add_to_delay_list (seq->insn (i), &delay_list);
616 
617   /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
618      list, and rebuild the delay list if non-empty.  */
619   prev = PREV_INSN (seq_insn);
620   trial = seq->insn (0);
621   delete_related_insns (seq_insn);
622   add_insn_after (trial, prev, NULL);
623 
624   /* If there was a barrier after the old SEQUENCE, remit it.  */
625   if (had_barrier)
626     emit_barrier_after (trial);
627 
628   /* If there are any delay insns, remit them.  Otherwise clear the
629      annul flag.  */
630   if (!delay_list.is_empty ())
631     trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
632   else if (JUMP_P (trial))
633     INSN_ANNULLED_BRANCH_P (trial) = 0;
634 
635   INSN_FROM_TARGET_P (insn) = 0;
636 
637   /* Show we need to fill this insn again.  */
638   obstack_ptr_grow (&unfilled_slots_obstack, trial);
639 
640   return trial;
641 }
642 
643 /* Delete INSN, a JUMP_INSN.  If it is a conditional jump, we must track down
644    the insn that sets CC0 for it and delete it too.  */
645 
646 static void
delete_scheduled_jump(rtx_insn * insn)647 delete_scheduled_jump (rtx_insn *insn)
648 {
649   /* Delete the insn that sets cc0 for us.  On machines without cc0, we could
650      delete the insn that sets the condition code, but it is hard to find it.
651      Since this case is rare anyway, don't bother trying; there would likely
652      be other insns that became dead anyway, which we wouldn't know to
653      delete.  */
654 
655   if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
656     {
657       rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
658 
659       /* If a reg-note was found, it points to an insn to set CC0.  This
660 	 insn is in the delay list of some other insn.  So delete it from
661 	 the delay list it was in.  */
662       if (note)
663 	{
664 	  if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
665 	      && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
666 	    delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
667 	}
668       else
669 	{
670 	  /* The insn setting CC0 is our previous insn, but it may be in
671 	     a delay slot.  It will be the last insn in the delay slot, if
672 	     it is.  */
673 	  rtx_insn *trial = previous_insn (insn);
674 	  if (NOTE_P (trial))
675 	    trial = prev_nonnote_insn (trial);
676 	  if (sets_cc0_p (PATTERN (trial)) != 1
677 	      || FIND_REG_INC_NOTE (trial, NULL_RTX))
678 	    return;
679 	  if (PREV_INSN (NEXT_INSN (trial)) == trial)
680 	    delete_related_insns (trial);
681 	  else
682 	    delete_from_delay_slot (trial);
683 	}
684     }
685 
686   delete_related_insns (insn);
687 }
688 
689 /* Counters for delay-slot filling.  */
690 
691 #define NUM_REORG_FUNCTIONS 2
692 #define MAX_DELAY_HISTOGRAM 3
693 #define MAX_REORG_PASSES 2
694 
695 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
696 
697 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
698 
699 static int reorg_pass_number;
700 
701 static void
note_delay_statistics(int slots_filled,int index)702 note_delay_statistics (int slots_filled, int index)
703 {
704   num_insns_needing_delays[index][reorg_pass_number]++;
705   if (slots_filled > MAX_DELAY_HISTOGRAM)
706     slots_filled = MAX_DELAY_HISTOGRAM;
707   num_filled_delays[index][slots_filled][reorg_pass_number]++;
708 }
709 
710 /* Optimize the following cases:
711 
712    1.  When a conditional branch skips over only one instruction,
713        use an annulling branch and put that insn in the delay slot.
714        Use either a branch that annuls when the condition if true or
715        invert the test with a branch that annuls when the condition is
716        false.  This saves insns, since otherwise we must copy an insn
717        from the L1 target.
718 
719         (orig)		 (skip)		(otherwise)
720 	Bcc.n L1	Bcc',a L1	Bcc,a L1'
721 	insn		insn		insn2
722       L1:	      L1:	      L1:
723 	insn2		insn2		insn2
724 	insn3		insn3	      L1':
725 					insn3
726 
727    2.  When a conditional branch skips over only one instruction,
728        and after that, it unconditionally branches somewhere else,
729        perform the similar optimization. This saves executing the
730        second branch in the case where the inverted condition is true.
731 
732 	Bcc.n L1	Bcc',a L2
733 	insn		insn
734       L1:	      L1:
735 	Bra L2		Bra L2
736 
737    INSN is a JUMP_INSN.
738 
739    This should be expanded to skip over N insns, where N is the number
740    of delay slots required.  */
741 
742 static void
optimize_skip(rtx_jump_insn * insn,vec<rtx_insn * > * delay_list)743 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
744 {
745   rtx_insn *trial = next_nonnote_insn (insn);
746   rtx_insn *next_trial = next_active_insn (trial);
747   int flags;
748 
749   flags = get_jump_flags (insn, JUMP_LABEL (insn));
750 
751   if (trial == 0
752       || !NONJUMP_INSN_P (trial)
753       || GET_CODE (PATTERN (trial)) == SEQUENCE
754       || recog_memoized (trial) < 0
755       || (! eligible_for_annul_false (insn, 0, trial, flags)
756 	  && ! eligible_for_annul_true (insn, 0, trial, flags))
757       || RTX_FRAME_RELATED_P (trial)
758       || can_throw_internal (trial))
759     return;
760 
761   /* There are two cases where we are just executing one insn (we assume
762      here that a branch requires only one insn; this should be generalized
763      at some point):  Where the branch goes around a single insn or where
764      we have one insn followed by a branch to the same label we branch to.
765      In both of these cases, inverting the jump and annulling the delay
766      slot give the same effect in fewer insns.  */
767   if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
768       || (next_trial != 0
769 	  && simplejump_or_return_p (next_trial)
770 	  && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
771     {
772       if (eligible_for_annul_false (insn, 0, trial, flags))
773 	{
774 	  if (invert_jump (insn, JUMP_LABEL (insn), 1))
775 	    INSN_FROM_TARGET_P (trial) = 1;
776 	  else if (! eligible_for_annul_true (insn, 0, trial, flags))
777 	    return;
778 	}
779 
780       add_to_delay_list (trial, delay_list);
781       next_trial = next_active_insn (trial);
782       update_block (trial, trial);
783       delete_related_insns (trial);
784 
785       /* Also, if we are targeting an unconditional
786 	 branch, thread our jump to the target of that branch.  Don't
787 	 change this into a RETURN here, because it may not accept what
788 	 we have in the delay slot.  We'll fix this up later.  */
789       if (next_trial && simplejump_or_return_p (next_trial))
790 	{
791 	  rtx target_label = JUMP_LABEL (next_trial);
792 	  if (ANY_RETURN_P (target_label))
793 	    target_label = find_end_label (target_label);
794 
795 	  if (target_label)
796 	    {
797 	      /* Recompute the flags based on TARGET_LABEL since threading
798 		 the jump to TARGET_LABEL may change the direction of the
799 		 jump (which may change the circumstances in which the
800 		 delay slot is nullified).  */
801 	      flags = get_jump_flags (insn, target_label);
802 	      if (eligible_for_annul_true (insn, 0, trial, flags))
803 		reorg_redirect_jump (insn, target_label);
804 	    }
805 	}
806 
807       INSN_ANNULLED_BRANCH_P (insn) = 1;
808     }
809 }
810 
811 /*  Encode and return branch direction and prediction information for
812     INSN assuming it will jump to LABEL.
813 
814     Non conditional branches return no direction information and
815     are predicted as very likely taken.  */
816 
817 static int
get_jump_flags(const rtx_insn * insn,rtx label)818 get_jump_flags (const rtx_insn *insn, rtx label)
819 {
820   int flags;
821 
822   /* get_jump_flags can be passed any insn with delay slots, these may
823      be INSNs, CALL_INSNs, or JUMP_INSNs.  Only JUMP_INSNs have branch
824      direction information, and only if they are conditional jumps.
825 
826      If LABEL is a return, then there is no way to determine the branch
827      direction.  */
828   if (JUMP_P (insn)
829       && (condjump_p (insn) || condjump_in_parallel_p (insn))
830       && !ANY_RETURN_P (label)
831       && INSN_UID (insn) <= max_uid
832       && INSN_UID (label) <= max_uid)
833     flags
834       = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
835 	 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
836   /* No valid direction information.  */
837   else
838     flags = 0;
839 
840   return flags;
841 }
842 
843 /* Return truth value of the statement that this branch
844    is mostly taken.  If we think that the branch is extremely likely
845    to be taken, we return 2.  If the branch is slightly more likely to be
846    taken, return 1.  If the branch is slightly less likely to be taken,
847    return 0 and if the branch is highly unlikely to be taken, return -1.  */
848 
849 static int
mostly_true_jump(rtx jump_insn)850 mostly_true_jump (rtx jump_insn)
851 {
852   /* If branch probabilities are available, then use that number since it
853      always gives a correct answer.  */
854   rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
855   if (note)
856     {
857       int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
858 			.to_reg_br_prob_base ();
859 
860       if (prob >= REG_BR_PROB_BASE * 9 / 10)
861 	return 2;
862       else if (prob >= REG_BR_PROB_BASE / 2)
863 	return 1;
864       else if (prob >= REG_BR_PROB_BASE / 10)
865 	return 0;
866       else
867 	return -1;
868     }
869 
870   /* If there is no note, assume branches are not taken.
871      This should be rare.  */
872     return 0;
873 }
874 
875 /* Return the condition under which INSN will branch to TARGET.  If TARGET
876    is zero, return the condition under which INSN will return.  If INSN is
877    an unconditional branch, return const_true_rtx.  If INSN isn't a simple
878    type of jump, or it doesn't go to TARGET, return 0.  */
879 
880 static rtx
get_branch_condition(const rtx_insn * insn,rtx target)881 get_branch_condition (const rtx_insn *insn, rtx target)
882 {
883   rtx pat = PATTERN (insn);
884   rtx src;
885 
886   if (condjump_in_parallel_p (insn))
887     pat = XVECEXP (pat, 0, 0);
888 
889   if (ANY_RETURN_P (pat) && pat == target)
890     return const_true_rtx;
891 
892   if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
893     return 0;
894 
895   src = SET_SRC (pat);
896   if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
897     return const_true_rtx;
898 
899   else if (GET_CODE (src) == IF_THEN_ELSE
900 	   && XEXP (src, 2) == pc_rtx
901 	   && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
902 		&& label_ref_label (XEXP (src, 1)) == target)
903 	       || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
904     return XEXP (src, 0);
905 
906   else if (GET_CODE (src) == IF_THEN_ELSE
907 	   && XEXP (src, 1) == pc_rtx
908 	   && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
909 		&& label_ref_label (XEXP (src, 2)) == target)
910 	       || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
911     {
912       enum rtx_code rev;
913       rev = reversed_comparison_code (XEXP (src, 0), insn);
914       if (rev != UNKNOWN)
915 	return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
916 			       XEXP (XEXP (src, 0), 0),
917 			       XEXP (XEXP (src, 0), 1));
918     }
919 
920   return 0;
921 }
922 
923 /* Return nonzero if CONDITION is more strict than the condition of
924    INSN, i.e., if INSN will always branch if CONDITION is true.  */
925 
926 static int
condition_dominates_p(rtx condition,const rtx_insn * insn)927 condition_dominates_p (rtx condition, const rtx_insn *insn)
928 {
929   rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
930   enum rtx_code code = GET_CODE (condition);
931   enum rtx_code other_code;
932 
933   if (rtx_equal_p (condition, other_condition)
934       || other_condition == const_true_rtx)
935     return 1;
936 
937   else if (condition == const_true_rtx || other_condition == 0)
938     return 0;
939 
940   other_code = GET_CODE (other_condition);
941   if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
942       || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
943       || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
944     return 0;
945 
946   return comparison_dominates_p (code, other_code);
947 }
948 
949 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
950    any insns already in the delay slot of JUMP.  */
951 
952 static int
redirect_with_delay_slots_safe_p(rtx_insn * jump,rtx newlabel,rtx seq)953 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
954 {
955   int flags, i;
956   rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
957 
958   /* Make sure all the delay slots of this jump would still
959      be valid after threading the jump.  If they are still
960      valid, then return nonzero.  */
961 
962   flags = get_jump_flags (jump, newlabel);
963   for (i = 1; i < pat->len (); i++)
964     if (! (
965 #if ANNUL_IFFALSE_SLOTS
966 	   (INSN_ANNULLED_BRANCH_P (jump)
967 	    && INSN_FROM_TARGET_P (pat->insn (i)))
968 	   ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
969 #endif
970 #if ANNUL_IFTRUE_SLOTS
971 	   (INSN_ANNULLED_BRANCH_P (jump)
972 	    && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
973 	   ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
974 #endif
975 	   eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
976       break;
977 
978   return (i == pat->len ());
979 }
980 
981 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
982    any insns we wish to place in the delay slot of JUMP.  */
983 
984 static int
redirect_with_delay_list_safe_p(rtx_insn * jump,rtx newlabel,const vec<rtx_insn * > & delay_list)985 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
986 				 const vec<rtx_insn *> &delay_list)
987 {
988   /* Make sure all the insns in DELAY_LIST would still be
989      valid after threading the jump.  If they are still
990      valid, then return nonzero.  */
991 
992   int flags = get_jump_flags (jump, newlabel);
993   unsigned int delay_insns = delay_list.length ();
994   unsigned int i = 0;
995   for (; i < delay_insns; i++)
996     if (! (
997 #if ANNUL_IFFALSE_SLOTS
998 	   (INSN_ANNULLED_BRANCH_P (jump)
999 	    && INSN_FROM_TARGET_P (delay_list[i]))
1000 	   ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
1001 #endif
1002 #if ANNUL_IFTRUE_SLOTS
1003 	   (INSN_ANNULLED_BRANCH_P (jump)
1004 	    && ! INSN_FROM_TARGET_P (delay_list[i]))
1005 	   ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
1006 #endif
1007 	   eligible_for_delay (jump, i, delay_list[i], flags)))
1008       break;
1009 
1010   return i == delay_insns;
1011 }
1012 
1013 /* DELAY_LIST is a list of insns that have already been placed into delay
1014    slots.  See if all of them have the same annulling status as ANNUL_TRUE_P.
1015    If not, return 0; otherwise return 1.  */
1016 
1017 static int
check_annul_list_true_false(int annul_true_p,const vec<rtx_insn * > & delay_list)1018 check_annul_list_true_false (int annul_true_p,
1019 			     const vec<rtx_insn *> &delay_list)
1020 {
1021   rtx_insn *trial;
1022   unsigned int i;
1023   FOR_EACH_VEC_ELT (delay_list, i, trial)
1024     if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1025 	|| (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1026       return 0;
1027 
1028   return 1;
1029 }
1030 
1031 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE.  Given that
1032    the condition tested by INSN is CONDITION and the resources shown in
1033    OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1034    from SEQ's delay list, in addition to whatever insns it may execute
1035    (in DELAY_LIST).   SETS and NEEDED are denote resources already set and
1036    needed while searching for delay slot insns.  Return the concatenated
1037    delay list if possible, otherwise, return 0.
1038 
1039    SLOTS_TO_FILL is the total number of slots required by INSN, and
1040    PSLOTS_FILLED points to the number filled so far (also the number of
1041    insns in DELAY_LIST).  It is updated with the number that have been
1042    filled from the SEQUENCE, if any.
1043 
1044    PANNUL_P points to a nonzero value if we already know that we need
1045    to annul INSN.  If this routine determines that annulling is needed,
1046    it may set that value nonzero.
1047 
1048    PNEW_THREAD points to a location that is to receive the place at which
1049    execution should continue.  */
1050 
1051 static void
steal_delay_list_from_target(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p,rtx * pnew_thread)1052 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1053 			      vec<rtx_insn *> *delay_list,
1054 			      struct resources *sets,
1055 			      struct resources *needed,
1056 			      struct resources *other_needed,
1057 			      int slots_to_fill, int *pslots_filled,
1058 			      int *pannul_p, rtx *pnew_thread)
1059 {
1060   int slots_remaining = slots_to_fill - *pslots_filled;
1061   int total_slots_filled = *pslots_filled;
1062   auto_vec<rtx_insn *, 5> new_delay_list;
1063   int must_annul = *pannul_p;
1064   int used_annul = 0;
1065   int i;
1066   struct resources cc_set;
1067   rtx_insn **redundant;
1068 
1069   /* We can't do anything if there are more delay slots in SEQ than we
1070      can handle, or if we don't know that it will be a taken branch.
1071      We know that it will be a taken branch if it is either an unconditional
1072      branch or a conditional branch with a stricter branch condition.
1073 
1074      Also, exit if the branch has more than one set, since then it is computing
1075      other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1076      ??? It may be possible to move other sets into INSN in addition to
1077      moving the instructions in the delay slots.
1078 
1079      We cannot steal the delay list if one of the instructions in the
1080      current delay_list modifies the condition codes and the jump in the
1081      sequence is a conditional jump. We cannot do this because we cannot
1082      change the direction of the jump because the condition codes
1083      will effect the direction of the jump in the sequence.  */
1084 
1085   CLEAR_RESOURCE (&cc_set);
1086 
1087   rtx_insn *trial;
1088   FOR_EACH_VEC_ELT (*delay_list, i, trial)
1089     {
1090       mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1091       if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1092 	return;
1093     }
1094 
1095   if (XVECLEN (seq, 0) - 1 > slots_remaining
1096       || ! condition_dominates_p (condition, seq->insn (0))
1097       || ! single_set (seq->insn (0)))
1098     return;
1099 
1100   /* On some targets, branches with delay slots can have a limited
1101      displacement.  Give the back end a chance to tell us we can't do
1102      this.  */
1103   if (! targetm.can_follow_jump (insn, seq->insn (0)))
1104     return;
1105 
1106   redundant = XALLOCAVEC (rtx_insn *, XVECLEN (seq, 0));
1107   for (i = 1; i < seq->len (); i++)
1108     {
1109       rtx_insn *trial = seq->insn (i);
1110       int flags;
1111 
1112       if (insn_references_resource_p (trial, sets, false)
1113 	  || insn_sets_resource_p (trial, needed, false)
1114 	  || insn_sets_resource_p (trial, sets, false)
1115 	  /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1116 	     delay list.  */
1117 	  || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1118 	  /* If TRIAL is from the fallthrough code of an annulled branch insn
1119 	     in SEQ, we cannot use it.  */
1120 	  || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1121 	      && ! INSN_FROM_TARGET_P (trial)))
1122 	return;
1123 
1124       /* If this insn was already done (usually in a previous delay slot),
1125 	 pretend we put it in our delay slot.  */
1126       redundant[i] = redundant_insn (trial, insn, new_delay_list);
1127       if (redundant[i])
1128 	continue;
1129 
1130       /* We will end up re-vectoring this branch, so compute flags
1131 	 based on jumping to the new label.  */
1132       flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1133 
1134       if (! must_annul
1135 	  && ((condition == const_true_rtx
1136 	       || (! insn_sets_resource_p (trial, other_needed, false)
1137 		   && ! may_trap_or_fault_p (PATTERN (trial)))))
1138 	  ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1139 	  : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1140 	     && (must_annul = 1,
1141 		 check_annul_list_true_false (0, *delay_list)
1142 	         && check_annul_list_true_false (0, new_delay_list)
1143 	         && eligible_for_annul_false (insn, total_slots_filled,
1144 					      trial, flags)))
1145 	{
1146 	  if (must_annul)
1147 	    {
1148 	      /* Frame related instructions cannot go into annulled delay
1149 		 slots, it messes up the dwarf info.  */
1150 	      if (RTX_FRAME_RELATED_P (trial))
1151 		return;
1152 	      used_annul = 1;
1153 	    }
1154 	  rtx_insn *temp = copy_delay_slot_insn (trial);
1155 	  INSN_FROM_TARGET_P (temp) = 1;
1156 	  add_to_delay_list (temp, &new_delay_list);
1157 	  total_slots_filled++;
1158 
1159 	  if (--slots_remaining == 0)
1160 	    break;
1161 	}
1162       else
1163 	return;
1164     }
1165 
1166   /* Record the effect of the instructions that were redundant and which
1167      we therefore decided not to copy.  */
1168   for (i = 1; i < seq->len (); i++)
1169     if (redundant[i])
1170       {
1171 	fix_reg_dead_note (redundant[i], insn);
1172 	update_block (seq->insn (i), insn);
1173       }
1174 
1175   /* Show the place to which we will be branching.  */
1176   *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1177 
1178   /* Add any new insns to the delay list and update the count of the
1179      number of slots filled.  */
1180   *pslots_filled = total_slots_filled;
1181   if (used_annul)
1182     *pannul_p = 1;
1183 
1184   rtx_insn *temp;
1185   FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1186     add_to_delay_list (temp, delay_list);
1187 }
1188 
1189 /* Similar to steal_delay_list_from_target except that SEQ is on the
1190    fallthrough path of INSN.  Here we only do something if the delay insn
1191    of SEQ is an unconditional branch.  In that case we steal its delay slot
1192    for INSN since unconditional branches are much easier to fill.  */
1193 
1194 static void
steal_delay_list_from_fallthrough(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p)1195 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1196 				   rtx_sequence *seq,
1197 				   vec<rtx_insn *> *delay_list,
1198 				   struct resources *sets,
1199 				   struct resources *needed,
1200 				   struct resources *other_needed,
1201 				   int slots_to_fill, int *pslots_filled,
1202 				   int *pannul_p)
1203 {
1204   int i;
1205   int flags;
1206   int must_annul = *pannul_p;
1207   int used_annul = 0;
1208 
1209   flags = get_jump_flags (insn, JUMP_LABEL (insn));
1210 
1211   /* We can't do anything if SEQ's delay insn isn't an
1212      unconditional branch.  */
1213 
1214   if (! simplejump_or_return_p (seq->insn (0)))
1215     return;
1216 
1217   for (i = 1; i < seq->len (); i++)
1218     {
1219       rtx_insn *trial = seq->insn (i);
1220       rtx_insn *prior_insn;
1221 
1222       /* If TRIAL sets CC0, stealing it will move it too far from the use
1223 	 of CC0.  */
1224       if (insn_references_resource_p (trial, sets, false)
1225 	  || insn_sets_resource_p (trial, needed, false)
1226 	  || insn_sets_resource_p (trial, sets, false)
1227 	  || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1228 
1229 	break;
1230 
1231       /* If this insn was already done, we don't need it.  */
1232       if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
1233 	{
1234 	  fix_reg_dead_note (prior_insn, insn);
1235 	  update_block (trial, insn);
1236 	  delete_from_delay_slot (trial);
1237 	  continue;
1238 	}
1239 
1240       if (! must_annul
1241 	  && ((condition == const_true_rtx
1242 	       || (! insn_sets_resource_p (trial, other_needed, false)
1243 		   && ! may_trap_or_fault_p (PATTERN (trial)))))
1244 	  ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1245 	  : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1246 	     check_annul_list_true_false (1, *delay_list)
1247 	     && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1248 	{
1249 	  if (must_annul)
1250 	    used_annul = 1;
1251 	  delete_from_delay_slot (trial);
1252 	  add_to_delay_list (trial, delay_list);
1253 
1254 	  if (++(*pslots_filled) == slots_to_fill)
1255 	    break;
1256 	}
1257       else
1258 	break;
1259     }
1260 
1261   if (used_annul)
1262     *pannul_p = 1;
1263 }
1264 
1265 /* Try merging insns starting at THREAD which match exactly the insns in
1266    INSN's delay list.
1267 
1268    If all insns were matched and the insn was previously annulling, the
1269    annul bit will be cleared.
1270 
1271    For each insn that is merged, if the branch is or will be non-annulling,
1272    we delete the merged insn.  */
1273 
1274 static void
try_merge_delay_insns(rtx_insn * insn,rtx_insn * thread)1275 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1276 {
1277   rtx_insn *trial, *next_trial;
1278   rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1279   int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1280   int slot_number = 1;
1281   int num_slots = XVECLEN (PATTERN (insn), 0);
1282   rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1283   struct resources set, needed, modified;
1284   auto_vec<std::pair<rtx_insn *, bool>, 10> merged_insns;
1285   int flags;
1286 
1287   flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1288 
1289   CLEAR_RESOURCE (&needed);
1290   CLEAR_RESOURCE (&set);
1291 
1292   /* If this is not an annulling branch, take into account anything needed in
1293      INSN's delay slot.  This prevents two increments from being incorrectly
1294      folded into one.  If we are annulling, this would be the correct
1295      thing to do.  (The alternative, looking at things set in NEXT_TO_MATCH
1296      will essentially disable this optimization.  This method is somewhat of
1297      a kludge, but I don't see a better way.)  */
1298   if (! annul_p)
1299     for (int i = 1; i < num_slots; i++)
1300       if (XVECEXP (PATTERN (insn), 0, i))
1301 	mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1302 				   true);
1303 
1304   for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1305     {
1306       rtx pat = PATTERN (trial);
1307       rtx oldtrial = trial;
1308 
1309       next_trial = next_nonnote_insn (trial);
1310 
1311       /* TRIAL must be a CALL_INSN or INSN.  Skip USE and CLOBBER.  */
1312       if (NONJUMP_INSN_P (trial)
1313 	  && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1314 	continue;
1315 
1316       if (GET_CODE (next_to_match) == GET_CODE (trial)
1317 	  /* We can't share an insn that sets cc0.  */
1318 	  && (!HAVE_cc0 || ! sets_cc0_p (pat))
1319 	  && ! insn_references_resource_p (trial, &set, true)
1320 	  && ! insn_sets_resource_p (trial, &set, true)
1321 	  && ! insn_sets_resource_p (trial, &needed, true)
1322 	  && (trial = try_split (pat, trial, 0)) != 0
1323 	  /* Update next_trial, in case try_split succeeded.  */
1324 	  && (next_trial = next_nonnote_insn (trial))
1325 	  /* Likewise THREAD.  */
1326 	  && (thread = oldtrial == thread ? trial : thread)
1327 	  && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1328 	  /* Have to test this condition if annul condition is different
1329 	     from (and less restrictive than) non-annulling one.  */
1330 	  && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1331 	{
1332 
1333 	  if (! annul_p)
1334 	    {
1335 	      update_block (trial, thread);
1336 	      if (trial == thread)
1337 		thread = next_active_insn (thread);
1338 
1339 	      delete_related_insns (trial);
1340 	      INSN_FROM_TARGET_P (next_to_match) = 0;
1341 	    }
1342 	  else
1343 	    merged_insns.safe_push (std::pair<rtx_insn *, bool> (trial, false));
1344 
1345 	  if (++slot_number == num_slots)
1346 	    break;
1347 
1348 	  next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1349 	}
1350 
1351       mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1352       mark_referenced_resources (trial, &needed, true);
1353     }
1354 
1355   /* See if we stopped on a filled insn.  If we did, try to see if its
1356      delay slots match.  */
1357   if (slot_number != num_slots
1358       && trial && NONJUMP_INSN_P (trial)
1359       && GET_CODE (PATTERN (trial)) == SEQUENCE
1360       && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1361            && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1362     {
1363       rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1364       rtx filled_insn = XVECEXP (pat, 0, 0);
1365 
1366       /* Account for resources set/needed by the filled insn.  */
1367       mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1368       mark_referenced_resources (filled_insn, &needed, true);
1369 
1370       for (int i = 1; i < pat->len (); i++)
1371 	{
1372 	  rtx_insn *dtrial = pat->insn (i);
1373 
1374 	  CLEAR_RESOURCE (&modified);
1375 	  /* Account for resources set by the insn following NEXT_TO_MATCH
1376 	     inside INSN's delay list. */
1377 	  for (int j = 1; slot_number + j < num_slots; j++)
1378 	    mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1379 				&modified, 0, MARK_SRC_DEST_CALL);
1380 	  /* Account for resources set by the insn before DTRIAL and inside
1381 	     TRIAL's delay list. */
1382 	  for (int j = 1; j < i; j++)
1383 	    mark_set_resources (XVECEXP (pat, 0, j),
1384 				&modified, 0, MARK_SRC_DEST_CALL);
1385 	  if (! insn_references_resource_p (dtrial, &set, true)
1386 	      && ! insn_sets_resource_p (dtrial, &set, true)
1387 	      && ! insn_sets_resource_p (dtrial, &needed, true)
1388 	      && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1389 	      && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1390 	      /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1391 	         resource modified between them (only dtrial is checked because
1392 	         next_to_match and dtrial shall to be equal in order to hit
1393 	         this line) */
1394 	      && ! insn_references_resource_p (dtrial, &modified, true)
1395 	      && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1396 	    {
1397 	      if (! annul_p)
1398 		{
1399 		  rtx_insn *new_rtx;
1400 
1401 		  update_block (dtrial, thread);
1402 		  new_rtx = delete_from_delay_slot (dtrial);
1403 	          if (thread->deleted ())
1404 		    thread = new_rtx;
1405 		  INSN_FROM_TARGET_P (next_to_match) = 0;
1406 		}
1407 	      else
1408 		merged_insns.safe_push (std::pair<rtx_insn *, bool> (dtrial,
1409 								     true));
1410 
1411 	      if (++slot_number == num_slots)
1412 		break;
1413 
1414 	      next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1415 	    }
1416 	  else
1417 	    {
1418 	      /* Keep track of the set/referenced resources for the delay
1419 		 slots of any trial insns we encounter.  */
1420 	      mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1421 	      mark_referenced_resources (dtrial, &needed, true);
1422 	    }
1423 	}
1424     }
1425 
1426   /* If all insns in the delay slot have been matched and we were previously
1427      annulling the branch, we need not any more.  In that case delete all the
1428      merged insns.  Also clear the INSN_FROM_TARGET_P bit of each insn in
1429      the delay list so that we know that it isn't only being used at the
1430      target.  */
1431   if (slot_number == num_slots && annul_p)
1432     {
1433       unsigned int len = merged_insns.length ();
1434       for (unsigned int i = len - 1; i < len; i--)
1435 	if (merged_insns[i].second)
1436 	  {
1437 	    update_block (merged_insns[i].first, thread);
1438 	    rtx_insn *new_rtx = delete_from_delay_slot (merged_insns[i].first);
1439 	    if (thread->deleted ())
1440 	      thread = new_rtx;
1441 	  }
1442 	else
1443 	  {
1444 	    update_block (merged_insns[i].first, thread);
1445 	    delete_related_insns (merged_insns[i].first);
1446 	  }
1447 
1448       INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1449 
1450       for (int i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1451 	INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1452     }
1453 }
1454 
1455 /* See if INSN is redundant with an insn in front of TARGET.  Often this
1456    is called when INSN is a candidate for a delay slot of TARGET.
1457    DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1458    of INSN.  Often INSN will be redundant with an insn in a delay slot of
1459    some previous insn.  This happens when we have a series of branches to the
1460    same label; in that case the first insn at the target might want to go
1461    into each of the delay slots.
1462 
1463    If we are not careful, this routine can take up a significant fraction
1464    of the total compilation time (4%), but only wins rarely.  Hence we
1465    speed this routine up by making two passes.  The first pass goes back
1466    until it hits a label and sees if it finds an insn with an identical
1467    pattern.  Only in this (relatively rare) event does it check for
1468    data conflicts.
1469 
1470    We do not split insns we encounter.  This could cause us not to find a
1471    redundant insn, but the cost of splitting seems greater than the possible
1472    gain in rare cases.  */
1473 
1474 static rtx_insn *
redundant_insn(rtx insn,rtx_insn * target,const vec<rtx_insn * > & delay_list)1475 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1476 {
1477   rtx target_main = target;
1478   rtx ipat = PATTERN (insn);
1479   rtx_insn *trial;
1480   rtx pat;
1481   struct resources needed, set;
1482   int i;
1483   unsigned insns_to_search;
1484 
1485   /* If INSN has any REG_UNUSED notes, it can't match anything since we
1486      are allowed to not actually assign to such a register.  */
1487   if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1488     return 0;
1489 
1490   /* Scan backwards looking for a match.  */
1491   for (trial = PREV_INSN (target),
1492 	 insns_to_search = param_max_delay_slot_insn_search;
1493        trial && insns_to_search > 0;
1494        trial = PREV_INSN (trial))
1495     {
1496       /* (use (insn))s can come immediately after a barrier if the
1497 	 label that used to precede them has been deleted as dead.
1498 	 See delete_related_insns.  */
1499       if (LABEL_P (trial) || BARRIER_P (trial))
1500 	return 0;
1501 
1502       if (!INSN_P (trial))
1503 	continue;
1504       --insns_to_search;
1505 
1506       pat = PATTERN (trial);
1507       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1508 	continue;
1509 
1510       if (GET_CODE (trial) == DEBUG_INSN)
1511 	continue;
1512 
1513       if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1514 	{
1515 	  /* Stop for a CALL and its delay slots because it is difficult to
1516 	     track its resource needs correctly.  */
1517 	  if (CALL_P (seq->element (0)))
1518 	    return 0;
1519 
1520 	  /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1521 	     slots because it is difficult to track its resource needs
1522 	     correctly.  */
1523 
1524 	  if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1525 	    return 0;
1526 
1527 	  if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1528 	    return 0;
1529 
1530 	  /* See if any of the insns in the delay slot match, updating
1531 	     resource requirements as we go.  */
1532 	  for (i = seq->len () - 1; i > 0; i--)
1533 	    if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1534 		&& rtx_equal_p (PATTERN (seq->element (i)), ipat)
1535 		&& ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1536 	      break;
1537 
1538 	  /* If found a match, exit this loop early.  */
1539 	  if (i > 0)
1540 	    break;
1541 	}
1542 
1543       else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1544 	       && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1545 	break;
1546     }
1547 
1548   /* If we didn't find an insn that matches, return 0.  */
1549   if (trial == 0)
1550     return 0;
1551 
1552   /* See what resources this insn sets and needs.  If they overlap, or
1553      if this insn references CC0, it can't be redundant.  */
1554 
1555   CLEAR_RESOURCE (&needed);
1556   CLEAR_RESOURCE (&set);
1557   mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1558   mark_referenced_resources (insn, &needed, true);
1559 
1560   /* If TARGET is a SEQUENCE, get the main insn.  */
1561   if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1562     target_main = XVECEXP (PATTERN (target), 0, 0);
1563 
1564   if (resource_conflicts_p (&needed, &set)
1565       || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1566       /* The insn requiring the delay may not set anything needed or set by
1567 	 INSN.  */
1568       || insn_sets_resource_p (target_main, &needed, true)
1569       || insn_sets_resource_p (target_main, &set, true))
1570     return 0;
1571 
1572   /* Insns we pass may not set either NEEDED or SET, so merge them for
1573      simpler tests.  */
1574   needed.memory |= set.memory;
1575   needed.regs |= set.regs;
1576 
1577   /* This insn isn't redundant if it conflicts with an insn that either is
1578      or will be in a delay slot of TARGET.  */
1579 
1580   unsigned int j;
1581   rtx_insn *temp;
1582   FOR_EACH_VEC_ELT (delay_list, j, temp)
1583     if (insn_sets_resource_p (temp, &needed, true))
1584       return 0;
1585 
1586   if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1587     for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1588       if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1589 				true))
1590 	return 0;
1591 
1592   /* Scan backwards until we reach a label or an insn that uses something
1593      INSN sets or sets something insn uses or sets.  */
1594 
1595   for (trial = PREV_INSN (target),
1596 	 insns_to_search = param_max_delay_slot_insn_search;
1597        trial && !LABEL_P (trial) && insns_to_search > 0;
1598        trial = PREV_INSN (trial))
1599     {
1600       if (!INSN_P (trial))
1601 	continue;
1602       --insns_to_search;
1603 
1604       pat = PATTERN (trial);
1605       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1606 	continue;
1607 
1608       if (GET_CODE (trial) == DEBUG_INSN)
1609 	continue;
1610 
1611       if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1612 	{
1613 	  bool annul_p = false;
1614           rtx_insn *control = seq->insn (0);
1615 
1616 	  /* If this is a CALL_INSN and its delay slots, it is hard to track
1617 	     the resource needs properly, so give up.  */
1618 	  if (CALL_P (control))
1619 	    return 0;
1620 
1621 	  /* If this is an INSN or JUMP_INSN with delayed effects, it
1622 	     is hard to track the resource needs properly, so give up.  */
1623 
1624 	  if (INSN_SETS_ARE_DELAYED (control))
1625 	    return 0;
1626 
1627 	  if (INSN_REFERENCES_ARE_DELAYED (control))
1628 	    return 0;
1629 
1630 	  if (JUMP_P (control))
1631 	    annul_p = INSN_ANNULLED_BRANCH_P (control);
1632 
1633 	  /* See if any of the insns in the delay slot match, updating
1634 	     resource requirements as we go.  */
1635 	  for (i = seq->len () - 1; i > 0; i--)
1636 	    {
1637 	      rtx_insn *candidate = seq->insn (i);
1638 
1639 	      /* If an insn will be annulled if the branch is false, it isn't
1640 		 considered as a possible duplicate insn.  */
1641 	      if (rtx_equal_p (PATTERN (candidate), ipat)
1642 		  && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1643 		{
1644 		  /* Show that this insn will be used in the sequel.  */
1645 		  INSN_FROM_TARGET_P (candidate) = 0;
1646 		  return candidate;
1647 		}
1648 
1649 	      /* Unless this is an annulled insn from the target of a branch,
1650 		 we must stop if it sets anything needed or set by INSN.  */
1651 	      if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1652 		  && insn_sets_resource_p (candidate, &needed, true))
1653 		return 0;
1654 	    }
1655 
1656 	  /* If the insn requiring the delay slot conflicts with INSN, we
1657 	     must stop.  */
1658 	  if (insn_sets_resource_p (control, &needed, true))
1659 	    return 0;
1660 	}
1661       else
1662 	{
1663 	  /* See if TRIAL is the same as INSN.  */
1664 	  pat = PATTERN (trial);
1665 	  if (rtx_equal_p (pat, ipat))
1666 	    return trial;
1667 
1668 	  /* Can't go any further if TRIAL conflicts with INSN.  */
1669 	  if (insn_sets_resource_p (trial, &needed, true))
1670 	    return 0;
1671 	}
1672     }
1673 
1674   return 0;
1675 }
1676 
1677 /* Return 1 if THREAD can only be executed in one way.  If LABEL is nonzero,
1678    it is the target of the branch insn being scanned.  If ALLOW_FALLTHROUGH
1679    is nonzero, we are allowed to fall into this thread; otherwise, we are
1680    not.
1681 
1682    If LABEL is used more than one or we pass a label other than LABEL before
1683    finding an active insn, we do not own this thread.  */
1684 
1685 static int
own_thread_p(rtx thread,rtx label,int allow_fallthrough)1686 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1687 {
1688   rtx_insn *active_insn;
1689   rtx_insn *insn;
1690 
1691   /* We don't own the function end.  */
1692   if (thread == 0 || ANY_RETURN_P (thread))
1693     return 0;
1694 
1695   /* We have a non-NULL insn.  */
1696   rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1697 
1698   /* Get the first active insn, or THREAD_INSN, if it is an active insn.  */
1699   active_insn = next_active_insn (PREV_INSN (thread_insn));
1700 
1701   for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1702     if (LABEL_P (insn)
1703 	&& (insn != label || LABEL_NUSES (insn) != 1))
1704       return 0;
1705 
1706   if (allow_fallthrough)
1707     return 1;
1708 
1709   /* Ensure that we reach a BARRIER before any insn or label.  */
1710   for (insn = prev_nonnote_insn (thread_insn);
1711        insn == 0 || !BARRIER_P (insn);
1712        insn = prev_nonnote_insn (insn))
1713     if (insn == 0
1714 	|| LABEL_P (insn)
1715 	|| (NONJUMP_INSN_P (insn)
1716 	    && GET_CODE (PATTERN (insn)) != USE
1717 	    && GET_CODE (PATTERN (insn)) != CLOBBER))
1718       return 0;
1719 
1720   return 1;
1721 }
1722 
1723 /* Called when INSN is being moved from a location near the target of a jump.
1724    We leave a marker of the form (use (INSN)) immediately in front of WHERE
1725    for mark_target_live_regs.  These markers will be deleted at the end.
1726 
1727    We used to try to update the live status of registers if WHERE is at
1728    the start of a basic block, but that can't work since we may remove a
1729    BARRIER in relax_delay_slots.  */
1730 
1731 static void
update_block(rtx_insn * insn,rtx_insn * where)1732 update_block (rtx_insn *insn, rtx_insn *where)
1733 {
1734   emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1735 
1736   /* INSN might be making a value live in a block where it didn't use to
1737      be.  So recompute liveness information for this block.  */
1738   incr_ticks_for_insn (insn);
1739 }
1740 
1741 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1742    the basic block containing the jump.  */
1743 
1744 static int
reorg_redirect_jump(rtx_jump_insn * jump,rtx nlabel)1745 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1746 {
1747   incr_ticks_for_insn (jump);
1748   return redirect_jump (jump, nlabel, 1);
1749 }
1750 
1751 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1752    We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1753    that reference values used in INSN.  If we find one, then we move the
1754    REG_DEAD note to INSN.
1755 
1756    This is needed to handle the case where a later insn (after INSN) has a
1757    REG_DEAD note for a register used by INSN, and this later insn subsequently
1758    gets moved before a CODE_LABEL because it is a redundant insn.  In this
1759    case, mark_target_live_regs may be confused into thinking the register
1760    is dead because it sees a REG_DEAD note immediately before a CODE_LABEL.  */
1761 
1762 static void
update_reg_dead_notes(rtx_insn * insn,rtx_insn * delayed_insn)1763 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1764 {
1765   rtx link, next;
1766   rtx_insn *p;
1767 
1768   for (p = next_nonnote_insn (insn); p != delayed_insn;
1769        p = next_nonnote_insn (p))
1770     for (link = REG_NOTES (p); link; link = next)
1771       {
1772 	next = XEXP (link, 1);
1773 
1774 	if (REG_NOTE_KIND (link) != REG_DEAD
1775 	    || !REG_P (XEXP (link, 0)))
1776 	  continue;
1777 
1778 	if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1779 	  {
1780 	    /* Move the REG_DEAD note from P to INSN.  */
1781 	    remove_note (p, link);
1782 	    XEXP (link, 1) = REG_NOTES (insn);
1783 	    REG_NOTES (insn) = link;
1784 	  }
1785       }
1786 }
1787 
1788 /* Called when an insn redundant with start_insn is deleted.  If there
1789    is a REG_DEAD note for the target of start_insn between start_insn
1790    and stop_insn, then the REG_DEAD note needs to be deleted since the
1791    value no longer dies there.
1792 
1793    If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1794    confused into thinking the register is dead.  */
1795 
1796 static void
fix_reg_dead_note(rtx_insn * start_insn,rtx stop_insn)1797 fix_reg_dead_note (rtx_insn *start_insn, rtx stop_insn)
1798 {
1799   rtx link, next;
1800   rtx_insn *p;
1801 
1802   for (p = next_nonnote_insn (start_insn); p != stop_insn;
1803        p = next_nonnote_insn (p))
1804     for (link = REG_NOTES (p); link; link = next)
1805       {
1806 	next = XEXP (link, 1);
1807 
1808 	if (REG_NOTE_KIND (link) != REG_DEAD
1809 	    || !REG_P (XEXP (link, 0)))
1810 	  continue;
1811 
1812 	if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1813 	  {
1814 	    remove_note (p, link);
1815 	    return;
1816 	  }
1817       }
1818 }
1819 
1820 /* Delete any REG_UNUSED notes that exist on INSN but not on OTHER_INSN.
1821 
1822    This handles the case of udivmodXi4 instructions which optimize their
1823    output depending on whether any REG_UNUSED notes are present.  We must
1824    make sure that INSN calculates as many results as OTHER_INSN does.  */
1825 
1826 static void
update_reg_unused_notes(rtx_insn * insn,rtx other_insn)1827 update_reg_unused_notes (rtx_insn *insn, rtx other_insn)
1828 {
1829   rtx link, next;
1830 
1831   for (link = REG_NOTES (insn); link; link = next)
1832     {
1833       next = XEXP (link, 1);
1834 
1835       if (REG_NOTE_KIND (link) != REG_UNUSED
1836 	  || !REG_P (XEXP (link, 0)))
1837 	continue;
1838 
1839       if (!find_regno_note (other_insn, REG_UNUSED, REGNO (XEXP (link, 0))))
1840 	remove_note (insn, link);
1841     }
1842 }
1843 
1844 static vec <rtx> sibling_labels;
1845 
1846 /* Return the label before INSN, or put a new label there.  If SIBLING is
1847    non-zero, it is another label associated with the new label (if any),
1848    typically the former target of the jump that will be redirected to
1849    the new label.  */
1850 
1851 static rtx_insn *
get_label_before(rtx_insn * insn,rtx sibling)1852 get_label_before (rtx_insn *insn, rtx sibling)
1853 {
1854   rtx_insn *label;
1855 
1856   /* Find an existing label at this point
1857      or make a new one if there is none.  */
1858   label = prev_nonnote_insn (insn);
1859 
1860   if (label == 0 || !LABEL_P (label))
1861     {
1862       rtx_insn *prev = PREV_INSN (insn);
1863 
1864       label = gen_label_rtx ();
1865       emit_label_after (label, prev);
1866       LABEL_NUSES (label) = 0;
1867       if (sibling)
1868 	{
1869 	  sibling_labels.safe_push (label);
1870 	  sibling_labels.safe_push (sibling);
1871 	}
1872     }
1873   return label;
1874 }
1875 
1876 /* Scan a function looking for insns that need a delay slot and find insns to
1877    put into the delay slot.
1878 
1879    NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1880    as calls).  We do these first since we don't want jump insns (that are
1881    easier to fill) to get the only insns that could be used for non-jump insns.
1882    When it is zero, only try to fill JUMP_INSNs.
1883 
1884    When slots are filled in this manner, the insns (including the
1885    delay_insn) are put together in a SEQUENCE rtx.  In this fashion,
1886    it is possible to tell whether a delay slot has really been filled
1887    or not.  `final' knows how to deal with this, by communicating
1888    through FINAL_SEQUENCE.  */
1889 
1890 static void
fill_simple_delay_slots(int non_jumps_p)1891 fill_simple_delay_slots (int non_jumps_p)
1892 {
1893   rtx_insn *insn, *trial, *next_trial;
1894   rtx pat;
1895   int i;
1896   int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1897   struct resources needed, set;
1898   int slots_to_fill, slots_filled;
1899   auto_vec<rtx_insn *, 5> delay_list;
1900 
1901   for (i = 0; i < num_unfilled_slots; i++)
1902     {
1903       int flags;
1904       /* Get the next insn to fill.  If it has already had any slots assigned,
1905 	 we can't do anything with it.  Maybe we'll improve this later.  */
1906 
1907       insn = unfilled_slots_base[i];
1908       if (insn == 0
1909 	  || insn->deleted ()
1910 	  || (NONJUMP_INSN_P (insn)
1911 	      && GET_CODE (PATTERN (insn)) == SEQUENCE)
1912 	  || (JUMP_P (insn) && non_jumps_p)
1913 	  || (!JUMP_P (insn) && ! non_jumps_p))
1914 	continue;
1915 
1916       /* It may have been that this insn used to need delay slots, but
1917 	 now doesn't; ignore in that case.  This can happen, for example,
1918 	 on the HP PA RISC, where the number of delay slots depends on
1919 	 what insns are nearby.  */
1920       slots_to_fill = num_delay_slots (insn);
1921 
1922       /* Some machine description have defined instructions to have
1923 	 delay slots only in certain circumstances which may depend on
1924 	 nearby insns (which change due to reorg's actions).
1925 
1926 	 For example, the PA port normally has delay slots for unconditional
1927 	 jumps.
1928 
1929 	 However, the PA port claims such jumps do not have a delay slot
1930 	 if they are immediate successors of certain CALL_INSNs.  This
1931 	 allows the port to favor filling the delay slot of the call with
1932 	 the unconditional jump.  */
1933       if (slots_to_fill == 0)
1934 	continue;
1935 
1936       /* This insn needs, or can use, some delay slots.  SLOTS_TO_FILL
1937 	 says how many.  After initialization, first try optimizing
1938 
1939 	 call _foo		call _foo
1940 	 nop			add %o7,.-L1,%o7
1941 	 b,a L1
1942 	 nop
1943 
1944 	 If this case applies, the delay slot of the call is filled with
1945 	 the unconditional jump.  This is done first to avoid having the
1946 	 delay slot of the call filled in the backward scan.  Also, since
1947 	 the unconditional jump is likely to also have a delay slot, that
1948 	 insn must exist when it is subsequently scanned.
1949 
1950 	 This is tried on each insn with delay slots as some machines
1951 	 have insns which perform calls, but are not represented as
1952 	 CALL_INSNs.  */
1953 
1954       slots_filled = 0;
1955       delay_list.truncate (0);
1956 
1957       if (JUMP_P (insn))
1958 	flags = get_jump_flags (insn, JUMP_LABEL (insn));
1959       else
1960 	flags = get_jump_flags (insn, NULL_RTX);
1961 
1962       if ((trial = next_active_insn (insn))
1963 	  && JUMP_P (trial)
1964 	  && simplejump_p (trial)
1965 	  && eligible_for_delay (insn, slots_filled, trial, flags)
1966 	  && no_labels_between_p (insn, trial)
1967 	  && ! can_throw_internal (trial))
1968 	{
1969 	  rtx_insn **tmp;
1970 	  slots_filled++;
1971 	  add_to_delay_list (trial, &delay_list);
1972 
1973 	  /* TRIAL may have had its delay slot filled, then unfilled.  When
1974 	     the delay slot is unfilled, TRIAL is placed back on the unfilled
1975 	     slots obstack.  Unfortunately, it is placed on the end of the
1976 	     obstack, not in its original location.  Therefore, we must search
1977 	     from entry i + 1 to the end of the unfilled slots obstack to
1978 	     try and find TRIAL.  */
1979 	  tmp = &unfilled_slots_base[i + 1];
1980 	  while (*tmp != trial && tmp != unfilled_slots_next)
1981 	    tmp++;
1982 
1983 	  /* Remove the unconditional jump from consideration for delay slot
1984 	     filling and unthread it.  */
1985 	  if (*tmp == trial)
1986 	    *tmp = 0;
1987 	  {
1988 	    rtx_insn *next = NEXT_INSN (trial);
1989 	    rtx_insn *prev = PREV_INSN (trial);
1990 	    if (prev)
1991 	      SET_NEXT_INSN (prev) = next;
1992 	    if (next)
1993 	      SET_PREV_INSN (next) = prev;
1994 	  }
1995 	}
1996 
1997       /* Now, scan backwards from the insn to search for a potential
1998 	 delay-slot candidate.  Stop searching when a label or jump is hit.
1999 
2000 	 For each candidate, if it is to go into the delay slot (moved
2001 	 forward in execution sequence), it must not need or set any resources
2002 	 that were set by later insns and must not set any resources that
2003 	 are needed for those insns.
2004 
2005 	 The delay slot insn itself sets resources unless it is a call
2006 	 (in which case the called routine, not the insn itself, is doing
2007 	 the setting).  */
2008 
2009       if (slots_filled < slots_to_fill)
2010 	{
2011 	  /* If the flags register is dead after the insn, then we want to be
2012 	     able to accept a candidate that clobbers it.  For this purpose,
2013 	     we need to filter the flags register during life analysis, so
2014 	     that it doesn't create RAW and WAW dependencies, while still
2015 	     creating the necessary WAR dependencies.  */
2016 	  bool filter_flags
2017 	    = (slots_to_fill == 1
2018 	       && targetm.flags_regnum != INVALID_REGNUM
2019 	       && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2020 	  struct resources fset;
2021 	  CLEAR_RESOURCE (&needed);
2022 	  CLEAR_RESOURCE (&set);
2023 	  mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2024 	  if (filter_flags)
2025 	    {
2026 	      CLEAR_RESOURCE (&fset);
2027 	      mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2028 	    }
2029 	  mark_referenced_resources (insn, &needed, false);
2030 
2031 	  for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2032 	       trial = next_trial)
2033 	    {
2034 	      next_trial = prev_nonnote_insn (trial);
2035 
2036 	      /* This must be an INSN or CALL_INSN.  */
2037 	      pat = PATTERN (trial);
2038 
2039 	      /* Stand-alone USE and CLOBBER are just for flow.  */
2040 	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2041 		continue;
2042 
2043 	      /* And DEBUG_INSNs never go into delay slots.  */
2044 	      if (GET_CODE (trial) == DEBUG_INSN)
2045 		continue;
2046 
2047 	      /* Check for resource conflict first, to avoid unnecessary
2048 		 splitting.  */
2049 	      if (! insn_references_resource_p (trial, &set, true)
2050 		  && ! insn_sets_resource_p (trial,
2051 					     filter_flags ? &fset : &set,
2052 					     true)
2053 		  && ! insn_sets_resource_p (trial, &needed, true)
2054 		  /* Can't separate set of cc0 from its use.  */
2055 		  && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2056 		  && ! can_throw_internal (trial))
2057 		{
2058 		  trial = try_split (pat, trial, 1);
2059 		  next_trial = prev_nonnote_insn (trial);
2060 		  if (eligible_for_delay (insn, slots_filled, trial, flags))
2061 		    {
2062 		      /* In this case, we are searching backward, so if we
2063 			 find insns to put on the delay list, we want
2064 			 to put them at the head, rather than the
2065 			 tail, of the list.  */
2066 
2067 		      update_reg_dead_notes (trial, insn);
2068 		      delay_list.safe_insert (0, trial);
2069 		      update_block (trial, trial);
2070 		      delete_related_insns (trial);
2071 		      if (slots_to_fill == ++slots_filled)
2072 			break;
2073 		      continue;
2074 		    }
2075 		}
2076 
2077 	      mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2078 	      if (filter_flags)
2079 		{
2080 		  mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2081 		  /* If the flags register is set, then it doesn't create RAW
2082 		     dependencies any longer and it also doesn't create WAW
2083 		     dependencies since it's dead after the original insn.  */
2084 		  if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2085 		    {
2086 		      CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2087 		      CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2088 		    }
2089 		}
2090 	      mark_referenced_resources (trial, &needed, true);
2091 	    }
2092 	}
2093 
2094       /* If all needed slots haven't been filled, we come here.  */
2095 
2096       /* Try to optimize case of jumping around a single insn.  */
2097       if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2098 	&& slots_filled != slots_to_fill
2099 	  && delay_list.is_empty ()
2100 	  && JUMP_P (insn)
2101 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
2102 	  && !ANY_RETURN_P (JUMP_LABEL (insn)))
2103 	{
2104 	  optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2105 	  if (!delay_list.is_empty ())
2106 	    slots_filled += 1;
2107 	}
2108 
2109       /* Try to get insns from beyond the insn needing the delay slot.
2110 	 These insns can neither set or reference resources set in insns being
2111 	 skipped, cannot set resources in the insn being skipped, and, if this
2112 	 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2113 	 call might not return).
2114 
2115 	 There used to be code which continued past the target label if
2116 	 we saw all uses of the target label.  This code did not work,
2117 	 because it failed to account for some instructions which were
2118 	 both annulled and marked as from the target.  This can happen as a
2119 	 result of optimize_skip.  Since this code was redundant with
2120 	 fill_eager_delay_slots anyways, it was just deleted.  */
2121 
2122       if (slots_filled != slots_to_fill
2123 	  /* If this instruction could throw an exception which is
2124 	     caught in the same function, then it's not safe to fill
2125 	     the delay slot with an instruction from beyond this
2126 	     point.  For example, consider:
2127 
2128                int i = 2;
2129 
2130 	       try {
2131                  f();
2132 	         i = 3;
2133                } catch (...) {}
2134 
2135                return i;
2136 
2137 	     Even though `i' is a local variable, we must be sure not
2138 	     to put `i = 3' in the delay slot if `f' might throw an
2139 	     exception.
2140 
2141 	     Presumably, we should also check to see if we could get
2142 	     back to this function via `setjmp'.  */
2143 	  && ! can_throw_internal (insn)
2144 	  && !JUMP_P (insn))
2145 	{
2146 	  int maybe_never = 0;
2147 	  rtx pat, trial_delay;
2148 
2149 	  CLEAR_RESOURCE (&needed);
2150 	  CLEAR_RESOURCE (&set);
2151 	  mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2152 	  mark_referenced_resources (insn, &needed, true);
2153 
2154 	  if (CALL_P (insn))
2155 	    maybe_never = 1;
2156 
2157 	  for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2158 	       trial = next_trial)
2159 	    {
2160 	      next_trial = next_nonnote_insn (trial);
2161 
2162 	      /* This must be an INSN or CALL_INSN.  */
2163 	      pat = PATTERN (trial);
2164 
2165 	      /* Stand-alone USE and CLOBBER are just for flow.  */
2166 	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2167 		continue;
2168 
2169 	      /* And DEBUG_INSNs do not go in delay slots.  */
2170 	      if (GET_CODE (trial) == DEBUG_INSN)
2171 		continue;
2172 
2173 	      /* If this already has filled delay slots, get the insn needing
2174 		 the delay slots.  */
2175 	      if (GET_CODE (pat) == SEQUENCE)
2176 		trial_delay = XVECEXP (pat, 0, 0);
2177 	      else
2178 		trial_delay = trial;
2179 
2180 	      /* Stop our search when seeing a jump.  */
2181 	      if (JUMP_P (trial_delay))
2182 		break;
2183 
2184 	      /* See if we have a resource problem before we try to split.  */
2185 	      if (GET_CODE (pat) != SEQUENCE
2186 		  && ! insn_references_resource_p (trial, &set, true)
2187 		  && ! insn_sets_resource_p (trial, &set, true)
2188 		  && ! insn_sets_resource_p (trial, &needed, true)
2189 		  && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2190 		  && ! (maybe_never && may_trap_or_fault_p (pat))
2191 		  && (trial = try_split (pat, trial, 0))
2192 		  && eligible_for_delay (insn, slots_filled, trial, flags)
2193 		  && ! can_throw_internal (trial))
2194 		{
2195 		  next_trial = next_nonnote_insn (trial);
2196 		  add_to_delay_list (trial, &delay_list);
2197 		  if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2198 		    link_cc0_insns (trial);
2199 
2200 		  delete_related_insns (trial);
2201 		  if (slots_to_fill == ++slots_filled)
2202 		    break;
2203 		  continue;
2204 		}
2205 
2206 	      mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2207 	      mark_referenced_resources (trial, &needed, true);
2208 
2209 	      /* Ensure we don't put insns between the setting of cc and the
2210 		 comparison by moving a setting of cc into an earlier delay
2211 		 slot since these insns could clobber the condition code.  */
2212 	      set.cc = 1;
2213 
2214 	      /* If this is a call, we might not get here.  */
2215 	      if (CALL_P (trial_delay))
2216 		maybe_never = 1;
2217 	    }
2218 
2219 	  /* If there are slots left to fill and our search was stopped by an
2220 	     unconditional branch, try the insn at the branch target.  We can
2221 	     redirect the branch if it works.
2222 
2223 	     Don't do this if the insn at the branch target is a branch.  */
2224 	  if (slots_to_fill != slots_filled
2225 	      && trial
2226 	      && jump_to_label_p (trial)
2227 	      && simplejump_p (trial)
2228 	      && (next_trial = next_active_insn (JUMP_LABEL_AS_INSN (trial))) != 0
2229 	      && ! (NONJUMP_INSN_P (next_trial)
2230 		    && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2231 	      && !JUMP_P (next_trial)
2232 	      && ! insn_references_resource_p (next_trial, &set, true)
2233 	      && ! insn_sets_resource_p (next_trial, &set, true)
2234 	      && ! insn_sets_resource_p (next_trial, &needed, true)
2235 	      && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2236 	      && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2237 	      && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2238 	      && eligible_for_delay (insn, slots_filled, next_trial, flags)
2239 	      && ! can_throw_internal (trial))
2240 	    {
2241 	      /* See comment in relax_delay_slots about necessity of using
2242 		 next_real_nondebug_insn here.  */
2243 	      rtx_insn *new_label = next_real_nondebug_insn (next_trial);
2244 
2245 	      if (new_label != 0)
2246 		new_label = get_label_before (new_label, JUMP_LABEL (trial));
2247 	      else
2248 		new_label = find_end_label (simple_return_rtx);
2249 
2250 	      if (new_label)
2251 	        {
2252 		  add_to_delay_list (copy_delay_slot_insn (next_trial),
2253 				     &delay_list);
2254 		  slots_filled++;
2255 		  reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2256 				       new_label);
2257 		}
2258 	    }
2259 	}
2260 
2261       /* If this is an unconditional jump, then try to get insns from the
2262 	 target of the jump.  */
2263       rtx_jump_insn *jump_insn;
2264       if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2265 	  && simplejump_p (jump_insn)
2266 	  && slots_filled != slots_to_fill)
2267 	fill_slots_from_thread (jump_insn, const_true_rtx,
2268 				next_active_insn (JUMP_LABEL_AS_INSN (insn)),
2269 				NULL, 1, 1, own_thread_p (JUMP_LABEL (insn),
2270 						 JUMP_LABEL (insn), 0),
2271 				slots_to_fill, &slots_filled, &delay_list);
2272 
2273       if (!delay_list.is_empty ())
2274 	unfilled_slots_base[i]
2275 	  = emit_delay_sequence (insn, delay_list, slots_filled);
2276 
2277       if (slots_to_fill == slots_filled)
2278 	unfilled_slots_base[i] = 0;
2279 
2280       note_delay_statistics (slots_filled, 0);
2281     }
2282 }
2283 
2284 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2285    return the ultimate label reached by any such chain of jumps.
2286    Return a suitable return rtx if the chain ultimately leads to a
2287    return instruction.
2288    If LABEL is not followed by a jump, return LABEL.
2289    If the chain loops or we can't find end, return LABEL,
2290    since that tells caller to avoid changing the insn.
2291    If the returned label is obtained by following a crossing jump,
2292    set *CROSSING to true, otherwise set it to false.  */
2293 
2294 static rtx
follow_jumps(rtx label,rtx_insn * jump,bool * crossing)2295 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2296 {
2297   rtx_insn *insn;
2298   rtx_insn *next;
2299   int depth;
2300 
2301   *crossing = false;
2302   if (ANY_RETURN_P (label))
2303     return label;
2304 
2305   rtx_insn *value = as_a <rtx_insn *> (label);
2306 
2307   for (depth = 0;
2308        (depth < 10
2309 	&& (insn = next_active_insn (value)) != 0
2310 	&& JUMP_P (insn)
2311 	&& JUMP_LABEL (insn) != NULL_RTX
2312 	&& ((any_uncondjump_p (insn) && onlyjump_p (insn))
2313 	    || ANY_RETURN_P (PATTERN (insn)))
2314 	&& (next = NEXT_INSN (insn))
2315 	&& BARRIER_P (next));
2316        depth++)
2317     {
2318       rtx this_label_or_return = JUMP_LABEL (insn);
2319 
2320       /* If we have found a cycle, make the insn jump to itself.  */
2321       if (this_label_or_return == label)
2322 	return label;
2323 
2324       /* Cannot follow returns and cannot look through tablejumps.  */
2325       if (ANY_RETURN_P (this_label_or_return))
2326 	return this_label_or_return;
2327 
2328       rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2329       if (NEXT_INSN (this_label)
2330 	  && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2331 	break;
2332 
2333       if (!targetm.can_follow_jump (jump, insn))
2334 	break;
2335       if (!*crossing)
2336 	*crossing = CROSSING_JUMP_P (jump);
2337       value = this_label;
2338     }
2339   if (depth == 10)
2340     return label;
2341   return value;
2342 }
2343 
2344 /* Try to find insns to place in delay slots.
2345 
2346    INSN is the jump needing SLOTS_TO_FILL delay slots.  It tests CONDITION
2347    or is an unconditional branch if CONDITION is const_true_rtx.
2348    *PSLOTS_FILLED is updated with the number of slots that we have filled.
2349 
2350    THREAD is a flow-of-control, either the insns to be executed if the
2351    branch is true or if the branch is false, THREAD_IF_TRUE says which.
2352 
2353    OPPOSITE_THREAD is the thread in the opposite direction.  It is used
2354    to see if any potential delay slot insns set things needed there.
2355 
2356    LIKELY is nonzero if it is extremely likely that the branch will be
2357    taken and THREAD_IF_TRUE is set.  This is used for the branch at the
2358    end of a loop back up to the top.
2359 
2360    OWN_THREAD is true if we are the only user of the thread, i.e. it is
2361    the target of the jump when we are the only jump going there.
2362 
2363    If OWN_THREAD is false, it must be the "true" thread of a jump.  In that
2364    case, we can only take insns from the head of the thread for our delay
2365    slot.  We then adjust the jump to point after the insns we have taken.  */
2366 
2367 static void
fill_slots_from_thread(rtx_jump_insn * insn,rtx condition,rtx thread_or_return,rtx opposite_thread,int likely,int thread_if_true,int own_thread,int slots_to_fill,int * pslots_filled,vec<rtx_insn * > * delay_list)2368 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2369 			rtx thread_or_return, rtx opposite_thread, int likely,
2370 			int thread_if_true, int own_thread, int slots_to_fill,
2371 			int *pslots_filled, vec<rtx_insn *> *delay_list)
2372 {
2373   rtx new_thread;
2374   struct resources opposite_needed, set, needed;
2375   rtx_insn *trial;
2376   int lose = 0;
2377   int must_annul = 0;
2378   int flags;
2379 
2380   /* Validate our arguments.  */
2381   gcc_assert (condition != const_true_rtx || thread_if_true);
2382   gcc_assert (own_thread || thread_if_true);
2383 
2384   flags = get_jump_flags (insn, JUMP_LABEL (insn));
2385 
2386   /* If our thread is the end of subroutine, we can't get any delay
2387      insns from that.  */
2388   if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2389     return;
2390 
2391   rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2392 
2393   /* If this is an unconditional branch, nothing is needed at the
2394      opposite thread.  Otherwise, compute what is needed there.  */
2395   if (condition == const_true_rtx)
2396     CLEAR_RESOURCE (&opposite_needed);
2397   else
2398     mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2399 
2400   /* If the insn at THREAD can be split, do it here to avoid having to
2401      update THREAD and NEW_THREAD if it is done in the loop below.  Also
2402      initialize NEW_THREAD.  */
2403 
2404   new_thread = thread = try_split (PATTERN (thread), thread, 0);
2405 
2406   /* Scan insns at THREAD.  We are looking for an insn that can be removed
2407      from THREAD (it neither sets nor references resources that were set
2408      ahead of it and it doesn't set anything needs by the insns ahead of
2409      it) and that either can be placed in an annulling insn or aren't
2410      needed at OPPOSITE_THREAD.  */
2411 
2412   CLEAR_RESOURCE (&needed);
2413   CLEAR_RESOURCE (&set);
2414 
2415   /* If we do not own this thread, we must stop as soon as we find
2416      something that we can't put in a delay slot, since all we can do
2417      is branch into THREAD at a later point.  Therefore, labels stop
2418      the search if this is not the `true' thread.  */
2419 
2420   for (trial = thread;
2421        ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2422        trial = next_nonnote_insn (trial))
2423     {
2424       rtx pat, old_trial;
2425 
2426       /* If we have passed a label, we no longer own this thread.  */
2427       if (LABEL_P (trial))
2428 	{
2429 	  own_thread = 0;
2430 	  continue;
2431 	}
2432 
2433       pat = PATTERN (trial);
2434       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2435 	continue;
2436 
2437       if (GET_CODE (trial) == DEBUG_INSN)
2438 	continue;
2439 
2440       /* If TRIAL conflicts with the insns ahead of it, we lose.  Also,
2441 	 don't separate or copy insns that set and use CC0.  */
2442       if (! insn_references_resource_p (trial, &set, true)
2443 	  && ! insn_sets_resource_p (trial, &set, true)
2444 	  && ! insn_sets_resource_p (trial, &needed, true)
2445 	  && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2446 			      && (! own_thread || ! sets_cc0_p (pat)))))
2447 	  && ! can_throw_internal (trial))
2448 	{
2449 	  rtx_insn *prior_insn;
2450 
2451 	  /* If TRIAL is redundant with some insn before INSN, we don't
2452 	     actually need to add it to the delay list; we can merely pretend
2453 	     we did.  */
2454 	  if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2455 	    {
2456 	      fix_reg_dead_note (prior_insn, insn);
2457 	      if (own_thread)
2458 		{
2459 		  update_block (trial, thread);
2460 		  if (trial == thread)
2461 		    {
2462 		      thread = next_active_insn (thread);
2463 		      if (new_thread == trial)
2464 			new_thread = thread;
2465 		    }
2466 
2467 		  delete_related_insns (trial);
2468 		}
2469 	      else
2470 		{
2471 		  update_reg_unused_notes (prior_insn, trial);
2472 		  new_thread = next_active_insn (trial);
2473 		}
2474 
2475 	      continue;
2476 	    }
2477 
2478 	  /* There are two ways we can win:  If TRIAL doesn't set anything
2479 	     needed at the opposite thread and can't trap, or if it can
2480 	     go into an annulled delay slot.  But we want neither to copy
2481 	     nor to speculate frame-related insns.  */
2482 	  if (!must_annul
2483 	      && ((condition == const_true_rtx
2484 		   && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2485 	          || (! insn_sets_resource_p (trial, &opposite_needed, true)
2486 		      && ! may_trap_or_fault_p (pat)
2487 		      && ! RTX_FRAME_RELATED_P (trial))))
2488 	    {
2489 	      old_trial = trial;
2490 	      trial = try_split (pat, trial, 0);
2491 	      if (new_thread == old_trial)
2492 		new_thread = trial;
2493 	      if (thread == old_trial)
2494 		thread = trial;
2495 	      pat = PATTERN (trial);
2496 	      if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2497 		goto winner;
2498 	    }
2499 	  else if (!RTX_FRAME_RELATED_P (trial)
2500 		   && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2501 		        || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2502 	    {
2503 	      old_trial = trial;
2504 	      trial = try_split (pat, trial, 0);
2505 	      if (new_thread == old_trial)
2506 		new_thread = trial;
2507 	      if (thread == old_trial)
2508 		thread = trial;
2509 	      pat = PATTERN (trial);
2510 	      if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2511 		   ? check_annul_list_true_false (0, *delay_list)
2512 		     && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2513 		   : check_annul_list_true_false (1, *delay_list)
2514 		     && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2515 		{
2516 		  rtx_insn *temp;
2517 
2518 		  must_annul = 1;
2519 		winner:
2520 
2521 		  if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2522 		    link_cc0_insns (trial);
2523 
2524 		  /* If we own this thread, delete the insn.  If this is the
2525 		     destination of a branch, show that a basic block status
2526 		     may have been updated.  In any case, mark the new
2527 		     starting point of this thread.  */
2528 		  if (own_thread)
2529 		    {
2530 		      rtx note;
2531 
2532 		      update_block (trial, thread);
2533 		      if (trial == thread)
2534 			{
2535 			  thread = next_active_insn (thread);
2536 			  if (new_thread == trial)
2537 			    new_thread = thread;
2538 			}
2539 
2540 		      /* We are moving this insn, not deleting it.  We must
2541 			 temporarily increment the use count on any referenced
2542 			 label lest it be deleted by delete_related_insns.  */
2543 		      for (note = REG_NOTES (trial);
2544 			   note != NULL_RTX;
2545 			   note = XEXP (note, 1))
2546 			if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2547 			    || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2548 			  {
2549 			    /* REG_LABEL_OPERAND could be
2550 			       NOTE_INSN_DELETED_LABEL too.  */
2551 			    if (LABEL_P (XEXP (note, 0)))
2552 			      LABEL_NUSES (XEXP (note, 0))++;
2553 			    else
2554 			      gcc_assert (REG_NOTE_KIND (note)
2555 					  == REG_LABEL_OPERAND);
2556 			  }
2557 		      if (jump_to_label_p (trial))
2558 			LABEL_NUSES (JUMP_LABEL (trial))++;
2559 
2560 		      delete_related_insns (trial);
2561 
2562 		      for (note = REG_NOTES (trial);
2563 			   note != NULL_RTX;
2564 			   note = XEXP (note, 1))
2565 			if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2566 			    || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2567 			  {
2568 			    /* REG_LABEL_OPERAND could be
2569 			       NOTE_INSN_DELETED_LABEL too.  */
2570 			    if (LABEL_P (XEXP (note, 0)))
2571 			      LABEL_NUSES (XEXP (note, 0))--;
2572 			    else
2573 			      gcc_assert (REG_NOTE_KIND (note)
2574 					  == REG_LABEL_OPERAND);
2575 			  }
2576 		      if (jump_to_label_p (trial))
2577 			LABEL_NUSES (JUMP_LABEL (trial))--;
2578 		    }
2579 		  else
2580 		    new_thread = next_active_insn (trial);
2581 
2582 		  temp = own_thread ? trial : copy_delay_slot_insn (trial);
2583 		  if (thread_if_true)
2584 		    INSN_FROM_TARGET_P (temp) = 1;
2585 
2586 		  add_to_delay_list (temp, delay_list);
2587 
2588 		  if (slots_to_fill == ++(*pslots_filled))
2589 		    {
2590 		      /* Even though we have filled all the slots, we
2591 			 may be branching to a location that has a
2592 			 redundant insn.  Skip any if so.  */
2593 		      while (new_thread && ! own_thread
2594 			     && ! insn_sets_resource_p (new_thread, &set, true)
2595 			     && ! insn_sets_resource_p (new_thread, &needed,
2596 							true)
2597 			     && ! insn_references_resource_p (new_thread,
2598 							      &set, true)
2599 			     && (prior_insn
2600 				 = redundant_insn (new_thread, insn,
2601 						   *delay_list)))
2602 			{
2603 			  /* We know we do not own the thread, so no need
2604 			     to call update_block and delete_insn.  */
2605 			  fix_reg_dead_note (prior_insn, insn);
2606 			  update_reg_unused_notes (prior_insn, new_thread);
2607 			  new_thread
2608 			    = next_active_insn (as_a<rtx_insn *> (new_thread));
2609 			}
2610 		      break;
2611 		    }
2612 
2613 		  continue;
2614 		}
2615 	    }
2616 	}
2617 
2618       /* This insn can't go into a delay slot.  */
2619       lose = 1;
2620       mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2621       mark_referenced_resources (trial, &needed, true);
2622 
2623       /* Ensure we don't put insns between the setting of cc and the comparison
2624 	 by moving a setting of cc into an earlier delay slot since these insns
2625 	 could clobber the condition code.  */
2626       set.cc = 1;
2627 
2628       /* If this insn is a register-register copy and the next insn has
2629 	 a use of our destination, change it to use our source.  That way,
2630 	 it will become a candidate for our delay slot the next time
2631 	 through this loop.  This case occurs commonly in loops that
2632 	 scan a list.
2633 
2634 	 We could check for more complex cases than those tested below,
2635 	 but it doesn't seem worth it.  It might also be a good idea to try
2636 	 to swap the two insns.  That might do better.
2637 
2638 	 We can't do this if the next insn modifies our destination, because
2639 	 that would make the replacement into the insn invalid.  We also can't
2640 	 do this if it modifies our source, because it might be an earlyclobber
2641 	 operand.  This latter test also prevents updating the contents of
2642 	 a PRE_INC.  We also can't do this if there's overlap of source and
2643 	 destination.  Overlap may happen for larger-than-register-size modes.  */
2644 
2645       if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2646 	  && REG_P (SET_SRC (pat))
2647 	  && REG_P (SET_DEST (pat))
2648 	  && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2649 	{
2650 	  rtx_insn *next = next_nonnote_insn (trial);
2651 
2652 	  if (next && NONJUMP_INSN_P (next)
2653 	      && GET_CODE (PATTERN (next)) != USE
2654 	      && ! reg_set_p (SET_DEST (pat), next)
2655 	      && ! reg_set_p (SET_SRC (pat), next)
2656 	      && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2657 	      && ! modified_in_p (SET_DEST (pat), next))
2658 	    validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2659 	}
2660     }
2661 
2662   /* If we stopped on a branch insn that has delay slots, see if we can
2663      steal some of the insns in those slots.  */
2664   if (trial && NONJUMP_INSN_P (trial)
2665       && GET_CODE (PATTERN (trial)) == SEQUENCE
2666       && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2667     {
2668       rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2669       /* If this is the `true' thread, we will want to follow the jump,
2670 	 so we can only do this if we have taken everything up to here.  */
2671       if (thread_if_true && trial == new_thread)
2672 	{
2673 	  steal_delay_list_from_target (insn, condition, sequence,
2674 					delay_list, &set, &needed,
2675 					&opposite_needed, slots_to_fill,
2676 					pslots_filled, &must_annul,
2677 					&new_thread);
2678 	  /* If we owned the thread and are told that it branched
2679 	     elsewhere, make sure we own the thread at the new location.  */
2680 	  if (own_thread && trial != new_thread)
2681 	    own_thread = own_thread_p (new_thread, new_thread, 0);
2682 	}
2683       else if (! thread_if_true)
2684 	steal_delay_list_from_fallthrough (insn, condition, sequence,
2685 					   delay_list, &set, &needed,
2686 					   &opposite_needed, slots_to_fill,
2687 					   pslots_filled, &must_annul);
2688     }
2689 
2690   /* If we haven't found anything for this delay slot and it is very
2691      likely that the branch will be taken, see if the insn at our target
2692      increments or decrements a register with an increment that does not
2693      depend on the destination register.  If so, try to place the opposite
2694      arithmetic insn after the jump insn and put the arithmetic insn in the
2695      delay slot.  If we can't do this, return.  */
2696   if (delay_list->is_empty () && likely
2697       && new_thread && !ANY_RETURN_P (new_thread)
2698       && NONJUMP_INSN_P (new_thread)
2699       && !RTX_FRAME_RELATED_P (new_thread)
2700       && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2701       && asm_noperands (PATTERN (new_thread)) < 0)
2702     {
2703       rtx dest;
2704       rtx src;
2705 
2706       /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2707 	 above.  */
2708       trial = as_a <rtx_insn *> (new_thread);
2709       rtx pat = PATTERN (trial);
2710 
2711       if (!NONJUMP_INSN_P (trial)
2712 	  || GET_CODE (pat) != SET
2713 	  || ! eligible_for_delay (insn, 0, trial, flags)
2714 	  || can_throw_internal (trial))
2715 	return;
2716 
2717       dest = SET_DEST (pat), src = SET_SRC (pat);
2718       if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2719 	  && rtx_equal_p (XEXP (src, 0), dest)
2720 	  && (!FLOAT_MODE_P (GET_MODE (src))
2721 	      || flag_unsafe_math_optimizations)
2722 	  && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2723 	  && ! side_effects_p (pat))
2724 	{
2725 	  rtx other = XEXP (src, 1);
2726 	  rtx new_arith;
2727 	  rtx_insn *ninsn;
2728 
2729 	  /* If this is a constant adjustment, use the same code with
2730 	     the negated constant.  Otherwise, reverse the sense of the
2731 	     arithmetic.  */
2732 	  if (CONST_INT_P (other))
2733 	    new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2734 					negate_rtx (GET_MODE (src), other));
2735 	  else
2736 	    new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2737 					GET_MODE (src), dest, other);
2738 
2739 	  ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2740 
2741 	  if (recog_memoized (ninsn) < 0
2742 	      || (extract_insn (ninsn),
2743 		  !constrain_operands (1, get_preferred_alternatives (ninsn))))
2744 	    {
2745 	      delete_related_insns (ninsn);
2746 	      return;
2747 	    }
2748 
2749 	  if (own_thread)
2750 	    {
2751 	      update_block (trial, thread);
2752 	      if (trial == thread)
2753 		{
2754 		  thread = next_active_insn (thread);
2755 		  if (new_thread == trial)
2756 		    new_thread = thread;
2757 		}
2758 	      delete_related_insns (trial);
2759 	    }
2760 	  else
2761 	    new_thread = next_active_insn (trial);
2762 
2763 	  ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2764 	  if (thread_if_true)
2765 	    INSN_FROM_TARGET_P (ninsn) = 1;
2766 
2767 	  add_to_delay_list (ninsn, delay_list);
2768 	  (*pslots_filled)++;
2769 	}
2770     }
2771 
2772   if (!delay_list->is_empty () && must_annul)
2773     INSN_ANNULLED_BRANCH_P (insn) = 1;
2774 
2775   /* If we are to branch into the middle of this thread, find an appropriate
2776      label or make a new one if none, and redirect INSN to it.  If we hit the
2777      end of the function, use the end-of-function label.  */
2778   if (new_thread != thread)
2779     {
2780       rtx label;
2781       bool crossing = false;
2782 
2783       gcc_assert (thread_if_true);
2784 
2785       if (new_thread && simplejump_or_return_p (new_thread)
2786 	  && redirect_with_delay_list_safe_p (insn,
2787 					      JUMP_LABEL (new_thread),
2788 					      *delay_list))
2789 	new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2790 				   &crossing);
2791 
2792       if (ANY_RETURN_P (new_thread))
2793 	label = find_end_label (new_thread);
2794       else if (LABEL_P (new_thread))
2795 	label = new_thread;
2796       else
2797 	label = get_label_before (as_a <rtx_insn *> (new_thread),
2798 				  JUMP_LABEL (insn));
2799 
2800       if (label)
2801 	{
2802 	  reorg_redirect_jump (insn, label);
2803 	  if (crossing)
2804 	    CROSSING_JUMP_P (insn) = 1;
2805 	}
2806     }
2807 }
2808 
2809 /* Make another attempt to find insns to place in delay slots.
2810 
2811    We previously looked for insns located in front of the delay insn
2812    and, for non-jump delay insns, located behind the delay insn.
2813 
2814    Here only try to schedule jump insns and try to move insns from either
2815    the target or the following insns into the delay slot.  If annulling is
2816    supported, we will be likely to do this.  Otherwise, we can do this only
2817    if safe.  */
2818 
2819 static void
fill_eager_delay_slots(void)2820 fill_eager_delay_slots (void)
2821 {
2822   rtx_insn *insn;
2823   int i;
2824   int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2825 
2826   for (i = 0; i < num_unfilled_slots; i++)
2827     {
2828       rtx condition;
2829       rtx target_label, insn_at_target;
2830       rtx_insn *fallthrough_insn;
2831       auto_vec<rtx_insn *, 5> delay_list;
2832       rtx_jump_insn *jump_insn;
2833       int own_target;
2834       int own_fallthrough;
2835       int prediction, slots_to_fill, slots_filled;
2836 
2837       insn = unfilled_slots_base[i];
2838       if (insn == 0
2839 	  || insn->deleted ()
2840 	  || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2841 	  || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2842 	continue;
2843 
2844       slots_to_fill = num_delay_slots (jump_insn);
2845       /* Some machine description have defined instructions to have
2846 	 delay slots only in certain circumstances which may depend on
2847 	 nearby insns (which change due to reorg's actions).
2848 
2849 	 For example, the PA port normally has delay slots for unconditional
2850 	 jumps.
2851 
2852 	 However, the PA port claims such jumps do not have a delay slot
2853 	 if they are immediate successors of certain CALL_INSNs.  This
2854 	 allows the port to favor filling the delay slot of the call with
2855 	 the unconditional jump.  */
2856       if (slots_to_fill == 0)
2857 	continue;
2858 
2859       slots_filled = 0;
2860       target_label = JUMP_LABEL (jump_insn);
2861       condition = get_branch_condition (jump_insn, target_label);
2862 
2863       if (condition == 0)
2864 	continue;
2865 
2866       /* Get the next active fallthrough and target insns and see if we own
2867 	 them.  Then see whether the branch is likely true.  We don't need
2868 	 to do a lot of this for unconditional branches.  */
2869 
2870       insn_at_target = first_active_target_insn (target_label);
2871       own_target = own_thread_p (target_label, target_label, 0);
2872 
2873       if (condition == const_true_rtx)
2874 	{
2875 	  own_fallthrough = 0;
2876 	  fallthrough_insn = 0;
2877 	  prediction = 2;
2878 	}
2879       else
2880 	{
2881 	  fallthrough_insn = next_active_insn (jump_insn);
2882 	  own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2883 	  prediction = mostly_true_jump (jump_insn);
2884 	}
2885 
2886       /* If this insn is expected to branch, first try to get insns from our
2887 	 target, then our fallthrough insns.  If it is not expected to branch,
2888 	 try the other order.  */
2889 
2890       if (prediction > 0)
2891 	{
2892 	  fill_slots_from_thread (jump_insn, condition, insn_at_target,
2893 				  fallthrough_insn, prediction == 2, 1,
2894 				  own_target,
2895 				  slots_to_fill, &slots_filled, &delay_list);
2896 
2897 	  if (delay_list.is_empty () && own_fallthrough)
2898 	    {
2899 	      /* Even though we didn't find anything for delay slots,
2900 		 we might have found a redundant insn which we deleted
2901 		 from the thread that was filled.  So we have to recompute
2902 		 the next insn at the target.  */
2903 	      target_label = JUMP_LABEL (jump_insn);
2904 	      insn_at_target = first_active_target_insn (target_label);
2905 
2906 	      fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2907 				      insn_at_target, 0, 0, own_fallthrough,
2908 				      slots_to_fill, &slots_filled,
2909 				      &delay_list);
2910 	    }
2911 	}
2912       else
2913 	{
2914 	  if (own_fallthrough)
2915 	    fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2916 				    insn_at_target, 0, 0, own_fallthrough,
2917 				    slots_to_fill, &slots_filled, &delay_list);
2918 
2919 	  if (delay_list.is_empty ())
2920 	    fill_slots_from_thread (jump_insn, condition, insn_at_target,
2921 				    next_active_insn (insn), 0, 1, own_target,
2922 				    slots_to_fill, &slots_filled, &delay_list);
2923 	}
2924 
2925       if (!delay_list.is_empty ())
2926 	unfilled_slots_base[i]
2927 	  = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2928 
2929       if (slots_to_fill == slots_filled)
2930 	unfilled_slots_base[i] = 0;
2931 
2932       note_delay_statistics (slots_filled, 1);
2933     }
2934 }
2935 
2936 static void delete_computation (rtx_insn *insn);
2937 
2938 /* Recursively delete prior insns that compute the value (used only by INSN
2939    which the caller is deleting) stored in the register mentioned by NOTE
2940    which is a REG_DEAD note associated with INSN.  */
2941 
2942 static void
delete_prior_computation(rtx note,rtx_insn * insn)2943 delete_prior_computation (rtx note, rtx_insn *insn)
2944 {
2945   rtx_insn *our_prev;
2946   rtx reg = XEXP (note, 0);
2947 
2948   for (our_prev = prev_nonnote_insn (insn);
2949        our_prev && (NONJUMP_INSN_P (our_prev)
2950 		    || CALL_P (our_prev));
2951        our_prev = prev_nonnote_insn (our_prev))
2952     {
2953       rtx pat = PATTERN (our_prev);
2954 
2955       /* If we reach a CALL which is not calling a const function
2956 	 or the callee pops the arguments, then give up.  */
2957       if (CALL_P (our_prev)
2958 	  && (! RTL_CONST_CALL_P (our_prev)
2959 	      || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2960 	break;
2961 
2962       /* If we reach a SEQUENCE, it is too complex to try to
2963 	 do anything with it, so give up.  We can be run during
2964 	 and after reorg, so SEQUENCE rtl can legitimately show
2965 	 up here.  */
2966       if (GET_CODE (pat) == SEQUENCE)
2967 	break;
2968 
2969       if (GET_CODE (pat) == USE
2970 	  && NONJUMP_INSN_P (XEXP (pat, 0)))
2971 	/* reorg creates USEs that look like this.  We leave them
2972 	   alone because reorg needs them for its own purposes.  */
2973 	break;
2974 
2975       if (reg_set_p (reg, pat))
2976 	{
2977 	  if (side_effects_p (pat) && !CALL_P (our_prev))
2978 	    break;
2979 
2980 	  if (GET_CODE (pat) == PARALLEL)
2981 	    {
2982 	      /* If we find a SET of something else, we can't
2983 		 delete the insn.  */
2984 
2985 	      int i;
2986 
2987 	      for (i = 0; i < XVECLEN (pat, 0); i++)
2988 		{
2989 		  rtx part = XVECEXP (pat, 0, i);
2990 
2991 		  if (GET_CODE (part) == SET
2992 		      && SET_DEST (part) != reg)
2993 		    break;
2994 		}
2995 
2996 	      if (i == XVECLEN (pat, 0))
2997 		delete_computation (our_prev);
2998 	    }
2999 	  else if (GET_CODE (pat) == SET
3000 		   && REG_P (SET_DEST (pat)))
3001 	    {
3002 	      int dest_regno = REGNO (SET_DEST (pat));
3003 	      int dest_endregno = END_REGNO (SET_DEST (pat));
3004 	      int regno = REGNO (reg);
3005 	      int endregno = END_REGNO (reg);
3006 
3007 	      if (dest_regno >= regno
3008 		  && dest_endregno <= endregno)
3009 		delete_computation (our_prev);
3010 
3011 	      /* We may have a multi-word hard register and some, but not
3012 		 all, of the words of the register are needed in subsequent
3013 		 insns.  Write REG_UNUSED notes for those parts that were not
3014 		 needed.  */
3015 	      else if (dest_regno <= regno
3016 		       && dest_endregno >= endregno)
3017 		{
3018 		  int i;
3019 
3020 		  add_reg_note (our_prev, REG_UNUSED, reg);
3021 
3022 		  for (i = dest_regno; i < dest_endregno; i++)
3023 		    if (! find_regno_note (our_prev, REG_UNUSED, i))
3024 		      break;
3025 
3026 		  if (i == dest_endregno)
3027 		    delete_computation (our_prev);
3028 		}
3029 	    }
3030 
3031 	  break;
3032 	}
3033 
3034       /* If PAT references the register that dies here, it is an
3035 	 additional use.  Hence any prior SET isn't dead.  However, this
3036 	 insn becomes the new place for the REG_DEAD note.  */
3037       if (reg_overlap_mentioned_p (reg, pat))
3038 	{
3039 	  XEXP (note, 1) = REG_NOTES (our_prev);
3040 	  REG_NOTES (our_prev) = note;
3041 	  break;
3042 	}
3043     }
3044 }
3045 
3046 /* Delete INSN and recursively delete insns that compute values used only
3047    by INSN.  This uses the REG_DEAD notes computed during flow analysis.
3048 
3049    Look at all our REG_DEAD notes.  If a previous insn does nothing other
3050    than set a register that dies in this insn, we can delete that insn
3051    as well.
3052 
3053    On machines with CC0, if CC0 is used in this insn, we may be able to
3054    delete the insn that set it.  */
3055 
3056 static void
delete_computation(rtx_insn * insn)3057 delete_computation (rtx_insn *insn)
3058 {
3059   rtx note, next;
3060 
3061   if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3062     {
3063       rtx_insn *prev = prev_nonnote_insn (insn);
3064       /* We assume that at this stage
3065 	 CC's are always set explicitly
3066 	 and always immediately before the jump that
3067 	 will use them.  So if the previous insn
3068 	 exists to set the CC's, delete it
3069 	 (unless it performs auto-increments, etc.).  */
3070       if (prev && NONJUMP_INSN_P (prev)
3071 	  && sets_cc0_p (PATTERN (prev)))
3072 	{
3073 	  if (sets_cc0_p (PATTERN (prev)) > 0
3074 	      && ! side_effects_p (PATTERN (prev)))
3075 	    delete_computation (prev);
3076 	  else
3077 	    /* Otherwise, show that cc0 won't be used.  */
3078 	    add_reg_note (prev, REG_UNUSED, cc0_rtx);
3079 	}
3080     }
3081 
3082   for (note = REG_NOTES (insn); note; note = next)
3083     {
3084       next = XEXP (note, 1);
3085 
3086       if (REG_NOTE_KIND (note) != REG_DEAD
3087 	  /* Verify that the REG_NOTE is legitimate.  */
3088 	  || !REG_P (XEXP (note, 0)))
3089 	continue;
3090 
3091       delete_prior_computation (note, insn);
3092     }
3093 
3094   delete_related_insns (insn);
3095 }
3096 
3097 /* If all INSN does is set the pc, delete it,
3098    and delete the insn that set the condition codes for it
3099    if that's what the previous thing was.  */
3100 
3101 static void
delete_jump(rtx_insn * insn)3102 delete_jump (rtx_insn *insn)
3103 {
3104   rtx set = single_set (insn);
3105 
3106   if (set && GET_CODE (SET_DEST (set)) == PC)
3107     delete_computation (insn);
3108 }
3109 
3110 static rtx_insn *
label_before_next_insn(rtx_insn * x,rtx scan_limit)3111 label_before_next_insn (rtx_insn *x, rtx scan_limit)
3112 {
3113   rtx_insn *insn = next_active_insn (x);
3114   while (insn)
3115     {
3116       insn = PREV_INSN (insn);
3117       if (insn == scan_limit || insn == NULL_RTX)
3118 	return NULL;
3119       if (LABEL_P (insn))
3120 	break;
3121     }
3122   return insn;
3123 }
3124 
3125 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3126    BEG and END.  */
3127 
3128 static bool
switch_text_sections_between_p(const rtx_insn * beg,const rtx_insn * end)3129 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3130 {
3131   const rtx_insn *p;
3132   for (p = beg; p != end; p = NEXT_INSN (p))
3133     if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3134       return true;
3135   return false;
3136 }
3137 
3138 
3139 /* Once we have tried two ways to fill a delay slot, make a pass over the
3140    code to try to improve the results and to do such things as more jump
3141    threading.  */
3142 
3143 static void
relax_delay_slots(rtx_insn * first)3144 relax_delay_slots (rtx_insn *first)
3145 {
3146   rtx_insn *insn, *next;
3147   rtx_sequence *pat;
3148   rtx_insn *delay_insn;
3149   rtx target_label;
3150 
3151   /* Look at every JUMP_INSN and see if we can improve it.  */
3152   for (insn = first; insn; insn = next)
3153     {
3154       rtx_insn *other, *prior_insn;
3155       bool crossing;
3156 
3157       next = next_active_insn (insn);
3158 
3159       /* If this is a jump insn, see if it now jumps to a jump, jumps to
3160 	 the next insn, or jumps to a label that is not the last of a
3161 	 group of consecutive labels.  */
3162       if (is_a <rtx_jump_insn *> (insn)
3163 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
3164 	  && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3165 	{
3166 	  rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3167 	  target_label
3168 	    = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3169 						     &crossing));
3170 	  if (ANY_RETURN_P (target_label))
3171 	    target_label = find_end_label (target_label);
3172 
3173 	  if (target_label
3174 	      && next_active_insn (as_a<rtx_insn *> (target_label)) == next
3175 	      && ! condjump_in_parallel_p (jump_insn)
3176 	      && ! (next && switch_text_sections_between_p (jump_insn, next)))
3177 	    {
3178 	      rtx_insn *direct_label = as_a<rtx_insn *> (JUMP_LABEL (insn));
3179 	      rtx_insn *prev = prev_nonnote_insn (direct_label);
3180 
3181 	      /* If the insn jumps over a BARRIER and is the only way to reach
3182 		 its target, then we need to delete the BARRIER before the jump
3183 		 because, otherwise, the target may end up being considered as
3184 		 unreachable and thus also deleted.  */
3185 	      if (BARRIER_P (prev) && LABEL_NUSES (direct_label) == 1)
3186 		{
3187 		  delete_related_insns (prev);
3188 
3189 		  /* We have just removed a BARRIER, which means that the block
3190 		     number of the next insns has effectively been changed (see
3191 		     find_basic_block in resource.c), so clear it.  */
3192 		  clear_hashed_info_until_next_barrier (direct_label);
3193 		}
3194 
3195 	      delete_jump (jump_insn);
3196 	      continue;
3197 	    }
3198 
3199 	  if (target_label && target_label != JUMP_LABEL (jump_insn))
3200 	    {
3201 	      reorg_redirect_jump (jump_insn, target_label);
3202 	      if (crossing)
3203 		CROSSING_JUMP_P (jump_insn) = 1;
3204 	    }
3205 
3206 	  /* See if this jump conditionally branches around an unconditional
3207 	     jump.  If so, invert this jump and point it to the target of the
3208 	     second jump.  Check if it's possible on the target.  */
3209 	  if (next && simplejump_or_return_p (next)
3210 	      && any_condjump_p (jump_insn)
3211 	      && target_label
3212 	      && (next_active_insn (as_a<rtx_insn *> (target_label))
3213 		  == next_active_insn (next))
3214 	      && no_labels_between_p (jump_insn, next)
3215 	      && targetm.can_follow_jump (jump_insn, next))
3216 	    {
3217 	      rtx label = JUMP_LABEL (next);
3218 
3219 	      /* Be careful how we do this to avoid deleting code or
3220 		 labels that are momentarily dead.  See similar optimization
3221 		 in jump.c.
3222 
3223 		 We also need to ensure we properly handle the case when
3224 		 invert_jump fails.  */
3225 
3226 	      ++LABEL_NUSES (target_label);
3227 	      if (!ANY_RETURN_P (label))
3228 		++LABEL_NUSES (label);
3229 
3230 	      if (invert_jump (jump_insn, label, 1))
3231 		{
3232 		  rtx_insn *from = delete_related_insns (next);
3233 
3234 		  /* We have just removed a BARRIER, which means that the block
3235 		     number of the next insns has effectively been changed (see
3236 		     find_basic_block in resource.c), so clear it.  */
3237 		  if (from)
3238 		    clear_hashed_info_until_next_barrier (from);
3239 
3240 		  next = jump_insn;
3241 		}
3242 
3243 	      if (!ANY_RETURN_P (label))
3244 		--LABEL_NUSES (label);
3245 
3246 	      if (--LABEL_NUSES (target_label) == 0)
3247 		delete_related_insns (target_label);
3248 
3249 	      continue;
3250 	    }
3251 	}
3252 
3253       /* If this is an unconditional jump and the previous insn is a
3254 	 conditional jump, try reversing the condition of the previous
3255 	 insn and swapping our targets.  The next pass might be able to
3256 	 fill the slots.
3257 
3258 	 Don't do this if we expect the conditional branch to be true, because
3259 	 we would then be making the more common case longer.  */
3260 
3261       if (simplejump_or_return_p (insn)
3262 	  && (other = prev_active_insn (insn)) != 0
3263 	  && any_condjump_p (other)
3264 	  && no_labels_between_p (other, insn)
3265 	  && mostly_true_jump (other) < 0)
3266 	{
3267 	  rtx other_target = JUMP_LABEL (other);
3268 	  target_label = JUMP_LABEL (insn);
3269 
3270 	  if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3271 	    reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3272 	}
3273 
3274       /* Now look only at cases where we have a filled delay slot.  */
3275       if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3276 	continue;
3277 
3278       pat = as_a <rtx_sequence *> (PATTERN (insn));
3279       delay_insn = pat->insn (0);
3280 
3281       /* See if the first insn in the delay slot is redundant with some
3282 	 previous insn.  Remove it from the delay slot if so; then set up
3283 	 to reprocess this insn.  */
3284       if ((prior_insn = redundant_insn (pat->insn (1), delay_insn, vNULL)))
3285 	{
3286 	  fix_reg_dead_note (prior_insn, insn);
3287 	  update_block (pat->insn (1), insn);
3288 	  delete_from_delay_slot (pat->insn (1));
3289 	  next = prev_active_insn (next);
3290 	  continue;
3291 	}
3292 
3293       /* See if we have a RETURN insn with a filled delay slot followed
3294 	 by a RETURN insn with an unfilled a delay slot.  If so, we can delete
3295 	 the first RETURN (but not its delay insn).  This gives the same
3296 	 effect in fewer instructions.
3297 
3298 	 Only do so if optimizing for size since this results in slower, but
3299 	 smaller code.  */
3300       if (optimize_function_for_size_p (cfun)
3301 	  && ANY_RETURN_P (PATTERN (delay_insn))
3302 	  && next
3303 	  && JUMP_P (next)
3304 	  && PATTERN (next) == PATTERN (delay_insn))
3305 	{
3306 	  rtx_insn *after;
3307 	  int i;
3308 
3309 	  /* Delete the RETURN and just execute the delay list insns.
3310 
3311 	     We do this by deleting the INSN containing the SEQUENCE, then
3312 	     re-emitting the insns separately, and then deleting the RETURN.
3313 	     This allows the count of the jump target to be properly
3314 	     decremented.
3315 
3316 	     Note that we need to change the INSN_UID of the re-emitted insns
3317 	     since it is used to hash the insns for mark_target_live_regs and
3318 	     the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3319 
3320 	     Clear the from target bit, since these insns are no longer
3321 	     in delay slots.  */
3322 	  for (i = 0; i < XVECLEN (pat, 0); i++)
3323 	    INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3324 
3325 	  rtx_insn *prev = PREV_INSN (insn);
3326 	  delete_related_insns (insn);
3327 	  gcc_assert (GET_CODE (pat) == SEQUENCE);
3328 	  add_insn_after (delay_insn, prev, NULL);
3329 	  after = delay_insn;
3330 	  for (i = 1; i < pat->len (); i++)
3331 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3332 	  delete_scheduled_jump (delay_insn);
3333 	  continue;
3334 	}
3335 
3336       /* Now look only at the cases where we have a filled JUMP_INSN.  */
3337       rtx_jump_insn *delay_jump_insn =
3338 		dyn_cast <rtx_jump_insn *> (delay_insn);
3339       if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3340 	  || condjump_in_parallel_p (delay_jump_insn)))
3341 	continue;
3342 
3343       target_label = JUMP_LABEL (delay_jump_insn);
3344       if (target_label && ANY_RETURN_P (target_label))
3345 	continue;
3346 
3347       /* If this jump goes to another unconditional jump, thread it, but
3348 	 don't convert a jump into a RETURN here.  */
3349       rtx trial = skip_consecutive_labels (follow_jumps (target_label,
3350 							 delay_jump_insn,
3351 							 &crossing));
3352       if (ANY_RETURN_P (trial))
3353 	trial = find_end_label (trial);
3354 
3355       if (trial && trial != target_label
3356 	  && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3357 	{
3358 	  reorg_redirect_jump (delay_jump_insn, trial);
3359 	  target_label = trial;
3360 	  if (crossing)
3361 	    CROSSING_JUMP_P (delay_jump_insn) = 1;
3362 	}
3363 
3364       /* If the first insn at TARGET_LABEL is redundant with a previous
3365 	 insn, redirect the jump to the following insn and process again.
3366 	 We use next_real_nondebug_insn instead of next_active_insn so we
3367 	 don't skip USE-markers, or we'll end up with incorrect
3368 	 liveness info.  */
3369       trial = next_real_nondebug_insn (target_label);
3370       if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3371 	  && redundant_insn (trial, insn, vNULL)
3372 	  && ! can_throw_internal (trial))
3373 	{
3374 	  /* Figure out where to emit the special USE insn so we don't
3375 	     later incorrectly compute register live/death info.  */
3376 	  rtx_insn *tmp = next_active_insn (as_a<rtx_insn *> (trial));
3377 	  if (tmp == 0)
3378 	    tmp = find_end_label (simple_return_rtx);
3379 
3380 	  if (tmp)
3381 	    {
3382 	      /* Insert the special USE insn and update dataflow info.
3383 		 We know "trial" is an insn here as it is the output of
3384 		 next_real_nondebug_insn () above.  */
3385 	      update_block (as_a <rtx_insn *> (trial), tmp);
3386 
3387 	      /* Now emit a label before the special USE insn, and
3388 		 redirect our jump to the new label.  */
3389 	      target_label = get_label_before (PREV_INSN (tmp), target_label);
3390 	      reorg_redirect_jump (delay_jump_insn, target_label);
3391 	      next = insn;
3392 	      continue;
3393 	    }
3394 	}
3395 
3396       /* Similarly, if it is an unconditional jump with one insn in its
3397 	 delay list and that insn is redundant, thread the jump.  */
3398       rtx_sequence *trial_seq =
3399 	trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3400       if (trial_seq
3401 	  && trial_seq->len () == 2
3402 	  && JUMP_P (trial_seq->insn (0))
3403 	  && simplejump_or_return_p (trial_seq->insn (0))
3404 	  && redundant_insn (trial_seq->insn (1), insn, vNULL))
3405 	{
3406 	  rtx temp_label = JUMP_LABEL (trial_seq->insn (0));
3407 	  if (ANY_RETURN_P (temp_label))
3408 	    temp_label = find_end_label (temp_label);
3409 
3410 	  if (temp_label
3411 	      && redirect_with_delay_slots_safe_p (delay_jump_insn,
3412 						   temp_label, insn))
3413 	    {
3414 	      update_block (trial_seq->insn (1), insn);
3415 	      reorg_redirect_jump (delay_jump_insn, temp_label);
3416 	      next = insn;
3417 	      continue;
3418 	    }
3419 	}
3420 
3421       /* See if we have a simple (conditional) jump that is useless.  */
3422       if (!CROSSING_JUMP_P (delay_jump_insn)
3423 	  && !INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3424 	  && !condjump_in_parallel_p (delay_jump_insn)
3425 	  && prev_active_insn (as_a<rtx_insn *> (target_label)) == insn
3426 	  && !BARRIER_P (prev_nonnote_insn (as_a<rtx_insn *> (target_label)))
3427 	  /* If the last insn in the delay slot sets CC0 for some insn,
3428 	     various code assumes that it is in a delay slot.  We could
3429 	     put it back where it belonged and delete the register notes,
3430 	     but it doesn't seem worthwhile in this uncommon case.  */
3431 	  && (!HAVE_cc0
3432 	      || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3433 				  REG_CC_USER, NULL_RTX)))
3434 	{
3435 	  rtx_insn *after;
3436 	  int i;
3437 
3438 	  /* All this insn does is execute its delay list and jump to the
3439 	     following insn.  So delete the jump and just execute the delay
3440 	     list insns.
3441 
3442 	     We do this by deleting the INSN containing the SEQUENCE, then
3443 	     re-emitting the insns separately, and then deleting the jump.
3444 	     This allows the count of the jump target to be properly
3445 	     decremented.
3446 
3447 	     Note that we need to change the INSN_UID of the re-emitted insns
3448 	     since it is used to hash the insns for mark_target_live_regs and
3449 	     the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3450 
3451 	     Clear the from target bit, since these insns are no longer
3452 	     in delay slots.  */
3453 	  for (i = 0; i < XVECLEN (pat, 0); i++)
3454 	    INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3455 
3456 	  rtx_insn *prev = PREV_INSN (insn);
3457 	  delete_related_insns (insn);
3458 	  gcc_assert (GET_CODE (pat) == SEQUENCE);
3459 	  add_insn_after (delay_jump_insn, prev, NULL);
3460 	  after = delay_jump_insn;
3461 	  for (i = 1; i < pat->len (); i++)
3462 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3463 	  delete_scheduled_jump (delay_jump_insn);
3464 	  continue;
3465 	}
3466 
3467       /* See if this is an unconditional jump around a single insn which is
3468 	 identical to the one in its delay slot.  In this case, we can just
3469 	 delete the branch and the insn in its delay slot.  */
3470       if (next && NONJUMP_INSN_P (next)
3471 	  && label_before_next_insn (next, insn) == target_label
3472 	  && simplejump_p (insn)
3473 	  && XVECLEN (pat, 0) == 2
3474 	  && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3475 	{
3476 	  delete_related_insns (insn);
3477 	  continue;
3478 	}
3479 
3480       /* See if this jump (with its delay slots) conditionally branches
3481 	 around an unconditional jump (without delay slots).  If so, invert
3482 	 this jump and point it to the target of the second jump.  We cannot
3483 	 do this for annulled jumps, though.  Again, don't convert a jump to
3484 	 a RETURN here.  */
3485       if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3486 	  && any_condjump_p (delay_jump_insn)
3487 	  && next && simplejump_or_return_p (next)
3488 	  && (next_active_insn (as_a<rtx_insn *> (target_label))
3489 	      == next_active_insn (next))
3490 	  && no_labels_between_p (insn, next))
3491 	{
3492 	  rtx label = JUMP_LABEL (next);
3493 	  rtx old_label = JUMP_LABEL (delay_jump_insn);
3494 
3495 	  if (ANY_RETURN_P (label))
3496 	    label = find_end_label (label);
3497 
3498 	  /* find_end_label can generate a new label. Check this first.  */
3499 	  if (label
3500 	      && no_labels_between_p (insn, next)
3501 	      && redirect_with_delay_slots_safe_p (delay_jump_insn,
3502 						   label, insn))
3503 	    {
3504 	      /* Be careful how we do this to avoid deleting code or labels
3505 		 that are momentarily dead.  See similar optimization in
3506 		 jump.c  */
3507 	      if (old_label)
3508 		++LABEL_NUSES (old_label);
3509 
3510 	      if (invert_jump (delay_jump_insn, label, 1))
3511 		{
3512 		  /* Must update the INSN_FROM_TARGET_P bits now that
3513 		     the branch is reversed, so that mark_target_live_regs
3514 		     will handle the delay slot insn correctly.  */
3515 		  for (int i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3516 		    {
3517 		      rtx slot = XVECEXP (PATTERN (insn), 0, i);
3518 		      INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3519 		    }
3520 
3521 		  /* We have just removed a BARRIER, which means that the block
3522 		     number of the next insns has effectively been changed (see
3523 		     find_basic_block in resource.c), so clear it.  */
3524 		  rtx_insn *from = delete_related_insns (next);
3525 		  if (from)
3526 		    clear_hashed_info_until_next_barrier (from);
3527 
3528 		  next = insn;
3529 		}
3530 
3531 	      if (old_label && --LABEL_NUSES (old_label) == 0)
3532 		delete_related_insns (old_label);
3533 	      continue;
3534 	    }
3535 	}
3536 
3537       /* If we own the thread opposite the way this insn branches, see if we
3538 	 can merge its delay slots with following insns.  */
3539       if (INSN_FROM_TARGET_P (pat->insn (1))
3540 	  && own_thread_p (NEXT_INSN (insn), 0, 1))
3541 	try_merge_delay_insns (insn, next);
3542       else if (! INSN_FROM_TARGET_P (pat->insn (1))
3543 	       && own_thread_p (target_label, target_label, 0))
3544 	try_merge_delay_insns (insn,
3545 			       next_active_insn (as_a<rtx_insn *> (target_label)));
3546 
3547       /* If we get here, we haven't deleted INSN.  But we may have deleted
3548 	 NEXT, so recompute it.  */
3549       next = next_active_insn (insn);
3550     }
3551 }
3552 
3553 
3554 /* Look for filled jumps to the end of function label.  We can try to convert
3555    them into RETURN insns if the insns in the delay slot are valid for the
3556    RETURN as well.  */
3557 
3558 static void
make_return_insns(rtx_insn * first)3559 make_return_insns (rtx_insn *first)
3560 {
3561   rtx_insn *insn;
3562   rtx_jump_insn *jump_insn;
3563   rtx real_return_label = function_return_label;
3564   rtx real_simple_return_label = function_simple_return_label;
3565   int slots, i;
3566 
3567   /* See if there is a RETURN insn in the function other than the one we
3568      made for END_OF_FUNCTION_LABEL.  If so, set up anything we can't change
3569      into a RETURN to jump to it.  */
3570   for (insn = first; insn; insn = NEXT_INSN (insn))
3571     if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3572       {
3573 	rtx t = get_label_before (insn, NULL_RTX);
3574 	if (PATTERN (insn) == ret_rtx)
3575 	  real_return_label = t;
3576 	else
3577 	  real_simple_return_label = t;
3578 	break;
3579       }
3580 
3581   /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3582      was equal to END_OF_FUNCTION_LABEL.  */
3583   if (real_return_label)
3584     LABEL_NUSES (real_return_label)++;
3585   if (real_simple_return_label)
3586     LABEL_NUSES (real_simple_return_label)++;
3587 
3588   /* Clear the list of insns to fill so we can use it.  */
3589   obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3590 
3591   for (insn = first; insn; insn = NEXT_INSN (insn))
3592     {
3593       int flags;
3594       rtx kind, real_label;
3595 
3596       /* Only look at filled JUMP_INSNs that go to the end of function
3597 	 label.  */
3598       if (!NONJUMP_INSN_P (insn))
3599 	continue;
3600 
3601       if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3602 	continue;
3603 
3604       rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3605 
3606       if (!jump_to_label_p (pat->insn (0)))
3607 	continue;
3608 
3609       if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3610 	{
3611 	  kind = ret_rtx;
3612 	  real_label = real_return_label;
3613 	}
3614       else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3615 	{
3616 	  kind = simple_return_rtx;
3617 	  real_label = real_simple_return_label;
3618 	}
3619       else
3620 	continue;
3621 
3622       jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3623 
3624       /* If we can't make the jump into a RETURN, try to redirect it to the best
3625 	 RETURN and go on to the next insn.  */
3626       if (!reorg_redirect_jump (jump_insn, kind))
3627 	{
3628 	  /* Make sure redirecting the jump will not invalidate the delay
3629 	     slot insns.  */
3630 	  if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3631 	    reorg_redirect_jump (jump_insn, real_label);
3632 	  continue;
3633 	}
3634 
3635       /* See if this RETURN can accept the insns current in its delay slot.
3636 	 It can if it has more or an equal number of slots and the contents
3637 	 of each is valid.  */
3638 
3639       flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3640       slots = num_delay_slots (jump_insn);
3641       if (slots >= XVECLEN (pat, 0) - 1)
3642 	{
3643 	  for (i = 1; i < XVECLEN (pat, 0); i++)
3644 	    if (! (
3645 #if ANNUL_IFFALSE_SLOTS
3646 		   (INSN_ANNULLED_BRANCH_P (jump_insn)
3647 		    && INSN_FROM_TARGET_P (pat->insn (i)))
3648 		   ? eligible_for_annul_false (jump_insn, i - 1,
3649 					       pat->insn (i), flags) :
3650 #endif
3651 #if ANNUL_IFTRUE_SLOTS
3652 		   (INSN_ANNULLED_BRANCH_P (jump_insn)
3653 		    && ! INSN_FROM_TARGET_P (pat->insn (i)))
3654 		   ? eligible_for_annul_true (jump_insn, i - 1,
3655 					      pat->insn (i), flags) :
3656 #endif
3657 		   eligible_for_delay (jump_insn, i - 1,
3658 				       pat->insn (i), flags)))
3659 	      break;
3660 	}
3661       else
3662 	i = 0;
3663 
3664       if (i == XVECLEN (pat, 0))
3665 	continue;
3666 
3667       /* We have to do something with this insn.  If it is an unconditional
3668 	 RETURN, delete the SEQUENCE and output the individual insns,
3669 	 followed by the RETURN.  Then set things up so we try to find
3670 	 insns for its delay slots, if it needs some.  */
3671       if (ANY_RETURN_P (PATTERN (jump_insn)))
3672 	{
3673 	  rtx_insn *after = PREV_INSN (insn);
3674 
3675 	  delete_related_insns (insn);
3676 	  insn = jump_insn;
3677 	  for (i = 1; i < pat->len (); i++)
3678 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3679 	  add_insn_after (insn, after, NULL);
3680 	  emit_barrier_after (insn);
3681 
3682 	  if (slots)
3683 	    obstack_ptr_grow (&unfilled_slots_obstack, insn);
3684 	}
3685       else
3686 	/* It is probably more efficient to keep this with its current
3687 	   delay slot as a branch to a RETURN.  */
3688 	reorg_redirect_jump (jump_insn, real_label);
3689     }
3690 
3691   /* Now delete REAL_RETURN_LABEL if we never used it.  Then try to fill any
3692      new delay slots we have created.  */
3693   if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3694     delete_related_insns (real_return_label);
3695   if (real_simple_return_label != NULL_RTX
3696       && --LABEL_NUSES (real_simple_return_label) == 0)
3697     delete_related_insns (real_simple_return_label);
3698 
3699   fill_simple_delay_slots (1);
3700   fill_simple_delay_slots (0);
3701 }
3702 
3703 /* Try to find insns to place in delay slots.  */
3704 
3705 static void
dbr_schedule(rtx_insn * first)3706 dbr_schedule (rtx_insn *first)
3707 {
3708   rtx_insn *insn, *next, *epilogue_insn = 0;
3709   int i;
3710   bool need_return_insns;
3711 
3712   /* If the current function has no insns other than the prologue and
3713      epilogue, then do not try to fill any delay slots.  */
3714   if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3715     return;
3716 
3717   /* Find the highest INSN_UID and allocate and initialize our map from
3718      INSN_UID's to position in code.  */
3719   for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3720     {
3721       if (INSN_UID (insn) > max_uid)
3722 	max_uid = INSN_UID (insn);
3723       if (NOTE_P (insn)
3724 	  && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3725 	epilogue_insn = insn;
3726     }
3727 
3728   uid_to_ruid = XNEWVEC (int, max_uid + 1);
3729   for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3730     uid_to_ruid[INSN_UID (insn)] = i;
3731 
3732   /* Initialize the list of insns that need filling.  */
3733   if (unfilled_firstobj == 0)
3734     {
3735       gcc_obstack_init (&unfilled_slots_obstack);
3736       unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3737     }
3738 
3739   for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3740     {
3741       rtx target;
3742 
3743       /* Skip vector tables.  We can't get attributes for them.  */
3744       if (JUMP_TABLE_DATA_P (insn))
3745 	continue;
3746 
3747       if (JUMP_P (insn))
3748         INSN_ANNULLED_BRANCH_P (insn) = 0;
3749       INSN_FROM_TARGET_P (insn) = 0;
3750 
3751       if (num_delay_slots (insn) > 0)
3752 	obstack_ptr_grow (&unfilled_slots_obstack, insn);
3753 
3754       /* Ensure all jumps go to the last of a set of consecutive labels.  */
3755       if (JUMP_P (insn)
3756 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
3757 	  && !ANY_RETURN_P (JUMP_LABEL (insn))
3758 	  && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3759 	      != JUMP_LABEL (insn)))
3760 	redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3761     }
3762 
3763   init_resource_info (epilogue_insn);
3764 
3765   /* Show we haven't computed an end-of-function label yet.  */
3766   function_return_label = function_simple_return_label = NULL;
3767 
3768   /* Initialize the statistics for this function.  */
3769   memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3770   memset (num_filled_delays, 0, sizeof num_filled_delays);
3771 
3772   /* Now do the delay slot filling.  Try everything twice in case earlier
3773      changes make more slots fillable.  */
3774 
3775   for (reorg_pass_number = 0;
3776        reorg_pass_number < MAX_REORG_PASSES;
3777        reorg_pass_number++)
3778     {
3779       fill_simple_delay_slots (1);
3780       fill_simple_delay_slots (0);
3781       if (!targetm.no_speculation_in_delay_slots_p ())
3782 	fill_eager_delay_slots ();
3783       relax_delay_slots (first);
3784     }
3785 
3786   /* If we made an end of function label, indicate that it is now
3787      safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3788      If it is now unused, delete it.  */
3789   if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3790     delete_related_insns (function_return_label);
3791   if (function_simple_return_label
3792       && --LABEL_NUSES (function_simple_return_label) == 0)
3793     delete_related_insns (function_simple_return_label);
3794 
3795   need_return_insns = false;
3796   need_return_insns |= targetm.have_return () && function_return_label != 0;
3797   need_return_insns |= (targetm.have_simple_return ()
3798 			&& function_simple_return_label != 0);
3799   if (need_return_insns)
3800     make_return_insns (first);
3801 
3802   /* Delete any USE insns made by update_block; subsequent passes don't need
3803      them or know how to deal with them.  */
3804   for (insn = first; insn; insn = next)
3805     {
3806       next = NEXT_INSN (insn);
3807 
3808       if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3809 	  && INSN_P (XEXP (PATTERN (insn), 0)))
3810 	next = delete_related_insns (insn);
3811     }
3812 
3813   obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3814 
3815   /* It is not clear why the line below is needed, but it does seem to be.  */
3816   unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3817 
3818   if (dump_file)
3819     {
3820       int i, j, need_comma;
3821       int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3822       int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3823 
3824       for (reorg_pass_number = 0;
3825 	   reorg_pass_number < MAX_REORG_PASSES;
3826 	   reorg_pass_number++)
3827 	{
3828 	  fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3829 	  for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3830 	    {
3831 	      need_comma = 0;
3832 	      fprintf (dump_file, ";; Reorg function #%d\n", i);
3833 
3834 	      fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3835 		       num_insns_needing_delays[i][reorg_pass_number]);
3836 
3837 	      for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3838 		if (num_filled_delays[i][j][reorg_pass_number])
3839 		  {
3840 		    if (need_comma)
3841 		      fprintf (dump_file, ", ");
3842 		    need_comma = 1;
3843 		    fprintf (dump_file, "%d got %d delays",
3844 			     num_filled_delays[i][j][reorg_pass_number], j);
3845 		  }
3846 	      fprintf (dump_file, "\n");
3847 	    }
3848 	}
3849       memset (total_delay_slots, 0, sizeof total_delay_slots);
3850       memset (total_annul_slots, 0, sizeof total_annul_slots);
3851       for (insn = first; insn; insn = NEXT_INSN (insn))
3852 	{
3853 	  if (! insn->deleted ()
3854 	      && NONJUMP_INSN_P (insn)
3855 	      && GET_CODE (PATTERN (insn)) != USE
3856 	      && GET_CODE (PATTERN (insn)) != CLOBBER)
3857 	    {
3858 	      if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3859 		{
3860                   rtx control;
3861 		  j = XVECLEN (PATTERN (insn), 0) - 1;
3862 		  if (j > MAX_DELAY_HISTOGRAM)
3863 		    j = MAX_DELAY_HISTOGRAM;
3864                   control = XVECEXP (PATTERN (insn), 0, 0);
3865 		  if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3866 		    total_annul_slots[j]++;
3867 		  else
3868 		    total_delay_slots[j]++;
3869 		}
3870 	      else if (num_delay_slots (insn) > 0)
3871 		total_delay_slots[0]++;
3872 	    }
3873 	}
3874       fprintf (dump_file, ";; Reorg totals: ");
3875       need_comma = 0;
3876       for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3877 	{
3878 	  if (total_delay_slots[j])
3879 	    {
3880 	      if (need_comma)
3881 		fprintf (dump_file, ", ");
3882 	      need_comma = 1;
3883 	      fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3884 	    }
3885 	}
3886       fprintf (dump_file, "\n");
3887 
3888       if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3889 	{
3890 	  fprintf (dump_file, ";; Reorg annuls: ");
3891 	  need_comma = 0;
3892 	  for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3893 	    {
3894 	      if (total_annul_slots[j])
3895 		{
3896 		  if (need_comma)
3897 		    fprintf (dump_file, ", ");
3898 		  need_comma = 1;
3899 		  fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3900 		}
3901 	    }
3902 	  fprintf (dump_file, "\n");
3903 	}
3904 
3905       fprintf (dump_file, "\n");
3906     }
3907 
3908   if (!sibling_labels.is_empty ())
3909     {
3910       update_alignments (sibling_labels);
3911       sibling_labels.release ();
3912     }
3913 
3914   free_resource_info ();
3915   free (uid_to_ruid);
3916   crtl->dbr_scheduled_p = true;
3917 }
3918 
3919 /* Run delay slot optimization.  */
3920 static unsigned int
rest_of_handle_delay_slots(void)3921 rest_of_handle_delay_slots (void)
3922 {
3923   if (DELAY_SLOTS)
3924     dbr_schedule (get_insns ());
3925 
3926   return 0;
3927 }
3928 
3929 namespace {
3930 
3931 const pass_data pass_data_delay_slots =
3932 {
3933   RTL_PASS, /* type */
3934   "dbr", /* name */
3935   OPTGROUP_NONE, /* optinfo_flags */
3936   TV_DBR_SCHED, /* tv_id */
3937   0, /* properties_required */
3938   0, /* properties_provided */
3939   0, /* properties_destroyed */
3940   0, /* todo_flags_start */
3941   0, /* todo_flags_finish */
3942 };
3943 
3944 class pass_delay_slots : public rtl_opt_pass
3945 {
3946 public:
pass_delay_slots(gcc::context * ctxt)3947   pass_delay_slots (gcc::context *ctxt)
3948     : rtl_opt_pass (pass_data_delay_slots, ctxt)
3949   {}
3950 
3951   /* opt_pass methods: */
3952   virtual bool gate (function *);
execute(function *)3953   virtual unsigned int execute (function *)
3954     {
3955       return rest_of_handle_delay_slots ();
3956     }
3957 
3958 }; // class pass_delay_slots
3959 
3960 bool
gate(function *)3961 pass_delay_slots::gate (function *)
3962 {
3963   /* At -O0 dataflow info isn't updated after RA.  */
3964   if (DELAY_SLOTS)
3965     return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3966 
3967   return false;
3968 }
3969 
3970 } // anon namespace
3971 
3972 rtl_opt_pass *
make_pass_delay_slots(gcc::context * ctxt)3973 make_pass_delay_slots (gcc::context *ctxt)
3974 {
3975   return new pass_delay_slots (ctxt);
3976 }
3977 
3978 /* Machine dependent reorg pass.  */
3979 
3980 namespace {
3981 
3982 const pass_data pass_data_machine_reorg =
3983 {
3984   RTL_PASS, /* type */
3985   "mach", /* name */
3986   OPTGROUP_NONE, /* optinfo_flags */
3987   TV_MACH_DEP, /* tv_id */
3988   0, /* properties_required */
3989   0, /* properties_provided */
3990   0, /* properties_destroyed */
3991   0, /* todo_flags_start */
3992   0, /* todo_flags_finish */
3993 };
3994 
3995 class pass_machine_reorg : public rtl_opt_pass
3996 {
3997 public:
pass_machine_reorg(gcc::context * ctxt)3998   pass_machine_reorg (gcc::context *ctxt)
3999     : rtl_opt_pass (pass_data_machine_reorg, ctxt)
4000   {}
4001 
4002   /* opt_pass methods: */
gate(function *)4003   virtual bool gate (function *)
4004     {
4005       return targetm.machine_dependent_reorg != 0;
4006     }
4007 
execute(function *)4008   virtual unsigned int execute (function *)
4009     {
4010       targetm.machine_dependent_reorg ();
4011       return 0;
4012     }
4013 
4014 }; // class pass_machine_reorg
4015 
4016 } // anon namespace
4017 
4018 rtl_opt_pass *
make_pass_machine_reorg(gcc::context * ctxt)4019 make_pass_machine_reorg (gcc::context *ctxt)
4020 {
4021   return new pass_machine_reorg (ctxt);
4022 }
4023