1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* Instruction reorganization pass.
23
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
31
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
36
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
41
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
47
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
54
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
58
59 Three techniques for filling delay slots have been implemented so far:
60
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
69
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
82
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
92
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
102
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "memmodel.h"
112 #include "tm_p.h"
113 #include "expmed.h"
114 #include "insn-config.h"
115 #include "emit-rtl.h"
116 #include "recog.h"
117 #include "insn-attr.h"
118 #include "resource.h"
119 #include "params.h"
120 #include "tree-pass.h"
121
122
123 /* First, some functions that were used before GCC got a control flow graph.
124 These functions are now only used here in reorg.c, and have therefore
125 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
126
127 /* Return the last label to mark the same position as LABEL. Return LABEL
128 itself if it is null or any return rtx. */
129
130 static rtx
skip_consecutive_labels(rtx label_or_return)131 skip_consecutive_labels (rtx label_or_return)
132 {
133 rtx_insn *insn;
134
135 if (label_or_return && ANY_RETURN_P (label_or_return))
136 return label_or_return;
137
138 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
139
140 /* __builtin_unreachable can create a CODE_LABEL followed by a BARRIER.
141
142 Since reaching the CODE_LABEL is undefined behavior, we can return
143 any code label and we're OK at runtime.
144
145 However, if we return a CODE_LABEL which leads to a shrinked wrapped
146 epilogue, but the path does not have a prologue, then we will trip
147 a sanity check in the dwarf2 cfi code which wants to verify that
148 the CFIs are all the same on the traces leading to the epilogue.
149
150 So we explicitly disallow looking through BARRIERS here. */
151 for (insn = label;
152 insn != 0 && !INSN_P (insn) && !BARRIER_P (insn);
153 insn = NEXT_INSN (insn))
154 if (LABEL_P (insn))
155 label = insn;
156
157 return label;
158 }
159
160 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
161 and REG_CC_USER notes so we can find it. */
162
163 static void
link_cc0_insns(rtx_insn * insn)164 link_cc0_insns (rtx_insn *insn)
165 {
166 rtx user = next_nonnote_insn (insn);
167
168 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
169 user = XVECEXP (PATTERN (user), 0, 0);
170
171 add_reg_note (user, REG_CC_SETTER, insn);
172 add_reg_note (insn, REG_CC_USER, user);
173 }
174
175 /* Insns which have delay slots that have not yet been filled. */
176
177 static struct obstack unfilled_slots_obstack;
178 static rtx *unfilled_firstobj;
179
180 /* Define macros to refer to the first and last slot containing unfilled
181 insns. These are used because the list may move and its address
182 should be recomputed at each use. */
183
184 #define unfilled_slots_base \
185 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
186
187 #define unfilled_slots_next \
188 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
189
190 /* Points to the label before the end of the function, or before a
191 return insn. */
192 static rtx_code_label *function_return_label;
193 /* Likewise for a simple_return. */
194 static rtx_code_label *function_simple_return_label;
195
196 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
197 not always monotonically increase. */
198 static int *uid_to_ruid;
199
200 /* Highest valid index in `uid_to_ruid'. */
201 static int max_uid;
202
203 static int stop_search_p (rtx_insn *, int);
204 static int resource_conflicts_p (struct resources *, struct resources *);
205 static int insn_references_resource_p (rtx, struct resources *, bool);
206 static int insn_sets_resource_p (rtx, struct resources *, bool);
207 static rtx_code_label *find_end_label (rtx);
208 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
209 int);
210 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
211 static rtx_insn *delete_from_delay_slot (rtx_insn *);
212 static void delete_scheduled_jump (rtx_insn *);
213 static void note_delay_statistics (int, int);
214 static int get_jump_flags (const rtx_insn *, rtx);
215 static int mostly_true_jump (rtx);
216 static rtx get_branch_condition (const rtx_insn *, rtx);
217 static int condition_dominates_p (rtx, const rtx_insn *);
218 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
219 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
220 const vec<rtx_insn *> &);
221 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
222 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
223 vec<rtx_insn *> *,
224 struct resources *,
225 struct resources *,
226 struct resources *,
227 int, int *, int *,
228 rtx *);
229 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
230 vec<rtx_insn *> *,
231 struct resources *,
232 struct resources *,
233 struct resources *,
234 int, int *, int *);
235 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
236 static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
237 static int own_thread_p (rtx, rtx, int);
238 static void update_block (rtx_insn *, rtx_insn *);
239 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
240 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
241 static void fix_reg_dead_note (rtx_insn *, rtx);
242 static void update_reg_unused_notes (rtx_insn *, rtx);
243 static void fill_simple_delay_slots (int);
244 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
245 int, int, int, int,
246 int *, vec<rtx_insn *> *);
247 static void fill_eager_delay_slots (void);
248 static void relax_delay_slots (rtx_insn *);
249 static void make_return_insns (rtx_insn *);
250
251 /* A wrapper around next_active_insn which takes care to return ret_rtx
252 unchanged. */
253
254 static rtx
first_active_target_insn(rtx insn)255 first_active_target_insn (rtx insn)
256 {
257 if (ANY_RETURN_P (insn))
258 return insn;
259 return next_active_insn (as_a <rtx_insn *> (insn));
260 }
261
262 /* Return true iff INSN is a simplejump, or any kind of return insn. */
263
264 static bool
simplejump_or_return_p(rtx insn)265 simplejump_or_return_p (rtx insn)
266 {
267 return (JUMP_P (insn)
268 && (simplejump_p (as_a <rtx_insn *> (insn))
269 || ANY_RETURN_P (PATTERN (insn))));
270 }
271
272 /* Return TRUE if this insn should stop the search for insn to fill delay
273 slots. LABELS_P indicates that labels should terminate the search.
274 In all cases, jumps terminate the search. */
275
276 static int
stop_search_p(rtx_insn * insn,int labels_p)277 stop_search_p (rtx_insn *insn, int labels_p)
278 {
279 if (insn == 0)
280 return 1;
281
282 /* If the insn can throw an exception that is caught within the function,
283 it may effectively perform a jump from the viewpoint of the function.
284 Therefore act like for a jump. */
285 if (can_throw_internal (insn))
286 return 1;
287
288 switch (GET_CODE (insn))
289 {
290 case NOTE:
291 case CALL_INSN:
292 case DEBUG_INSN:
293 return 0;
294
295 case CODE_LABEL:
296 return labels_p;
297
298 case JUMP_INSN:
299 case BARRIER:
300 return 1;
301
302 case INSN:
303 /* OK unless it contains a delay slot or is an `asm' insn of some type.
304 We don't know anything about these. */
305 return (GET_CODE (PATTERN (insn)) == SEQUENCE
306 || GET_CODE (PATTERN (insn)) == ASM_INPUT
307 || asm_noperands (PATTERN (insn)) >= 0);
308
309 default:
310 gcc_unreachable ();
311 }
312 }
313
314 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
315 resource set contains a volatile memory reference. Otherwise, return FALSE. */
316
317 static int
resource_conflicts_p(struct resources * res1,struct resources * res2)318 resource_conflicts_p (struct resources *res1, struct resources *res2)
319 {
320 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
321 || res1->volatil || res2->volatil)
322 return 1;
323
324 return hard_reg_set_intersect_p (res1->regs, res2->regs);
325 }
326
327 /* Return TRUE if any resource marked in RES, a `struct resources', is
328 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
329 routine is using those resources.
330
331 We compute this by computing all the resources referenced by INSN and
332 seeing if this conflicts with RES. It might be faster to directly check
333 ourselves, and this is the way it used to work, but it means duplicating
334 a large block of complex code. */
335
336 static int
insn_references_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)337 insn_references_resource_p (rtx insn, struct resources *res,
338 bool include_delayed_effects)
339 {
340 struct resources insn_res;
341
342 CLEAR_RESOURCE (&insn_res);
343 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
344 return resource_conflicts_p (&insn_res, res);
345 }
346
347 /* Return TRUE if INSN modifies resources that are marked in RES.
348 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
349 included. CC0 is only modified if it is explicitly set; see comments
350 in front of mark_set_resources for details. */
351
352 static int
insn_sets_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)353 insn_sets_resource_p (rtx insn, struct resources *res,
354 bool include_delayed_effects)
355 {
356 struct resources insn_sets;
357
358 CLEAR_RESOURCE (&insn_sets);
359 mark_set_resources (insn, &insn_sets, 0,
360 (include_delayed_effects
361 ? MARK_SRC_DEST_CALL
362 : MARK_SRC_DEST));
363 return resource_conflicts_p (&insn_sets, res);
364 }
365
366 /* Find a label at the end of the function or before a RETURN. If there
367 is none, try to make one. If that fails, returns 0.
368
369 The property of such a label is that it is placed just before the
370 epilogue or a bare RETURN insn, so that another bare RETURN can be
371 turned into a jump to the label unconditionally. In particular, the
372 label cannot be placed before a RETURN insn with a filled delay slot.
373
374 ??? There may be a problem with the current implementation. Suppose
375 we start with a bare RETURN insn and call find_end_label. It may set
376 function_return_label just before the RETURN. Suppose the machinery
377 is able to fill the delay slot of the RETURN insn afterwards. Then
378 function_return_label is no longer valid according to the property
379 described above and find_end_label will still return it unmodified.
380 Note that this is probably mitigated by the following observation:
381 once function_return_label is made, it is very likely the target of
382 a jump, so filling the delay slot of the RETURN will be much more
383 difficult.
384 KIND is either simple_return_rtx or ret_rtx, indicating which type of
385 return we're looking for. */
386
387 static rtx_code_label *
find_end_label(rtx kind)388 find_end_label (rtx kind)
389 {
390 rtx_insn *insn;
391 rtx_code_label **plabel;
392
393 if (kind == ret_rtx)
394 plabel = &function_return_label;
395 else
396 {
397 gcc_assert (kind == simple_return_rtx);
398 plabel = &function_simple_return_label;
399 }
400
401 /* If we found one previously, return it. */
402 if (*plabel)
403 return *plabel;
404
405 /* Otherwise, see if there is a label at the end of the function. If there
406 is, it must be that RETURN insns aren't needed, so that is our return
407 label and we don't have to do anything else. */
408
409 insn = get_last_insn ();
410 while (NOTE_P (insn)
411 || (NONJUMP_INSN_P (insn)
412 && (GET_CODE (PATTERN (insn)) == USE
413 || GET_CODE (PATTERN (insn)) == CLOBBER
414 || GET_CODE (PATTERN (insn)) == CLOBBER_HIGH)))
415 insn = PREV_INSN (insn);
416
417 /* When a target threads its epilogue we might already have a
418 suitable return insn. If so put a label before it for the
419 function_return_label. */
420 if (BARRIER_P (insn)
421 && JUMP_P (PREV_INSN (insn))
422 && PATTERN (PREV_INSN (insn)) == kind)
423 {
424 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
425 rtx_code_label *label = gen_label_rtx ();
426 LABEL_NUSES (label) = 0;
427
428 /* Put the label before any USE insns that may precede the RETURN
429 insn. */
430 while (GET_CODE (temp) == USE)
431 temp = PREV_INSN (temp);
432
433 emit_label_after (label, temp);
434 *plabel = label;
435 }
436
437 else if (LABEL_P (insn))
438 *plabel = as_a <rtx_code_label *> (insn);
439 else
440 {
441 rtx_code_label *label = gen_label_rtx ();
442 LABEL_NUSES (label) = 0;
443 /* If the basic block reorder pass moves the return insn to
444 some other place try to locate it again and put our
445 function_return_label there. */
446 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
447 insn = PREV_INSN (insn);
448 if (insn)
449 {
450 insn = PREV_INSN (insn);
451
452 /* Put the label before any USE insns that may precede the
453 RETURN insn. */
454 while (GET_CODE (insn) == USE)
455 insn = PREV_INSN (insn);
456
457 emit_label_after (label, insn);
458 }
459 else
460 {
461 if (targetm.have_epilogue () && ! targetm.have_return ())
462 /* The RETURN insn has its delay slot filled so we cannot
463 emit the label just before it. Since we already have
464 an epilogue and cannot emit a new RETURN, we cannot
465 emit the label at all. */
466 return NULL;
467
468 /* Otherwise, make a new label and emit a RETURN and BARRIER,
469 if needed. */
470 emit_label (label);
471 if (targetm.have_return ())
472 {
473 /* The return we make may have delay slots too. */
474 rtx_insn *pat = targetm.gen_return ();
475 rtx_insn *insn = emit_jump_insn (pat);
476 set_return_jump_label (insn);
477 emit_barrier ();
478 if (num_delay_slots (insn) > 0)
479 obstack_ptr_grow (&unfilled_slots_obstack, insn);
480 }
481 }
482 *plabel = label;
483 }
484
485 /* Show one additional use for this label so it won't go away until
486 we are done. */
487 ++LABEL_NUSES (*plabel);
488
489 return *plabel;
490 }
491
492 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
493 the pattern of INSN with the SEQUENCE.
494
495 Returns the insn containing the SEQUENCE that replaces INSN. */
496
497 static rtx_insn *
emit_delay_sequence(rtx_insn * insn,const vec<rtx_insn * > & list,int length)498 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
499 {
500 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
501 rtvec seqv = rtvec_alloc (length + 1);
502 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
503 rtx_insn *seq_insn = make_insn_raw (seq);
504
505 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
506 not have a location, but one of the delayed insns does, we pick up a
507 location from there later. */
508 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
509
510 /* Unlink INSN from the insn chain, so that we can put it into
511 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
512 rtx_insn *after = PREV_INSN (insn);
513 remove_insn (insn);
514 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
515
516 /* Build our SEQUENCE and rebuild the insn chain. */
517 start_sequence ();
518 XVECEXP (seq, 0, 0) = emit_insn (insn);
519
520 unsigned int delay_insns = list.length ();
521 gcc_assert (delay_insns == (unsigned int) length);
522 for (unsigned int i = 0; i < delay_insns; i++)
523 {
524 rtx_insn *tem = list[i];
525 rtx note, next;
526
527 /* Show that this copy of the insn isn't deleted. */
528 tem->set_undeleted ();
529
530 /* Unlink insn from its original place, and re-emit it into
531 the sequence. */
532 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
533 XVECEXP (seq, 0, i + 1) = emit_insn (tem);
534
535 /* SPARC assembler, for instance, emit warning when debug info is output
536 into the delay slot. */
537 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
538 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
539 INSN_LOCATION (tem) = 0;
540
541 for (note = REG_NOTES (tem); note; note = next)
542 {
543 next = XEXP (note, 1);
544 switch (REG_NOTE_KIND (note))
545 {
546 case REG_DEAD:
547 /* Remove any REG_DEAD notes because we can't rely on them now
548 that the insn has been moved. */
549 remove_note (tem, note);
550 break;
551
552 case REG_LABEL_OPERAND:
553 case REG_LABEL_TARGET:
554 /* Keep the label reference count up to date. */
555 if (LABEL_P (XEXP (note, 0)))
556 LABEL_NUSES (XEXP (note, 0)) ++;
557 break;
558
559 default:
560 break;
561 }
562 }
563 }
564 end_sequence ();
565
566 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
567 add_insn_after (seq_insn, after, NULL);
568
569 return seq_insn;
570 }
571
572 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
573 be in the order in which the insns are to be executed. */
574
575 static void
add_to_delay_list(rtx_insn * insn,vec<rtx_insn * > * delay_list)576 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
577 {
578 /* If INSN has its block number recorded, clear it since we may
579 be moving the insn to a new block. */
580 clear_hashed_info_for_insn (insn);
581 delay_list->safe_push (insn);
582 }
583
584 /* Delete INSN from the delay slot of the insn that it is in, which may
585 produce an insn with no delay slots. Return the new insn. */
586
587 static rtx_insn *
delete_from_delay_slot(rtx_insn * insn)588 delete_from_delay_slot (rtx_insn *insn)
589 {
590 rtx_insn *trial, *seq_insn, *prev;
591 rtx_sequence *seq;
592 int i;
593 int had_barrier = 0;
594
595 /* We first must find the insn containing the SEQUENCE with INSN in its
596 delay slot. Do this by finding an insn, TRIAL, where
597 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
598
599 for (trial = insn;
600 PREV_INSN (NEXT_INSN (trial)) == trial;
601 trial = NEXT_INSN (trial))
602 ;
603
604 seq_insn = PREV_INSN (NEXT_INSN (trial));
605 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
606
607 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
608 had_barrier = 1;
609
610 /* Create a delay list consisting of all the insns other than the one
611 we are deleting (unless we were the only one). */
612 auto_vec<rtx_insn *, 5> delay_list;
613 if (seq->len () > 2)
614 for (i = 1; i < seq->len (); i++)
615 if (seq->insn (i) != insn)
616 add_to_delay_list (seq->insn (i), &delay_list);
617
618 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
619 list, and rebuild the delay list if non-empty. */
620 prev = PREV_INSN (seq_insn);
621 trial = seq->insn (0);
622 delete_related_insns (seq_insn);
623 add_insn_after (trial, prev, NULL);
624
625 /* If there was a barrier after the old SEQUENCE, remit it. */
626 if (had_barrier)
627 emit_barrier_after (trial);
628
629 /* If there are any delay insns, remit them. Otherwise clear the
630 annul flag. */
631 if (!delay_list.is_empty ())
632 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
633 else if (JUMP_P (trial))
634 INSN_ANNULLED_BRANCH_P (trial) = 0;
635
636 INSN_FROM_TARGET_P (insn) = 0;
637
638 /* Show we need to fill this insn again. */
639 obstack_ptr_grow (&unfilled_slots_obstack, trial);
640
641 return trial;
642 }
643
644 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
645 the insn that sets CC0 for it and delete it too. */
646
647 static void
delete_scheduled_jump(rtx_insn * insn)648 delete_scheduled_jump (rtx_insn *insn)
649 {
650 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
651 delete the insn that sets the condition code, but it is hard to find it.
652 Since this case is rare anyway, don't bother trying; there would likely
653 be other insns that became dead anyway, which we wouldn't know to
654 delete. */
655
656 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
657 {
658 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
659
660 /* If a reg-note was found, it points to an insn to set CC0. This
661 insn is in the delay list of some other insn. So delete it from
662 the delay list it was in. */
663 if (note)
664 {
665 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
666 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
667 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
668 }
669 else
670 {
671 /* The insn setting CC0 is our previous insn, but it may be in
672 a delay slot. It will be the last insn in the delay slot, if
673 it is. */
674 rtx_insn *trial = previous_insn (insn);
675 if (NOTE_P (trial))
676 trial = prev_nonnote_insn (trial);
677 if (sets_cc0_p (PATTERN (trial)) != 1
678 || FIND_REG_INC_NOTE (trial, NULL_RTX))
679 return;
680 if (PREV_INSN (NEXT_INSN (trial)) == trial)
681 delete_related_insns (trial);
682 else
683 delete_from_delay_slot (trial);
684 }
685 }
686
687 delete_related_insns (insn);
688 }
689
690 /* Counters for delay-slot filling. */
691
692 #define NUM_REORG_FUNCTIONS 2
693 #define MAX_DELAY_HISTOGRAM 3
694 #define MAX_REORG_PASSES 2
695
696 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
697
698 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
699
700 static int reorg_pass_number;
701
702 static void
note_delay_statistics(int slots_filled,int index)703 note_delay_statistics (int slots_filled, int index)
704 {
705 num_insns_needing_delays[index][reorg_pass_number]++;
706 if (slots_filled > MAX_DELAY_HISTOGRAM)
707 slots_filled = MAX_DELAY_HISTOGRAM;
708 num_filled_delays[index][slots_filled][reorg_pass_number]++;
709 }
710
711 /* Optimize the following cases:
712
713 1. When a conditional branch skips over only one instruction,
714 use an annulling branch and put that insn in the delay slot.
715 Use either a branch that annuls when the condition if true or
716 invert the test with a branch that annuls when the condition is
717 false. This saves insns, since otherwise we must copy an insn
718 from the L1 target.
719
720 (orig) (skip) (otherwise)
721 Bcc.n L1 Bcc',a L1 Bcc,a L1'
722 insn insn insn2
723 L1: L1: L1:
724 insn2 insn2 insn2
725 insn3 insn3 L1':
726 insn3
727
728 2. When a conditional branch skips over only one instruction,
729 and after that, it unconditionally branches somewhere else,
730 perform the similar optimization. This saves executing the
731 second branch in the case where the inverted condition is true.
732
733 Bcc.n L1 Bcc',a L2
734 insn insn
735 L1: L1:
736 Bra L2 Bra L2
737
738 INSN is a JUMP_INSN.
739
740 This should be expanded to skip over N insns, where N is the number
741 of delay slots required. */
742
743 static void
optimize_skip(rtx_jump_insn * insn,vec<rtx_insn * > * delay_list)744 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
745 {
746 rtx_insn *trial = next_nonnote_insn (insn);
747 rtx_insn *next_trial = next_active_insn (trial);
748 int flags;
749
750 flags = get_jump_flags (insn, JUMP_LABEL (insn));
751
752 if (trial == 0
753 || !NONJUMP_INSN_P (trial)
754 || GET_CODE (PATTERN (trial)) == SEQUENCE
755 || recog_memoized (trial) < 0
756 || (! eligible_for_annul_false (insn, 0, trial, flags)
757 && ! eligible_for_annul_true (insn, 0, trial, flags))
758 || RTX_FRAME_RELATED_P (trial)
759 || can_throw_internal (trial))
760 return;
761
762 /* There are two cases where we are just executing one insn (we assume
763 here that a branch requires only one insn; this should be generalized
764 at some point): Where the branch goes around a single insn or where
765 we have one insn followed by a branch to the same label we branch to.
766 In both of these cases, inverting the jump and annulling the delay
767 slot give the same effect in fewer insns. */
768 if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
769 || (next_trial != 0
770 && simplejump_or_return_p (next_trial)
771 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
772 {
773 if (eligible_for_annul_false (insn, 0, trial, flags))
774 {
775 if (invert_jump (insn, JUMP_LABEL (insn), 1))
776 INSN_FROM_TARGET_P (trial) = 1;
777 else if (! eligible_for_annul_true (insn, 0, trial, flags))
778 return;
779 }
780
781 add_to_delay_list (trial, delay_list);
782 next_trial = next_active_insn (trial);
783 update_block (trial, trial);
784 delete_related_insns (trial);
785
786 /* Also, if we are targeting an unconditional
787 branch, thread our jump to the target of that branch. Don't
788 change this into a RETURN here, because it may not accept what
789 we have in the delay slot. We'll fix this up later. */
790 if (next_trial && simplejump_or_return_p (next_trial))
791 {
792 rtx target_label = JUMP_LABEL (next_trial);
793 if (ANY_RETURN_P (target_label))
794 target_label = find_end_label (target_label);
795
796 if (target_label)
797 {
798 /* Recompute the flags based on TARGET_LABEL since threading
799 the jump to TARGET_LABEL may change the direction of the
800 jump (which may change the circumstances in which the
801 delay slot is nullified). */
802 flags = get_jump_flags (insn, target_label);
803 if (eligible_for_annul_true (insn, 0, trial, flags))
804 reorg_redirect_jump (insn, target_label);
805 }
806 }
807
808 INSN_ANNULLED_BRANCH_P (insn) = 1;
809 }
810 }
811
812 /* Encode and return branch direction and prediction information for
813 INSN assuming it will jump to LABEL.
814
815 Non conditional branches return no direction information and
816 are predicted as very likely taken. */
817
818 static int
get_jump_flags(const rtx_insn * insn,rtx label)819 get_jump_flags (const rtx_insn *insn, rtx label)
820 {
821 int flags;
822
823 /* get_jump_flags can be passed any insn with delay slots, these may
824 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
825 direction information, and only if they are conditional jumps.
826
827 If LABEL is a return, then there is no way to determine the branch
828 direction. */
829 if (JUMP_P (insn)
830 && (condjump_p (insn) || condjump_in_parallel_p (insn))
831 && !ANY_RETURN_P (label)
832 && INSN_UID (insn) <= max_uid
833 && INSN_UID (label) <= max_uid)
834 flags
835 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
836 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
837 /* No valid direction information. */
838 else
839 flags = 0;
840
841 return flags;
842 }
843
844 /* Return truth value of the statement that this branch
845 is mostly taken. If we think that the branch is extremely likely
846 to be taken, we return 2. If the branch is slightly more likely to be
847 taken, return 1. If the branch is slightly less likely to be taken,
848 return 0 and if the branch is highly unlikely to be taken, return -1. */
849
850 static int
mostly_true_jump(rtx jump_insn)851 mostly_true_jump (rtx jump_insn)
852 {
853 /* If branch probabilities are available, then use that number since it
854 always gives a correct answer. */
855 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
856 if (note)
857 {
858 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
859 .to_reg_br_prob_base ();
860
861 if (prob >= REG_BR_PROB_BASE * 9 / 10)
862 return 2;
863 else if (prob >= REG_BR_PROB_BASE / 2)
864 return 1;
865 else if (prob >= REG_BR_PROB_BASE / 10)
866 return 0;
867 else
868 return -1;
869 }
870
871 /* If there is no note, assume branches are not taken.
872 This should be rare. */
873 return 0;
874 }
875
876 /* Return the condition under which INSN will branch to TARGET. If TARGET
877 is zero, return the condition under which INSN will return. If INSN is
878 an unconditional branch, return const_true_rtx. If INSN isn't a simple
879 type of jump, or it doesn't go to TARGET, return 0. */
880
881 static rtx
get_branch_condition(const rtx_insn * insn,rtx target)882 get_branch_condition (const rtx_insn *insn, rtx target)
883 {
884 rtx pat = PATTERN (insn);
885 rtx src;
886
887 if (condjump_in_parallel_p (insn))
888 pat = XVECEXP (pat, 0, 0);
889
890 if (ANY_RETURN_P (pat) && pat == target)
891 return const_true_rtx;
892
893 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
894 return 0;
895
896 src = SET_SRC (pat);
897 if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
898 return const_true_rtx;
899
900 else if (GET_CODE (src) == IF_THEN_ELSE
901 && XEXP (src, 2) == pc_rtx
902 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
903 && label_ref_label (XEXP (src, 1)) == target)
904 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
905 return XEXP (src, 0);
906
907 else if (GET_CODE (src) == IF_THEN_ELSE
908 && XEXP (src, 1) == pc_rtx
909 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
910 && label_ref_label (XEXP (src, 2)) == target)
911 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
912 {
913 enum rtx_code rev;
914 rev = reversed_comparison_code (XEXP (src, 0), insn);
915 if (rev != UNKNOWN)
916 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
917 XEXP (XEXP (src, 0), 0),
918 XEXP (XEXP (src, 0), 1));
919 }
920
921 return 0;
922 }
923
924 /* Return nonzero if CONDITION is more strict than the condition of
925 INSN, i.e., if INSN will always branch if CONDITION is true. */
926
927 static int
condition_dominates_p(rtx condition,const rtx_insn * insn)928 condition_dominates_p (rtx condition, const rtx_insn *insn)
929 {
930 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
931 enum rtx_code code = GET_CODE (condition);
932 enum rtx_code other_code;
933
934 if (rtx_equal_p (condition, other_condition)
935 || other_condition == const_true_rtx)
936 return 1;
937
938 else if (condition == const_true_rtx || other_condition == 0)
939 return 0;
940
941 other_code = GET_CODE (other_condition);
942 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
943 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
944 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
945 return 0;
946
947 return comparison_dominates_p (code, other_code);
948 }
949
950 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
951 any insns already in the delay slot of JUMP. */
952
953 static int
redirect_with_delay_slots_safe_p(rtx_insn * jump,rtx newlabel,rtx seq)954 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
955 {
956 int flags, i;
957 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
958
959 /* Make sure all the delay slots of this jump would still
960 be valid after threading the jump. If they are still
961 valid, then return nonzero. */
962
963 flags = get_jump_flags (jump, newlabel);
964 for (i = 1; i < pat->len (); i++)
965 if (! (
966 #if ANNUL_IFFALSE_SLOTS
967 (INSN_ANNULLED_BRANCH_P (jump)
968 && INSN_FROM_TARGET_P (pat->insn (i)))
969 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
970 #endif
971 #if ANNUL_IFTRUE_SLOTS
972 (INSN_ANNULLED_BRANCH_P (jump)
973 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
974 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
975 #endif
976 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
977 break;
978
979 return (i == pat->len ());
980 }
981
982 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
983 any insns we wish to place in the delay slot of JUMP. */
984
985 static int
redirect_with_delay_list_safe_p(rtx_insn * jump,rtx newlabel,const vec<rtx_insn * > & delay_list)986 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
987 const vec<rtx_insn *> &delay_list)
988 {
989 /* Make sure all the insns in DELAY_LIST would still be
990 valid after threading the jump. If they are still
991 valid, then return nonzero. */
992
993 int flags = get_jump_flags (jump, newlabel);
994 unsigned int delay_insns = delay_list.length ();
995 unsigned int i = 0;
996 for (; i < delay_insns; i++)
997 if (! (
998 #if ANNUL_IFFALSE_SLOTS
999 (INSN_ANNULLED_BRANCH_P (jump)
1000 && INSN_FROM_TARGET_P (delay_list[i]))
1001 ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
1002 #endif
1003 #if ANNUL_IFTRUE_SLOTS
1004 (INSN_ANNULLED_BRANCH_P (jump)
1005 && ! INSN_FROM_TARGET_P (delay_list[i]))
1006 ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
1007 #endif
1008 eligible_for_delay (jump, i, delay_list[i], flags)))
1009 break;
1010
1011 return i == delay_insns;
1012 }
1013
1014 /* DELAY_LIST is a list of insns that have already been placed into delay
1015 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1016 If not, return 0; otherwise return 1. */
1017
1018 static int
check_annul_list_true_false(int annul_true_p,const vec<rtx_insn * > & delay_list)1019 check_annul_list_true_false (int annul_true_p,
1020 const vec<rtx_insn *> &delay_list)
1021 {
1022 rtx_insn *trial;
1023 unsigned int i;
1024 FOR_EACH_VEC_ELT (delay_list, i, trial)
1025 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1026 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1027 return 0;
1028
1029 return 1;
1030 }
1031
1032 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1033 the condition tested by INSN is CONDITION and the resources shown in
1034 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1035 from SEQ's delay list, in addition to whatever insns it may execute
1036 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1037 needed while searching for delay slot insns. Return the concatenated
1038 delay list if possible, otherwise, return 0.
1039
1040 SLOTS_TO_FILL is the total number of slots required by INSN, and
1041 PSLOTS_FILLED points to the number filled so far (also the number of
1042 insns in DELAY_LIST). It is updated with the number that have been
1043 filled from the SEQUENCE, if any.
1044
1045 PANNUL_P points to a nonzero value if we already know that we need
1046 to annul INSN. If this routine determines that annulling is needed,
1047 it may set that value nonzero.
1048
1049 PNEW_THREAD points to a location that is to receive the place at which
1050 execution should continue. */
1051
1052 static void
steal_delay_list_from_target(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p,rtx * pnew_thread)1053 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1054 vec<rtx_insn *> *delay_list,
1055 struct resources *sets,
1056 struct resources *needed,
1057 struct resources *other_needed,
1058 int slots_to_fill, int *pslots_filled,
1059 int *pannul_p, rtx *pnew_thread)
1060 {
1061 int slots_remaining = slots_to_fill - *pslots_filled;
1062 int total_slots_filled = *pslots_filled;
1063 auto_vec<rtx_insn *, 5> new_delay_list;
1064 int must_annul = *pannul_p;
1065 int used_annul = 0;
1066 int i;
1067 struct resources cc_set;
1068 rtx_insn **redundant;
1069
1070 /* We can't do anything if there are more delay slots in SEQ than we
1071 can handle, or if we don't know that it will be a taken branch.
1072 We know that it will be a taken branch if it is either an unconditional
1073 branch or a conditional branch with a stricter branch condition.
1074
1075 Also, exit if the branch has more than one set, since then it is computing
1076 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1077 ??? It may be possible to move other sets into INSN in addition to
1078 moving the instructions in the delay slots.
1079
1080 We cannot steal the delay list if one of the instructions in the
1081 current delay_list modifies the condition codes and the jump in the
1082 sequence is a conditional jump. We cannot do this because we cannot
1083 change the direction of the jump because the condition codes
1084 will effect the direction of the jump in the sequence. */
1085
1086 CLEAR_RESOURCE (&cc_set);
1087
1088 rtx_insn *trial;
1089 FOR_EACH_VEC_ELT (*delay_list, i, trial)
1090 {
1091 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1092 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1093 return;
1094 }
1095
1096 if (XVECLEN (seq, 0) - 1 > slots_remaining
1097 || ! condition_dominates_p (condition, seq->insn (0))
1098 || ! single_set (seq->insn (0)))
1099 return;
1100
1101 /* On some targets, branches with delay slots can have a limited
1102 displacement. Give the back end a chance to tell us we can't do
1103 this. */
1104 if (! targetm.can_follow_jump (insn, seq->insn (0)))
1105 return;
1106
1107 redundant = XALLOCAVEC (rtx_insn *, XVECLEN (seq, 0));
1108 for (i = 1; i < seq->len (); i++)
1109 {
1110 rtx_insn *trial = seq->insn (i);
1111 int flags;
1112
1113 if (insn_references_resource_p (trial, sets, false)
1114 || insn_sets_resource_p (trial, needed, false)
1115 || insn_sets_resource_p (trial, sets, false)
1116 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1117 delay list. */
1118 || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1119 /* If TRIAL is from the fallthrough code of an annulled branch insn
1120 in SEQ, we cannot use it. */
1121 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1122 && ! INSN_FROM_TARGET_P (trial)))
1123 return;
1124
1125 /* If this insn was already done (usually in a previous delay slot),
1126 pretend we put it in our delay slot. */
1127 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1128 if (redundant[i])
1129 continue;
1130
1131 /* We will end up re-vectoring this branch, so compute flags
1132 based on jumping to the new label. */
1133 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1134
1135 if (! must_annul
1136 && ((condition == const_true_rtx
1137 || (! insn_sets_resource_p (trial, other_needed, false)
1138 && ! may_trap_or_fault_p (PATTERN (trial)))))
1139 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1140 : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1141 && (must_annul = 1,
1142 check_annul_list_true_false (0, *delay_list)
1143 && check_annul_list_true_false (0, new_delay_list)
1144 && eligible_for_annul_false (insn, total_slots_filled,
1145 trial, flags)))
1146 {
1147 if (must_annul)
1148 {
1149 /* Frame related instructions cannot go into annulled delay
1150 slots, it messes up the dwarf info. */
1151 if (RTX_FRAME_RELATED_P (trial))
1152 return;
1153 used_annul = 1;
1154 }
1155 rtx_insn *temp = copy_delay_slot_insn (trial);
1156 INSN_FROM_TARGET_P (temp) = 1;
1157 add_to_delay_list (temp, &new_delay_list);
1158 total_slots_filled++;
1159
1160 if (--slots_remaining == 0)
1161 break;
1162 }
1163 else
1164 return;
1165 }
1166
1167 /* Record the effect of the instructions that were redundant and which
1168 we therefore decided not to copy. */
1169 for (i = 1; i < seq->len (); i++)
1170 if (redundant[i])
1171 {
1172 fix_reg_dead_note (redundant[i], insn);
1173 update_block (seq->insn (i), insn);
1174 }
1175
1176 /* Show the place to which we will be branching. */
1177 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1178
1179 /* Add any new insns to the delay list and update the count of the
1180 number of slots filled. */
1181 *pslots_filled = total_slots_filled;
1182 if (used_annul)
1183 *pannul_p = 1;
1184
1185 rtx_insn *temp;
1186 FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1187 add_to_delay_list (temp, delay_list);
1188 }
1189
1190 /* Similar to steal_delay_list_from_target except that SEQ is on the
1191 fallthrough path of INSN. Here we only do something if the delay insn
1192 of SEQ is an unconditional branch. In that case we steal its delay slot
1193 for INSN since unconditional branches are much easier to fill. */
1194
1195 static void
steal_delay_list_from_fallthrough(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p)1196 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1197 rtx_sequence *seq,
1198 vec<rtx_insn *> *delay_list,
1199 struct resources *sets,
1200 struct resources *needed,
1201 struct resources *other_needed,
1202 int slots_to_fill, int *pslots_filled,
1203 int *pannul_p)
1204 {
1205 int i;
1206 int flags;
1207 int must_annul = *pannul_p;
1208 int used_annul = 0;
1209
1210 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1211
1212 /* We can't do anything if SEQ's delay insn isn't an
1213 unconditional branch. */
1214
1215 if (! simplejump_or_return_p (seq->insn (0)))
1216 return;
1217
1218 for (i = 1; i < seq->len (); i++)
1219 {
1220 rtx_insn *trial = seq->insn (i);
1221 rtx_insn *prior_insn;
1222
1223 /* If TRIAL sets CC0, stealing it will move it too far from the use
1224 of CC0. */
1225 if (insn_references_resource_p (trial, sets, false)
1226 || insn_sets_resource_p (trial, needed, false)
1227 || insn_sets_resource_p (trial, sets, false)
1228 || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1229
1230 break;
1231
1232 /* If this insn was already done, we don't need it. */
1233 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
1234 {
1235 fix_reg_dead_note (prior_insn, insn);
1236 update_block (trial, insn);
1237 delete_from_delay_slot (trial);
1238 continue;
1239 }
1240
1241 if (! must_annul
1242 && ((condition == const_true_rtx
1243 || (! insn_sets_resource_p (trial, other_needed, false)
1244 && ! may_trap_or_fault_p (PATTERN (trial)))))
1245 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1246 : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1247 check_annul_list_true_false (1, *delay_list)
1248 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1249 {
1250 if (must_annul)
1251 used_annul = 1;
1252 delete_from_delay_slot (trial);
1253 add_to_delay_list (trial, delay_list);
1254
1255 if (++(*pslots_filled) == slots_to_fill)
1256 break;
1257 }
1258 else
1259 break;
1260 }
1261
1262 if (used_annul)
1263 *pannul_p = 1;
1264 }
1265
1266 /* Try merging insns starting at THREAD which match exactly the insns in
1267 INSN's delay list.
1268
1269 If all insns were matched and the insn was previously annulling, the
1270 annul bit will be cleared.
1271
1272 For each insn that is merged, if the branch is or will be non-annulling,
1273 we delete the merged insn. */
1274
1275 static void
try_merge_delay_insns(rtx_insn * insn,rtx_insn * thread)1276 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1277 {
1278 rtx_insn *trial, *next_trial;
1279 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1280 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1281 int slot_number = 1;
1282 int num_slots = XVECLEN (PATTERN (insn), 0);
1283 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1284 struct resources set, needed, modified;
1285 auto_vec<std::pair<rtx_insn *, bool>, 10> merged_insns;
1286 int flags;
1287
1288 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1289
1290 CLEAR_RESOURCE (&needed);
1291 CLEAR_RESOURCE (&set);
1292
1293 /* If this is not an annulling branch, take into account anything needed in
1294 INSN's delay slot. This prevents two increments from being incorrectly
1295 folded into one. If we are annulling, this would be the correct
1296 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1297 will essentially disable this optimization. This method is somewhat of
1298 a kludge, but I don't see a better way.) */
1299 if (! annul_p)
1300 for (int i = 1; i < num_slots; i++)
1301 if (XVECEXP (PATTERN (insn), 0, i))
1302 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1303 true);
1304
1305 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1306 {
1307 rtx pat = PATTERN (trial);
1308 rtx oldtrial = trial;
1309
1310 next_trial = next_nonnote_insn (trial);
1311
1312 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1313 if (NONJUMP_INSN_P (trial)
1314 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
1315 || GET_CODE (pat) == CLOBBER_HIGH))
1316 continue;
1317
1318 if (GET_CODE (next_to_match) == GET_CODE (trial)
1319 /* We can't share an insn that sets cc0. */
1320 && (!HAVE_cc0 || ! sets_cc0_p (pat))
1321 && ! insn_references_resource_p (trial, &set, true)
1322 && ! insn_sets_resource_p (trial, &set, true)
1323 && ! insn_sets_resource_p (trial, &needed, true)
1324 && (trial = try_split (pat, trial, 0)) != 0
1325 /* Update next_trial, in case try_split succeeded. */
1326 && (next_trial = next_nonnote_insn (trial))
1327 /* Likewise THREAD. */
1328 && (thread = oldtrial == thread ? trial : thread)
1329 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1330 /* Have to test this condition if annul condition is different
1331 from (and less restrictive than) non-annulling one. */
1332 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1333 {
1334
1335 if (! annul_p)
1336 {
1337 update_block (trial, thread);
1338 if (trial == thread)
1339 thread = next_active_insn (thread);
1340
1341 delete_related_insns (trial);
1342 INSN_FROM_TARGET_P (next_to_match) = 0;
1343 }
1344 else
1345 merged_insns.safe_push (std::pair<rtx_insn *, bool> (trial, false));
1346
1347 if (++slot_number == num_slots)
1348 break;
1349
1350 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1351 }
1352
1353 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1354 mark_referenced_resources (trial, &needed, true);
1355 }
1356
1357 /* See if we stopped on a filled insn. If we did, try to see if its
1358 delay slots match. */
1359 if (slot_number != num_slots
1360 && trial && NONJUMP_INSN_P (trial)
1361 && GET_CODE (PATTERN (trial)) == SEQUENCE
1362 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1363 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1364 {
1365 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1366 rtx filled_insn = XVECEXP (pat, 0, 0);
1367
1368 /* Account for resources set/needed by the filled insn. */
1369 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1370 mark_referenced_resources (filled_insn, &needed, true);
1371
1372 for (int i = 1; i < pat->len (); i++)
1373 {
1374 rtx_insn *dtrial = pat->insn (i);
1375
1376 CLEAR_RESOURCE (&modified);
1377 /* Account for resources set by the insn following NEXT_TO_MATCH
1378 inside INSN's delay list. */
1379 for (int j = 1; slot_number + j < num_slots; j++)
1380 mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1381 &modified, 0, MARK_SRC_DEST_CALL);
1382 /* Account for resources set by the insn before DTRIAL and inside
1383 TRIAL's delay list. */
1384 for (int j = 1; j < i; j++)
1385 mark_set_resources (XVECEXP (pat, 0, j),
1386 &modified, 0, MARK_SRC_DEST_CALL);
1387 if (! insn_references_resource_p (dtrial, &set, true)
1388 && ! insn_sets_resource_p (dtrial, &set, true)
1389 && ! insn_sets_resource_p (dtrial, &needed, true)
1390 && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1391 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1392 /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1393 resource modified between them (only dtrial is checked because
1394 next_to_match and dtrial shall to be equal in order to hit
1395 this line) */
1396 && ! insn_references_resource_p (dtrial, &modified, true)
1397 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1398 {
1399 if (! annul_p)
1400 {
1401 rtx_insn *new_rtx;
1402
1403 update_block (dtrial, thread);
1404 new_rtx = delete_from_delay_slot (dtrial);
1405 if (thread->deleted ())
1406 thread = new_rtx;
1407 INSN_FROM_TARGET_P (next_to_match) = 0;
1408 }
1409 else
1410 merged_insns.safe_push (std::pair<rtx_insn *, bool> (dtrial,
1411 true));
1412
1413 if (++slot_number == num_slots)
1414 break;
1415
1416 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1417 }
1418 else
1419 {
1420 /* Keep track of the set/referenced resources for the delay
1421 slots of any trial insns we encounter. */
1422 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1423 mark_referenced_resources (dtrial, &needed, true);
1424 }
1425 }
1426 }
1427
1428 /* If all insns in the delay slot have been matched and we were previously
1429 annulling the branch, we need not any more. In that case delete all the
1430 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1431 the delay list so that we know that it isn't only being used at the
1432 target. */
1433 if (slot_number == num_slots && annul_p)
1434 {
1435 unsigned int len = merged_insns.length ();
1436 for (unsigned int i = len - 1; i < len; i--)
1437 if (merged_insns[i].second)
1438 {
1439 update_block (merged_insns[i].first, thread);
1440 rtx_insn *new_rtx = delete_from_delay_slot (merged_insns[i].first);
1441 if (thread->deleted ())
1442 thread = new_rtx;
1443 }
1444 else
1445 {
1446 update_block (merged_insns[i].first, thread);
1447 delete_related_insns (merged_insns[i].first);
1448 }
1449
1450 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1451
1452 for (int i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1453 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1454 }
1455 }
1456
1457 /* See if INSN is redundant with an insn in front of TARGET. Often this
1458 is called when INSN is a candidate for a delay slot of TARGET.
1459 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1460 of INSN. Often INSN will be redundant with an insn in a delay slot of
1461 some previous insn. This happens when we have a series of branches to the
1462 same label; in that case the first insn at the target might want to go
1463 into each of the delay slots.
1464
1465 If we are not careful, this routine can take up a significant fraction
1466 of the total compilation time (4%), but only wins rarely. Hence we
1467 speed this routine up by making two passes. The first pass goes back
1468 until it hits a label and sees if it finds an insn with an identical
1469 pattern. Only in this (relatively rare) event does it check for
1470 data conflicts.
1471
1472 We do not split insns we encounter. This could cause us not to find a
1473 redundant insn, but the cost of splitting seems greater than the possible
1474 gain in rare cases. */
1475
1476 static rtx_insn *
redundant_insn(rtx insn,rtx_insn * target,const vec<rtx_insn * > & delay_list)1477 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1478 {
1479 rtx target_main = target;
1480 rtx ipat = PATTERN (insn);
1481 rtx_insn *trial;
1482 rtx pat;
1483 struct resources needed, set;
1484 int i;
1485 unsigned insns_to_search;
1486
1487 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1488 are allowed to not actually assign to such a register. */
1489 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1490 return 0;
1491
1492 /* Scan backwards looking for a match. */
1493 for (trial = PREV_INSN (target),
1494 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1495 trial && insns_to_search > 0;
1496 trial = PREV_INSN (trial))
1497 {
1498 /* (use (insn))s can come immediately after a barrier if the
1499 label that used to precede them has been deleted as dead.
1500 See delete_related_insns. */
1501 if (LABEL_P (trial) || BARRIER_P (trial))
1502 return 0;
1503
1504 if (!INSN_P (trial))
1505 continue;
1506 --insns_to_search;
1507
1508 pat = PATTERN (trial);
1509 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
1510 || GET_CODE (pat) == CLOBBER_HIGH)
1511 continue;
1512
1513 if (GET_CODE (trial) == DEBUG_INSN)
1514 continue;
1515
1516 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1517 {
1518 /* Stop for a CALL and its delay slots because it is difficult to
1519 track its resource needs correctly. */
1520 if (CALL_P (seq->element (0)))
1521 return 0;
1522
1523 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1524 slots because it is difficult to track its resource needs
1525 correctly. */
1526
1527 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1528 return 0;
1529
1530 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1531 return 0;
1532
1533 /* See if any of the insns in the delay slot match, updating
1534 resource requirements as we go. */
1535 for (i = seq->len () - 1; i > 0; i--)
1536 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1537 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1538 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1539 break;
1540
1541 /* If found a match, exit this loop early. */
1542 if (i > 0)
1543 break;
1544 }
1545
1546 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1547 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1548 break;
1549 }
1550
1551 /* If we didn't find an insn that matches, return 0. */
1552 if (trial == 0)
1553 return 0;
1554
1555 /* See what resources this insn sets and needs. If they overlap, or
1556 if this insn references CC0, it can't be redundant. */
1557
1558 CLEAR_RESOURCE (&needed);
1559 CLEAR_RESOURCE (&set);
1560 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1561 mark_referenced_resources (insn, &needed, true);
1562
1563 /* If TARGET is a SEQUENCE, get the main insn. */
1564 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1565 target_main = XVECEXP (PATTERN (target), 0, 0);
1566
1567 if (resource_conflicts_p (&needed, &set)
1568 || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1569 /* The insn requiring the delay may not set anything needed or set by
1570 INSN. */
1571 || insn_sets_resource_p (target_main, &needed, true)
1572 || insn_sets_resource_p (target_main, &set, true))
1573 return 0;
1574
1575 /* Insns we pass may not set either NEEDED or SET, so merge them for
1576 simpler tests. */
1577 needed.memory |= set.memory;
1578 IOR_HARD_REG_SET (needed.regs, set.regs);
1579
1580 /* This insn isn't redundant if it conflicts with an insn that either is
1581 or will be in a delay slot of TARGET. */
1582
1583 unsigned int j;
1584 rtx_insn *temp;
1585 FOR_EACH_VEC_ELT (delay_list, j, temp)
1586 if (insn_sets_resource_p (temp, &needed, true))
1587 return 0;
1588
1589 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1590 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1591 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1592 true))
1593 return 0;
1594
1595 /* Scan backwards until we reach a label or an insn that uses something
1596 INSN sets or sets something insn uses or sets. */
1597
1598 for (trial = PREV_INSN (target),
1599 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1600 trial && !LABEL_P (trial) && insns_to_search > 0;
1601 trial = PREV_INSN (trial))
1602 {
1603 if (!INSN_P (trial))
1604 continue;
1605 --insns_to_search;
1606
1607 pat = PATTERN (trial);
1608 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
1609 || GET_CODE (pat) == CLOBBER_HIGH)
1610 continue;
1611
1612 if (GET_CODE (trial) == DEBUG_INSN)
1613 continue;
1614
1615 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1616 {
1617 bool annul_p = false;
1618 rtx_insn *control = seq->insn (0);
1619
1620 /* If this is a CALL_INSN and its delay slots, it is hard to track
1621 the resource needs properly, so give up. */
1622 if (CALL_P (control))
1623 return 0;
1624
1625 /* If this is an INSN or JUMP_INSN with delayed effects, it
1626 is hard to track the resource needs properly, so give up. */
1627
1628 if (INSN_SETS_ARE_DELAYED (control))
1629 return 0;
1630
1631 if (INSN_REFERENCES_ARE_DELAYED (control))
1632 return 0;
1633
1634 if (JUMP_P (control))
1635 annul_p = INSN_ANNULLED_BRANCH_P (control);
1636
1637 /* See if any of the insns in the delay slot match, updating
1638 resource requirements as we go. */
1639 for (i = seq->len () - 1; i > 0; i--)
1640 {
1641 rtx_insn *candidate = seq->insn (i);
1642
1643 /* If an insn will be annulled if the branch is false, it isn't
1644 considered as a possible duplicate insn. */
1645 if (rtx_equal_p (PATTERN (candidate), ipat)
1646 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1647 {
1648 /* Show that this insn will be used in the sequel. */
1649 INSN_FROM_TARGET_P (candidate) = 0;
1650 return candidate;
1651 }
1652
1653 /* Unless this is an annulled insn from the target of a branch,
1654 we must stop if it sets anything needed or set by INSN. */
1655 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1656 && insn_sets_resource_p (candidate, &needed, true))
1657 return 0;
1658 }
1659
1660 /* If the insn requiring the delay slot conflicts with INSN, we
1661 must stop. */
1662 if (insn_sets_resource_p (control, &needed, true))
1663 return 0;
1664 }
1665 else
1666 {
1667 /* See if TRIAL is the same as INSN. */
1668 pat = PATTERN (trial);
1669 if (rtx_equal_p (pat, ipat))
1670 return trial;
1671
1672 /* Can't go any further if TRIAL conflicts with INSN. */
1673 if (insn_sets_resource_p (trial, &needed, true))
1674 return 0;
1675 }
1676 }
1677
1678 return 0;
1679 }
1680
1681 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1682 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1683 is nonzero, we are allowed to fall into this thread; otherwise, we are
1684 not.
1685
1686 If LABEL is used more than one or we pass a label other than LABEL before
1687 finding an active insn, we do not own this thread. */
1688
1689 static int
own_thread_p(rtx thread,rtx label,int allow_fallthrough)1690 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1691 {
1692 rtx_insn *active_insn;
1693 rtx_insn *insn;
1694
1695 /* We don't own the function end. */
1696 if (thread == 0 || ANY_RETURN_P (thread))
1697 return 0;
1698
1699 /* We have a non-NULL insn. */
1700 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1701
1702 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1703 active_insn = next_active_insn (PREV_INSN (thread_insn));
1704
1705 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1706 if (LABEL_P (insn)
1707 && (insn != label || LABEL_NUSES (insn) != 1))
1708 return 0;
1709
1710 if (allow_fallthrough)
1711 return 1;
1712
1713 /* Ensure that we reach a BARRIER before any insn or label. */
1714 for (insn = prev_nonnote_insn (thread_insn);
1715 insn == 0 || !BARRIER_P (insn);
1716 insn = prev_nonnote_insn (insn))
1717 if (insn == 0
1718 || LABEL_P (insn)
1719 || (NONJUMP_INSN_P (insn)
1720 && GET_CODE (PATTERN (insn)) != USE
1721 && GET_CODE (PATTERN (insn)) != CLOBBER
1722 && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH))
1723 return 0;
1724
1725 return 1;
1726 }
1727
1728 /* Called when INSN is being moved from a location near the target of a jump.
1729 We leave a marker of the form (use (INSN)) immediately in front of WHERE
1730 for mark_target_live_regs. These markers will be deleted at the end.
1731
1732 We used to try to update the live status of registers if WHERE is at
1733 the start of a basic block, but that can't work since we may remove a
1734 BARRIER in relax_delay_slots. */
1735
1736 static void
update_block(rtx_insn * insn,rtx_insn * where)1737 update_block (rtx_insn *insn, rtx_insn *where)
1738 {
1739 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1740
1741 /* INSN might be making a value live in a block where it didn't use to
1742 be. So recompute liveness information for this block. */
1743 incr_ticks_for_insn (insn);
1744 }
1745
1746 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1747 the basic block containing the jump. */
1748
1749 static int
reorg_redirect_jump(rtx_jump_insn * jump,rtx nlabel)1750 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1751 {
1752 incr_ticks_for_insn (jump);
1753 return redirect_jump (jump, nlabel, 1);
1754 }
1755
1756 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1757 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1758 that reference values used in INSN. If we find one, then we move the
1759 REG_DEAD note to INSN.
1760
1761 This is needed to handle the case where a later insn (after INSN) has a
1762 REG_DEAD note for a register used by INSN, and this later insn subsequently
1763 gets moved before a CODE_LABEL because it is a redundant insn. In this
1764 case, mark_target_live_regs may be confused into thinking the register
1765 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1766
1767 static void
update_reg_dead_notes(rtx_insn * insn,rtx_insn * delayed_insn)1768 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1769 {
1770 rtx link, next;
1771 rtx_insn *p;
1772
1773 for (p = next_nonnote_insn (insn); p != delayed_insn;
1774 p = next_nonnote_insn (p))
1775 for (link = REG_NOTES (p); link; link = next)
1776 {
1777 next = XEXP (link, 1);
1778
1779 if (REG_NOTE_KIND (link) != REG_DEAD
1780 || !REG_P (XEXP (link, 0)))
1781 continue;
1782
1783 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1784 {
1785 /* Move the REG_DEAD note from P to INSN. */
1786 remove_note (p, link);
1787 XEXP (link, 1) = REG_NOTES (insn);
1788 REG_NOTES (insn) = link;
1789 }
1790 }
1791 }
1792
1793 /* Called when an insn redundant with start_insn is deleted. If there
1794 is a REG_DEAD note for the target of start_insn between start_insn
1795 and stop_insn, then the REG_DEAD note needs to be deleted since the
1796 value no longer dies there.
1797
1798 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1799 confused into thinking the register is dead. */
1800
1801 static void
fix_reg_dead_note(rtx_insn * start_insn,rtx stop_insn)1802 fix_reg_dead_note (rtx_insn *start_insn, rtx stop_insn)
1803 {
1804 rtx link, next;
1805 rtx_insn *p;
1806
1807 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1808 p = next_nonnote_insn (p))
1809 for (link = REG_NOTES (p); link; link = next)
1810 {
1811 next = XEXP (link, 1);
1812
1813 if (REG_NOTE_KIND (link) != REG_DEAD
1814 || !REG_P (XEXP (link, 0)))
1815 continue;
1816
1817 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1818 {
1819 remove_note (p, link);
1820 return;
1821 }
1822 }
1823 }
1824
1825 /* Delete any REG_UNUSED notes that exist on INSN but not on OTHER_INSN.
1826
1827 This handles the case of udivmodXi4 instructions which optimize their
1828 output depending on whether any REG_UNUSED notes are present. We must
1829 make sure that INSN calculates as many results as OTHER_INSN does. */
1830
1831 static void
update_reg_unused_notes(rtx_insn * insn,rtx other_insn)1832 update_reg_unused_notes (rtx_insn *insn, rtx other_insn)
1833 {
1834 rtx link, next;
1835
1836 for (link = REG_NOTES (insn); link; link = next)
1837 {
1838 next = XEXP (link, 1);
1839
1840 if (REG_NOTE_KIND (link) != REG_UNUSED
1841 || !REG_P (XEXP (link, 0)))
1842 continue;
1843
1844 if (!find_regno_note (other_insn, REG_UNUSED, REGNO (XEXP (link, 0))))
1845 remove_note (insn, link);
1846 }
1847 }
1848
1849 static vec <rtx> sibling_labels;
1850
1851 /* Return the label before INSN, or put a new label there. If SIBLING is
1852 non-zero, it is another label associated with the new label (if any),
1853 typically the former target of the jump that will be redirected to
1854 the new label. */
1855
1856 static rtx_insn *
get_label_before(rtx_insn * insn,rtx sibling)1857 get_label_before (rtx_insn *insn, rtx sibling)
1858 {
1859 rtx_insn *label;
1860
1861 /* Find an existing label at this point
1862 or make a new one if there is none. */
1863 label = prev_nonnote_insn (insn);
1864
1865 if (label == 0 || !LABEL_P (label))
1866 {
1867 rtx_insn *prev = PREV_INSN (insn);
1868
1869 label = gen_label_rtx ();
1870 emit_label_after (label, prev);
1871 LABEL_NUSES (label) = 0;
1872 if (sibling)
1873 {
1874 sibling_labels.safe_push (label);
1875 sibling_labels.safe_push (sibling);
1876 }
1877 }
1878 return label;
1879 }
1880
1881 /* Scan a function looking for insns that need a delay slot and find insns to
1882 put into the delay slot.
1883
1884 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1885 as calls). We do these first since we don't want jump insns (that are
1886 easier to fill) to get the only insns that could be used for non-jump insns.
1887 When it is zero, only try to fill JUMP_INSNs.
1888
1889 When slots are filled in this manner, the insns (including the
1890 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1891 it is possible to tell whether a delay slot has really been filled
1892 or not. `final' knows how to deal with this, by communicating
1893 through FINAL_SEQUENCE. */
1894
1895 static void
fill_simple_delay_slots(int non_jumps_p)1896 fill_simple_delay_slots (int non_jumps_p)
1897 {
1898 rtx_insn *insn, *trial, *next_trial;
1899 rtx pat;
1900 int i;
1901 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1902 struct resources needed, set;
1903 int slots_to_fill, slots_filled;
1904 auto_vec<rtx_insn *, 5> delay_list;
1905
1906 for (i = 0; i < num_unfilled_slots; i++)
1907 {
1908 int flags;
1909 /* Get the next insn to fill. If it has already had any slots assigned,
1910 we can't do anything with it. Maybe we'll improve this later. */
1911
1912 insn = unfilled_slots_base[i];
1913 if (insn == 0
1914 || insn->deleted ()
1915 || (NONJUMP_INSN_P (insn)
1916 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1917 || (JUMP_P (insn) && non_jumps_p)
1918 || (!JUMP_P (insn) && ! non_jumps_p))
1919 continue;
1920
1921 /* It may have been that this insn used to need delay slots, but
1922 now doesn't; ignore in that case. This can happen, for example,
1923 on the HP PA RISC, where the number of delay slots depends on
1924 what insns are nearby. */
1925 slots_to_fill = num_delay_slots (insn);
1926
1927 /* Some machine description have defined instructions to have
1928 delay slots only in certain circumstances which may depend on
1929 nearby insns (which change due to reorg's actions).
1930
1931 For example, the PA port normally has delay slots for unconditional
1932 jumps.
1933
1934 However, the PA port claims such jumps do not have a delay slot
1935 if they are immediate successors of certain CALL_INSNs. This
1936 allows the port to favor filling the delay slot of the call with
1937 the unconditional jump. */
1938 if (slots_to_fill == 0)
1939 continue;
1940
1941 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1942 says how many. After initialization, first try optimizing
1943
1944 call _foo call _foo
1945 nop add %o7,.-L1,%o7
1946 b,a L1
1947 nop
1948
1949 If this case applies, the delay slot of the call is filled with
1950 the unconditional jump. This is done first to avoid having the
1951 delay slot of the call filled in the backward scan. Also, since
1952 the unconditional jump is likely to also have a delay slot, that
1953 insn must exist when it is subsequently scanned.
1954
1955 This is tried on each insn with delay slots as some machines
1956 have insns which perform calls, but are not represented as
1957 CALL_INSNs. */
1958
1959 slots_filled = 0;
1960 delay_list.truncate (0);
1961
1962 if (JUMP_P (insn))
1963 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1964 else
1965 flags = get_jump_flags (insn, NULL_RTX);
1966
1967 if ((trial = next_active_insn (insn))
1968 && JUMP_P (trial)
1969 && simplejump_p (trial)
1970 && eligible_for_delay (insn, slots_filled, trial, flags)
1971 && no_labels_between_p (insn, trial)
1972 && ! can_throw_internal (trial))
1973 {
1974 rtx_insn **tmp;
1975 slots_filled++;
1976 add_to_delay_list (trial, &delay_list);
1977
1978 /* TRIAL may have had its delay slot filled, then unfilled. When
1979 the delay slot is unfilled, TRIAL is placed back on the unfilled
1980 slots obstack. Unfortunately, it is placed on the end of the
1981 obstack, not in its original location. Therefore, we must search
1982 from entry i + 1 to the end of the unfilled slots obstack to
1983 try and find TRIAL. */
1984 tmp = &unfilled_slots_base[i + 1];
1985 while (*tmp != trial && tmp != unfilled_slots_next)
1986 tmp++;
1987
1988 /* Remove the unconditional jump from consideration for delay slot
1989 filling and unthread it. */
1990 if (*tmp == trial)
1991 *tmp = 0;
1992 {
1993 rtx_insn *next = NEXT_INSN (trial);
1994 rtx_insn *prev = PREV_INSN (trial);
1995 if (prev)
1996 SET_NEXT_INSN (prev) = next;
1997 if (next)
1998 SET_PREV_INSN (next) = prev;
1999 }
2000 }
2001
2002 /* Now, scan backwards from the insn to search for a potential
2003 delay-slot candidate. Stop searching when a label or jump is hit.
2004
2005 For each candidate, if it is to go into the delay slot (moved
2006 forward in execution sequence), it must not need or set any resources
2007 that were set by later insns and must not set any resources that
2008 are needed for those insns.
2009
2010 The delay slot insn itself sets resources unless it is a call
2011 (in which case the called routine, not the insn itself, is doing
2012 the setting). */
2013
2014 if (slots_filled < slots_to_fill)
2015 {
2016 /* If the flags register is dead after the insn, then we want to be
2017 able to accept a candidate that clobbers it. For this purpose,
2018 we need to filter the flags register during life analysis, so
2019 that it doesn't create RAW and WAW dependencies, while still
2020 creating the necessary WAR dependencies. */
2021 bool filter_flags
2022 = (slots_to_fill == 1
2023 && targetm.flags_regnum != INVALID_REGNUM
2024 && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2025 struct resources fset;
2026 CLEAR_RESOURCE (&needed);
2027 CLEAR_RESOURCE (&set);
2028 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2029 if (filter_flags)
2030 {
2031 CLEAR_RESOURCE (&fset);
2032 mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2033 }
2034 mark_referenced_resources (insn, &needed, false);
2035
2036 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2037 trial = next_trial)
2038 {
2039 next_trial = prev_nonnote_insn (trial);
2040
2041 /* This must be an INSN or CALL_INSN. */
2042 pat = PATTERN (trial);
2043
2044 /* Stand-alone USE and CLOBBER are just for flow. */
2045 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
2046 || GET_CODE (pat) == CLOBBER_HIGH)
2047 continue;
2048
2049 /* And DEBUG_INSNs never go into delay slots. */
2050 if (GET_CODE (trial) == DEBUG_INSN)
2051 continue;
2052
2053 /* Check for resource conflict first, to avoid unnecessary
2054 splitting. */
2055 if (! insn_references_resource_p (trial, &set, true)
2056 && ! insn_sets_resource_p (trial,
2057 filter_flags ? &fset : &set,
2058 true)
2059 && ! insn_sets_resource_p (trial, &needed, true)
2060 /* Can't separate set of cc0 from its use. */
2061 && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2062 && ! can_throw_internal (trial))
2063 {
2064 trial = try_split (pat, trial, 1);
2065 next_trial = prev_nonnote_insn (trial);
2066 if (eligible_for_delay (insn, slots_filled, trial, flags))
2067 {
2068 /* In this case, we are searching backward, so if we
2069 find insns to put on the delay list, we want
2070 to put them at the head, rather than the
2071 tail, of the list. */
2072
2073 update_reg_dead_notes (trial, insn);
2074 delay_list.safe_insert (0, trial);
2075 update_block (trial, trial);
2076 delete_related_insns (trial);
2077 if (slots_to_fill == ++slots_filled)
2078 break;
2079 continue;
2080 }
2081 }
2082
2083 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2084 if (filter_flags)
2085 {
2086 mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2087 /* If the flags register is set, then it doesn't create RAW
2088 dependencies any longer and it also doesn't create WAW
2089 dependencies since it's dead after the original insn. */
2090 if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2091 {
2092 CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2093 CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2094 }
2095 }
2096 mark_referenced_resources (trial, &needed, true);
2097 }
2098 }
2099
2100 /* If all needed slots haven't been filled, we come here. */
2101
2102 /* Try to optimize case of jumping around a single insn. */
2103 if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2104 && slots_filled != slots_to_fill
2105 && delay_list.is_empty ()
2106 && JUMP_P (insn)
2107 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2108 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2109 {
2110 optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2111 if (!delay_list.is_empty ())
2112 slots_filled += 1;
2113 }
2114
2115 /* Try to get insns from beyond the insn needing the delay slot.
2116 These insns can neither set or reference resources set in insns being
2117 skipped, cannot set resources in the insn being skipped, and, if this
2118 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2119 call might not return).
2120
2121 There used to be code which continued past the target label if
2122 we saw all uses of the target label. This code did not work,
2123 because it failed to account for some instructions which were
2124 both annulled and marked as from the target. This can happen as a
2125 result of optimize_skip. Since this code was redundant with
2126 fill_eager_delay_slots anyways, it was just deleted. */
2127
2128 if (slots_filled != slots_to_fill
2129 /* If this instruction could throw an exception which is
2130 caught in the same function, then it's not safe to fill
2131 the delay slot with an instruction from beyond this
2132 point. For example, consider:
2133
2134 int i = 2;
2135
2136 try {
2137 f();
2138 i = 3;
2139 } catch (...) {}
2140
2141 return i;
2142
2143 Even though `i' is a local variable, we must be sure not
2144 to put `i = 3' in the delay slot if `f' might throw an
2145 exception.
2146
2147 Presumably, we should also check to see if we could get
2148 back to this function via `setjmp'. */
2149 && ! can_throw_internal (insn)
2150 && !JUMP_P (insn))
2151 {
2152 int maybe_never = 0;
2153 rtx pat, trial_delay;
2154
2155 CLEAR_RESOURCE (&needed);
2156 CLEAR_RESOURCE (&set);
2157 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2158 mark_referenced_resources (insn, &needed, true);
2159
2160 if (CALL_P (insn))
2161 maybe_never = 1;
2162
2163 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2164 trial = next_trial)
2165 {
2166 next_trial = next_nonnote_insn (trial);
2167
2168 /* This must be an INSN or CALL_INSN. */
2169 pat = PATTERN (trial);
2170
2171 /* Stand-alone USE and CLOBBER are just for flow. */
2172 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
2173 || GET_CODE (pat) == CLOBBER_HIGH)
2174 continue;
2175
2176 /* And DEBUG_INSNs do not go in delay slots. */
2177 if (GET_CODE (trial) == DEBUG_INSN)
2178 continue;
2179
2180 /* If this already has filled delay slots, get the insn needing
2181 the delay slots. */
2182 if (GET_CODE (pat) == SEQUENCE)
2183 trial_delay = XVECEXP (pat, 0, 0);
2184 else
2185 trial_delay = trial;
2186
2187 /* Stop our search when seeing a jump. */
2188 if (JUMP_P (trial_delay))
2189 break;
2190
2191 /* See if we have a resource problem before we try to split. */
2192 if (GET_CODE (pat) != SEQUENCE
2193 && ! insn_references_resource_p (trial, &set, true)
2194 && ! insn_sets_resource_p (trial, &set, true)
2195 && ! insn_sets_resource_p (trial, &needed, true)
2196 && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2197 && ! (maybe_never && may_trap_or_fault_p (pat))
2198 && (trial = try_split (pat, trial, 0))
2199 && eligible_for_delay (insn, slots_filled, trial, flags)
2200 && ! can_throw_internal (trial))
2201 {
2202 next_trial = next_nonnote_insn (trial);
2203 add_to_delay_list (trial, &delay_list);
2204 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2205 link_cc0_insns (trial);
2206
2207 delete_related_insns (trial);
2208 if (slots_to_fill == ++slots_filled)
2209 break;
2210 continue;
2211 }
2212
2213 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2214 mark_referenced_resources (trial, &needed, true);
2215
2216 /* Ensure we don't put insns between the setting of cc and the
2217 comparison by moving a setting of cc into an earlier delay
2218 slot since these insns could clobber the condition code. */
2219 set.cc = 1;
2220
2221 /* If this is a call, we might not get here. */
2222 if (CALL_P (trial_delay))
2223 maybe_never = 1;
2224 }
2225
2226 /* If there are slots left to fill and our search was stopped by an
2227 unconditional branch, try the insn at the branch target. We can
2228 redirect the branch if it works.
2229
2230 Don't do this if the insn at the branch target is a branch. */
2231 if (slots_to_fill != slots_filled
2232 && trial
2233 && jump_to_label_p (trial)
2234 && simplejump_p (trial)
2235 && (next_trial = next_active_insn (JUMP_LABEL_AS_INSN (trial))) != 0
2236 && ! (NONJUMP_INSN_P (next_trial)
2237 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2238 && !JUMP_P (next_trial)
2239 && ! insn_references_resource_p (next_trial, &set, true)
2240 && ! insn_sets_resource_p (next_trial, &set, true)
2241 && ! insn_sets_resource_p (next_trial, &needed, true)
2242 && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2243 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2244 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2245 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2246 && ! can_throw_internal (trial))
2247 {
2248 /* See comment in relax_delay_slots about necessity of using
2249 next_real_nondebug_insn here. */
2250 rtx_insn *new_label = next_real_nondebug_insn (next_trial);
2251
2252 if (new_label != 0)
2253 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2254 else
2255 new_label = find_end_label (simple_return_rtx);
2256
2257 if (new_label)
2258 {
2259 add_to_delay_list (copy_delay_slot_insn (next_trial),
2260 &delay_list);
2261 slots_filled++;
2262 reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2263 new_label);
2264 }
2265 }
2266 }
2267
2268 /* If this is an unconditional jump, then try to get insns from the
2269 target of the jump. */
2270 rtx_jump_insn *jump_insn;
2271 if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2272 && simplejump_p (jump_insn)
2273 && slots_filled != slots_to_fill)
2274 fill_slots_from_thread (jump_insn, const_true_rtx,
2275 next_active_insn (JUMP_LABEL_AS_INSN (insn)),
2276 NULL, 1, 1, own_thread_p (JUMP_LABEL (insn),
2277 JUMP_LABEL (insn), 0),
2278 slots_to_fill, &slots_filled, &delay_list);
2279
2280 if (!delay_list.is_empty ())
2281 unfilled_slots_base[i]
2282 = emit_delay_sequence (insn, delay_list, slots_filled);
2283
2284 if (slots_to_fill == slots_filled)
2285 unfilled_slots_base[i] = 0;
2286
2287 note_delay_statistics (slots_filled, 0);
2288 }
2289 }
2290
2291 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2292 return the ultimate label reached by any such chain of jumps.
2293 Return a suitable return rtx if the chain ultimately leads to a
2294 return instruction.
2295 If LABEL is not followed by a jump, return LABEL.
2296 If the chain loops or we can't find end, return LABEL,
2297 since that tells caller to avoid changing the insn.
2298 If the returned label is obtained by following a crossing jump,
2299 set *CROSSING to true, otherwise set it to false. */
2300
2301 static rtx
follow_jumps(rtx label,rtx_insn * jump,bool * crossing)2302 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2303 {
2304 rtx_insn *insn;
2305 rtx_insn *next;
2306 int depth;
2307
2308 *crossing = false;
2309 if (ANY_RETURN_P (label))
2310 return label;
2311
2312 rtx_insn *value = as_a <rtx_insn *> (label);
2313
2314 for (depth = 0;
2315 (depth < 10
2316 && (insn = next_active_insn (value)) != 0
2317 && JUMP_P (insn)
2318 && JUMP_LABEL (insn) != NULL_RTX
2319 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2320 || ANY_RETURN_P (PATTERN (insn)))
2321 && (next = NEXT_INSN (insn))
2322 && BARRIER_P (next));
2323 depth++)
2324 {
2325 rtx this_label_or_return = JUMP_LABEL (insn);
2326
2327 /* If we have found a cycle, make the insn jump to itself. */
2328 if (this_label_or_return == label)
2329 return label;
2330
2331 /* Cannot follow returns and cannot look through tablejumps. */
2332 if (ANY_RETURN_P (this_label_or_return))
2333 return this_label_or_return;
2334
2335 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2336 if (NEXT_INSN (this_label)
2337 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2338 break;
2339
2340 if (!targetm.can_follow_jump (jump, insn))
2341 break;
2342 if (!*crossing)
2343 *crossing = CROSSING_JUMP_P (jump);
2344 value = this_label;
2345 }
2346 if (depth == 10)
2347 return label;
2348 return value;
2349 }
2350
2351 /* Try to find insns to place in delay slots.
2352
2353 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2354 or is an unconditional branch if CONDITION is const_true_rtx.
2355 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2356
2357 THREAD is a flow-of-control, either the insns to be executed if the
2358 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2359
2360 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2361 to see if any potential delay slot insns set things needed there.
2362
2363 LIKELY is nonzero if it is extremely likely that the branch will be
2364 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2365 end of a loop back up to the top.
2366
2367 OWN_THREAD is true if we are the only user of the thread, i.e. it is
2368 the target of the jump when we are the only jump going there.
2369
2370 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2371 case, we can only take insns from the head of the thread for our delay
2372 slot. We then adjust the jump to point after the insns we have taken. */
2373
2374 static void
fill_slots_from_thread(rtx_jump_insn * insn,rtx condition,rtx thread_or_return,rtx opposite_thread,int likely,int thread_if_true,int own_thread,int slots_to_fill,int * pslots_filled,vec<rtx_insn * > * delay_list)2375 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2376 rtx thread_or_return, rtx opposite_thread, int likely,
2377 int thread_if_true, int own_thread, int slots_to_fill,
2378 int *pslots_filled, vec<rtx_insn *> *delay_list)
2379 {
2380 rtx new_thread;
2381 struct resources opposite_needed, set, needed;
2382 rtx_insn *trial;
2383 int lose = 0;
2384 int must_annul = 0;
2385 int flags;
2386
2387 /* Validate our arguments. */
2388 gcc_assert (condition != const_true_rtx || thread_if_true);
2389 gcc_assert (own_thread || thread_if_true);
2390
2391 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2392
2393 /* If our thread is the end of subroutine, we can't get any delay
2394 insns from that. */
2395 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2396 return;
2397
2398 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2399
2400 /* If this is an unconditional branch, nothing is needed at the
2401 opposite thread. Otherwise, compute what is needed there. */
2402 if (condition == const_true_rtx)
2403 CLEAR_RESOURCE (&opposite_needed);
2404 else
2405 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2406
2407 /* If the insn at THREAD can be split, do it here to avoid having to
2408 update THREAD and NEW_THREAD if it is done in the loop below. Also
2409 initialize NEW_THREAD. */
2410
2411 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2412
2413 /* Scan insns at THREAD. We are looking for an insn that can be removed
2414 from THREAD (it neither sets nor references resources that were set
2415 ahead of it and it doesn't set anything needs by the insns ahead of
2416 it) and that either can be placed in an annulling insn or aren't
2417 needed at OPPOSITE_THREAD. */
2418
2419 CLEAR_RESOURCE (&needed);
2420 CLEAR_RESOURCE (&set);
2421
2422 /* If we do not own this thread, we must stop as soon as we find
2423 something that we can't put in a delay slot, since all we can do
2424 is branch into THREAD at a later point. Therefore, labels stop
2425 the search if this is not the `true' thread. */
2426
2427 for (trial = thread;
2428 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2429 trial = next_nonnote_insn (trial))
2430 {
2431 rtx pat, old_trial;
2432
2433 /* If we have passed a label, we no longer own this thread. */
2434 if (LABEL_P (trial))
2435 {
2436 own_thread = 0;
2437 continue;
2438 }
2439
2440 pat = PATTERN (trial);
2441 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
2442 || GET_CODE (pat) == CLOBBER_HIGH)
2443 continue;
2444
2445 if (GET_CODE (trial) == DEBUG_INSN)
2446 continue;
2447
2448 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2449 don't separate or copy insns that set and use CC0. */
2450 if (! insn_references_resource_p (trial, &set, true)
2451 && ! insn_sets_resource_p (trial, &set, true)
2452 && ! insn_sets_resource_p (trial, &needed, true)
2453 && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2454 && (! own_thread || ! sets_cc0_p (pat)))))
2455 && ! can_throw_internal (trial))
2456 {
2457 rtx_insn *prior_insn;
2458
2459 /* If TRIAL is redundant with some insn before INSN, we don't
2460 actually need to add it to the delay list; we can merely pretend
2461 we did. */
2462 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2463 {
2464 fix_reg_dead_note (prior_insn, insn);
2465 if (own_thread)
2466 {
2467 update_block (trial, thread);
2468 if (trial == thread)
2469 {
2470 thread = next_active_insn (thread);
2471 if (new_thread == trial)
2472 new_thread = thread;
2473 }
2474
2475 delete_related_insns (trial);
2476 }
2477 else
2478 {
2479 update_reg_unused_notes (prior_insn, trial);
2480 new_thread = next_active_insn (trial);
2481 }
2482
2483 continue;
2484 }
2485
2486 /* There are two ways we can win: If TRIAL doesn't set anything
2487 needed at the opposite thread and can't trap, or if it can
2488 go into an annulled delay slot. But we want neither to copy
2489 nor to speculate frame-related insns. */
2490 if (!must_annul
2491 && ((condition == const_true_rtx
2492 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2493 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2494 && ! may_trap_or_fault_p (pat)
2495 && ! RTX_FRAME_RELATED_P (trial))))
2496 {
2497 old_trial = trial;
2498 trial = try_split (pat, trial, 0);
2499 if (new_thread == old_trial)
2500 new_thread = trial;
2501 if (thread == old_trial)
2502 thread = trial;
2503 pat = PATTERN (trial);
2504 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2505 goto winner;
2506 }
2507 else if (!RTX_FRAME_RELATED_P (trial)
2508 && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2509 || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2510 {
2511 old_trial = trial;
2512 trial = try_split (pat, trial, 0);
2513 if (new_thread == old_trial)
2514 new_thread = trial;
2515 if (thread == old_trial)
2516 thread = trial;
2517 pat = PATTERN (trial);
2518 if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2519 ? check_annul_list_true_false (0, *delay_list)
2520 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2521 : check_annul_list_true_false (1, *delay_list)
2522 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2523 {
2524 rtx_insn *temp;
2525
2526 must_annul = 1;
2527 winner:
2528
2529 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2530 link_cc0_insns (trial);
2531
2532 /* If we own this thread, delete the insn. If this is the
2533 destination of a branch, show that a basic block status
2534 may have been updated. In any case, mark the new
2535 starting point of this thread. */
2536 if (own_thread)
2537 {
2538 rtx note;
2539
2540 update_block (trial, thread);
2541 if (trial == thread)
2542 {
2543 thread = next_active_insn (thread);
2544 if (new_thread == trial)
2545 new_thread = thread;
2546 }
2547
2548 /* We are moving this insn, not deleting it. We must
2549 temporarily increment the use count on any referenced
2550 label lest it be deleted by delete_related_insns. */
2551 for (note = REG_NOTES (trial);
2552 note != NULL_RTX;
2553 note = XEXP (note, 1))
2554 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2555 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2556 {
2557 /* REG_LABEL_OPERAND could be
2558 NOTE_INSN_DELETED_LABEL too. */
2559 if (LABEL_P (XEXP (note, 0)))
2560 LABEL_NUSES (XEXP (note, 0))++;
2561 else
2562 gcc_assert (REG_NOTE_KIND (note)
2563 == REG_LABEL_OPERAND);
2564 }
2565 if (jump_to_label_p (trial))
2566 LABEL_NUSES (JUMP_LABEL (trial))++;
2567
2568 delete_related_insns (trial);
2569
2570 for (note = REG_NOTES (trial);
2571 note != NULL_RTX;
2572 note = XEXP (note, 1))
2573 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2574 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2575 {
2576 /* REG_LABEL_OPERAND could be
2577 NOTE_INSN_DELETED_LABEL too. */
2578 if (LABEL_P (XEXP (note, 0)))
2579 LABEL_NUSES (XEXP (note, 0))--;
2580 else
2581 gcc_assert (REG_NOTE_KIND (note)
2582 == REG_LABEL_OPERAND);
2583 }
2584 if (jump_to_label_p (trial))
2585 LABEL_NUSES (JUMP_LABEL (trial))--;
2586 }
2587 else
2588 new_thread = next_active_insn (trial);
2589
2590 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2591 if (thread_if_true)
2592 INSN_FROM_TARGET_P (temp) = 1;
2593
2594 add_to_delay_list (temp, delay_list);
2595
2596 if (slots_to_fill == ++(*pslots_filled))
2597 {
2598 /* Even though we have filled all the slots, we
2599 may be branching to a location that has a
2600 redundant insn. Skip any if so. */
2601 while (new_thread && ! own_thread
2602 && ! insn_sets_resource_p (new_thread, &set, true)
2603 && ! insn_sets_resource_p (new_thread, &needed,
2604 true)
2605 && ! insn_references_resource_p (new_thread,
2606 &set, true)
2607 && (prior_insn
2608 = redundant_insn (new_thread, insn,
2609 *delay_list)))
2610 {
2611 /* We know we do not own the thread, so no need
2612 to call update_block and delete_insn. */
2613 fix_reg_dead_note (prior_insn, insn);
2614 update_reg_unused_notes (prior_insn, new_thread);
2615 new_thread
2616 = next_active_insn (as_a<rtx_insn *> (new_thread));
2617 }
2618 break;
2619 }
2620
2621 continue;
2622 }
2623 }
2624 }
2625
2626 /* This insn can't go into a delay slot. */
2627 lose = 1;
2628 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2629 mark_referenced_resources (trial, &needed, true);
2630
2631 /* Ensure we don't put insns between the setting of cc and the comparison
2632 by moving a setting of cc into an earlier delay slot since these insns
2633 could clobber the condition code. */
2634 set.cc = 1;
2635
2636 /* If this insn is a register-register copy and the next insn has
2637 a use of our destination, change it to use our source. That way,
2638 it will become a candidate for our delay slot the next time
2639 through this loop. This case occurs commonly in loops that
2640 scan a list.
2641
2642 We could check for more complex cases than those tested below,
2643 but it doesn't seem worth it. It might also be a good idea to try
2644 to swap the two insns. That might do better.
2645
2646 We can't do this if the next insn modifies our destination, because
2647 that would make the replacement into the insn invalid. We also can't
2648 do this if it modifies our source, because it might be an earlyclobber
2649 operand. This latter test also prevents updating the contents of
2650 a PRE_INC. We also can't do this if there's overlap of source and
2651 destination. Overlap may happen for larger-than-register-size modes. */
2652
2653 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2654 && REG_P (SET_SRC (pat))
2655 && REG_P (SET_DEST (pat))
2656 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2657 {
2658 rtx_insn *next = next_nonnote_insn (trial);
2659
2660 if (next && NONJUMP_INSN_P (next)
2661 && GET_CODE (PATTERN (next)) != USE
2662 && ! reg_set_p (SET_DEST (pat), next)
2663 && ! reg_set_p (SET_SRC (pat), next)
2664 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2665 && ! modified_in_p (SET_DEST (pat), next))
2666 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2667 }
2668 }
2669
2670 /* If we stopped on a branch insn that has delay slots, see if we can
2671 steal some of the insns in those slots. */
2672 if (trial && NONJUMP_INSN_P (trial)
2673 && GET_CODE (PATTERN (trial)) == SEQUENCE
2674 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2675 {
2676 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2677 /* If this is the `true' thread, we will want to follow the jump,
2678 so we can only do this if we have taken everything up to here. */
2679 if (thread_if_true && trial == new_thread)
2680 {
2681 steal_delay_list_from_target (insn, condition, sequence,
2682 delay_list, &set, &needed,
2683 &opposite_needed, slots_to_fill,
2684 pslots_filled, &must_annul,
2685 &new_thread);
2686 /* If we owned the thread and are told that it branched
2687 elsewhere, make sure we own the thread at the new location. */
2688 if (own_thread && trial != new_thread)
2689 own_thread = own_thread_p (new_thread, new_thread, 0);
2690 }
2691 else if (! thread_if_true)
2692 steal_delay_list_from_fallthrough (insn, condition, sequence,
2693 delay_list, &set, &needed,
2694 &opposite_needed, slots_to_fill,
2695 pslots_filled, &must_annul);
2696 }
2697
2698 /* If we haven't found anything for this delay slot and it is very
2699 likely that the branch will be taken, see if the insn at our target
2700 increments or decrements a register with an increment that does not
2701 depend on the destination register. If so, try to place the opposite
2702 arithmetic insn after the jump insn and put the arithmetic insn in the
2703 delay slot. If we can't do this, return. */
2704 if (delay_list->is_empty () && likely
2705 && new_thread && !ANY_RETURN_P (new_thread)
2706 && NONJUMP_INSN_P (new_thread)
2707 && !RTX_FRAME_RELATED_P (new_thread)
2708 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2709 && asm_noperands (PATTERN (new_thread)) < 0)
2710 {
2711 rtx pat = PATTERN (new_thread);
2712 rtx dest;
2713 rtx src;
2714
2715 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2716 above. */
2717 trial = as_a <rtx_insn *> (new_thread);
2718 pat = PATTERN (trial);
2719
2720 if (!NONJUMP_INSN_P (trial)
2721 || GET_CODE (pat) != SET
2722 || ! eligible_for_delay (insn, 0, trial, flags)
2723 || can_throw_internal (trial))
2724 return;
2725
2726 dest = SET_DEST (pat), src = SET_SRC (pat);
2727 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2728 && rtx_equal_p (XEXP (src, 0), dest)
2729 && (!FLOAT_MODE_P (GET_MODE (src))
2730 || flag_unsafe_math_optimizations)
2731 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2732 && ! side_effects_p (pat))
2733 {
2734 rtx other = XEXP (src, 1);
2735 rtx new_arith;
2736 rtx_insn *ninsn;
2737
2738 /* If this is a constant adjustment, use the same code with
2739 the negated constant. Otherwise, reverse the sense of the
2740 arithmetic. */
2741 if (CONST_INT_P (other))
2742 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2743 negate_rtx (GET_MODE (src), other));
2744 else
2745 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2746 GET_MODE (src), dest, other);
2747
2748 ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2749
2750 if (recog_memoized (ninsn) < 0
2751 || (extract_insn (ninsn),
2752 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2753 {
2754 delete_related_insns (ninsn);
2755 return;
2756 }
2757
2758 if (own_thread)
2759 {
2760 update_block (trial, thread);
2761 if (trial == thread)
2762 {
2763 thread = next_active_insn (thread);
2764 if (new_thread == trial)
2765 new_thread = thread;
2766 }
2767 delete_related_insns (trial);
2768 }
2769 else
2770 new_thread = next_active_insn (trial);
2771
2772 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2773 if (thread_if_true)
2774 INSN_FROM_TARGET_P (ninsn) = 1;
2775
2776 add_to_delay_list (ninsn, delay_list);
2777 (*pslots_filled)++;
2778 }
2779 }
2780
2781 if (!delay_list->is_empty () && must_annul)
2782 INSN_ANNULLED_BRANCH_P (insn) = 1;
2783
2784 /* If we are to branch into the middle of this thread, find an appropriate
2785 label or make a new one if none, and redirect INSN to it. If we hit the
2786 end of the function, use the end-of-function label. */
2787 if (new_thread != thread)
2788 {
2789 rtx label;
2790 bool crossing = false;
2791
2792 gcc_assert (thread_if_true);
2793
2794 if (new_thread && simplejump_or_return_p (new_thread)
2795 && redirect_with_delay_list_safe_p (insn,
2796 JUMP_LABEL (new_thread),
2797 *delay_list))
2798 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2799 &crossing);
2800
2801 if (ANY_RETURN_P (new_thread))
2802 label = find_end_label (new_thread);
2803 else if (LABEL_P (new_thread))
2804 label = new_thread;
2805 else
2806 label = get_label_before (as_a <rtx_insn *> (new_thread),
2807 JUMP_LABEL (insn));
2808
2809 if (label)
2810 {
2811 reorg_redirect_jump (insn, label);
2812 if (crossing)
2813 CROSSING_JUMP_P (insn) = 1;
2814 }
2815 }
2816 }
2817
2818 /* Make another attempt to find insns to place in delay slots.
2819
2820 We previously looked for insns located in front of the delay insn
2821 and, for non-jump delay insns, located behind the delay insn.
2822
2823 Here only try to schedule jump insns and try to move insns from either
2824 the target or the following insns into the delay slot. If annulling is
2825 supported, we will be likely to do this. Otherwise, we can do this only
2826 if safe. */
2827
2828 static void
fill_eager_delay_slots(void)2829 fill_eager_delay_slots (void)
2830 {
2831 rtx_insn *insn;
2832 int i;
2833 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2834
2835 for (i = 0; i < num_unfilled_slots; i++)
2836 {
2837 rtx condition;
2838 rtx target_label, insn_at_target;
2839 rtx_insn *fallthrough_insn;
2840 auto_vec<rtx_insn *, 5> delay_list;
2841 rtx_jump_insn *jump_insn;
2842 int own_target;
2843 int own_fallthrough;
2844 int prediction, slots_to_fill, slots_filled;
2845
2846 insn = unfilled_slots_base[i];
2847 if (insn == 0
2848 || insn->deleted ()
2849 || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2850 || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2851 continue;
2852
2853 slots_to_fill = num_delay_slots (jump_insn);
2854 /* Some machine description have defined instructions to have
2855 delay slots only in certain circumstances which may depend on
2856 nearby insns (which change due to reorg's actions).
2857
2858 For example, the PA port normally has delay slots for unconditional
2859 jumps.
2860
2861 However, the PA port claims such jumps do not have a delay slot
2862 if they are immediate successors of certain CALL_INSNs. This
2863 allows the port to favor filling the delay slot of the call with
2864 the unconditional jump. */
2865 if (slots_to_fill == 0)
2866 continue;
2867
2868 slots_filled = 0;
2869 target_label = JUMP_LABEL (jump_insn);
2870 condition = get_branch_condition (jump_insn, target_label);
2871
2872 if (condition == 0)
2873 continue;
2874
2875 /* Get the next active fallthrough and target insns and see if we own
2876 them. Then see whether the branch is likely true. We don't need
2877 to do a lot of this for unconditional branches. */
2878
2879 insn_at_target = first_active_target_insn (target_label);
2880 own_target = own_thread_p (target_label, target_label, 0);
2881
2882 if (condition == const_true_rtx)
2883 {
2884 own_fallthrough = 0;
2885 fallthrough_insn = 0;
2886 prediction = 2;
2887 }
2888 else
2889 {
2890 fallthrough_insn = next_active_insn (jump_insn);
2891 own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2892 prediction = mostly_true_jump (jump_insn);
2893 }
2894
2895 /* If this insn is expected to branch, first try to get insns from our
2896 target, then our fallthrough insns. If it is not expected to branch,
2897 try the other order. */
2898
2899 if (prediction > 0)
2900 {
2901 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2902 fallthrough_insn, prediction == 2, 1,
2903 own_target,
2904 slots_to_fill, &slots_filled, &delay_list);
2905
2906 if (delay_list.is_empty () && own_fallthrough)
2907 {
2908 /* Even though we didn't find anything for delay slots,
2909 we might have found a redundant insn which we deleted
2910 from the thread that was filled. So we have to recompute
2911 the next insn at the target. */
2912 target_label = JUMP_LABEL (jump_insn);
2913 insn_at_target = first_active_target_insn (target_label);
2914
2915 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2916 insn_at_target, 0, 0, own_fallthrough,
2917 slots_to_fill, &slots_filled,
2918 &delay_list);
2919 }
2920 }
2921 else
2922 {
2923 if (own_fallthrough)
2924 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2925 insn_at_target, 0, 0, own_fallthrough,
2926 slots_to_fill, &slots_filled, &delay_list);
2927
2928 if (delay_list.is_empty ())
2929 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2930 next_active_insn (insn), 0, 1, own_target,
2931 slots_to_fill, &slots_filled, &delay_list);
2932 }
2933
2934 if (!delay_list.is_empty ())
2935 unfilled_slots_base[i]
2936 = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2937
2938 if (slots_to_fill == slots_filled)
2939 unfilled_slots_base[i] = 0;
2940
2941 note_delay_statistics (slots_filled, 1);
2942 }
2943 }
2944
2945 static void delete_computation (rtx_insn *insn);
2946
2947 /* Recursively delete prior insns that compute the value (used only by INSN
2948 which the caller is deleting) stored in the register mentioned by NOTE
2949 which is a REG_DEAD note associated with INSN. */
2950
2951 static void
delete_prior_computation(rtx note,rtx_insn * insn)2952 delete_prior_computation (rtx note, rtx_insn *insn)
2953 {
2954 rtx_insn *our_prev;
2955 rtx reg = XEXP (note, 0);
2956
2957 for (our_prev = prev_nonnote_insn (insn);
2958 our_prev && (NONJUMP_INSN_P (our_prev)
2959 || CALL_P (our_prev));
2960 our_prev = prev_nonnote_insn (our_prev))
2961 {
2962 rtx pat = PATTERN (our_prev);
2963
2964 /* If we reach a CALL which is not calling a const function
2965 or the callee pops the arguments, then give up. */
2966 if (CALL_P (our_prev)
2967 && (! RTL_CONST_CALL_P (our_prev)
2968 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2969 break;
2970
2971 /* If we reach a SEQUENCE, it is too complex to try to
2972 do anything with it, so give up. We can be run during
2973 and after reorg, so SEQUENCE rtl can legitimately show
2974 up here. */
2975 if (GET_CODE (pat) == SEQUENCE)
2976 break;
2977
2978 if (GET_CODE (pat) == USE
2979 && NONJUMP_INSN_P (XEXP (pat, 0)))
2980 /* reorg creates USEs that look like this. We leave them
2981 alone because reorg needs them for its own purposes. */
2982 break;
2983
2984 if (reg_set_p (reg, pat))
2985 {
2986 if (side_effects_p (pat) && !CALL_P (our_prev))
2987 break;
2988
2989 if (GET_CODE (pat) == PARALLEL)
2990 {
2991 /* If we find a SET of something else, we can't
2992 delete the insn. */
2993
2994 int i;
2995
2996 for (i = 0; i < XVECLEN (pat, 0); i++)
2997 {
2998 rtx part = XVECEXP (pat, 0, i);
2999
3000 if (GET_CODE (part) == SET
3001 && SET_DEST (part) != reg)
3002 break;
3003 }
3004
3005 if (i == XVECLEN (pat, 0))
3006 delete_computation (our_prev);
3007 }
3008 else if (GET_CODE (pat) == SET
3009 && REG_P (SET_DEST (pat)))
3010 {
3011 int dest_regno = REGNO (SET_DEST (pat));
3012 int dest_endregno = END_REGNO (SET_DEST (pat));
3013 int regno = REGNO (reg);
3014 int endregno = END_REGNO (reg);
3015
3016 if (dest_regno >= regno
3017 && dest_endregno <= endregno)
3018 delete_computation (our_prev);
3019
3020 /* We may have a multi-word hard register and some, but not
3021 all, of the words of the register are needed in subsequent
3022 insns. Write REG_UNUSED notes for those parts that were not
3023 needed. */
3024 else if (dest_regno <= regno
3025 && dest_endregno >= endregno)
3026 {
3027 int i;
3028
3029 add_reg_note (our_prev, REG_UNUSED, reg);
3030
3031 for (i = dest_regno; i < dest_endregno; i++)
3032 if (! find_regno_note (our_prev, REG_UNUSED, i))
3033 break;
3034
3035 if (i == dest_endregno)
3036 delete_computation (our_prev);
3037 }
3038 }
3039
3040 break;
3041 }
3042
3043 /* If PAT references the register that dies here, it is an
3044 additional use. Hence any prior SET isn't dead. However, this
3045 insn becomes the new place for the REG_DEAD note. */
3046 if (reg_overlap_mentioned_p (reg, pat))
3047 {
3048 XEXP (note, 1) = REG_NOTES (our_prev);
3049 REG_NOTES (our_prev) = note;
3050 break;
3051 }
3052 }
3053 }
3054
3055 /* Delete INSN and recursively delete insns that compute values used only
3056 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3057
3058 Look at all our REG_DEAD notes. If a previous insn does nothing other
3059 than set a register that dies in this insn, we can delete that insn
3060 as well.
3061
3062 On machines with CC0, if CC0 is used in this insn, we may be able to
3063 delete the insn that set it. */
3064
3065 static void
delete_computation(rtx_insn * insn)3066 delete_computation (rtx_insn *insn)
3067 {
3068 rtx note, next;
3069
3070 if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3071 {
3072 rtx_insn *prev = prev_nonnote_insn (insn);
3073 /* We assume that at this stage
3074 CC's are always set explicitly
3075 and always immediately before the jump that
3076 will use them. So if the previous insn
3077 exists to set the CC's, delete it
3078 (unless it performs auto-increments, etc.). */
3079 if (prev && NONJUMP_INSN_P (prev)
3080 && sets_cc0_p (PATTERN (prev)))
3081 {
3082 if (sets_cc0_p (PATTERN (prev)) > 0
3083 && ! side_effects_p (PATTERN (prev)))
3084 delete_computation (prev);
3085 else
3086 /* Otherwise, show that cc0 won't be used. */
3087 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3088 }
3089 }
3090
3091 for (note = REG_NOTES (insn); note; note = next)
3092 {
3093 next = XEXP (note, 1);
3094
3095 if (REG_NOTE_KIND (note) != REG_DEAD
3096 /* Verify that the REG_NOTE is legitimate. */
3097 || !REG_P (XEXP (note, 0)))
3098 continue;
3099
3100 delete_prior_computation (note, insn);
3101 }
3102
3103 delete_related_insns (insn);
3104 }
3105
3106 /* If all INSN does is set the pc, delete it,
3107 and delete the insn that set the condition codes for it
3108 if that's what the previous thing was. */
3109
3110 static void
delete_jump(rtx_insn * insn)3111 delete_jump (rtx_insn *insn)
3112 {
3113 rtx set = single_set (insn);
3114
3115 if (set && GET_CODE (SET_DEST (set)) == PC)
3116 delete_computation (insn);
3117 }
3118
3119 static rtx_insn *
label_before_next_insn(rtx_insn * x,rtx scan_limit)3120 label_before_next_insn (rtx_insn *x, rtx scan_limit)
3121 {
3122 rtx_insn *insn = next_active_insn (x);
3123 while (insn)
3124 {
3125 insn = PREV_INSN (insn);
3126 if (insn == scan_limit || insn == NULL_RTX)
3127 return NULL;
3128 if (LABEL_P (insn))
3129 break;
3130 }
3131 return insn;
3132 }
3133
3134 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3135 BEG and END. */
3136
3137 static bool
switch_text_sections_between_p(const rtx_insn * beg,const rtx_insn * end)3138 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3139 {
3140 const rtx_insn *p;
3141 for (p = beg; p != end; p = NEXT_INSN (p))
3142 if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3143 return true;
3144 return false;
3145 }
3146
3147
3148 /* Once we have tried two ways to fill a delay slot, make a pass over the
3149 code to try to improve the results and to do such things as more jump
3150 threading. */
3151
3152 static void
relax_delay_slots(rtx_insn * first)3153 relax_delay_slots (rtx_insn *first)
3154 {
3155 rtx_insn *insn, *next;
3156 rtx_sequence *pat;
3157 rtx_insn *delay_insn;
3158 rtx target_label;
3159
3160 /* Look at every JUMP_INSN and see if we can improve it. */
3161 for (insn = first; insn; insn = next)
3162 {
3163 rtx_insn *other, *prior_insn;
3164 bool crossing;
3165
3166 next = next_active_insn (insn);
3167
3168 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3169 the next insn, or jumps to a label that is not the last of a
3170 group of consecutive labels. */
3171 if (is_a <rtx_jump_insn *> (insn)
3172 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3173 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3174 {
3175 rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3176 target_label
3177 = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3178 &crossing));
3179 if (ANY_RETURN_P (target_label))
3180 target_label = find_end_label (target_label);
3181
3182 if (target_label
3183 && next_active_insn (as_a<rtx_insn *> (target_label)) == next
3184 && ! condjump_in_parallel_p (jump_insn)
3185 && ! (next && switch_text_sections_between_p (jump_insn, next)))
3186 {
3187 delete_jump (jump_insn);
3188 continue;
3189 }
3190
3191 if (target_label && target_label != JUMP_LABEL (jump_insn))
3192 {
3193 reorg_redirect_jump (jump_insn, target_label);
3194 if (crossing)
3195 CROSSING_JUMP_P (jump_insn) = 1;
3196 }
3197
3198 /* See if this jump conditionally branches around an unconditional
3199 jump. If so, invert this jump and point it to the target of the
3200 second jump. Check if it's possible on the target. */
3201 if (next && simplejump_or_return_p (next)
3202 && any_condjump_p (jump_insn)
3203 && target_label
3204 && (next_active_insn (as_a<rtx_insn *> (target_label))
3205 == next_active_insn (next))
3206 && no_labels_between_p (jump_insn, next)
3207 && targetm.can_follow_jump (jump_insn, next))
3208 {
3209 rtx label = JUMP_LABEL (next);
3210
3211 /* Be careful how we do this to avoid deleting code or
3212 labels that are momentarily dead. See similar optimization
3213 in jump.c.
3214
3215 We also need to ensure we properly handle the case when
3216 invert_jump fails. */
3217
3218 ++LABEL_NUSES (target_label);
3219 if (!ANY_RETURN_P (label))
3220 ++LABEL_NUSES (label);
3221
3222 if (invert_jump (jump_insn, label, 1))
3223 {
3224 delete_related_insns (next);
3225 next = jump_insn;
3226 }
3227
3228 if (!ANY_RETURN_P (label))
3229 --LABEL_NUSES (label);
3230
3231 if (--LABEL_NUSES (target_label) == 0)
3232 delete_related_insns (target_label);
3233
3234 continue;
3235 }
3236 }
3237
3238 /* If this is an unconditional jump and the previous insn is a
3239 conditional jump, try reversing the condition of the previous
3240 insn and swapping our targets. The next pass might be able to
3241 fill the slots.
3242
3243 Don't do this if we expect the conditional branch to be true, because
3244 we would then be making the more common case longer. */
3245
3246 if (simplejump_or_return_p (insn)
3247 && (other = prev_active_insn (insn)) != 0
3248 && any_condjump_p (other)
3249 && no_labels_between_p (other, insn)
3250 && mostly_true_jump (other) < 0)
3251 {
3252 rtx other_target = JUMP_LABEL (other);
3253 target_label = JUMP_LABEL (insn);
3254
3255 if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3256 reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3257 }
3258
3259 /* Now look only at cases where we have a filled delay slot. */
3260 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3261 continue;
3262
3263 pat = as_a <rtx_sequence *> (PATTERN (insn));
3264 delay_insn = pat->insn (0);
3265
3266 /* See if the first insn in the delay slot is redundant with some
3267 previous insn. Remove it from the delay slot if so; then set up
3268 to reprocess this insn. */
3269 if ((prior_insn = redundant_insn (pat->insn (1), delay_insn, vNULL)))
3270 {
3271 fix_reg_dead_note (prior_insn, insn);
3272 update_block (pat->insn (1), insn);
3273 delete_from_delay_slot (pat->insn (1));
3274 next = prev_active_insn (next);
3275 continue;
3276 }
3277
3278 /* See if we have a RETURN insn with a filled delay slot followed
3279 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3280 the first RETURN (but not its delay insn). This gives the same
3281 effect in fewer instructions.
3282
3283 Only do so if optimizing for size since this results in slower, but
3284 smaller code. */
3285 if (optimize_function_for_size_p (cfun)
3286 && ANY_RETURN_P (PATTERN (delay_insn))
3287 && next
3288 && JUMP_P (next)
3289 && PATTERN (next) == PATTERN (delay_insn))
3290 {
3291 rtx_insn *after;
3292 int i;
3293
3294 /* Delete the RETURN and just execute the delay list insns.
3295
3296 We do this by deleting the INSN containing the SEQUENCE, then
3297 re-emitting the insns separately, and then deleting the RETURN.
3298 This allows the count of the jump target to be properly
3299 decremented.
3300
3301 Note that we need to change the INSN_UID of the re-emitted insns
3302 since it is used to hash the insns for mark_target_live_regs and
3303 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3304
3305 Clear the from target bit, since these insns are no longer
3306 in delay slots. */
3307 for (i = 0; i < XVECLEN (pat, 0); i++)
3308 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3309
3310 rtx_insn *prev = PREV_INSN (insn);
3311 delete_related_insns (insn);
3312 gcc_assert (GET_CODE (pat) == SEQUENCE);
3313 add_insn_after (delay_insn, prev, NULL);
3314 after = delay_insn;
3315 for (i = 1; i < pat->len (); i++)
3316 after = emit_copy_of_insn_after (pat->insn (i), after);
3317 delete_scheduled_jump (delay_insn);
3318 continue;
3319 }
3320
3321 /* Now look only at the cases where we have a filled JUMP_INSN. */
3322 rtx_jump_insn *delay_jump_insn =
3323 dyn_cast <rtx_jump_insn *> (delay_insn);
3324 if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3325 || condjump_in_parallel_p (delay_jump_insn)))
3326 continue;
3327
3328 target_label = JUMP_LABEL (delay_jump_insn);
3329 if (target_label && ANY_RETURN_P (target_label))
3330 continue;
3331
3332 /* If this jump goes to another unconditional jump, thread it, but
3333 don't convert a jump into a RETURN here. */
3334 rtx trial = skip_consecutive_labels (follow_jumps (target_label,
3335 delay_jump_insn,
3336 &crossing));
3337 if (ANY_RETURN_P (trial))
3338 trial = find_end_label (trial);
3339
3340 if (trial && trial != target_label
3341 && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3342 {
3343 reorg_redirect_jump (delay_jump_insn, trial);
3344 target_label = trial;
3345 if (crossing)
3346 CROSSING_JUMP_P (delay_jump_insn) = 1;
3347 }
3348
3349 /* If the first insn at TARGET_LABEL is redundant with a previous
3350 insn, redirect the jump to the following insn and process again.
3351 We use next_real_nondebug_insn instead of next_active_insn so we
3352 don't skip USE-markers, or we'll end up with incorrect
3353 liveness info. */
3354 trial = next_real_nondebug_insn (target_label);
3355 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3356 && redundant_insn (trial, insn, vNULL)
3357 && ! can_throw_internal (trial))
3358 {
3359 /* Figure out where to emit the special USE insn so we don't
3360 later incorrectly compute register live/death info. */
3361 rtx_insn *tmp = next_active_insn (as_a<rtx_insn *> (trial));
3362 if (tmp == 0)
3363 tmp = find_end_label (simple_return_rtx);
3364
3365 if (tmp)
3366 {
3367 /* Insert the special USE insn and update dataflow info.
3368 We know "trial" is an insn here as it is the output of
3369 next_real_nondebug_insn () above. */
3370 update_block (as_a <rtx_insn *> (trial), tmp);
3371
3372 /* Now emit a label before the special USE insn, and
3373 redirect our jump to the new label. */
3374 target_label = get_label_before (PREV_INSN (tmp), target_label);
3375 reorg_redirect_jump (delay_jump_insn, target_label);
3376 next = insn;
3377 continue;
3378 }
3379 }
3380
3381 /* Similarly, if it is an unconditional jump with one insn in its
3382 delay list and that insn is redundant, thread the jump. */
3383 rtx_sequence *trial_seq =
3384 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3385 if (trial_seq
3386 && trial_seq->len () == 2
3387 && JUMP_P (trial_seq->insn (0))
3388 && simplejump_or_return_p (trial_seq->insn (0))
3389 && redundant_insn (trial_seq->insn (1), insn, vNULL))
3390 {
3391 rtx temp_label = JUMP_LABEL (trial_seq->insn (0));
3392 if (ANY_RETURN_P (temp_label))
3393 temp_label = find_end_label (temp_label);
3394
3395 if (temp_label
3396 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3397 temp_label, insn))
3398 {
3399 update_block (trial_seq->insn (1), insn);
3400 reorg_redirect_jump (delay_jump_insn, temp_label);
3401 next = insn;
3402 continue;
3403 }
3404 }
3405
3406 /* See if we have a simple (conditional) jump that is useless. */
3407 if (!CROSSING_JUMP_P (delay_jump_insn)
3408 && !INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3409 && !condjump_in_parallel_p (delay_jump_insn)
3410 && prev_active_insn (as_a<rtx_insn *> (target_label)) == insn
3411 && !BARRIER_P (prev_nonnote_insn (as_a<rtx_insn *> (target_label)))
3412 /* If the last insn in the delay slot sets CC0 for some insn,
3413 various code assumes that it is in a delay slot. We could
3414 put it back where it belonged and delete the register notes,
3415 but it doesn't seem worthwhile in this uncommon case. */
3416 && (!HAVE_cc0
3417 || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3418 REG_CC_USER, NULL_RTX)))
3419 {
3420 rtx_insn *after;
3421 int i;
3422
3423 /* All this insn does is execute its delay list and jump to the
3424 following insn. So delete the jump and just execute the delay
3425 list insns.
3426
3427 We do this by deleting the INSN containing the SEQUENCE, then
3428 re-emitting the insns separately, and then deleting the jump.
3429 This allows the count of the jump target to be properly
3430 decremented.
3431
3432 Note that we need to change the INSN_UID of the re-emitted insns
3433 since it is used to hash the insns for mark_target_live_regs and
3434 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3435
3436 Clear the from target bit, since these insns are no longer
3437 in delay slots. */
3438 for (i = 0; i < XVECLEN (pat, 0); i++)
3439 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3440
3441 rtx_insn *prev = PREV_INSN (insn);
3442 delete_related_insns (insn);
3443 gcc_assert (GET_CODE (pat) == SEQUENCE);
3444 add_insn_after (delay_jump_insn, prev, NULL);
3445 after = delay_jump_insn;
3446 for (i = 1; i < pat->len (); i++)
3447 after = emit_copy_of_insn_after (pat->insn (i), after);
3448 delete_scheduled_jump (delay_jump_insn);
3449 continue;
3450 }
3451
3452 /* See if this is an unconditional jump around a single insn which is
3453 identical to the one in its delay slot. In this case, we can just
3454 delete the branch and the insn in its delay slot. */
3455 if (next && NONJUMP_INSN_P (next)
3456 && label_before_next_insn (next, insn) == target_label
3457 && simplejump_p (insn)
3458 && XVECLEN (pat, 0) == 2
3459 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3460 {
3461 delete_related_insns (insn);
3462 continue;
3463 }
3464
3465 /* See if this jump (with its delay slots) conditionally branches
3466 around an unconditional jump (without delay slots). If so, invert
3467 this jump and point it to the target of the second jump. We cannot
3468 do this for annulled jumps, though. Again, don't convert a jump to
3469 a RETURN here. */
3470 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3471 && any_condjump_p (delay_jump_insn)
3472 && next && simplejump_or_return_p (next)
3473 && (next_active_insn (as_a<rtx_insn *> (target_label))
3474 == next_active_insn (next))
3475 && no_labels_between_p (insn, next))
3476 {
3477 rtx label = JUMP_LABEL (next);
3478 rtx old_label = JUMP_LABEL (delay_jump_insn);
3479
3480 if (ANY_RETURN_P (label))
3481 label = find_end_label (label);
3482
3483 /* find_end_label can generate a new label. Check this first. */
3484 if (label
3485 && no_labels_between_p (insn, next)
3486 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3487 label, insn))
3488 {
3489 /* Be careful how we do this to avoid deleting code or labels
3490 that are momentarily dead. See similar optimization in
3491 jump.c */
3492 if (old_label)
3493 ++LABEL_NUSES (old_label);
3494
3495 if (invert_jump (delay_jump_insn, label, 1))
3496 {
3497 int i;
3498
3499 /* Must update the INSN_FROM_TARGET_P bits now that
3500 the branch is reversed, so that mark_target_live_regs
3501 will handle the delay slot insn correctly. */
3502 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3503 {
3504 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3505 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3506 }
3507
3508 delete_related_insns (next);
3509 next = insn;
3510 }
3511
3512 if (old_label && --LABEL_NUSES (old_label) == 0)
3513 delete_related_insns (old_label);
3514 continue;
3515 }
3516 }
3517
3518 /* If we own the thread opposite the way this insn branches, see if we
3519 can merge its delay slots with following insns. */
3520 if (INSN_FROM_TARGET_P (pat->insn (1))
3521 && own_thread_p (NEXT_INSN (insn), 0, 1))
3522 try_merge_delay_insns (insn, next);
3523 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3524 && own_thread_p (target_label, target_label, 0))
3525 try_merge_delay_insns (insn,
3526 next_active_insn (as_a<rtx_insn *> (target_label)));
3527
3528 /* If we get here, we haven't deleted INSN. But we may have deleted
3529 NEXT, so recompute it. */
3530 next = next_active_insn (insn);
3531 }
3532 }
3533
3534
3535 /* Look for filled jumps to the end of function label. We can try to convert
3536 them into RETURN insns if the insns in the delay slot are valid for the
3537 RETURN as well. */
3538
3539 static void
make_return_insns(rtx_insn * first)3540 make_return_insns (rtx_insn *first)
3541 {
3542 rtx_insn *insn;
3543 rtx_jump_insn *jump_insn;
3544 rtx real_return_label = function_return_label;
3545 rtx real_simple_return_label = function_simple_return_label;
3546 int slots, i;
3547
3548 /* See if there is a RETURN insn in the function other than the one we
3549 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3550 into a RETURN to jump to it. */
3551 for (insn = first; insn; insn = NEXT_INSN (insn))
3552 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3553 {
3554 rtx t = get_label_before (insn, NULL_RTX);
3555 if (PATTERN (insn) == ret_rtx)
3556 real_return_label = t;
3557 else
3558 real_simple_return_label = t;
3559 break;
3560 }
3561
3562 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3563 was equal to END_OF_FUNCTION_LABEL. */
3564 if (real_return_label)
3565 LABEL_NUSES (real_return_label)++;
3566 if (real_simple_return_label)
3567 LABEL_NUSES (real_simple_return_label)++;
3568
3569 /* Clear the list of insns to fill so we can use it. */
3570 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3571
3572 for (insn = first; insn; insn = NEXT_INSN (insn))
3573 {
3574 int flags;
3575 rtx kind, real_label;
3576
3577 /* Only look at filled JUMP_INSNs that go to the end of function
3578 label. */
3579 if (!NONJUMP_INSN_P (insn))
3580 continue;
3581
3582 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3583 continue;
3584
3585 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3586
3587 if (!jump_to_label_p (pat->insn (0)))
3588 continue;
3589
3590 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3591 {
3592 kind = ret_rtx;
3593 real_label = real_return_label;
3594 }
3595 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3596 {
3597 kind = simple_return_rtx;
3598 real_label = real_simple_return_label;
3599 }
3600 else
3601 continue;
3602
3603 jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3604
3605 /* If we can't make the jump into a RETURN, try to redirect it to the best
3606 RETURN and go on to the next insn. */
3607 if (!reorg_redirect_jump (jump_insn, kind))
3608 {
3609 /* Make sure redirecting the jump will not invalidate the delay
3610 slot insns. */
3611 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3612 reorg_redirect_jump (jump_insn, real_label);
3613 continue;
3614 }
3615
3616 /* See if this RETURN can accept the insns current in its delay slot.
3617 It can if it has more or an equal number of slots and the contents
3618 of each is valid. */
3619
3620 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3621 slots = num_delay_slots (jump_insn);
3622 if (slots >= XVECLEN (pat, 0) - 1)
3623 {
3624 for (i = 1; i < XVECLEN (pat, 0); i++)
3625 if (! (
3626 #if ANNUL_IFFALSE_SLOTS
3627 (INSN_ANNULLED_BRANCH_P (jump_insn)
3628 && INSN_FROM_TARGET_P (pat->insn (i)))
3629 ? eligible_for_annul_false (jump_insn, i - 1,
3630 pat->insn (i), flags) :
3631 #endif
3632 #if ANNUL_IFTRUE_SLOTS
3633 (INSN_ANNULLED_BRANCH_P (jump_insn)
3634 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3635 ? eligible_for_annul_true (jump_insn, i - 1,
3636 pat->insn (i), flags) :
3637 #endif
3638 eligible_for_delay (jump_insn, i - 1,
3639 pat->insn (i), flags)))
3640 break;
3641 }
3642 else
3643 i = 0;
3644
3645 if (i == XVECLEN (pat, 0))
3646 continue;
3647
3648 /* We have to do something with this insn. If it is an unconditional
3649 RETURN, delete the SEQUENCE and output the individual insns,
3650 followed by the RETURN. Then set things up so we try to find
3651 insns for its delay slots, if it needs some. */
3652 if (ANY_RETURN_P (PATTERN (jump_insn)))
3653 {
3654 rtx_insn *after = PREV_INSN (insn);
3655
3656 delete_related_insns (insn);
3657 insn = jump_insn;
3658 for (i = 1; i < pat->len (); i++)
3659 after = emit_copy_of_insn_after (pat->insn (i), after);
3660 add_insn_after (insn, after, NULL);
3661 emit_barrier_after (insn);
3662
3663 if (slots)
3664 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3665 }
3666 else
3667 /* It is probably more efficient to keep this with its current
3668 delay slot as a branch to a RETURN. */
3669 reorg_redirect_jump (jump_insn, real_label);
3670 }
3671
3672 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3673 new delay slots we have created. */
3674 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3675 delete_related_insns (real_return_label);
3676 if (real_simple_return_label != NULL_RTX
3677 && --LABEL_NUSES (real_simple_return_label) == 0)
3678 delete_related_insns (real_simple_return_label);
3679
3680 fill_simple_delay_slots (1);
3681 fill_simple_delay_slots (0);
3682 }
3683
3684 /* Try to find insns to place in delay slots. */
3685
3686 static void
dbr_schedule(rtx_insn * first)3687 dbr_schedule (rtx_insn *first)
3688 {
3689 rtx_insn *insn, *next, *epilogue_insn = 0;
3690 int i;
3691 bool need_return_insns;
3692
3693 /* If the current function has no insns other than the prologue and
3694 epilogue, then do not try to fill any delay slots. */
3695 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3696 return;
3697
3698 /* Find the highest INSN_UID and allocate and initialize our map from
3699 INSN_UID's to position in code. */
3700 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3701 {
3702 if (INSN_UID (insn) > max_uid)
3703 max_uid = INSN_UID (insn);
3704 if (NOTE_P (insn)
3705 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3706 epilogue_insn = insn;
3707 }
3708
3709 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3710 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3711 uid_to_ruid[INSN_UID (insn)] = i;
3712
3713 /* Initialize the list of insns that need filling. */
3714 if (unfilled_firstobj == 0)
3715 {
3716 gcc_obstack_init (&unfilled_slots_obstack);
3717 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3718 }
3719
3720 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3721 {
3722 rtx target;
3723
3724 /* Skip vector tables. We can't get attributes for them. */
3725 if (JUMP_TABLE_DATA_P (insn))
3726 continue;
3727
3728 if (JUMP_P (insn))
3729 INSN_ANNULLED_BRANCH_P (insn) = 0;
3730 INSN_FROM_TARGET_P (insn) = 0;
3731
3732 if (num_delay_slots (insn) > 0)
3733 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3734
3735 /* Ensure all jumps go to the last of a set of consecutive labels. */
3736 if (JUMP_P (insn)
3737 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3738 && !ANY_RETURN_P (JUMP_LABEL (insn))
3739 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3740 != JUMP_LABEL (insn)))
3741 redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3742 }
3743
3744 init_resource_info (epilogue_insn);
3745
3746 /* Show we haven't computed an end-of-function label yet. */
3747 function_return_label = function_simple_return_label = NULL;
3748
3749 /* Initialize the statistics for this function. */
3750 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3751 memset (num_filled_delays, 0, sizeof num_filled_delays);
3752
3753 /* Now do the delay slot filling. Try everything twice in case earlier
3754 changes make more slots fillable. */
3755
3756 for (reorg_pass_number = 0;
3757 reorg_pass_number < MAX_REORG_PASSES;
3758 reorg_pass_number++)
3759 {
3760 fill_simple_delay_slots (1);
3761 fill_simple_delay_slots (0);
3762 if (!targetm.no_speculation_in_delay_slots_p ())
3763 fill_eager_delay_slots ();
3764 relax_delay_slots (first);
3765 }
3766
3767 /* If we made an end of function label, indicate that it is now
3768 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3769 If it is now unused, delete it. */
3770 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3771 delete_related_insns (function_return_label);
3772 if (function_simple_return_label
3773 && --LABEL_NUSES (function_simple_return_label) == 0)
3774 delete_related_insns (function_simple_return_label);
3775
3776 need_return_insns = false;
3777 need_return_insns |= targetm.have_return () && function_return_label != 0;
3778 need_return_insns |= (targetm.have_simple_return ()
3779 && function_simple_return_label != 0);
3780 if (need_return_insns)
3781 make_return_insns (first);
3782
3783 /* Delete any USE insns made by update_block; subsequent passes don't need
3784 them or know how to deal with them. */
3785 for (insn = first; insn; insn = next)
3786 {
3787 next = NEXT_INSN (insn);
3788
3789 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3790 && INSN_P (XEXP (PATTERN (insn), 0)))
3791 next = delete_related_insns (insn);
3792 }
3793
3794 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3795
3796 /* It is not clear why the line below is needed, but it does seem to be. */
3797 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3798
3799 if (dump_file)
3800 {
3801 int i, j, need_comma;
3802 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3803 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3804
3805 for (reorg_pass_number = 0;
3806 reorg_pass_number < MAX_REORG_PASSES;
3807 reorg_pass_number++)
3808 {
3809 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3810 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3811 {
3812 need_comma = 0;
3813 fprintf (dump_file, ";; Reorg function #%d\n", i);
3814
3815 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3816 num_insns_needing_delays[i][reorg_pass_number]);
3817
3818 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3819 if (num_filled_delays[i][j][reorg_pass_number])
3820 {
3821 if (need_comma)
3822 fprintf (dump_file, ", ");
3823 need_comma = 1;
3824 fprintf (dump_file, "%d got %d delays",
3825 num_filled_delays[i][j][reorg_pass_number], j);
3826 }
3827 fprintf (dump_file, "\n");
3828 }
3829 }
3830 memset (total_delay_slots, 0, sizeof total_delay_slots);
3831 memset (total_annul_slots, 0, sizeof total_annul_slots);
3832 for (insn = first; insn; insn = NEXT_INSN (insn))
3833 {
3834 if (! insn->deleted ()
3835 && NONJUMP_INSN_P (insn)
3836 && GET_CODE (PATTERN (insn)) != USE
3837 && GET_CODE (PATTERN (insn)) != CLOBBER
3838 && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH)
3839 {
3840 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3841 {
3842 rtx control;
3843 j = XVECLEN (PATTERN (insn), 0) - 1;
3844 if (j > MAX_DELAY_HISTOGRAM)
3845 j = MAX_DELAY_HISTOGRAM;
3846 control = XVECEXP (PATTERN (insn), 0, 0);
3847 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3848 total_annul_slots[j]++;
3849 else
3850 total_delay_slots[j]++;
3851 }
3852 else if (num_delay_slots (insn) > 0)
3853 total_delay_slots[0]++;
3854 }
3855 }
3856 fprintf (dump_file, ";; Reorg totals: ");
3857 need_comma = 0;
3858 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3859 {
3860 if (total_delay_slots[j])
3861 {
3862 if (need_comma)
3863 fprintf (dump_file, ", ");
3864 need_comma = 1;
3865 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3866 }
3867 }
3868 fprintf (dump_file, "\n");
3869
3870 if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3871 {
3872 fprintf (dump_file, ";; Reorg annuls: ");
3873 need_comma = 0;
3874 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3875 {
3876 if (total_annul_slots[j])
3877 {
3878 if (need_comma)
3879 fprintf (dump_file, ", ");
3880 need_comma = 1;
3881 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3882 }
3883 }
3884 fprintf (dump_file, "\n");
3885 }
3886
3887 fprintf (dump_file, "\n");
3888 }
3889
3890 if (!sibling_labels.is_empty ())
3891 {
3892 update_alignments (sibling_labels);
3893 sibling_labels.release ();
3894 }
3895
3896 free_resource_info ();
3897 free (uid_to_ruid);
3898 crtl->dbr_scheduled_p = true;
3899 }
3900
3901 /* Run delay slot optimization. */
3902 static unsigned int
rest_of_handle_delay_slots(void)3903 rest_of_handle_delay_slots (void)
3904 {
3905 if (DELAY_SLOTS)
3906 dbr_schedule (get_insns ());
3907
3908 return 0;
3909 }
3910
3911 namespace {
3912
3913 const pass_data pass_data_delay_slots =
3914 {
3915 RTL_PASS, /* type */
3916 "dbr", /* name */
3917 OPTGROUP_NONE, /* optinfo_flags */
3918 TV_DBR_SCHED, /* tv_id */
3919 0, /* properties_required */
3920 0, /* properties_provided */
3921 0, /* properties_destroyed */
3922 0, /* todo_flags_start */
3923 0, /* todo_flags_finish */
3924 };
3925
3926 class pass_delay_slots : public rtl_opt_pass
3927 {
3928 public:
pass_delay_slots(gcc::context * ctxt)3929 pass_delay_slots (gcc::context *ctxt)
3930 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3931 {}
3932
3933 /* opt_pass methods: */
3934 virtual bool gate (function *);
execute(function *)3935 virtual unsigned int execute (function *)
3936 {
3937 return rest_of_handle_delay_slots ();
3938 }
3939
3940 }; // class pass_delay_slots
3941
3942 bool
gate(function *)3943 pass_delay_slots::gate (function *)
3944 {
3945 /* At -O0 dataflow info isn't updated after RA. */
3946 if (DELAY_SLOTS)
3947 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3948
3949 return false;
3950 }
3951
3952 } // anon namespace
3953
3954 rtl_opt_pass *
make_pass_delay_slots(gcc::context * ctxt)3955 make_pass_delay_slots (gcc::context *ctxt)
3956 {
3957 return new pass_delay_slots (ctxt);
3958 }
3959
3960 /* Machine dependent reorg pass. */
3961
3962 namespace {
3963
3964 const pass_data pass_data_machine_reorg =
3965 {
3966 RTL_PASS, /* type */
3967 "mach", /* name */
3968 OPTGROUP_NONE, /* optinfo_flags */
3969 TV_MACH_DEP, /* tv_id */
3970 0, /* properties_required */
3971 0, /* properties_provided */
3972 0, /* properties_destroyed */
3973 0, /* todo_flags_start */
3974 0, /* todo_flags_finish */
3975 };
3976
3977 class pass_machine_reorg : public rtl_opt_pass
3978 {
3979 public:
pass_machine_reorg(gcc::context * ctxt)3980 pass_machine_reorg (gcc::context *ctxt)
3981 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3982 {}
3983
3984 /* opt_pass methods: */
gate(function *)3985 virtual bool gate (function *)
3986 {
3987 return targetm.machine_dependent_reorg != 0;
3988 }
3989
execute(function *)3990 virtual unsigned int execute (function *)
3991 {
3992 targetm.machine_dependent_reorg ();
3993 return 0;
3994 }
3995
3996 }; // class pass_machine_reorg
3997
3998 } // anon namespace
3999
4000 rtl_opt_pass *
make_pass_machine_reorg(gcc::context * ctxt)4001 make_pass_machine_reorg (gcc::context *ctxt)
4002 {
4003 return new pass_machine_reorg (ctxt);
4004 }
4005