1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* Instruction reorganization pass.
23
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
31
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
36
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
41
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
47
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
54
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
58
59 Three techniques for filling delay slots have been implemented so far:
60
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
69
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
82
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
92
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
102
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "memmodel.h"
112 #include "tm_p.h"
113 #include "expmed.h"
114 #include "insn-config.h"
115 #include "emit-rtl.h"
116 #include "recog.h"
117 #include "insn-attr.h"
118 #include "resource.h"
119 #include "params.h"
120 #include "tree-pass.h"
121
122
123 /* First, some functions that were used before GCC got a control flow graph.
124 These functions are now only used here in reorg.c, and have therefore
125 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
126
127 /* Return the last label to mark the same position as LABEL. Return LABEL
128 itself if it is null or any return rtx. */
129
130 static rtx
skip_consecutive_labels(rtx label_or_return)131 skip_consecutive_labels (rtx label_or_return)
132 {
133 rtx_insn *insn;
134
135 if (label_or_return && ANY_RETURN_P (label_or_return))
136 return label_or_return;
137
138 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
139
140 /* __builtin_unreachable can create a CODE_LABEL followed by a BARRIER.
141
142 Since reaching the CODE_LABEL is undefined behavior, we can return
143 any code label and we're OK at runtime.
144
145 However, if we return a CODE_LABEL which leads to a shrinked wrapped
146 epilogue, but the path does not have a prologue, then we will trip
147 a sanity check in the dwarf2 cfi code which wants to verify that
148 the CFIs are all the same on the traces leading to the epilogue.
149
150 So we explicitly disallow looking through BARRIERS here. */
151 for (insn = label;
152 insn != 0 && !INSN_P (insn) && !BARRIER_P (insn);
153 insn = NEXT_INSN (insn))
154 if (LABEL_P (insn))
155 label = insn;
156
157 return label;
158 }
159
160 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
161 and REG_CC_USER notes so we can find it. */
162
163 static void
link_cc0_insns(rtx_insn * insn)164 link_cc0_insns (rtx_insn *insn)
165 {
166 rtx user = next_nonnote_insn (insn);
167
168 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
169 user = XVECEXP (PATTERN (user), 0, 0);
170
171 add_reg_note (user, REG_CC_SETTER, insn);
172 add_reg_note (insn, REG_CC_USER, user);
173 }
174
175 /* Insns which have delay slots that have not yet been filled. */
176
177 static struct obstack unfilled_slots_obstack;
178 static rtx *unfilled_firstobj;
179
180 /* Define macros to refer to the first and last slot containing unfilled
181 insns. These are used because the list may move and its address
182 should be recomputed at each use. */
183
184 #define unfilled_slots_base \
185 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
186
187 #define unfilled_slots_next \
188 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
189
190 /* Points to the label before the end of the function, or before a
191 return insn. */
192 static rtx_code_label *function_return_label;
193 /* Likewise for a simple_return. */
194 static rtx_code_label *function_simple_return_label;
195
196 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
197 not always monotonically increase. */
198 static int *uid_to_ruid;
199
200 /* Highest valid index in `uid_to_ruid'. */
201 static int max_uid;
202
203 static int stop_search_p (rtx_insn *, int);
204 static int resource_conflicts_p (struct resources *, struct resources *);
205 static int insn_references_resource_p (rtx, struct resources *, bool);
206 static int insn_sets_resource_p (rtx, struct resources *, bool);
207 static rtx_code_label *find_end_label (rtx);
208 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
209 int);
210 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
211 static rtx_insn *delete_from_delay_slot (rtx_insn *);
212 static void delete_scheduled_jump (rtx_insn *);
213 static void note_delay_statistics (int, int);
214 static int get_jump_flags (const rtx_insn *, rtx);
215 static int mostly_true_jump (rtx);
216 static rtx get_branch_condition (const rtx_insn *, rtx);
217 static int condition_dominates_p (rtx, const rtx_insn *);
218 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
219 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
220 const vec<rtx_insn *> &);
221 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
222 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
223 vec<rtx_insn *> *,
224 struct resources *,
225 struct resources *,
226 struct resources *,
227 int, int *, int *,
228 rtx *);
229 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
230 vec<rtx_insn *> *,
231 struct resources *,
232 struct resources *,
233 struct resources *,
234 int, int *, int *);
235 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
236 static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
237 static int own_thread_p (rtx, rtx, int);
238 static void update_block (rtx_insn *, rtx_insn *);
239 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
240 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
241 static void fix_reg_dead_note (rtx_insn *, rtx);
242 static void update_reg_unused_notes (rtx_insn *, rtx);
243 static void fill_simple_delay_slots (int);
244 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
245 int, int, int, int,
246 int *, vec<rtx_insn *> *);
247 static void fill_eager_delay_slots (void);
248 static void relax_delay_slots (rtx_insn *);
249 static void make_return_insns (rtx_insn *);
250
251 /* A wrapper around next_active_insn which takes care to return ret_rtx
252 unchanged. */
253
254 static rtx
first_active_target_insn(rtx insn)255 first_active_target_insn (rtx insn)
256 {
257 if (ANY_RETURN_P (insn))
258 return insn;
259 return next_active_insn (as_a <rtx_insn *> (insn));
260 }
261
262 /* Return true iff INSN is a simplejump, or any kind of return insn. */
263
264 static bool
simplejump_or_return_p(rtx insn)265 simplejump_or_return_p (rtx insn)
266 {
267 return (JUMP_P (insn)
268 && (simplejump_p (as_a <rtx_insn *> (insn))
269 || ANY_RETURN_P (PATTERN (insn))));
270 }
271
272 /* Return TRUE if this insn should stop the search for insn to fill delay
273 slots. LABELS_P indicates that labels should terminate the search.
274 In all cases, jumps terminate the search. */
275
276 static int
stop_search_p(rtx_insn * insn,int labels_p)277 stop_search_p (rtx_insn *insn, int labels_p)
278 {
279 if (insn == 0)
280 return 1;
281
282 /* If the insn can throw an exception that is caught within the function,
283 it may effectively perform a jump from the viewpoint of the function.
284 Therefore act like for a jump. */
285 if (can_throw_internal (insn))
286 return 1;
287
288 switch (GET_CODE (insn))
289 {
290 case NOTE:
291 case CALL_INSN:
292 case DEBUG_INSN:
293 return 0;
294
295 case CODE_LABEL:
296 return labels_p;
297
298 case JUMP_INSN:
299 case BARRIER:
300 return 1;
301
302 case INSN:
303 /* OK unless it contains a delay slot or is an `asm' insn of some type.
304 We don't know anything about these. */
305 return (GET_CODE (PATTERN (insn)) == SEQUENCE
306 || GET_CODE (PATTERN (insn)) == ASM_INPUT
307 || asm_noperands (PATTERN (insn)) >= 0);
308
309 default:
310 gcc_unreachable ();
311 }
312 }
313
314 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
315 resource set contains a volatile memory reference. Otherwise, return FALSE. */
316
317 static int
resource_conflicts_p(struct resources * res1,struct resources * res2)318 resource_conflicts_p (struct resources *res1, struct resources *res2)
319 {
320 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
321 || res1->volatil || res2->volatil)
322 return 1;
323
324 return hard_reg_set_intersect_p (res1->regs, res2->regs);
325 }
326
327 /* Return TRUE if any resource marked in RES, a `struct resources', is
328 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
329 routine is using those resources.
330
331 We compute this by computing all the resources referenced by INSN and
332 seeing if this conflicts with RES. It might be faster to directly check
333 ourselves, and this is the way it used to work, but it means duplicating
334 a large block of complex code. */
335
336 static int
insn_references_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)337 insn_references_resource_p (rtx insn, struct resources *res,
338 bool include_delayed_effects)
339 {
340 struct resources insn_res;
341
342 CLEAR_RESOURCE (&insn_res);
343 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
344 return resource_conflicts_p (&insn_res, res);
345 }
346
347 /* Return TRUE if INSN modifies resources that are marked in RES.
348 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
349 included. CC0 is only modified if it is explicitly set; see comments
350 in front of mark_set_resources for details. */
351
352 static int
insn_sets_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)353 insn_sets_resource_p (rtx insn, struct resources *res,
354 bool include_delayed_effects)
355 {
356 struct resources insn_sets;
357
358 CLEAR_RESOURCE (&insn_sets);
359 mark_set_resources (insn, &insn_sets, 0,
360 (include_delayed_effects
361 ? MARK_SRC_DEST_CALL
362 : MARK_SRC_DEST));
363 return resource_conflicts_p (&insn_sets, res);
364 }
365
366 /* Find a label at the end of the function or before a RETURN. If there
367 is none, try to make one. If that fails, returns 0.
368
369 The property of such a label is that it is placed just before the
370 epilogue or a bare RETURN insn, so that another bare RETURN can be
371 turned into a jump to the label unconditionally. In particular, the
372 label cannot be placed before a RETURN insn with a filled delay slot.
373
374 ??? There may be a problem with the current implementation. Suppose
375 we start with a bare RETURN insn and call find_end_label. It may set
376 function_return_label just before the RETURN. Suppose the machinery
377 is able to fill the delay slot of the RETURN insn afterwards. Then
378 function_return_label is no longer valid according to the property
379 described above and find_end_label will still return it unmodified.
380 Note that this is probably mitigated by the following observation:
381 once function_return_label is made, it is very likely the target of
382 a jump, so filling the delay slot of the RETURN will be much more
383 difficult.
384 KIND is either simple_return_rtx or ret_rtx, indicating which type of
385 return we're looking for. */
386
387 static rtx_code_label *
find_end_label(rtx kind)388 find_end_label (rtx kind)
389 {
390 rtx_insn *insn;
391 rtx_code_label **plabel;
392
393 if (kind == ret_rtx)
394 plabel = &function_return_label;
395 else
396 {
397 gcc_assert (kind == simple_return_rtx);
398 plabel = &function_simple_return_label;
399 }
400
401 /* If we found one previously, return it. */
402 if (*plabel)
403 return *plabel;
404
405 /* Otherwise, see if there is a label at the end of the function. If there
406 is, it must be that RETURN insns aren't needed, so that is our return
407 label and we don't have to do anything else. */
408
409 insn = get_last_insn ();
410 while (NOTE_P (insn)
411 || (NONJUMP_INSN_P (insn)
412 && (GET_CODE (PATTERN (insn)) == USE
413 || GET_CODE (PATTERN (insn)) == CLOBBER
414 || GET_CODE (PATTERN (insn)) == CLOBBER_HIGH)))
415 insn = PREV_INSN (insn);
416
417 /* When a target threads its epilogue we might already have a
418 suitable return insn. If so put a label before it for the
419 function_return_label. */
420 if (BARRIER_P (insn)
421 && JUMP_P (PREV_INSN (insn))
422 && PATTERN (PREV_INSN (insn)) == kind)
423 {
424 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
425 rtx_code_label *label = gen_label_rtx ();
426 LABEL_NUSES (label) = 0;
427
428 /* Put the label before any USE insns that may precede the RETURN
429 insn. */
430 while (GET_CODE (temp) == USE)
431 temp = PREV_INSN (temp);
432
433 emit_label_after (label, temp);
434 *plabel = label;
435 }
436
437 else if (LABEL_P (insn))
438 *plabel = as_a <rtx_code_label *> (insn);
439 else
440 {
441 rtx_code_label *label = gen_label_rtx ();
442 LABEL_NUSES (label) = 0;
443 /* If the basic block reorder pass moves the return insn to
444 some other place try to locate it again and put our
445 function_return_label there. */
446 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
447 insn = PREV_INSN (insn);
448 if (insn)
449 {
450 insn = PREV_INSN (insn);
451
452 /* Put the label before any USE insns that may precede the
453 RETURN insn. */
454 while (GET_CODE (insn) == USE)
455 insn = PREV_INSN (insn);
456
457 emit_label_after (label, insn);
458 }
459 else
460 {
461 if (targetm.have_epilogue () && ! targetm.have_return ())
462 /* The RETURN insn has its delay slot filled so we cannot
463 emit the label just before it. Since we already have
464 an epilogue and cannot emit a new RETURN, we cannot
465 emit the label at all. */
466 return NULL;
467
468 /* Otherwise, make a new label and emit a RETURN and BARRIER,
469 if needed. */
470 emit_label (label);
471 if (targetm.have_return ())
472 {
473 /* The return we make may have delay slots too. */
474 rtx_insn *pat = targetm.gen_return ();
475 rtx_insn *insn = emit_jump_insn (pat);
476 set_return_jump_label (insn);
477 emit_barrier ();
478 if (num_delay_slots (insn) > 0)
479 obstack_ptr_grow (&unfilled_slots_obstack, insn);
480 }
481 }
482 *plabel = label;
483 }
484
485 /* Show one additional use for this label so it won't go away until
486 we are done. */
487 ++LABEL_NUSES (*plabel);
488
489 return *plabel;
490 }
491
492 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
493 the pattern of INSN with the SEQUENCE.
494
495 Returns the insn containing the SEQUENCE that replaces INSN. */
496
497 static rtx_insn *
emit_delay_sequence(rtx_insn * insn,const vec<rtx_insn * > & list,int length)498 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
499 {
500 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
501 rtvec seqv = rtvec_alloc (length + 1);
502 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
503 rtx_insn *seq_insn = make_insn_raw (seq);
504
505 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
506 not have a location, but one of the delayed insns does, we pick up a
507 location from there later. */
508 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
509
510 /* Unlink INSN from the insn chain, so that we can put it into
511 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
512 rtx_insn *after = PREV_INSN (insn);
513 remove_insn (insn);
514 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
515
516 /* Build our SEQUENCE and rebuild the insn chain. */
517 start_sequence ();
518 XVECEXP (seq, 0, 0) = emit_insn (insn);
519
520 unsigned int delay_insns = list.length ();
521 gcc_assert (delay_insns == (unsigned int) length);
522 for (unsigned int i = 0; i < delay_insns; i++)
523 {
524 rtx_insn *tem = list[i];
525 rtx note, next;
526
527 /* Show that this copy of the insn isn't deleted. */
528 tem->set_undeleted ();
529
530 /* Unlink insn from its original place, and re-emit it into
531 the sequence. */
532 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
533 XVECEXP (seq, 0, i + 1) = emit_insn (tem);
534
535 /* SPARC assembler, for instance, emit warning when debug info is output
536 into the delay slot. */
537 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
538 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
539 INSN_LOCATION (tem) = 0;
540
541 for (note = REG_NOTES (tem); note; note = next)
542 {
543 next = XEXP (note, 1);
544 switch (REG_NOTE_KIND (note))
545 {
546 case REG_DEAD:
547 /* Remove any REG_DEAD notes because we can't rely on them now
548 that the insn has been moved. */
549 remove_note (tem, note);
550 break;
551
552 case REG_LABEL_OPERAND:
553 case REG_LABEL_TARGET:
554 /* Keep the label reference count up to date. */
555 if (LABEL_P (XEXP (note, 0)))
556 LABEL_NUSES (XEXP (note, 0)) ++;
557 break;
558
559 default:
560 break;
561 }
562 }
563 }
564 end_sequence ();
565
566 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
567 add_insn_after (seq_insn, after, NULL);
568
569 return seq_insn;
570 }
571
572 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
573 be in the order in which the insns are to be executed. */
574
575 static void
add_to_delay_list(rtx_insn * insn,vec<rtx_insn * > * delay_list)576 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
577 {
578 /* If INSN has its block number recorded, clear it since we may
579 be moving the insn to a new block. */
580 clear_hashed_info_for_insn (insn);
581
582 delay_list->safe_push (insn);
583 }
584
585 /* Delete INSN from the delay slot of the insn that it is in, which may
586 produce an insn with no delay slots. Return the new insn. */
587
588 static rtx_insn *
delete_from_delay_slot(rtx_insn * insn)589 delete_from_delay_slot (rtx_insn *insn)
590 {
591 rtx_insn *trial, *seq_insn, *prev;
592 rtx_sequence *seq;
593 int i;
594 int had_barrier = 0;
595
596 /* We first must find the insn containing the SEQUENCE with INSN in its
597 delay slot. Do this by finding an insn, TRIAL, where
598 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
599
600 for (trial = insn;
601 PREV_INSN (NEXT_INSN (trial)) == trial;
602 trial = NEXT_INSN (trial))
603 ;
604
605 seq_insn = PREV_INSN (NEXT_INSN (trial));
606 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
607
608 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
609 had_barrier = 1;
610
611 /* Create a delay list consisting of all the insns other than the one
612 we are deleting (unless we were the only one). */
613 auto_vec<rtx_insn *, 5> delay_list;
614 if (seq->len () > 2)
615 for (i = 1; i < seq->len (); i++)
616 if (seq->insn (i) != insn)
617 add_to_delay_list (seq->insn (i), &delay_list);
618
619 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
620 list, and rebuild the delay list if non-empty. */
621 prev = PREV_INSN (seq_insn);
622 trial = seq->insn (0);
623 delete_related_insns (seq_insn);
624 add_insn_after (trial, prev, NULL);
625
626 /* If there was a barrier after the old SEQUENCE, remit it. */
627 if (had_barrier)
628 emit_barrier_after (trial);
629
630 /* If there are any delay insns, remit them. Otherwise clear the
631 annul flag. */
632 if (!delay_list.is_empty ())
633 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
634 else if (JUMP_P (trial))
635 INSN_ANNULLED_BRANCH_P (trial) = 0;
636
637 INSN_FROM_TARGET_P (insn) = 0;
638
639 /* Show we need to fill this insn again. */
640 obstack_ptr_grow (&unfilled_slots_obstack, trial);
641
642 return trial;
643 }
644
645 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
646 the insn that sets CC0 for it and delete it too. */
647
648 static void
delete_scheduled_jump(rtx_insn * insn)649 delete_scheduled_jump (rtx_insn *insn)
650 {
651 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
652 delete the insn that sets the condition code, but it is hard to find it.
653 Since this case is rare anyway, don't bother trying; there would likely
654 be other insns that became dead anyway, which we wouldn't know to
655 delete. */
656
657 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
658 {
659 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
660
661 /* If a reg-note was found, it points to an insn to set CC0. This
662 insn is in the delay list of some other insn. So delete it from
663 the delay list it was in. */
664 if (note)
665 {
666 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
667 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
668 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
669 }
670 else
671 {
672 /* The insn setting CC0 is our previous insn, but it may be in
673 a delay slot. It will be the last insn in the delay slot, if
674 it is. */
675 rtx_insn *trial = previous_insn (insn);
676 if (NOTE_P (trial))
677 trial = prev_nonnote_insn (trial);
678 if (sets_cc0_p (PATTERN (trial)) != 1
679 || FIND_REG_INC_NOTE (trial, NULL_RTX))
680 return;
681 if (PREV_INSN (NEXT_INSN (trial)) == trial)
682 delete_related_insns (trial);
683 else
684 delete_from_delay_slot (trial);
685 }
686 }
687
688 delete_related_insns (insn);
689 }
690
691 /* Counters for delay-slot filling. */
692
693 #define NUM_REORG_FUNCTIONS 2
694 #define MAX_DELAY_HISTOGRAM 3
695 #define MAX_REORG_PASSES 2
696
697 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
698
699 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
700
701 static int reorg_pass_number;
702
703 static void
note_delay_statistics(int slots_filled,int index)704 note_delay_statistics (int slots_filled, int index)
705 {
706 num_insns_needing_delays[index][reorg_pass_number]++;
707 if (slots_filled > MAX_DELAY_HISTOGRAM)
708 slots_filled = MAX_DELAY_HISTOGRAM;
709 num_filled_delays[index][slots_filled][reorg_pass_number]++;
710 }
711
712 /* Optimize the following cases:
713
714 1. When a conditional branch skips over only one instruction,
715 use an annulling branch and put that insn in the delay slot.
716 Use either a branch that annuls when the condition if true or
717 invert the test with a branch that annuls when the condition is
718 false. This saves insns, since otherwise we must copy an insn
719 from the L1 target.
720
721 (orig) (skip) (otherwise)
722 Bcc.n L1 Bcc',a L1 Bcc,a L1'
723 insn insn insn2
724 L1: L1: L1:
725 insn2 insn2 insn2
726 insn3 insn3 L1':
727 insn3
728
729 2. When a conditional branch skips over only one instruction,
730 and after that, it unconditionally branches somewhere else,
731 perform the similar optimization. This saves executing the
732 second branch in the case where the inverted condition is true.
733
734 Bcc.n L1 Bcc',a L2
735 insn insn
736 L1: L1:
737 Bra L2 Bra L2
738
739 INSN is a JUMP_INSN.
740
741 This should be expanded to skip over N insns, where N is the number
742 of delay slots required. */
743
744 static void
optimize_skip(rtx_jump_insn * insn,vec<rtx_insn * > * delay_list)745 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
746 {
747 rtx_insn *trial = next_nonnote_insn (insn);
748 rtx_insn *next_trial = next_active_insn (trial);
749 int flags;
750
751 flags = get_jump_flags (insn, JUMP_LABEL (insn));
752
753 if (trial == 0
754 || !NONJUMP_INSN_P (trial)
755 || GET_CODE (PATTERN (trial)) == SEQUENCE
756 || recog_memoized (trial) < 0
757 || (! eligible_for_annul_false (insn, 0, trial, flags)
758 && ! eligible_for_annul_true (insn, 0, trial, flags))
759 || RTX_FRAME_RELATED_P (trial)
760 || can_throw_internal (trial))
761 return;
762
763 /* There are two cases where we are just executing one insn (we assume
764 here that a branch requires only one insn; this should be generalized
765 at some point): Where the branch goes around a single insn or where
766 we have one insn followed by a branch to the same label we branch to.
767 In both of these cases, inverting the jump and annulling the delay
768 slot give the same effect in fewer insns. */
769 if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
770 || (next_trial != 0
771 && simplejump_or_return_p (next_trial)
772 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
773 {
774 if (eligible_for_annul_false (insn, 0, trial, flags))
775 {
776 if (invert_jump (insn, JUMP_LABEL (insn), 1))
777 INSN_FROM_TARGET_P (trial) = 1;
778 else if (! eligible_for_annul_true (insn, 0, trial, flags))
779 return;
780 }
781
782 add_to_delay_list (trial, delay_list);
783 next_trial = next_active_insn (trial);
784 update_block (trial, trial);
785 delete_related_insns (trial);
786
787 /* Also, if we are targeting an unconditional
788 branch, thread our jump to the target of that branch. Don't
789 change this into a RETURN here, because it may not accept what
790 we have in the delay slot. We'll fix this up later. */
791 if (next_trial && simplejump_or_return_p (next_trial))
792 {
793 rtx target_label = JUMP_LABEL (next_trial);
794 if (ANY_RETURN_P (target_label))
795 target_label = find_end_label (target_label);
796
797 if (target_label)
798 {
799 /* Recompute the flags based on TARGET_LABEL since threading
800 the jump to TARGET_LABEL may change the direction of the
801 jump (which may change the circumstances in which the
802 delay slot is nullified). */
803 flags = get_jump_flags (insn, target_label);
804 if (eligible_for_annul_true (insn, 0, trial, flags))
805 reorg_redirect_jump (insn, target_label);
806 }
807 }
808
809 INSN_ANNULLED_BRANCH_P (insn) = 1;
810 }
811 }
812
813 /* Encode and return branch direction and prediction information for
814 INSN assuming it will jump to LABEL.
815
816 Non conditional branches return no direction information and
817 are predicted as very likely taken. */
818
819 static int
get_jump_flags(const rtx_insn * insn,rtx label)820 get_jump_flags (const rtx_insn *insn, rtx label)
821 {
822 int flags;
823
824 /* get_jump_flags can be passed any insn with delay slots, these may
825 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
826 direction information, and only if they are conditional jumps.
827
828 If LABEL is a return, then there is no way to determine the branch
829 direction. */
830 if (JUMP_P (insn)
831 && (condjump_p (insn) || condjump_in_parallel_p (insn))
832 && !ANY_RETURN_P (label)
833 && INSN_UID (insn) <= max_uid
834 && INSN_UID (label) <= max_uid)
835 flags
836 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
837 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
838 /* No valid direction information. */
839 else
840 flags = 0;
841
842 return flags;
843 }
844
845 /* Return truth value of the statement that this branch
846 is mostly taken. If we think that the branch is extremely likely
847 to be taken, we return 2. If the branch is slightly more likely to be
848 taken, return 1. If the branch is slightly less likely to be taken,
849 return 0 and if the branch is highly unlikely to be taken, return -1. */
850
851 static int
mostly_true_jump(rtx jump_insn)852 mostly_true_jump (rtx jump_insn)
853 {
854 /* If branch probabilities are available, then use that number since it
855 always gives a correct answer. */
856 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
857 if (note)
858 {
859 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
860 .to_reg_br_prob_base ();
861
862 if (prob >= REG_BR_PROB_BASE * 9 / 10)
863 return 2;
864 else if (prob >= REG_BR_PROB_BASE / 2)
865 return 1;
866 else if (prob >= REG_BR_PROB_BASE / 10)
867 return 0;
868 else
869 return -1;
870 }
871
872 /* If there is no note, assume branches are not taken.
873 This should be rare. */
874 return 0;
875 }
876
877 /* Return the condition under which INSN will branch to TARGET. If TARGET
878 is zero, return the condition under which INSN will return. If INSN is
879 an unconditional branch, return const_true_rtx. If INSN isn't a simple
880 type of jump, or it doesn't go to TARGET, return 0. */
881
882 static rtx
get_branch_condition(const rtx_insn * insn,rtx target)883 get_branch_condition (const rtx_insn *insn, rtx target)
884 {
885 rtx pat = PATTERN (insn);
886 rtx src;
887
888 if (condjump_in_parallel_p (insn))
889 pat = XVECEXP (pat, 0, 0);
890
891 if (ANY_RETURN_P (pat) && pat == target)
892 return const_true_rtx;
893
894 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
895 return 0;
896
897 src = SET_SRC (pat);
898 if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
899 return const_true_rtx;
900
901 else if (GET_CODE (src) == IF_THEN_ELSE
902 && XEXP (src, 2) == pc_rtx
903 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
904 && label_ref_label (XEXP (src, 1)) == target)
905 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
906 return XEXP (src, 0);
907
908 else if (GET_CODE (src) == IF_THEN_ELSE
909 && XEXP (src, 1) == pc_rtx
910 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
911 && label_ref_label (XEXP (src, 2)) == target)
912 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
913 {
914 enum rtx_code rev;
915 rev = reversed_comparison_code (XEXP (src, 0), insn);
916 if (rev != UNKNOWN)
917 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
918 XEXP (XEXP (src, 0), 0),
919 XEXP (XEXP (src, 0), 1));
920 }
921
922 return 0;
923 }
924
925 /* Return nonzero if CONDITION is more strict than the condition of
926 INSN, i.e., if INSN will always branch if CONDITION is true. */
927
928 static int
condition_dominates_p(rtx condition,const rtx_insn * insn)929 condition_dominates_p (rtx condition, const rtx_insn *insn)
930 {
931 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
932 enum rtx_code code = GET_CODE (condition);
933 enum rtx_code other_code;
934
935 if (rtx_equal_p (condition, other_condition)
936 || other_condition == const_true_rtx)
937 return 1;
938
939 else if (condition == const_true_rtx || other_condition == 0)
940 return 0;
941
942 other_code = GET_CODE (other_condition);
943 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
944 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
945 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
946 return 0;
947
948 return comparison_dominates_p (code, other_code);
949 }
950
951 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
952 any insns already in the delay slot of JUMP. */
953
954 static int
redirect_with_delay_slots_safe_p(rtx_insn * jump,rtx newlabel,rtx seq)955 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
956 {
957 int flags, i;
958 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
959
960 /* Make sure all the delay slots of this jump would still
961 be valid after threading the jump. If they are still
962 valid, then return nonzero. */
963
964 flags = get_jump_flags (jump, newlabel);
965 for (i = 1; i < pat->len (); i++)
966 if (! (
967 #if ANNUL_IFFALSE_SLOTS
968 (INSN_ANNULLED_BRANCH_P (jump)
969 && INSN_FROM_TARGET_P (pat->insn (i)))
970 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
971 #endif
972 #if ANNUL_IFTRUE_SLOTS
973 (INSN_ANNULLED_BRANCH_P (jump)
974 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
975 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
976 #endif
977 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
978 break;
979
980 return (i == pat->len ());
981 }
982
983 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
984 any insns we wish to place in the delay slot of JUMP. */
985
986 static int
redirect_with_delay_list_safe_p(rtx_insn * jump,rtx newlabel,const vec<rtx_insn * > & delay_list)987 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
988 const vec<rtx_insn *> &delay_list)
989 {
990 /* Make sure all the insns in DELAY_LIST would still be
991 valid after threading the jump. If they are still
992 valid, then return nonzero. */
993
994 int flags = get_jump_flags (jump, newlabel);
995 unsigned int delay_insns = delay_list.length ();
996 unsigned int i = 0;
997 for (; i < delay_insns; i++)
998 if (! (
999 #if ANNUL_IFFALSE_SLOTS
1000 (INSN_ANNULLED_BRANCH_P (jump)
1001 && INSN_FROM_TARGET_P (delay_list[i]))
1002 ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
1003 #endif
1004 #if ANNUL_IFTRUE_SLOTS
1005 (INSN_ANNULLED_BRANCH_P (jump)
1006 && ! INSN_FROM_TARGET_P (delay_list[i]))
1007 ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
1008 #endif
1009 eligible_for_delay (jump, i, delay_list[i], flags)))
1010 break;
1011
1012 return i == delay_insns;
1013 }
1014
1015 /* DELAY_LIST is a list of insns that have already been placed into delay
1016 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1017 If not, return 0; otherwise return 1. */
1018
1019 static int
check_annul_list_true_false(int annul_true_p,const vec<rtx_insn * > & delay_list)1020 check_annul_list_true_false (int annul_true_p,
1021 const vec<rtx_insn *> &delay_list)
1022 {
1023 rtx_insn *trial;
1024 unsigned int i;
1025 FOR_EACH_VEC_ELT (delay_list, i, trial)
1026 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1027 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1028 return 0;
1029
1030 return 1;
1031 }
1032
1033 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1034 the condition tested by INSN is CONDITION and the resources shown in
1035 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1036 from SEQ's delay list, in addition to whatever insns it may execute
1037 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1038 needed while searching for delay slot insns. Return the concatenated
1039 delay list if possible, otherwise, return 0.
1040
1041 SLOTS_TO_FILL is the total number of slots required by INSN, and
1042 PSLOTS_FILLED points to the number filled so far (also the number of
1043 insns in DELAY_LIST). It is updated with the number that have been
1044 filled from the SEQUENCE, if any.
1045
1046 PANNUL_P points to a nonzero value if we already know that we need
1047 to annul INSN. If this routine determines that annulling is needed,
1048 it may set that value nonzero.
1049
1050 PNEW_THREAD points to a location that is to receive the place at which
1051 execution should continue. */
1052
1053 static void
steal_delay_list_from_target(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p,rtx * pnew_thread)1054 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1055 vec<rtx_insn *> *delay_list,
1056 struct resources *sets,
1057 struct resources *needed,
1058 struct resources *other_needed,
1059 int slots_to_fill, int *pslots_filled,
1060 int *pannul_p, rtx *pnew_thread)
1061 {
1062 int slots_remaining = slots_to_fill - *pslots_filled;
1063 int total_slots_filled = *pslots_filled;
1064 auto_vec<rtx_insn *, 5> new_delay_list;
1065 int must_annul = *pannul_p;
1066 int used_annul = 0;
1067 int i;
1068 struct resources cc_set;
1069 rtx_insn **redundant;
1070
1071 /* We can't do anything if there are more delay slots in SEQ than we
1072 can handle, or if we don't know that it will be a taken branch.
1073 We know that it will be a taken branch if it is either an unconditional
1074 branch or a conditional branch with a stricter branch condition.
1075
1076 Also, exit if the branch has more than one set, since then it is computing
1077 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1078 ??? It may be possible to move other sets into INSN in addition to
1079 moving the instructions in the delay slots.
1080
1081 We cannot steal the delay list if one of the instructions in the
1082 current delay_list modifies the condition codes and the jump in the
1083 sequence is a conditional jump. We cannot do this because we cannot
1084 change the direction of the jump because the condition codes
1085 will effect the direction of the jump in the sequence. */
1086
1087 CLEAR_RESOURCE (&cc_set);
1088
1089 rtx_insn *trial;
1090 FOR_EACH_VEC_ELT (*delay_list, i, trial)
1091 {
1092 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1093 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1094 return;
1095 }
1096
1097 if (XVECLEN (seq, 0) - 1 > slots_remaining
1098 || ! condition_dominates_p (condition, seq->insn (0))
1099 || ! single_set (seq->insn (0)))
1100 return;
1101
1102 /* On some targets, branches with delay slots can have a limited
1103 displacement. Give the back end a chance to tell us we can't do
1104 this. */
1105 if (! targetm.can_follow_jump (insn, seq->insn (0)))
1106 return;
1107
1108 redundant = XALLOCAVEC (rtx_insn *, XVECLEN (seq, 0));
1109 for (i = 1; i < seq->len (); i++)
1110 {
1111 rtx_insn *trial = seq->insn (i);
1112 int flags;
1113
1114 if (insn_references_resource_p (trial, sets, false)
1115 || insn_sets_resource_p (trial, needed, false)
1116 || insn_sets_resource_p (trial, sets, false)
1117 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1118 delay list. */
1119 || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1120 /* If TRIAL is from the fallthrough code of an annulled branch insn
1121 in SEQ, we cannot use it. */
1122 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1123 && ! INSN_FROM_TARGET_P (trial)))
1124 return;
1125
1126 /* If this insn was already done (usually in a previous delay slot),
1127 pretend we put it in our delay slot. */
1128 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1129 if (redundant[i])
1130 continue;
1131
1132 /* We will end up re-vectoring this branch, so compute flags
1133 based on jumping to the new label. */
1134 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1135
1136 if (! must_annul
1137 && ((condition == const_true_rtx
1138 || (! insn_sets_resource_p (trial, other_needed, false)
1139 && ! may_trap_or_fault_p (PATTERN (trial)))))
1140 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1141 : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1142 && (must_annul = 1,
1143 check_annul_list_true_false (0, *delay_list)
1144 && check_annul_list_true_false (0, new_delay_list)
1145 && eligible_for_annul_false (insn, total_slots_filled,
1146 trial, flags)))
1147 {
1148 if (must_annul)
1149 {
1150 /* Frame related instructions cannot go into annulled delay
1151 slots, it messes up the dwarf info. */
1152 if (RTX_FRAME_RELATED_P (trial))
1153 return;
1154 used_annul = 1;
1155 }
1156 rtx_insn *temp = copy_delay_slot_insn (trial);
1157 INSN_FROM_TARGET_P (temp) = 1;
1158 add_to_delay_list (temp, &new_delay_list);
1159 total_slots_filled++;
1160
1161 if (--slots_remaining == 0)
1162 break;
1163 }
1164 else
1165 return;
1166 }
1167
1168 /* Record the effect of the instructions that were redundant and which
1169 we therefore decided not to copy. */
1170 for (i = 1; i < seq->len (); i++)
1171 if (redundant[i])
1172 {
1173 fix_reg_dead_note (redundant[i], insn);
1174 update_block (seq->insn (i), insn);
1175 }
1176
1177 /* Show the place to which we will be branching. */
1178 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1179
1180 /* Add any new insns to the delay list and update the count of the
1181 number of slots filled. */
1182 *pslots_filled = total_slots_filled;
1183 if (used_annul)
1184 *pannul_p = 1;
1185
1186 rtx_insn *temp;
1187 FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1188 add_to_delay_list (temp, delay_list);
1189 }
1190
1191 /* Similar to steal_delay_list_from_target except that SEQ is on the
1192 fallthrough path of INSN. Here we only do something if the delay insn
1193 of SEQ is an unconditional branch. In that case we steal its delay slot
1194 for INSN since unconditional branches are much easier to fill. */
1195
1196 static void
steal_delay_list_from_fallthrough(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p)1197 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1198 rtx_sequence *seq,
1199 vec<rtx_insn *> *delay_list,
1200 struct resources *sets,
1201 struct resources *needed,
1202 struct resources *other_needed,
1203 int slots_to_fill, int *pslots_filled,
1204 int *pannul_p)
1205 {
1206 int i;
1207 int flags;
1208 int must_annul = *pannul_p;
1209 int used_annul = 0;
1210
1211 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1212
1213 /* We can't do anything if SEQ's delay insn isn't an
1214 unconditional branch. */
1215
1216 if (! simplejump_or_return_p (seq->insn (0)))
1217 return;
1218
1219 for (i = 1; i < seq->len (); i++)
1220 {
1221 rtx_insn *trial = seq->insn (i);
1222 rtx_insn *prior_insn;
1223
1224 /* If TRIAL sets CC0, stealing it will move it too far from the use
1225 of CC0. */
1226 if (insn_references_resource_p (trial, sets, false)
1227 || insn_sets_resource_p (trial, needed, false)
1228 || insn_sets_resource_p (trial, sets, false)
1229 || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1230
1231 break;
1232
1233 /* If this insn was already done, we don't need it. */
1234 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
1235 {
1236 fix_reg_dead_note (prior_insn, insn);
1237 update_block (trial, insn);
1238 delete_from_delay_slot (trial);
1239 continue;
1240 }
1241
1242 if (! must_annul
1243 && ((condition == const_true_rtx
1244 || (! insn_sets_resource_p (trial, other_needed, false)
1245 && ! may_trap_or_fault_p (PATTERN (trial)))))
1246 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1247 : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1248 check_annul_list_true_false (1, *delay_list)
1249 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1250 {
1251 if (must_annul)
1252 used_annul = 1;
1253 delete_from_delay_slot (trial);
1254 add_to_delay_list (trial, delay_list);
1255
1256 if (++(*pslots_filled) == slots_to_fill)
1257 break;
1258 }
1259 else
1260 break;
1261 }
1262
1263 if (used_annul)
1264 *pannul_p = 1;
1265 }
1266
1267 /* Try merging insns starting at THREAD which match exactly the insns in
1268 INSN's delay list.
1269
1270 If all insns were matched and the insn was previously annulling, the
1271 annul bit will be cleared.
1272
1273 For each insn that is merged, if the branch is or will be non-annulling,
1274 we delete the merged insn. */
1275
1276 static void
try_merge_delay_insns(rtx_insn * insn,rtx_insn * thread)1277 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1278 {
1279 rtx_insn *trial, *next_trial;
1280 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1281 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1282 int slot_number = 1;
1283 int num_slots = XVECLEN (PATTERN (insn), 0);
1284 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1285 struct resources set, needed, modified;
1286 auto_vec<std::pair<rtx_insn *, bool>, 10> merged_insns;
1287 int flags;
1288
1289 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1290
1291 CLEAR_RESOURCE (&needed);
1292 CLEAR_RESOURCE (&set);
1293
1294 /* If this is not an annulling branch, take into account anything needed in
1295 INSN's delay slot. This prevents two increments from being incorrectly
1296 folded into one. If we are annulling, this would be the correct
1297 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1298 will essentially disable this optimization. This method is somewhat of
1299 a kludge, but I don't see a better way.) */
1300 if (! annul_p)
1301 for (int i = 1; i < num_slots; i++)
1302 if (XVECEXP (PATTERN (insn), 0, i))
1303 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1304 true);
1305
1306 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1307 {
1308 rtx pat = PATTERN (trial);
1309 rtx oldtrial = trial;
1310
1311 next_trial = next_nonnote_insn (trial);
1312
1313 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1314 if (NONJUMP_INSN_P (trial)
1315 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
1316 || GET_CODE (pat) == CLOBBER_HIGH))
1317 continue;
1318
1319 if (GET_CODE (next_to_match) == GET_CODE (trial)
1320 /* We can't share an insn that sets cc0. */
1321 && (!HAVE_cc0 || ! sets_cc0_p (pat))
1322 && ! insn_references_resource_p (trial, &set, true)
1323 && ! insn_sets_resource_p (trial, &set, true)
1324 && ! insn_sets_resource_p (trial, &needed, true)
1325 && (trial = try_split (pat, trial, 0)) != 0
1326 /* Update next_trial, in case try_split succeeded. */
1327 && (next_trial = next_nonnote_insn (trial))
1328 /* Likewise THREAD. */
1329 && (thread = oldtrial == thread ? trial : thread)
1330 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1331 /* Have to test this condition if annul condition is different
1332 from (and less restrictive than) non-annulling one. */
1333 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1334 {
1335
1336 if (! annul_p)
1337 {
1338 update_block (trial, thread);
1339 if (trial == thread)
1340 thread = next_active_insn (thread);
1341
1342 delete_related_insns (trial);
1343 INSN_FROM_TARGET_P (next_to_match) = 0;
1344 }
1345 else
1346 merged_insns.safe_push (std::pair<rtx_insn *, bool> (trial, false));
1347
1348 if (++slot_number == num_slots)
1349 break;
1350
1351 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1352 }
1353
1354 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1355 mark_referenced_resources (trial, &needed, true);
1356 }
1357
1358 /* See if we stopped on a filled insn. If we did, try to see if its
1359 delay slots match. */
1360 if (slot_number != num_slots
1361 && trial && NONJUMP_INSN_P (trial)
1362 && GET_CODE (PATTERN (trial)) == SEQUENCE
1363 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1364 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1365 {
1366 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1367 rtx filled_insn = XVECEXP (pat, 0, 0);
1368
1369 /* Account for resources set/needed by the filled insn. */
1370 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1371 mark_referenced_resources (filled_insn, &needed, true);
1372
1373 for (int i = 1; i < pat->len (); i++)
1374 {
1375 rtx_insn *dtrial = pat->insn (i);
1376
1377 CLEAR_RESOURCE (&modified);
1378 /* Account for resources set by the insn following NEXT_TO_MATCH
1379 inside INSN's delay list. */
1380 for (int j = 1; slot_number + j < num_slots; j++)
1381 mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1382 &modified, 0, MARK_SRC_DEST_CALL);
1383 /* Account for resources set by the insn before DTRIAL and inside
1384 TRIAL's delay list. */
1385 for (int j = 1; j < i; j++)
1386 mark_set_resources (XVECEXP (pat, 0, j),
1387 &modified, 0, MARK_SRC_DEST_CALL);
1388 if (! insn_references_resource_p (dtrial, &set, true)
1389 && ! insn_sets_resource_p (dtrial, &set, true)
1390 && ! insn_sets_resource_p (dtrial, &needed, true)
1391 && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1392 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1393 /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1394 resource modified between them (only dtrial is checked because
1395 next_to_match and dtrial shall to be equal in order to hit
1396 this line) */
1397 && ! insn_references_resource_p (dtrial, &modified, true)
1398 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1399 {
1400 if (! annul_p)
1401 {
1402 rtx_insn *new_rtx;
1403
1404 update_block (dtrial, thread);
1405 new_rtx = delete_from_delay_slot (dtrial);
1406 if (thread->deleted ())
1407 thread = new_rtx;
1408 INSN_FROM_TARGET_P (next_to_match) = 0;
1409 }
1410 else
1411 merged_insns.safe_push (std::pair<rtx_insn *, bool> (dtrial,
1412 true));
1413
1414 if (++slot_number == num_slots)
1415 break;
1416
1417 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1418 }
1419 else
1420 {
1421 /* Keep track of the set/referenced resources for the delay
1422 slots of any trial insns we encounter. */
1423 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1424 mark_referenced_resources (dtrial, &needed, true);
1425 }
1426 }
1427 }
1428
1429 /* If all insns in the delay slot have been matched and we were previously
1430 annulling the branch, we need not any more. In that case delete all the
1431 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1432 the delay list so that we know that it isn't only being used at the
1433 target. */
1434 if (slot_number == num_slots && annul_p)
1435 {
1436 unsigned int len = merged_insns.length ();
1437 for (unsigned int i = len - 1; i < len; i--)
1438 if (merged_insns[i].second)
1439 {
1440 update_block (merged_insns[i].first, thread);
1441 rtx_insn *new_rtx = delete_from_delay_slot (merged_insns[i].first);
1442 if (thread->deleted ())
1443 thread = new_rtx;
1444 }
1445 else
1446 {
1447 update_block (merged_insns[i].first, thread);
1448 delete_related_insns (merged_insns[i].first);
1449 }
1450
1451 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1452
1453 for (int i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1454 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1455 }
1456 }
1457
1458 /* See if INSN is redundant with an insn in front of TARGET. Often this
1459 is called when INSN is a candidate for a delay slot of TARGET.
1460 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1461 of INSN. Often INSN will be redundant with an insn in a delay slot of
1462 some previous insn. This happens when we have a series of branches to the
1463 same label; in that case the first insn at the target might want to go
1464 into each of the delay slots.
1465
1466 If we are not careful, this routine can take up a significant fraction
1467 of the total compilation time (4%), but only wins rarely. Hence we
1468 speed this routine up by making two passes. The first pass goes back
1469 until it hits a label and sees if it finds an insn with an identical
1470 pattern. Only in this (relatively rare) event does it check for
1471 data conflicts.
1472
1473 We do not split insns we encounter. This could cause us not to find a
1474 redundant insn, but the cost of splitting seems greater than the possible
1475 gain in rare cases. */
1476
1477 static rtx_insn *
redundant_insn(rtx insn,rtx_insn * target,const vec<rtx_insn * > & delay_list)1478 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1479 {
1480 rtx target_main = target;
1481 rtx ipat = PATTERN (insn);
1482 rtx_insn *trial;
1483 rtx pat;
1484 struct resources needed, set;
1485 int i;
1486 unsigned insns_to_search;
1487
1488 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1489 are allowed to not actually assign to such a register. */
1490 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1491 return 0;
1492
1493 /* Scan backwards looking for a match. */
1494 for (trial = PREV_INSN (target),
1495 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1496 trial && insns_to_search > 0;
1497 trial = PREV_INSN (trial))
1498 {
1499 /* (use (insn))s can come immediately after a barrier if the
1500 label that used to precede them has been deleted as dead.
1501 See delete_related_insns. */
1502 if (LABEL_P (trial) || BARRIER_P (trial))
1503 return 0;
1504
1505 if (!INSN_P (trial))
1506 continue;
1507 --insns_to_search;
1508
1509 pat = PATTERN (trial);
1510 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
1511 || GET_CODE (pat) == CLOBBER_HIGH)
1512 continue;
1513
1514 if (GET_CODE (trial) == DEBUG_INSN)
1515 continue;
1516
1517 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1518 {
1519 /* Stop for a CALL and its delay slots because it is difficult to
1520 track its resource needs correctly. */
1521 if (CALL_P (seq->element (0)))
1522 return 0;
1523
1524 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1525 slots because it is difficult to track its resource needs
1526 correctly. */
1527
1528 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1529 return 0;
1530
1531 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1532 return 0;
1533
1534 /* See if any of the insns in the delay slot match, updating
1535 resource requirements as we go. */
1536 for (i = seq->len () - 1; i > 0; i--)
1537 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1538 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1539 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1540 break;
1541
1542 /* If found a match, exit this loop early. */
1543 if (i > 0)
1544 break;
1545 }
1546
1547 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1548 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1549 break;
1550 }
1551
1552 /* If we didn't find an insn that matches, return 0. */
1553 if (trial == 0)
1554 return 0;
1555
1556 /* See what resources this insn sets and needs. If they overlap, or
1557 if this insn references CC0, it can't be redundant. */
1558
1559 CLEAR_RESOURCE (&needed);
1560 CLEAR_RESOURCE (&set);
1561 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1562 mark_referenced_resources (insn, &needed, true);
1563
1564 /* If TARGET is a SEQUENCE, get the main insn. */
1565 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1566 target_main = XVECEXP (PATTERN (target), 0, 0);
1567
1568 if (resource_conflicts_p (&needed, &set)
1569 || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1570 /* The insn requiring the delay may not set anything needed or set by
1571 INSN. */
1572 || insn_sets_resource_p (target_main, &needed, true)
1573 || insn_sets_resource_p (target_main, &set, true))
1574 return 0;
1575
1576 /* Insns we pass may not set either NEEDED or SET, so merge them for
1577 simpler tests. */
1578 needed.memory |= set.memory;
1579 IOR_HARD_REG_SET (needed.regs, set.regs);
1580
1581 /* This insn isn't redundant if it conflicts with an insn that either is
1582 or will be in a delay slot of TARGET. */
1583
1584 unsigned int j;
1585 rtx_insn *temp;
1586 FOR_EACH_VEC_ELT (delay_list, j, temp)
1587 if (insn_sets_resource_p (temp, &needed, true))
1588 return 0;
1589
1590 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1591 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1592 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1593 true))
1594 return 0;
1595
1596 /* Scan backwards until we reach a label or an insn that uses something
1597 INSN sets or sets something insn uses or sets. */
1598
1599 for (trial = PREV_INSN (target),
1600 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1601 trial && !LABEL_P (trial) && insns_to_search > 0;
1602 trial = PREV_INSN (trial))
1603 {
1604 if (!INSN_P (trial))
1605 continue;
1606 --insns_to_search;
1607
1608 pat = PATTERN (trial);
1609 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
1610 || GET_CODE (pat) == CLOBBER_HIGH)
1611 continue;
1612
1613 if (GET_CODE (trial) == DEBUG_INSN)
1614 continue;
1615
1616 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1617 {
1618 bool annul_p = false;
1619 rtx_insn *control = seq->insn (0);
1620
1621 /* If this is a CALL_INSN and its delay slots, it is hard to track
1622 the resource needs properly, so give up. */
1623 if (CALL_P (control))
1624 return 0;
1625
1626 /* If this is an INSN or JUMP_INSN with delayed effects, it
1627 is hard to track the resource needs properly, so give up. */
1628
1629 if (INSN_SETS_ARE_DELAYED (control))
1630 return 0;
1631
1632 if (INSN_REFERENCES_ARE_DELAYED (control))
1633 return 0;
1634
1635 if (JUMP_P (control))
1636 annul_p = INSN_ANNULLED_BRANCH_P (control);
1637
1638 /* See if any of the insns in the delay slot match, updating
1639 resource requirements as we go. */
1640 for (i = seq->len () - 1; i > 0; i--)
1641 {
1642 rtx_insn *candidate = seq->insn (i);
1643
1644 /* If an insn will be annulled if the branch is false, it isn't
1645 considered as a possible duplicate insn. */
1646 if (rtx_equal_p (PATTERN (candidate), ipat)
1647 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1648 {
1649 /* Show that this insn will be used in the sequel. */
1650 INSN_FROM_TARGET_P (candidate) = 0;
1651 return candidate;
1652 }
1653
1654 /* Unless this is an annulled insn from the target of a branch,
1655 we must stop if it sets anything needed or set by INSN. */
1656 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1657 && insn_sets_resource_p (candidate, &needed, true))
1658 return 0;
1659 }
1660
1661 /* If the insn requiring the delay slot conflicts with INSN, we
1662 must stop. */
1663 if (insn_sets_resource_p (control, &needed, true))
1664 return 0;
1665 }
1666 else
1667 {
1668 /* See if TRIAL is the same as INSN. */
1669 pat = PATTERN (trial);
1670 if (rtx_equal_p (pat, ipat))
1671 return trial;
1672
1673 /* Can't go any further if TRIAL conflicts with INSN. */
1674 if (insn_sets_resource_p (trial, &needed, true))
1675 return 0;
1676 }
1677 }
1678
1679 return 0;
1680 }
1681
1682 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1683 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1684 is nonzero, we are allowed to fall into this thread; otherwise, we are
1685 not.
1686
1687 If LABEL is used more than one or we pass a label other than LABEL before
1688 finding an active insn, we do not own this thread. */
1689
1690 static int
own_thread_p(rtx thread,rtx label,int allow_fallthrough)1691 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1692 {
1693 rtx_insn *active_insn;
1694 rtx_insn *insn;
1695
1696 /* We don't own the function end. */
1697 if (thread == 0 || ANY_RETURN_P (thread))
1698 return 0;
1699
1700 /* We have a non-NULL insn. */
1701 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1702
1703 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1704 active_insn = next_active_insn (PREV_INSN (thread_insn));
1705
1706 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1707 if (LABEL_P (insn)
1708 && (insn != label || LABEL_NUSES (insn) != 1))
1709 return 0;
1710
1711 if (allow_fallthrough)
1712 return 1;
1713
1714 /* Ensure that we reach a BARRIER before any insn or label. */
1715 for (insn = prev_nonnote_insn (thread_insn);
1716 insn == 0 || !BARRIER_P (insn);
1717 insn = prev_nonnote_insn (insn))
1718 if (insn == 0
1719 || LABEL_P (insn)
1720 || (NONJUMP_INSN_P (insn)
1721 && GET_CODE (PATTERN (insn)) != USE
1722 && GET_CODE (PATTERN (insn)) != CLOBBER
1723 && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH))
1724 return 0;
1725
1726 return 1;
1727 }
1728
1729 /* Called when INSN is being moved from a location near the target of a jump.
1730 We leave a marker of the form (use (INSN)) immediately in front of WHERE
1731 for mark_target_live_regs. These markers will be deleted at the end.
1732
1733 We used to try to update the live status of registers if WHERE is at
1734 the start of a basic block, but that can't work since we may remove a
1735 BARRIER in relax_delay_slots. */
1736
1737 static void
update_block(rtx_insn * insn,rtx_insn * where)1738 update_block (rtx_insn *insn, rtx_insn *where)
1739 {
1740 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1741
1742 /* INSN might be making a value live in a block where it didn't use to
1743 be. So recompute liveness information for this block. */
1744 incr_ticks_for_insn (insn);
1745 }
1746
1747 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1748 the basic block containing the jump. */
1749
1750 static int
reorg_redirect_jump(rtx_jump_insn * jump,rtx nlabel)1751 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1752 {
1753 incr_ticks_for_insn (jump);
1754 return redirect_jump (jump, nlabel, 1);
1755 }
1756
1757 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1758 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1759 that reference values used in INSN. If we find one, then we move the
1760 REG_DEAD note to INSN.
1761
1762 This is needed to handle the case where a later insn (after INSN) has a
1763 REG_DEAD note for a register used by INSN, and this later insn subsequently
1764 gets moved before a CODE_LABEL because it is a redundant insn. In this
1765 case, mark_target_live_regs may be confused into thinking the register
1766 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1767
1768 static void
update_reg_dead_notes(rtx_insn * insn,rtx_insn * delayed_insn)1769 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1770 {
1771 rtx link, next;
1772 rtx_insn *p;
1773
1774 for (p = next_nonnote_insn (insn); p != delayed_insn;
1775 p = next_nonnote_insn (p))
1776 for (link = REG_NOTES (p); link; link = next)
1777 {
1778 next = XEXP (link, 1);
1779
1780 if (REG_NOTE_KIND (link) != REG_DEAD
1781 || !REG_P (XEXP (link, 0)))
1782 continue;
1783
1784 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1785 {
1786 /* Move the REG_DEAD note from P to INSN. */
1787 remove_note (p, link);
1788 XEXP (link, 1) = REG_NOTES (insn);
1789 REG_NOTES (insn) = link;
1790 }
1791 }
1792 }
1793
1794 /* Called when an insn redundant with start_insn is deleted. If there
1795 is a REG_DEAD note for the target of start_insn between start_insn
1796 and stop_insn, then the REG_DEAD note needs to be deleted since the
1797 value no longer dies there.
1798
1799 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1800 confused into thinking the register is dead. */
1801
1802 static void
fix_reg_dead_note(rtx_insn * start_insn,rtx stop_insn)1803 fix_reg_dead_note (rtx_insn *start_insn, rtx stop_insn)
1804 {
1805 rtx link, next;
1806 rtx_insn *p;
1807
1808 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1809 p = next_nonnote_insn (p))
1810 for (link = REG_NOTES (p); link; link = next)
1811 {
1812 next = XEXP (link, 1);
1813
1814 if (REG_NOTE_KIND (link) != REG_DEAD
1815 || !REG_P (XEXP (link, 0)))
1816 continue;
1817
1818 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1819 {
1820 remove_note (p, link);
1821 return;
1822 }
1823 }
1824 }
1825
1826 /* Delete any REG_UNUSED notes that exist on INSN but not on OTHER_INSN.
1827
1828 This handles the case of udivmodXi4 instructions which optimize their
1829 output depending on whether any REG_UNUSED notes are present. We must
1830 make sure that INSN calculates as many results as OTHER_INSN does. */
1831
1832 static void
update_reg_unused_notes(rtx_insn * insn,rtx other_insn)1833 update_reg_unused_notes (rtx_insn *insn, rtx other_insn)
1834 {
1835 rtx link, next;
1836
1837 for (link = REG_NOTES (insn); link; link = next)
1838 {
1839 next = XEXP (link, 1);
1840
1841 if (REG_NOTE_KIND (link) != REG_UNUSED
1842 || !REG_P (XEXP (link, 0)))
1843 continue;
1844
1845 if (!find_regno_note (other_insn, REG_UNUSED, REGNO (XEXP (link, 0))))
1846 remove_note (insn, link);
1847 }
1848 }
1849
1850 static vec <rtx> sibling_labels;
1851
1852 /* Return the label before INSN, or put a new label there. If SIBLING is
1853 non-zero, it is another label associated with the new label (if any),
1854 typically the former target of the jump that will be redirected to
1855 the new label. */
1856
1857 static rtx_insn *
get_label_before(rtx_insn * insn,rtx sibling)1858 get_label_before (rtx_insn *insn, rtx sibling)
1859 {
1860 rtx_insn *label;
1861
1862 /* Find an existing label at this point
1863 or make a new one if there is none. */
1864 label = prev_nonnote_insn (insn);
1865
1866 if (label == 0 || !LABEL_P (label))
1867 {
1868 rtx_insn *prev = PREV_INSN (insn);
1869
1870 label = gen_label_rtx ();
1871 emit_label_after (label, prev);
1872 LABEL_NUSES (label) = 0;
1873 if (sibling)
1874 {
1875 sibling_labels.safe_push (label);
1876 sibling_labels.safe_push (sibling);
1877 }
1878 }
1879 return label;
1880 }
1881
1882 /* Scan a function looking for insns that need a delay slot and find insns to
1883 put into the delay slot.
1884
1885 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1886 as calls). We do these first since we don't want jump insns (that are
1887 easier to fill) to get the only insns that could be used for non-jump insns.
1888 When it is zero, only try to fill JUMP_INSNs.
1889
1890 When slots are filled in this manner, the insns (including the
1891 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1892 it is possible to tell whether a delay slot has really been filled
1893 or not. `final' knows how to deal with this, by communicating
1894 through FINAL_SEQUENCE. */
1895
1896 static void
fill_simple_delay_slots(int non_jumps_p)1897 fill_simple_delay_slots (int non_jumps_p)
1898 {
1899 rtx_insn *insn, *trial, *next_trial;
1900 rtx pat;
1901 int i;
1902 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1903 struct resources needed, set;
1904 int slots_to_fill, slots_filled;
1905 auto_vec<rtx_insn *, 5> delay_list;
1906
1907 for (i = 0; i < num_unfilled_slots; i++)
1908 {
1909 int flags;
1910 /* Get the next insn to fill. If it has already had any slots assigned,
1911 we can't do anything with it. Maybe we'll improve this later. */
1912
1913 insn = unfilled_slots_base[i];
1914 if (insn == 0
1915 || insn->deleted ()
1916 || (NONJUMP_INSN_P (insn)
1917 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1918 || (JUMP_P (insn) && non_jumps_p)
1919 || (!JUMP_P (insn) && ! non_jumps_p))
1920 continue;
1921
1922 /* It may have been that this insn used to need delay slots, but
1923 now doesn't; ignore in that case. This can happen, for example,
1924 on the HP PA RISC, where the number of delay slots depends on
1925 what insns are nearby. */
1926 slots_to_fill = num_delay_slots (insn);
1927
1928 /* Some machine description have defined instructions to have
1929 delay slots only in certain circumstances which may depend on
1930 nearby insns (which change due to reorg's actions).
1931
1932 For example, the PA port normally has delay slots for unconditional
1933 jumps.
1934
1935 However, the PA port claims such jumps do not have a delay slot
1936 if they are immediate successors of certain CALL_INSNs. This
1937 allows the port to favor filling the delay slot of the call with
1938 the unconditional jump. */
1939 if (slots_to_fill == 0)
1940 continue;
1941
1942 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1943 says how many. After initialization, first try optimizing
1944
1945 call _foo call _foo
1946 nop add %o7,.-L1,%o7
1947 b,a L1
1948 nop
1949
1950 If this case applies, the delay slot of the call is filled with
1951 the unconditional jump. This is done first to avoid having the
1952 delay slot of the call filled in the backward scan. Also, since
1953 the unconditional jump is likely to also have a delay slot, that
1954 insn must exist when it is subsequently scanned.
1955
1956 This is tried on each insn with delay slots as some machines
1957 have insns which perform calls, but are not represented as
1958 CALL_INSNs. */
1959
1960 slots_filled = 0;
1961 delay_list.truncate (0);
1962
1963 if (JUMP_P (insn))
1964 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1965 else
1966 flags = get_jump_flags (insn, NULL_RTX);
1967
1968 if ((trial = next_active_insn (insn))
1969 && JUMP_P (trial)
1970 && simplejump_p (trial)
1971 && eligible_for_delay (insn, slots_filled, trial, flags)
1972 && no_labels_between_p (insn, trial)
1973 && ! can_throw_internal (trial))
1974 {
1975 rtx_insn **tmp;
1976 slots_filled++;
1977 add_to_delay_list (trial, &delay_list);
1978
1979 /* TRIAL may have had its delay slot filled, then unfilled. When
1980 the delay slot is unfilled, TRIAL is placed back on the unfilled
1981 slots obstack. Unfortunately, it is placed on the end of the
1982 obstack, not in its original location. Therefore, we must search
1983 from entry i + 1 to the end of the unfilled slots obstack to
1984 try and find TRIAL. */
1985 tmp = &unfilled_slots_base[i + 1];
1986 while (*tmp != trial && tmp != unfilled_slots_next)
1987 tmp++;
1988
1989 /* Remove the unconditional jump from consideration for delay slot
1990 filling and unthread it. */
1991 if (*tmp == trial)
1992 *tmp = 0;
1993 {
1994 rtx_insn *next = NEXT_INSN (trial);
1995 rtx_insn *prev = PREV_INSN (trial);
1996 if (prev)
1997 SET_NEXT_INSN (prev) = next;
1998 if (next)
1999 SET_PREV_INSN (next) = prev;
2000 }
2001 }
2002
2003 /* Now, scan backwards from the insn to search for a potential
2004 delay-slot candidate. Stop searching when a label or jump is hit.
2005
2006 For each candidate, if it is to go into the delay slot (moved
2007 forward in execution sequence), it must not need or set any resources
2008 that were set by later insns and must not set any resources that
2009 are needed for those insns.
2010
2011 The delay slot insn itself sets resources unless it is a call
2012 (in which case the called routine, not the insn itself, is doing
2013 the setting). */
2014
2015 if (slots_filled < slots_to_fill)
2016 {
2017 /* If the flags register is dead after the insn, then we want to be
2018 able to accept a candidate that clobbers it. For this purpose,
2019 we need to filter the flags register during life analysis, so
2020 that it doesn't create RAW and WAW dependencies, while still
2021 creating the necessary WAR dependencies. */
2022 bool filter_flags
2023 = (slots_to_fill == 1
2024 && targetm.flags_regnum != INVALID_REGNUM
2025 && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2026 struct resources fset;
2027 CLEAR_RESOURCE (&needed);
2028 CLEAR_RESOURCE (&set);
2029 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2030 if (filter_flags)
2031 {
2032 CLEAR_RESOURCE (&fset);
2033 mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2034 }
2035 mark_referenced_resources (insn, &needed, false);
2036
2037 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2038 trial = next_trial)
2039 {
2040 next_trial = prev_nonnote_insn (trial);
2041
2042 /* This must be an INSN or CALL_INSN. */
2043 pat = PATTERN (trial);
2044
2045 /* Stand-alone USE and CLOBBER are just for flow. */
2046 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
2047 || GET_CODE (pat) == CLOBBER_HIGH)
2048 continue;
2049
2050 /* And DEBUG_INSNs never go into delay slots. */
2051 if (GET_CODE (trial) == DEBUG_INSN)
2052 continue;
2053
2054 /* Check for resource conflict first, to avoid unnecessary
2055 splitting. */
2056 if (! insn_references_resource_p (trial, &set, true)
2057 && ! insn_sets_resource_p (trial,
2058 filter_flags ? &fset : &set,
2059 true)
2060 && ! insn_sets_resource_p (trial, &needed, true)
2061 /* Can't separate set of cc0 from its use. */
2062 && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2063 && ! can_throw_internal (trial))
2064 {
2065 trial = try_split (pat, trial, 1);
2066 next_trial = prev_nonnote_insn (trial);
2067 if (eligible_for_delay (insn, slots_filled, trial, flags))
2068 {
2069 /* In this case, we are searching backward, so if we
2070 find insns to put on the delay list, we want
2071 to put them at the head, rather than the
2072 tail, of the list. */
2073
2074 update_reg_dead_notes (trial, insn);
2075 delay_list.safe_insert (0, trial);
2076 update_block (trial, trial);
2077 delete_related_insns (trial);
2078 if (slots_to_fill == ++slots_filled)
2079 break;
2080 continue;
2081 }
2082 }
2083
2084 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2085 if (filter_flags)
2086 {
2087 mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2088 /* If the flags register is set, then it doesn't create RAW
2089 dependencies any longer and it also doesn't create WAW
2090 dependencies since it's dead after the original insn. */
2091 if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2092 {
2093 CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2094 CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2095 }
2096 }
2097 mark_referenced_resources (trial, &needed, true);
2098 }
2099 }
2100
2101 /* If all needed slots haven't been filled, we come here. */
2102
2103 /* Try to optimize case of jumping around a single insn. */
2104 if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2105 && slots_filled != slots_to_fill
2106 && delay_list.is_empty ()
2107 && JUMP_P (insn)
2108 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2109 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2110 {
2111 optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2112 if (!delay_list.is_empty ())
2113 slots_filled += 1;
2114 }
2115
2116 /* Try to get insns from beyond the insn needing the delay slot.
2117 These insns can neither set or reference resources set in insns being
2118 skipped, cannot set resources in the insn being skipped, and, if this
2119 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2120 call might not return).
2121
2122 There used to be code which continued past the target label if
2123 we saw all uses of the target label. This code did not work,
2124 because it failed to account for some instructions which were
2125 both annulled and marked as from the target. This can happen as a
2126 result of optimize_skip. Since this code was redundant with
2127 fill_eager_delay_slots anyways, it was just deleted. */
2128
2129 if (slots_filled != slots_to_fill
2130 /* If this instruction could throw an exception which is
2131 caught in the same function, then it's not safe to fill
2132 the delay slot with an instruction from beyond this
2133 point. For example, consider:
2134
2135 int i = 2;
2136
2137 try {
2138 f();
2139 i = 3;
2140 } catch (...) {}
2141
2142 return i;
2143
2144 Even though `i' is a local variable, we must be sure not
2145 to put `i = 3' in the delay slot if `f' might throw an
2146 exception.
2147
2148 Presumably, we should also check to see if we could get
2149 back to this function via `setjmp'. */
2150 && ! can_throw_internal (insn)
2151 && !JUMP_P (insn))
2152 {
2153 int maybe_never = 0;
2154 rtx pat, trial_delay;
2155
2156 CLEAR_RESOURCE (&needed);
2157 CLEAR_RESOURCE (&set);
2158 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2159 mark_referenced_resources (insn, &needed, true);
2160
2161 if (CALL_P (insn))
2162 maybe_never = 1;
2163
2164 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2165 trial = next_trial)
2166 {
2167 next_trial = next_nonnote_insn (trial);
2168
2169 /* This must be an INSN or CALL_INSN. */
2170 pat = PATTERN (trial);
2171
2172 /* Stand-alone USE and CLOBBER are just for flow. */
2173 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
2174 || GET_CODE (pat) == CLOBBER_HIGH)
2175 continue;
2176
2177 /* And DEBUG_INSNs do not go in delay slots. */
2178 if (GET_CODE (trial) == DEBUG_INSN)
2179 continue;
2180
2181 /* If this already has filled delay slots, get the insn needing
2182 the delay slots. */
2183 if (GET_CODE (pat) == SEQUENCE)
2184 trial_delay = XVECEXP (pat, 0, 0);
2185 else
2186 trial_delay = trial;
2187
2188 /* Stop our search when seeing a jump. */
2189 if (JUMP_P (trial_delay))
2190 break;
2191
2192 /* See if we have a resource problem before we try to split. */
2193 if (GET_CODE (pat) != SEQUENCE
2194 && ! insn_references_resource_p (trial, &set, true)
2195 && ! insn_sets_resource_p (trial, &set, true)
2196 && ! insn_sets_resource_p (trial, &needed, true)
2197 && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2198 && ! (maybe_never && may_trap_or_fault_p (pat))
2199 && (trial = try_split (pat, trial, 0))
2200 && eligible_for_delay (insn, slots_filled, trial, flags)
2201 && ! can_throw_internal (trial))
2202 {
2203 next_trial = next_nonnote_insn (trial);
2204 add_to_delay_list (trial, &delay_list);
2205 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2206 link_cc0_insns (trial);
2207
2208 delete_related_insns (trial);
2209 if (slots_to_fill == ++slots_filled)
2210 break;
2211 continue;
2212 }
2213
2214 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2215 mark_referenced_resources (trial, &needed, true);
2216
2217 /* Ensure we don't put insns between the setting of cc and the
2218 comparison by moving a setting of cc into an earlier delay
2219 slot since these insns could clobber the condition code. */
2220 set.cc = 1;
2221
2222 /* If this is a call, we might not get here. */
2223 if (CALL_P (trial_delay))
2224 maybe_never = 1;
2225 }
2226
2227 /* If there are slots left to fill and our search was stopped by an
2228 unconditional branch, try the insn at the branch target. We can
2229 redirect the branch if it works.
2230
2231 Don't do this if the insn at the branch target is a branch. */
2232 if (slots_to_fill != slots_filled
2233 && trial
2234 && jump_to_label_p (trial)
2235 && simplejump_p (trial)
2236 && (next_trial = next_active_insn (JUMP_LABEL_AS_INSN (trial))) != 0
2237 && ! (NONJUMP_INSN_P (next_trial)
2238 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2239 && !JUMP_P (next_trial)
2240 && ! insn_references_resource_p (next_trial, &set, true)
2241 && ! insn_sets_resource_p (next_trial, &set, true)
2242 && ! insn_sets_resource_p (next_trial, &needed, true)
2243 && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2244 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2245 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2246 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2247 && ! can_throw_internal (trial))
2248 {
2249 /* See comment in relax_delay_slots about necessity of using
2250 next_real_nondebug_insn here. */
2251 rtx_insn *new_label = next_real_nondebug_insn (next_trial);
2252
2253 if (new_label != 0)
2254 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2255 else
2256 new_label = find_end_label (simple_return_rtx);
2257
2258 if (new_label)
2259 {
2260 add_to_delay_list (copy_delay_slot_insn (next_trial),
2261 &delay_list);
2262 slots_filled++;
2263 reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2264 new_label);
2265 }
2266 }
2267 }
2268
2269 /* If this is an unconditional jump, then try to get insns from the
2270 target of the jump. */
2271 rtx_jump_insn *jump_insn;
2272 if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2273 && simplejump_p (jump_insn)
2274 && slots_filled != slots_to_fill)
2275 fill_slots_from_thread (jump_insn, const_true_rtx,
2276 next_active_insn (JUMP_LABEL_AS_INSN (insn)),
2277 NULL, 1, 1, own_thread_p (JUMP_LABEL (insn),
2278 JUMP_LABEL (insn), 0),
2279 slots_to_fill, &slots_filled, &delay_list);
2280
2281 if (!delay_list.is_empty ())
2282 unfilled_slots_base[i]
2283 = emit_delay_sequence (insn, delay_list, slots_filled);
2284
2285 if (slots_to_fill == slots_filled)
2286 unfilled_slots_base[i] = 0;
2287
2288 note_delay_statistics (slots_filled, 0);
2289 }
2290 }
2291
2292 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2293 return the ultimate label reached by any such chain of jumps.
2294 Return a suitable return rtx if the chain ultimately leads to a
2295 return instruction.
2296 If LABEL is not followed by a jump, return LABEL.
2297 If the chain loops or we can't find end, return LABEL,
2298 since that tells caller to avoid changing the insn.
2299 If the returned label is obtained by following a crossing jump,
2300 set *CROSSING to true, otherwise set it to false. */
2301
2302 static rtx
follow_jumps(rtx label,rtx_insn * jump,bool * crossing)2303 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2304 {
2305 rtx_insn *insn;
2306 rtx_insn *next;
2307 int depth;
2308
2309 *crossing = false;
2310 if (ANY_RETURN_P (label))
2311 return label;
2312
2313 rtx_insn *value = as_a <rtx_insn *> (label);
2314
2315 for (depth = 0;
2316 (depth < 10
2317 && (insn = next_active_insn (value)) != 0
2318 && JUMP_P (insn)
2319 && JUMP_LABEL (insn) != NULL_RTX
2320 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2321 || ANY_RETURN_P (PATTERN (insn)))
2322 && (next = NEXT_INSN (insn))
2323 && BARRIER_P (next));
2324 depth++)
2325 {
2326 rtx this_label_or_return = JUMP_LABEL (insn);
2327
2328 /* If we have found a cycle, make the insn jump to itself. */
2329 if (this_label_or_return == label)
2330 return label;
2331
2332 /* Cannot follow returns and cannot look through tablejumps. */
2333 if (ANY_RETURN_P (this_label_or_return))
2334 return this_label_or_return;
2335
2336 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2337 if (NEXT_INSN (this_label)
2338 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2339 break;
2340
2341 if (!targetm.can_follow_jump (jump, insn))
2342 break;
2343 if (!*crossing)
2344 *crossing = CROSSING_JUMP_P (jump);
2345 value = this_label;
2346 }
2347 if (depth == 10)
2348 return label;
2349 return value;
2350 }
2351
2352 /* Try to find insns to place in delay slots.
2353
2354 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2355 or is an unconditional branch if CONDITION is const_true_rtx.
2356 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2357
2358 THREAD is a flow-of-control, either the insns to be executed if the
2359 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2360
2361 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2362 to see if any potential delay slot insns set things needed there.
2363
2364 LIKELY is nonzero if it is extremely likely that the branch will be
2365 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2366 end of a loop back up to the top.
2367
2368 OWN_THREAD is true if we are the only user of the thread, i.e. it is
2369 the target of the jump when we are the only jump going there.
2370
2371 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2372 case, we can only take insns from the head of the thread for our delay
2373 slot. We then adjust the jump to point after the insns we have taken. */
2374
2375 static void
fill_slots_from_thread(rtx_jump_insn * insn,rtx condition,rtx thread_or_return,rtx opposite_thread,int likely,int thread_if_true,int own_thread,int slots_to_fill,int * pslots_filled,vec<rtx_insn * > * delay_list)2376 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2377 rtx thread_or_return, rtx opposite_thread, int likely,
2378 int thread_if_true, int own_thread, int slots_to_fill,
2379 int *pslots_filled, vec<rtx_insn *> *delay_list)
2380 {
2381 rtx new_thread;
2382 struct resources opposite_needed, set, needed;
2383 rtx_insn *trial;
2384 int lose = 0;
2385 int must_annul = 0;
2386 int flags;
2387
2388 /* Validate our arguments. */
2389 gcc_assert (condition != const_true_rtx || thread_if_true);
2390 gcc_assert (own_thread || thread_if_true);
2391
2392 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2393
2394 /* If our thread is the end of subroutine, we can't get any delay
2395 insns from that. */
2396 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2397 return;
2398
2399 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2400
2401 /* If this is an unconditional branch, nothing is needed at the
2402 opposite thread. Otherwise, compute what is needed there. */
2403 if (condition == const_true_rtx)
2404 CLEAR_RESOURCE (&opposite_needed);
2405 else
2406 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2407
2408 /* If the insn at THREAD can be split, do it here to avoid having to
2409 update THREAD and NEW_THREAD if it is done in the loop below. Also
2410 initialize NEW_THREAD. */
2411
2412 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2413
2414 /* Scan insns at THREAD. We are looking for an insn that can be removed
2415 from THREAD (it neither sets nor references resources that were set
2416 ahead of it and it doesn't set anything needs by the insns ahead of
2417 it) and that either can be placed in an annulling insn or aren't
2418 needed at OPPOSITE_THREAD. */
2419
2420 CLEAR_RESOURCE (&needed);
2421 CLEAR_RESOURCE (&set);
2422
2423 /* If we do not own this thread, we must stop as soon as we find
2424 something that we can't put in a delay slot, since all we can do
2425 is branch into THREAD at a later point. Therefore, labels stop
2426 the search if this is not the `true' thread. */
2427
2428 for (trial = thread;
2429 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2430 trial = next_nonnote_insn (trial))
2431 {
2432 rtx pat, old_trial;
2433
2434 /* If we have passed a label, we no longer own this thread. */
2435 if (LABEL_P (trial))
2436 {
2437 own_thread = 0;
2438 continue;
2439 }
2440
2441 pat = PATTERN (trial);
2442 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
2443 || GET_CODE (pat) == CLOBBER_HIGH)
2444 continue;
2445
2446 if (GET_CODE (trial) == DEBUG_INSN)
2447 continue;
2448
2449 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2450 don't separate or copy insns that set and use CC0. */
2451 if (! insn_references_resource_p (trial, &set, true)
2452 && ! insn_sets_resource_p (trial, &set, true)
2453 && ! insn_sets_resource_p (trial, &needed, true)
2454 && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2455 && (! own_thread || ! sets_cc0_p (pat)))))
2456 && ! can_throw_internal (trial))
2457 {
2458 rtx_insn *prior_insn;
2459
2460 /* If TRIAL is redundant with some insn before INSN, we don't
2461 actually need to add it to the delay list; we can merely pretend
2462 we did. */
2463 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2464 {
2465 fix_reg_dead_note (prior_insn, insn);
2466 if (own_thread)
2467 {
2468 update_block (trial, thread);
2469 if (trial == thread)
2470 {
2471 thread = next_active_insn (thread);
2472 if (new_thread == trial)
2473 new_thread = thread;
2474 }
2475
2476 delete_related_insns (trial);
2477 }
2478 else
2479 {
2480 update_reg_unused_notes (prior_insn, trial);
2481 new_thread = next_active_insn (trial);
2482 }
2483
2484 continue;
2485 }
2486
2487 /* There are two ways we can win: If TRIAL doesn't set anything
2488 needed at the opposite thread and can't trap, or if it can
2489 go into an annulled delay slot. But we want neither to copy
2490 nor to speculate frame-related insns. */
2491 if (!must_annul
2492 && ((condition == const_true_rtx
2493 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2494 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2495 && ! may_trap_or_fault_p (pat)
2496 && ! RTX_FRAME_RELATED_P (trial))))
2497 {
2498 old_trial = trial;
2499 trial = try_split (pat, trial, 0);
2500 if (new_thread == old_trial)
2501 new_thread = trial;
2502 if (thread == old_trial)
2503 thread = trial;
2504 pat = PATTERN (trial);
2505 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2506 goto winner;
2507 }
2508 else if (!RTX_FRAME_RELATED_P (trial)
2509 && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2510 || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2511 {
2512 old_trial = trial;
2513 trial = try_split (pat, trial, 0);
2514 if (new_thread == old_trial)
2515 new_thread = trial;
2516 if (thread == old_trial)
2517 thread = trial;
2518 pat = PATTERN (trial);
2519 if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2520 ? check_annul_list_true_false (0, *delay_list)
2521 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2522 : check_annul_list_true_false (1, *delay_list)
2523 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2524 {
2525 rtx_insn *temp;
2526
2527 must_annul = 1;
2528 winner:
2529
2530 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2531 link_cc0_insns (trial);
2532
2533 /* If we own this thread, delete the insn. If this is the
2534 destination of a branch, show that a basic block status
2535 may have been updated. In any case, mark the new
2536 starting point of this thread. */
2537 if (own_thread)
2538 {
2539 rtx note;
2540
2541 update_block (trial, thread);
2542 if (trial == thread)
2543 {
2544 thread = next_active_insn (thread);
2545 if (new_thread == trial)
2546 new_thread = thread;
2547 }
2548
2549 /* We are moving this insn, not deleting it. We must
2550 temporarily increment the use count on any referenced
2551 label lest it be deleted by delete_related_insns. */
2552 for (note = REG_NOTES (trial);
2553 note != NULL_RTX;
2554 note = XEXP (note, 1))
2555 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2556 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2557 {
2558 /* REG_LABEL_OPERAND could be
2559 NOTE_INSN_DELETED_LABEL too. */
2560 if (LABEL_P (XEXP (note, 0)))
2561 LABEL_NUSES (XEXP (note, 0))++;
2562 else
2563 gcc_assert (REG_NOTE_KIND (note)
2564 == REG_LABEL_OPERAND);
2565 }
2566 if (jump_to_label_p (trial))
2567 LABEL_NUSES (JUMP_LABEL (trial))++;
2568
2569 delete_related_insns (trial);
2570
2571 for (note = REG_NOTES (trial);
2572 note != NULL_RTX;
2573 note = XEXP (note, 1))
2574 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2575 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2576 {
2577 /* REG_LABEL_OPERAND could be
2578 NOTE_INSN_DELETED_LABEL too. */
2579 if (LABEL_P (XEXP (note, 0)))
2580 LABEL_NUSES (XEXP (note, 0))--;
2581 else
2582 gcc_assert (REG_NOTE_KIND (note)
2583 == REG_LABEL_OPERAND);
2584 }
2585 if (jump_to_label_p (trial))
2586 LABEL_NUSES (JUMP_LABEL (trial))--;
2587 }
2588 else
2589 new_thread = next_active_insn (trial);
2590
2591 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2592 if (thread_if_true)
2593 INSN_FROM_TARGET_P (temp) = 1;
2594
2595 add_to_delay_list (temp, delay_list);
2596
2597 if (slots_to_fill == ++(*pslots_filled))
2598 {
2599 /* Even though we have filled all the slots, we
2600 may be branching to a location that has a
2601 redundant insn. Skip any if so. */
2602 while (new_thread && ! own_thread
2603 && ! insn_sets_resource_p (new_thread, &set, true)
2604 && ! insn_sets_resource_p (new_thread, &needed,
2605 true)
2606 && ! insn_references_resource_p (new_thread,
2607 &set, true)
2608 && (prior_insn
2609 = redundant_insn (new_thread, insn,
2610 *delay_list)))
2611 {
2612 /* We know we do not own the thread, so no need
2613 to call update_block and delete_insn. */
2614 fix_reg_dead_note (prior_insn, insn);
2615 update_reg_unused_notes (prior_insn, new_thread);
2616 new_thread
2617 = next_active_insn (as_a<rtx_insn *> (new_thread));
2618 }
2619 break;
2620 }
2621
2622 continue;
2623 }
2624 }
2625 }
2626
2627 /* This insn can't go into a delay slot. */
2628 lose = 1;
2629 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2630 mark_referenced_resources (trial, &needed, true);
2631
2632 /* Ensure we don't put insns between the setting of cc and the comparison
2633 by moving a setting of cc into an earlier delay slot since these insns
2634 could clobber the condition code. */
2635 set.cc = 1;
2636
2637 /* If this insn is a register-register copy and the next insn has
2638 a use of our destination, change it to use our source. That way,
2639 it will become a candidate for our delay slot the next time
2640 through this loop. This case occurs commonly in loops that
2641 scan a list.
2642
2643 We could check for more complex cases than those tested below,
2644 but it doesn't seem worth it. It might also be a good idea to try
2645 to swap the two insns. That might do better.
2646
2647 We can't do this if the next insn modifies our destination, because
2648 that would make the replacement into the insn invalid. We also can't
2649 do this if it modifies our source, because it might be an earlyclobber
2650 operand. This latter test also prevents updating the contents of
2651 a PRE_INC. We also can't do this if there's overlap of source and
2652 destination. Overlap may happen for larger-than-register-size modes. */
2653
2654 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2655 && REG_P (SET_SRC (pat))
2656 && REG_P (SET_DEST (pat))
2657 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2658 {
2659 rtx_insn *next = next_nonnote_insn (trial);
2660
2661 if (next && NONJUMP_INSN_P (next)
2662 && GET_CODE (PATTERN (next)) != USE
2663 && ! reg_set_p (SET_DEST (pat), next)
2664 && ! reg_set_p (SET_SRC (pat), next)
2665 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2666 && ! modified_in_p (SET_DEST (pat), next))
2667 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2668 }
2669 }
2670
2671 /* If we stopped on a branch insn that has delay slots, see if we can
2672 steal some of the insns in those slots. */
2673 if (trial && NONJUMP_INSN_P (trial)
2674 && GET_CODE (PATTERN (trial)) == SEQUENCE
2675 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2676 {
2677 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2678 /* If this is the `true' thread, we will want to follow the jump,
2679 so we can only do this if we have taken everything up to here. */
2680 if (thread_if_true && trial == new_thread)
2681 {
2682 steal_delay_list_from_target (insn, condition, sequence,
2683 delay_list, &set, &needed,
2684 &opposite_needed, slots_to_fill,
2685 pslots_filled, &must_annul,
2686 &new_thread);
2687 /* If we owned the thread and are told that it branched
2688 elsewhere, make sure we own the thread at the new location. */
2689 if (own_thread && trial != new_thread)
2690 own_thread = own_thread_p (new_thread, new_thread, 0);
2691 }
2692 else if (! thread_if_true)
2693 steal_delay_list_from_fallthrough (insn, condition, sequence,
2694 delay_list, &set, &needed,
2695 &opposite_needed, slots_to_fill,
2696 pslots_filled, &must_annul);
2697 }
2698
2699 /* If we haven't found anything for this delay slot and it is very
2700 likely that the branch will be taken, see if the insn at our target
2701 increments or decrements a register with an increment that does not
2702 depend on the destination register. If so, try to place the opposite
2703 arithmetic insn after the jump insn and put the arithmetic insn in the
2704 delay slot. If we can't do this, return. */
2705 if (delay_list->is_empty () && likely
2706 && new_thread && !ANY_RETURN_P (new_thread)
2707 && NONJUMP_INSN_P (new_thread)
2708 && !RTX_FRAME_RELATED_P (new_thread)
2709 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2710 && asm_noperands (PATTERN (new_thread)) < 0)
2711 {
2712 rtx pat = PATTERN (new_thread);
2713 rtx dest;
2714 rtx src;
2715
2716 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2717 above. */
2718 trial = as_a <rtx_insn *> (new_thread);
2719 pat = PATTERN (trial);
2720
2721 if (!NONJUMP_INSN_P (trial)
2722 || GET_CODE (pat) != SET
2723 || ! eligible_for_delay (insn, 0, trial, flags)
2724 || can_throw_internal (trial))
2725 return;
2726
2727 dest = SET_DEST (pat), src = SET_SRC (pat);
2728 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2729 && rtx_equal_p (XEXP (src, 0), dest)
2730 && (!FLOAT_MODE_P (GET_MODE (src))
2731 || flag_unsafe_math_optimizations)
2732 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2733 && ! side_effects_p (pat))
2734 {
2735 rtx other = XEXP (src, 1);
2736 rtx new_arith;
2737 rtx_insn *ninsn;
2738
2739 /* If this is a constant adjustment, use the same code with
2740 the negated constant. Otherwise, reverse the sense of the
2741 arithmetic. */
2742 if (CONST_INT_P (other))
2743 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2744 negate_rtx (GET_MODE (src), other));
2745 else
2746 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2747 GET_MODE (src), dest, other);
2748
2749 ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2750
2751 if (recog_memoized (ninsn) < 0
2752 || (extract_insn (ninsn),
2753 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2754 {
2755 delete_related_insns (ninsn);
2756 return;
2757 }
2758
2759 if (own_thread)
2760 {
2761 update_block (trial, thread);
2762 if (trial == thread)
2763 {
2764 thread = next_active_insn (thread);
2765 if (new_thread == trial)
2766 new_thread = thread;
2767 }
2768 delete_related_insns (trial);
2769 }
2770 else
2771 new_thread = next_active_insn (trial);
2772
2773 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2774 if (thread_if_true)
2775 INSN_FROM_TARGET_P (ninsn) = 1;
2776
2777 add_to_delay_list (ninsn, delay_list);
2778 (*pslots_filled)++;
2779 }
2780 }
2781
2782 if (!delay_list->is_empty () && must_annul)
2783 INSN_ANNULLED_BRANCH_P (insn) = 1;
2784
2785 /* If we are to branch into the middle of this thread, find an appropriate
2786 label or make a new one if none, and redirect INSN to it. If we hit the
2787 end of the function, use the end-of-function label. */
2788 if (new_thread != thread)
2789 {
2790 rtx label;
2791 bool crossing = false;
2792
2793 gcc_assert (thread_if_true);
2794
2795 if (new_thread && simplejump_or_return_p (new_thread)
2796 && redirect_with_delay_list_safe_p (insn,
2797 JUMP_LABEL (new_thread),
2798 *delay_list))
2799 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2800 &crossing);
2801
2802 if (ANY_RETURN_P (new_thread))
2803 label = find_end_label (new_thread);
2804 else if (LABEL_P (new_thread))
2805 label = new_thread;
2806 else
2807 label = get_label_before (as_a <rtx_insn *> (new_thread),
2808 JUMP_LABEL (insn));
2809
2810 if (label)
2811 {
2812 reorg_redirect_jump (insn, label);
2813 if (crossing)
2814 CROSSING_JUMP_P (insn) = 1;
2815 }
2816 }
2817 }
2818
2819 /* Make another attempt to find insns to place in delay slots.
2820
2821 We previously looked for insns located in front of the delay insn
2822 and, for non-jump delay insns, located behind the delay insn.
2823
2824 Here only try to schedule jump insns and try to move insns from either
2825 the target or the following insns into the delay slot. If annulling is
2826 supported, we will be likely to do this. Otherwise, we can do this only
2827 if safe. */
2828
2829 static void
fill_eager_delay_slots(void)2830 fill_eager_delay_slots (void)
2831 {
2832 rtx_insn *insn;
2833 int i;
2834 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2835
2836 for (i = 0; i < num_unfilled_slots; i++)
2837 {
2838 rtx condition;
2839 rtx target_label, insn_at_target;
2840 rtx_insn *fallthrough_insn;
2841 auto_vec<rtx_insn *, 5> delay_list;
2842 rtx_jump_insn *jump_insn;
2843 int own_target;
2844 int own_fallthrough;
2845 int prediction, slots_to_fill, slots_filled;
2846
2847 insn = unfilled_slots_base[i];
2848 if (insn == 0
2849 || insn->deleted ()
2850 || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2851 || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2852 continue;
2853
2854 slots_to_fill = num_delay_slots (jump_insn);
2855 /* Some machine description have defined instructions to have
2856 delay slots only in certain circumstances which may depend on
2857 nearby insns (which change due to reorg's actions).
2858
2859 For example, the PA port normally has delay slots for unconditional
2860 jumps.
2861
2862 However, the PA port claims such jumps do not have a delay slot
2863 if they are immediate successors of certain CALL_INSNs. This
2864 allows the port to favor filling the delay slot of the call with
2865 the unconditional jump. */
2866 if (slots_to_fill == 0)
2867 continue;
2868
2869 slots_filled = 0;
2870 target_label = JUMP_LABEL (jump_insn);
2871 condition = get_branch_condition (jump_insn, target_label);
2872
2873 if (condition == 0)
2874 continue;
2875
2876 /* Get the next active fallthrough and target insns and see if we own
2877 them. Then see whether the branch is likely true. We don't need
2878 to do a lot of this for unconditional branches. */
2879
2880 insn_at_target = first_active_target_insn (target_label);
2881 own_target = own_thread_p (target_label, target_label, 0);
2882
2883 if (condition == const_true_rtx)
2884 {
2885 own_fallthrough = 0;
2886 fallthrough_insn = 0;
2887 prediction = 2;
2888 }
2889 else
2890 {
2891 fallthrough_insn = next_active_insn (jump_insn);
2892 own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2893 prediction = mostly_true_jump (jump_insn);
2894 }
2895
2896 /* If this insn is expected to branch, first try to get insns from our
2897 target, then our fallthrough insns. If it is not expected to branch,
2898 try the other order. */
2899
2900 if (prediction > 0)
2901 {
2902 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2903 fallthrough_insn, prediction == 2, 1,
2904 own_target,
2905 slots_to_fill, &slots_filled, &delay_list);
2906
2907 if (delay_list.is_empty () && own_fallthrough)
2908 {
2909 /* Even though we didn't find anything for delay slots,
2910 we might have found a redundant insn which we deleted
2911 from the thread that was filled. So we have to recompute
2912 the next insn at the target. */
2913 target_label = JUMP_LABEL (jump_insn);
2914 insn_at_target = first_active_target_insn (target_label);
2915
2916 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2917 insn_at_target, 0, 0, own_fallthrough,
2918 slots_to_fill, &slots_filled,
2919 &delay_list);
2920 }
2921 }
2922 else
2923 {
2924 if (own_fallthrough)
2925 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2926 insn_at_target, 0, 0, own_fallthrough,
2927 slots_to_fill, &slots_filled, &delay_list);
2928
2929 if (delay_list.is_empty ())
2930 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2931 next_active_insn (insn), 0, 1, own_target,
2932 slots_to_fill, &slots_filled, &delay_list);
2933 }
2934
2935 if (!delay_list.is_empty ())
2936 unfilled_slots_base[i]
2937 = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2938
2939 if (slots_to_fill == slots_filled)
2940 unfilled_slots_base[i] = 0;
2941
2942 note_delay_statistics (slots_filled, 1);
2943 }
2944 }
2945
2946 static void delete_computation (rtx_insn *insn);
2947
2948 /* Recursively delete prior insns that compute the value (used only by INSN
2949 which the caller is deleting) stored in the register mentioned by NOTE
2950 which is a REG_DEAD note associated with INSN. */
2951
2952 static void
delete_prior_computation(rtx note,rtx_insn * insn)2953 delete_prior_computation (rtx note, rtx_insn *insn)
2954 {
2955 rtx_insn *our_prev;
2956 rtx reg = XEXP (note, 0);
2957
2958 for (our_prev = prev_nonnote_insn (insn);
2959 our_prev && (NONJUMP_INSN_P (our_prev)
2960 || CALL_P (our_prev));
2961 our_prev = prev_nonnote_insn (our_prev))
2962 {
2963 rtx pat = PATTERN (our_prev);
2964
2965 /* If we reach a CALL which is not calling a const function
2966 or the callee pops the arguments, then give up. */
2967 if (CALL_P (our_prev)
2968 && (! RTL_CONST_CALL_P (our_prev)
2969 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2970 break;
2971
2972 /* If we reach a SEQUENCE, it is too complex to try to
2973 do anything with it, so give up. We can be run during
2974 and after reorg, so SEQUENCE rtl can legitimately show
2975 up here. */
2976 if (GET_CODE (pat) == SEQUENCE)
2977 break;
2978
2979 if (GET_CODE (pat) == USE
2980 && NONJUMP_INSN_P (XEXP (pat, 0)))
2981 /* reorg creates USEs that look like this. We leave them
2982 alone because reorg needs them for its own purposes. */
2983 break;
2984
2985 if (reg_set_p (reg, pat))
2986 {
2987 if (side_effects_p (pat) && !CALL_P (our_prev))
2988 break;
2989
2990 if (GET_CODE (pat) == PARALLEL)
2991 {
2992 /* If we find a SET of something else, we can't
2993 delete the insn. */
2994
2995 int i;
2996
2997 for (i = 0; i < XVECLEN (pat, 0); i++)
2998 {
2999 rtx part = XVECEXP (pat, 0, i);
3000
3001 if (GET_CODE (part) == SET
3002 && SET_DEST (part) != reg)
3003 break;
3004 }
3005
3006 if (i == XVECLEN (pat, 0))
3007 delete_computation (our_prev);
3008 }
3009 else if (GET_CODE (pat) == SET
3010 && REG_P (SET_DEST (pat)))
3011 {
3012 int dest_regno = REGNO (SET_DEST (pat));
3013 int dest_endregno = END_REGNO (SET_DEST (pat));
3014 int regno = REGNO (reg);
3015 int endregno = END_REGNO (reg);
3016
3017 if (dest_regno >= regno
3018 && dest_endregno <= endregno)
3019 delete_computation (our_prev);
3020
3021 /* We may have a multi-word hard register and some, but not
3022 all, of the words of the register are needed in subsequent
3023 insns. Write REG_UNUSED notes for those parts that were not
3024 needed. */
3025 else if (dest_regno <= regno
3026 && dest_endregno >= endregno)
3027 {
3028 int i;
3029
3030 add_reg_note (our_prev, REG_UNUSED, reg);
3031
3032 for (i = dest_regno; i < dest_endregno; i++)
3033 if (! find_regno_note (our_prev, REG_UNUSED, i))
3034 break;
3035
3036 if (i == dest_endregno)
3037 delete_computation (our_prev);
3038 }
3039 }
3040
3041 break;
3042 }
3043
3044 /* If PAT references the register that dies here, it is an
3045 additional use. Hence any prior SET isn't dead. However, this
3046 insn becomes the new place for the REG_DEAD note. */
3047 if (reg_overlap_mentioned_p (reg, pat))
3048 {
3049 XEXP (note, 1) = REG_NOTES (our_prev);
3050 REG_NOTES (our_prev) = note;
3051 break;
3052 }
3053 }
3054 }
3055
3056 /* Delete INSN and recursively delete insns that compute values used only
3057 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3058
3059 Look at all our REG_DEAD notes. If a previous insn does nothing other
3060 than set a register that dies in this insn, we can delete that insn
3061 as well.
3062
3063 On machines with CC0, if CC0 is used in this insn, we may be able to
3064 delete the insn that set it. */
3065
3066 static void
delete_computation(rtx_insn * insn)3067 delete_computation (rtx_insn *insn)
3068 {
3069 rtx note, next;
3070
3071 if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3072 {
3073 rtx_insn *prev = prev_nonnote_insn (insn);
3074 /* We assume that at this stage
3075 CC's are always set explicitly
3076 and always immediately before the jump that
3077 will use them. So if the previous insn
3078 exists to set the CC's, delete it
3079 (unless it performs auto-increments, etc.). */
3080 if (prev && NONJUMP_INSN_P (prev)
3081 && sets_cc0_p (PATTERN (prev)))
3082 {
3083 if (sets_cc0_p (PATTERN (prev)) > 0
3084 && ! side_effects_p (PATTERN (prev)))
3085 delete_computation (prev);
3086 else
3087 /* Otherwise, show that cc0 won't be used. */
3088 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3089 }
3090 }
3091
3092 for (note = REG_NOTES (insn); note; note = next)
3093 {
3094 next = XEXP (note, 1);
3095
3096 if (REG_NOTE_KIND (note) != REG_DEAD
3097 /* Verify that the REG_NOTE is legitimate. */
3098 || !REG_P (XEXP (note, 0)))
3099 continue;
3100
3101 delete_prior_computation (note, insn);
3102 }
3103
3104 delete_related_insns (insn);
3105 }
3106
3107 /* If all INSN does is set the pc, delete it,
3108 and delete the insn that set the condition codes for it
3109 if that's what the previous thing was. */
3110
3111 static void
delete_jump(rtx_insn * insn)3112 delete_jump (rtx_insn *insn)
3113 {
3114 rtx set = single_set (insn);
3115
3116 if (set && GET_CODE (SET_DEST (set)) == PC)
3117 delete_computation (insn);
3118 }
3119
3120 static rtx_insn *
label_before_next_insn(rtx_insn * x,rtx scan_limit)3121 label_before_next_insn (rtx_insn *x, rtx scan_limit)
3122 {
3123 rtx_insn *insn = next_active_insn (x);
3124 while (insn)
3125 {
3126 insn = PREV_INSN (insn);
3127 if (insn == scan_limit || insn == NULL_RTX)
3128 return NULL;
3129 if (LABEL_P (insn))
3130 break;
3131 }
3132 return insn;
3133 }
3134
3135 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3136 BEG and END. */
3137
3138 static bool
switch_text_sections_between_p(const rtx_insn * beg,const rtx_insn * end)3139 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3140 {
3141 const rtx_insn *p;
3142 for (p = beg; p != end; p = NEXT_INSN (p))
3143 if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3144 return true;
3145 return false;
3146 }
3147
3148
3149 /* Once we have tried two ways to fill a delay slot, make a pass over the
3150 code to try to improve the results and to do such things as more jump
3151 threading. */
3152
3153 static void
relax_delay_slots(rtx_insn * first)3154 relax_delay_slots (rtx_insn *first)
3155 {
3156 rtx_insn *insn, *next;
3157 rtx_sequence *pat;
3158 rtx_insn *delay_insn;
3159 rtx target_label;
3160
3161 /* Look at every JUMP_INSN and see if we can improve it. */
3162 for (insn = first; insn; insn = next)
3163 {
3164 rtx_insn *other, *prior_insn;
3165 bool crossing;
3166
3167 next = next_active_insn (insn);
3168
3169 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3170 the next insn, or jumps to a label that is not the last of a
3171 group of consecutive labels. */
3172 if (is_a <rtx_jump_insn *> (insn)
3173 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3174 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3175 {
3176 rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3177 target_label
3178 = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3179 &crossing));
3180 if (ANY_RETURN_P (target_label))
3181 target_label = find_end_label (target_label);
3182
3183 if (target_label
3184 && next_active_insn (as_a<rtx_insn *> (target_label)) == next
3185 && ! condjump_in_parallel_p (jump_insn)
3186 && ! (next && switch_text_sections_between_p (jump_insn, next)))
3187 {
3188 delete_jump (jump_insn);
3189 continue;
3190 }
3191
3192 if (target_label && target_label != JUMP_LABEL (jump_insn))
3193 {
3194 reorg_redirect_jump (jump_insn, target_label);
3195 if (crossing)
3196 CROSSING_JUMP_P (jump_insn) = 1;
3197 }
3198
3199 /* See if this jump conditionally branches around an unconditional
3200 jump. If so, invert this jump and point it to the target of the
3201 second jump. Check if it's possible on the target. */
3202 if (next && simplejump_or_return_p (next)
3203 && any_condjump_p (jump_insn)
3204 && target_label
3205 && (next_active_insn (as_a<rtx_insn *> (target_label))
3206 == next_active_insn (next))
3207 && no_labels_between_p (jump_insn, next)
3208 && targetm.can_follow_jump (jump_insn, next))
3209 {
3210 rtx label = JUMP_LABEL (next);
3211
3212 /* Be careful how we do this to avoid deleting code or
3213 labels that are momentarily dead. See similar optimization
3214 in jump.c.
3215
3216 We also need to ensure we properly handle the case when
3217 invert_jump fails. */
3218
3219 ++LABEL_NUSES (target_label);
3220 if (!ANY_RETURN_P (label))
3221 ++LABEL_NUSES (label);
3222
3223 if (invert_jump (jump_insn, label, 1))
3224 {
3225 rtx_insn *from = delete_related_insns (next);
3226
3227 /* We have just removed a BARRIER, which means that the block
3228 number of the next insns has effectively been changed (see
3229 find_basic_block in resource.c), so clear it. */
3230 if (from)
3231 clear_hashed_info_until_next_barrier (from);
3232
3233 next = jump_insn;
3234 }
3235
3236 if (!ANY_RETURN_P (label))
3237 --LABEL_NUSES (label);
3238
3239 if (--LABEL_NUSES (target_label) == 0)
3240 delete_related_insns (target_label);
3241
3242 continue;
3243 }
3244 }
3245
3246 /* If this is an unconditional jump and the previous insn is a
3247 conditional jump, try reversing the condition of the previous
3248 insn and swapping our targets. The next pass might be able to
3249 fill the slots.
3250
3251 Don't do this if we expect the conditional branch to be true, because
3252 we would then be making the more common case longer. */
3253
3254 if (simplejump_or_return_p (insn)
3255 && (other = prev_active_insn (insn)) != 0
3256 && any_condjump_p (other)
3257 && no_labels_between_p (other, insn)
3258 && mostly_true_jump (other) < 0)
3259 {
3260 rtx other_target = JUMP_LABEL (other);
3261 target_label = JUMP_LABEL (insn);
3262
3263 if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3264 reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3265 }
3266
3267 /* Now look only at cases where we have a filled delay slot. */
3268 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3269 continue;
3270
3271 pat = as_a <rtx_sequence *> (PATTERN (insn));
3272 delay_insn = pat->insn (0);
3273
3274 /* See if the first insn in the delay slot is redundant with some
3275 previous insn. Remove it from the delay slot if so; then set up
3276 to reprocess this insn. */
3277 if ((prior_insn = redundant_insn (pat->insn (1), delay_insn, vNULL)))
3278 {
3279 fix_reg_dead_note (prior_insn, insn);
3280 update_block (pat->insn (1), insn);
3281 delete_from_delay_slot (pat->insn (1));
3282 next = prev_active_insn (next);
3283 continue;
3284 }
3285
3286 /* See if we have a RETURN insn with a filled delay slot followed
3287 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3288 the first RETURN (but not its delay insn). This gives the same
3289 effect in fewer instructions.
3290
3291 Only do so if optimizing for size since this results in slower, but
3292 smaller code. */
3293 if (optimize_function_for_size_p (cfun)
3294 && ANY_RETURN_P (PATTERN (delay_insn))
3295 && next
3296 && JUMP_P (next)
3297 && PATTERN (next) == PATTERN (delay_insn))
3298 {
3299 rtx_insn *after;
3300 int i;
3301
3302 /* Delete the RETURN and just execute the delay list insns.
3303
3304 We do this by deleting the INSN containing the SEQUENCE, then
3305 re-emitting the insns separately, and then deleting the RETURN.
3306 This allows the count of the jump target to be properly
3307 decremented.
3308
3309 Note that we need to change the INSN_UID of the re-emitted insns
3310 since it is used to hash the insns for mark_target_live_regs and
3311 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3312
3313 Clear the from target bit, since these insns are no longer
3314 in delay slots. */
3315 for (i = 0; i < XVECLEN (pat, 0); i++)
3316 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3317
3318 rtx_insn *prev = PREV_INSN (insn);
3319 delete_related_insns (insn);
3320 gcc_assert (GET_CODE (pat) == SEQUENCE);
3321 add_insn_after (delay_insn, prev, NULL);
3322 after = delay_insn;
3323 for (i = 1; i < pat->len (); i++)
3324 after = emit_copy_of_insn_after (pat->insn (i), after);
3325 delete_scheduled_jump (delay_insn);
3326 continue;
3327 }
3328
3329 /* Now look only at the cases where we have a filled JUMP_INSN. */
3330 rtx_jump_insn *delay_jump_insn =
3331 dyn_cast <rtx_jump_insn *> (delay_insn);
3332 if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3333 || condjump_in_parallel_p (delay_jump_insn)))
3334 continue;
3335
3336 target_label = JUMP_LABEL (delay_jump_insn);
3337 if (target_label && ANY_RETURN_P (target_label))
3338 continue;
3339
3340 /* If this jump goes to another unconditional jump, thread it, but
3341 don't convert a jump into a RETURN here. */
3342 rtx trial = skip_consecutive_labels (follow_jumps (target_label,
3343 delay_jump_insn,
3344 &crossing));
3345 if (ANY_RETURN_P (trial))
3346 trial = find_end_label (trial);
3347
3348 if (trial && trial != target_label
3349 && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3350 {
3351 reorg_redirect_jump (delay_jump_insn, trial);
3352 target_label = trial;
3353 if (crossing)
3354 CROSSING_JUMP_P (delay_jump_insn) = 1;
3355 }
3356
3357 /* If the first insn at TARGET_LABEL is redundant with a previous
3358 insn, redirect the jump to the following insn and process again.
3359 We use next_real_nondebug_insn instead of next_active_insn so we
3360 don't skip USE-markers, or we'll end up with incorrect
3361 liveness info. */
3362 trial = next_real_nondebug_insn (target_label);
3363 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3364 && redundant_insn (trial, insn, vNULL)
3365 && ! can_throw_internal (trial))
3366 {
3367 /* Figure out where to emit the special USE insn so we don't
3368 later incorrectly compute register live/death info. */
3369 rtx_insn *tmp = next_active_insn (as_a<rtx_insn *> (trial));
3370 if (tmp == 0)
3371 tmp = find_end_label (simple_return_rtx);
3372
3373 if (tmp)
3374 {
3375 /* Insert the special USE insn and update dataflow info.
3376 We know "trial" is an insn here as it is the output of
3377 next_real_nondebug_insn () above. */
3378 update_block (as_a <rtx_insn *> (trial), tmp);
3379
3380 /* Now emit a label before the special USE insn, and
3381 redirect our jump to the new label. */
3382 target_label = get_label_before (PREV_INSN (tmp), target_label);
3383 reorg_redirect_jump (delay_jump_insn, target_label);
3384 next = insn;
3385 continue;
3386 }
3387 }
3388
3389 /* Similarly, if it is an unconditional jump with one insn in its
3390 delay list and that insn is redundant, thread the jump. */
3391 rtx_sequence *trial_seq =
3392 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3393 if (trial_seq
3394 && trial_seq->len () == 2
3395 && JUMP_P (trial_seq->insn (0))
3396 && simplejump_or_return_p (trial_seq->insn (0))
3397 && redundant_insn (trial_seq->insn (1), insn, vNULL))
3398 {
3399 rtx temp_label = JUMP_LABEL (trial_seq->insn (0));
3400 if (ANY_RETURN_P (temp_label))
3401 temp_label = find_end_label (temp_label);
3402
3403 if (temp_label
3404 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3405 temp_label, insn))
3406 {
3407 update_block (trial_seq->insn (1), insn);
3408 reorg_redirect_jump (delay_jump_insn, temp_label);
3409 next = insn;
3410 continue;
3411 }
3412 }
3413
3414 /* See if we have a simple (conditional) jump that is useless. */
3415 if (!CROSSING_JUMP_P (delay_jump_insn)
3416 && !INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3417 && !condjump_in_parallel_p (delay_jump_insn)
3418 && prev_active_insn (as_a<rtx_insn *> (target_label)) == insn
3419 && !BARRIER_P (prev_nonnote_insn (as_a<rtx_insn *> (target_label)))
3420 /* If the last insn in the delay slot sets CC0 for some insn,
3421 various code assumes that it is in a delay slot. We could
3422 put it back where it belonged and delete the register notes,
3423 but it doesn't seem worthwhile in this uncommon case. */
3424 && (!HAVE_cc0
3425 || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3426 REG_CC_USER, NULL_RTX)))
3427 {
3428 rtx_insn *after;
3429 int i;
3430
3431 /* All this insn does is execute its delay list and jump to the
3432 following insn. So delete the jump and just execute the delay
3433 list insns.
3434
3435 We do this by deleting the INSN containing the SEQUENCE, then
3436 re-emitting the insns separately, and then deleting the jump.
3437 This allows the count of the jump target to be properly
3438 decremented.
3439
3440 Note that we need to change the INSN_UID of the re-emitted insns
3441 since it is used to hash the insns for mark_target_live_regs and
3442 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3443
3444 Clear the from target bit, since these insns are no longer
3445 in delay slots. */
3446 for (i = 0; i < XVECLEN (pat, 0); i++)
3447 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3448
3449 rtx_insn *prev = PREV_INSN (insn);
3450 delete_related_insns (insn);
3451 gcc_assert (GET_CODE (pat) == SEQUENCE);
3452 add_insn_after (delay_jump_insn, prev, NULL);
3453 after = delay_jump_insn;
3454 for (i = 1; i < pat->len (); i++)
3455 after = emit_copy_of_insn_after (pat->insn (i), after);
3456 delete_scheduled_jump (delay_jump_insn);
3457 continue;
3458 }
3459
3460 /* See if this is an unconditional jump around a single insn which is
3461 identical to the one in its delay slot. In this case, we can just
3462 delete the branch and the insn in its delay slot. */
3463 if (next && NONJUMP_INSN_P (next)
3464 && label_before_next_insn (next, insn) == target_label
3465 && simplejump_p (insn)
3466 && XVECLEN (pat, 0) == 2
3467 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3468 {
3469 delete_related_insns (insn);
3470 continue;
3471 }
3472
3473 /* See if this jump (with its delay slots) conditionally branches
3474 around an unconditional jump (without delay slots). If so, invert
3475 this jump and point it to the target of the second jump. We cannot
3476 do this for annulled jumps, though. Again, don't convert a jump to
3477 a RETURN here. */
3478 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3479 && any_condjump_p (delay_jump_insn)
3480 && next && simplejump_or_return_p (next)
3481 && (next_active_insn (as_a<rtx_insn *> (target_label))
3482 == next_active_insn (next))
3483 && no_labels_between_p (insn, next))
3484 {
3485 rtx label = JUMP_LABEL (next);
3486 rtx old_label = JUMP_LABEL (delay_jump_insn);
3487
3488 if (ANY_RETURN_P (label))
3489 label = find_end_label (label);
3490
3491 /* find_end_label can generate a new label. Check this first. */
3492 if (label
3493 && no_labels_between_p (insn, next)
3494 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3495 label, insn))
3496 {
3497 /* Be careful how we do this to avoid deleting code or labels
3498 that are momentarily dead. See similar optimization in
3499 jump.c */
3500 if (old_label)
3501 ++LABEL_NUSES (old_label);
3502
3503 if (invert_jump (delay_jump_insn, label, 1))
3504 {
3505 /* Must update the INSN_FROM_TARGET_P bits now that
3506 the branch is reversed, so that mark_target_live_regs
3507 will handle the delay slot insn correctly. */
3508 for (int i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3509 {
3510 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3511 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3512 }
3513
3514 /* We have just removed a BARRIER, which means that the block
3515 number of the next insns has effectively been changed (see
3516 find_basic_block in resource.c), so clear it. */
3517 rtx_insn *from = delete_related_insns (next);
3518 if (from)
3519 clear_hashed_info_until_next_barrier (from);
3520
3521 next = insn;
3522 }
3523
3524 if (old_label && --LABEL_NUSES (old_label) == 0)
3525 delete_related_insns (old_label);
3526 continue;
3527 }
3528 }
3529
3530 /* If we own the thread opposite the way this insn branches, see if we
3531 can merge its delay slots with following insns. */
3532 if (INSN_FROM_TARGET_P (pat->insn (1))
3533 && own_thread_p (NEXT_INSN (insn), 0, 1))
3534 try_merge_delay_insns (insn, next);
3535 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3536 && own_thread_p (target_label, target_label, 0))
3537 try_merge_delay_insns (insn,
3538 next_active_insn (as_a<rtx_insn *> (target_label)));
3539
3540 /* If we get here, we haven't deleted INSN. But we may have deleted
3541 NEXT, so recompute it. */
3542 next = next_active_insn (insn);
3543 }
3544 }
3545
3546
3547 /* Look for filled jumps to the end of function label. We can try to convert
3548 them into RETURN insns if the insns in the delay slot are valid for the
3549 RETURN as well. */
3550
3551 static void
make_return_insns(rtx_insn * first)3552 make_return_insns (rtx_insn *first)
3553 {
3554 rtx_insn *insn;
3555 rtx_jump_insn *jump_insn;
3556 rtx real_return_label = function_return_label;
3557 rtx real_simple_return_label = function_simple_return_label;
3558 int slots, i;
3559
3560 /* See if there is a RETURN insn in the function other than the one we
3561 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3562 into a RETURN to jump to it. */
3563 for (insn = first; insn; insn = NEXT_INSN (insn))
3564 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3565 {
3566 rtx t = get_label_before (insn, NULL_RTX);
3567 if (PATTERN (insn) == ret_rtx)
3568 real_return_label = t;
3569 else
3570 real_simple_return_label = t;
3571 break;
3572 }
3573
3574 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3575 was equal to END_OF_FUNCTION_LABEL. */
3576 if (real_return_label)
3577 LABEL_NUSES (real_return_label)++;
3578 if (real_simple_return_label)
3579 LABEL_NUSES (real_simple_return_label)++;
3580
3581 /* Clear the list of insns to fill so we can use it. */
3582 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3583
3584 for (insn = first; insn; insn = NEXT_INSN (insn))
3585 {
3586 int flags;
3587 rtx kind, real_label;
3588
3589 /* Only look at filled JUMP_INSNs that go to the end of function
3590 label. */
3591 if (!NONJUMP_INSN_P (insn))
3592 continue;
3593
3594 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3595 continue;
3596
3597 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3598
3599 if (!jump_to_label_p (pat->insn (0)))
3600 continue;
3601
3602 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3603 {
3604 kind = ret_rtx;
3605 real_label = real_return_label;
3606 }
3607 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3608 {
3609 kind = simple_return_rtx;
3610 real_label = real_simple_return_label;
3611 }
3612 else
3613 continue;
3614
3615 jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3616
3617 /* If we can't make the jump into a RETURN, try to redirect it to the best
3618 RETURN and go on to the next insn. */
3619 if (!reorg_redirect_jump (jump_insn, kind))
3620 {
3621 /* Make sure redirecting the jump will not invalidate the delay
3622 slot insns. */
3623 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3624 reorg_redirect_jump (jump_insn, real_label);
3625 continue;
3626 }
3627
3628 /* See if this RETURN can accept the insns current in its delay slot.
3629 It can if it has more or an equal number of slots and the contents
3630 of each is valid. */
3631
3632 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3633 slots = num_delay_slots (jump_insn);
3634 if (slots >= XVECLEN (pat, 0) - 1)
3635 {
3636 for (i = 1; i < XVECLEN (pat, 0); i++)
3637 if (! (
3638 #if ANNUL_IFFALSE_SLOTS
3639 (INSN_ANNULLED_BRANCH_P (jump_insn)
3640 && INSN_FROM_TARGET_P (pat->insn (i)))
3641 ? eligible_for_annul_false (jump_insn, i - 1,
3642 pat->insn (i), flags) :
3643 #endif
3644 #if ANNUL_IFTRUE_SLOTS
3645 (INSN_ANNULLED_BRANCH_P (jump_insn)
3646 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3647 ? eligible_for_annul_true (jump_insn, i - 1,
3648 pat->insn (i), flags) :
3649 #endif
3650 eligible_for_delay (jump_insn, i - 1,
3651 pat->insn (i), flags)))
3652 break;
3653 }
3654 else
3655 i = 0;
3656
3657 if (i == XVECLEN (pat, 0))
3658 continue;
3659
3660 /* We have to do something with this insn. If it is an unconditional
3661 RETURN, delete the SEQUENCE and output the individual insns,
3662 followed by the RETURN. Then set things up so we try to find
3663 insns for its delay slots, if it needs some. */
3664 if (ANY_RETURN_P (PATTERN (jump_insn)))
3665 {
3666 rtx_insn *after = PREV_INSN (insn);
3667
3668 delete_related_insns (insn);
3669 insn = jump_insn;
3670 for (i = 1; i < pat->len (); i++)
3671 after = emit_copy_of_insn_after (pat->insn (i), after);
3672 add_insn_after (insn, after, NULL);
3673 emit_barrier_after (insn);
3674
3675 if (slots)
3676 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3677 }
3678 else
3679 /* It is probably more efficient to keep this with its current
3680 delay slot as a branch to a RETURN. */
3681 reorg_redirect_jump (jump_insn, real_label);
3682 }
3683
3684 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3685 new delay slots we have created. */
3686 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3687 delete_related_insns (real_return_label);
3688 if (real_simple_return_label != NULL_RTX
3689 && --LABEL_NUSES (real_simple_return_label) == 0)
3690 delete_related_insns (real_simple_return_label);
3691
3692 fill_simple_delay_slots (1);
3693 fill_simple_delay_slots (0);
3694 }
3695
3696 /* Try to find insns to place in delay slots. */
3697
3698 static void
dbr_schedule(rtx_insn * first)3699 dbr_schedule (rtx_insn *first)
3700 {
3701 rtx_insn *insn, *next, *epilogue_insn = 0;
3702 int i;
3703 bool need_return_insns;
3704
3705 /* If the current function has no insns other than the prologue and
3706 epilogue, then do not try to fill any delay slots. */
3707 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3708 return;
3709
3710 /* Find the highest INSN_UID and allocate and initialize our map from
3711 INSN_UID's to position in code. */
3712 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3713 {
3714 if (INSN_UID (insn) > max_uid)
3715 max_uid = INSN_UID (insn);
3716 if (NOTE_P (insn)
3717 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3718 epilogue_insn = insn;
3719 }
3720
3721 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3722 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3723 uid_to_ruid[INSN_UID (insn)] = i;
3724
3725 /* Initialize the list of insns that need filling. */
3726 if (unfilled_firstobj == 0)
3727 {
3728 gcc_obstack_init (&unfilled_slots_obstack);
3729 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3730 }
3731
3732 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3733 {
3734 rtx target;
3735
3736 /* Skip vector tables. We can't get attributes for them. */
3737 if (JUMP_TABLE_DATA_P (insn))
3738 continue;
3739
3740 if (JUMP_P (insn))
3741 INSN_ANNULLED_BRANCH_P (insn) = 0;
3742 INSN_FROM_TARGET_P (insn) = 0;
3743
3744 if (num_delay_slots (insn) > 0)
3745 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3746
3747 /* Ensure all jumps go to the last of a set of consecutive labels. */
3748 if (JUMP_P (insn)
3749 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3750 && !ANY_RETURN_P (JUMP_LABEL (insn))
3751 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3752 != JUMP_LABEL (insn)))
3753 redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3754 }
3755
3756 init_resource_info (epilogue_insn);
3757
3758 /* Show we haven't computed an end-of-function label yet. */
3759 function_return_label = function_simple_return_label = NULL;
3760
3761 /* Initialize the statistics for this function. */
3762 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3763 memset (num_filled_delays, 0, sizeof num_filled_delays);
3764
3765 /* Now do the delay slot filling. Try everything twice in case earlier
3766 changes make more slots fillable. */
3767
3768 for (reorg_pass_number = 0;
3769 reorg_pass_number < MAX_REORG_PASSES;
3770 reorg_pass_number++)
3771 {
3772 fill_simple_delay_slots (1);
3773 fill_simple_delay_slots (0);
3774 if (!targetm.no_speculation_in_delay_slots_p ())
3775 fill_eager_delay_slots ();
3776 relax_delay_slots (first);
3777 }
3778
3779 /* If we made an end of function label, indicate that it is now
3780 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3781 If it is now unused, delete it. */
3782 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3783 delete_related_insns (function_return_label);
3784 if (function_simple_return_label
3785 && --LABEL_NUSES (function_simple_return_label) == 0)
3786 delete_related_insns (function_simple_return_label);
3787
3788 need_return_insns = false;
3789 need_return_insns |= targetm.have_return () && function_return_label != 0;
3790 need_return_insns |= (targetm.have_simple_return ()
3791 && function_simple_return_label != 0);
3792 if (need_return_insns)
3793 make_return_insns (first);
3794
3795 /* Delete any USE insns made by update_block; subsequent passes don't need
3796 them or know how to deal with them. */
3797 for (insn = first; insn; insn = next)
3798 {
3799 next = NEXT_INSN (insn);
3800
3801 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3802 && INSN_P (XEXP (PATTERN (insn), 0)))
3803 next = delete_related_insns (insn);
3804 }
3805
3806 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3807
3808 /* It is not clear why the line below is needed, but it does seem to be. */
3809 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3810
3811 if (dump_file)
3812 {
3813 int i, j, need_comma;
3814 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3815 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3816
3817 for (reorg_pass_number = 0;
3818 reorg_pass_number < MAX_REORG_PASSES;
3819 reorg_pass_number++)
3820 {
3821 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3822 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3823 {
3824 need_comma = 0;
3825 fprintf (dump_file, ";; Reorg function #%d\n", i);
3826
3827 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3828 num_insns_needing_delays[i][reorg_pass_number]);
3829
3830 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3831 if (num_filled_delays[i][j][reorg_pass_number])
3832 {
3833 if (need_comma)
3834 fprintf (dump_file, ", ");
3835 need_comma = 1;
3836 fprintf (dump_file, "%d got %d delays",
3837 num_filled_delays[i][j][reorg_pass_number], j);
3838 }
3839 fprintf (dump_file, "\n");
3840 }
3841 }
3842 memset (total_delay_slots, 0, sizeof total_delay_slots);
3843 memset (total_annul_slots, 0, sizeof total_annul_slots);
3844 for (insn = first; insn; insn = NEXT_INSN (insn))
3845 {
3846 if (! insn->deleted ()
3847 && NONJUMP_INSN_P (insn)
3848 && GET_CODE (PATTERN (insn)) != USE
3849 && GET_CODE (PATTERN (insn)) != CLOBBER
3850 && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH)
3851 {
3852 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3853 {
3854 rtx control;
3855 j = XVECLEN (PATTERN (insn), 0) - 1;
3856 if (j > MAX_DELAY_HISTOGRAM)
3857 j = MAX_DELAY_HISTOGRAM;
3858 control = XVECEXP (PATTERN (insn), 0, 0);
3859 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3860 total_annul_slots[j]++;
3861 else
3862 total_delay_slots[j]++;
3863 }
3864 else if (num_delay_slots (insn) > 0)
3865 total_delay_slots[0]++;
3866 }
3867 }
3868 fprintf (dump_file, ";; Reorg totals: ");
3869 need_comma = 0;
3870 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3871 {
3872 if (total_delay_slots[j])
3873 {
3874 if (need_comma)
3875 fprintf (dump_file, ", ");
3876 need_comma = 1;
3877 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3878 }
3879 }
3880 fprintf (dump_file, "\n");
3881
3882 if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3883 {
3884 fprintf (dump_file, ";; Reorg annuls: ");
3885 need_comma = 0;
3886 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3887 {
3888 if (total_annul_slots[j])
3889 {
3890 if (need_comma)
3891 fprintf (dump_file, ", ");
3892 need_comma = 1;
3893 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3894 }
3895 }
3896 fprintf (dump_file, "\n");
3897 }
3898
3899 fprintf (dump_file, "\n");
3900 }
3901
3902 if (!sibling_labels.is_empty ())
3903 {
3904 update_alignments (sibling_labels);
3905 sibling_labels.release ();
3906 }
3907
3908 free_resource_info ();
3909 free (uid_to_ruid);
3910 crtl->dbr_scheduled_p = true;
3911 }
3912
3913 /* Run delay slot optimization. */
3914 static unsigned int
rest_of_handle_delay_slots(void)3915 rest_of_handle_delay_slots (void)
3916 {
3917 if (DELAY_SLOTS)
3918 dbr_schedule (get_insns ());
3919
3920 return 0;
3921 }
3922
3923 namespace {
3924
3925 const pass_data pass_data_delay_slots =
3926 {
3927 RTL_PASS, /* type */
3928 "dbr", /* name */
3929 OPTGROUP_NONE, /* optinfo_flags */
3930 TV_DBR_SCHED, /* tv_id */
3931 0, /* properties_required */
3932 0, /* properties_provided */
3933 0, /* properties_destroyed */
3934 0, /* todo_flags_start */
3935 0, /* todo_flags_finish */
3936 };
3937
3938 class pass_delay_slots : public rtl_opt_pass
3939 {
3940 public:
pass_delay_slots(gcc::context * ctxt)3941 pass_delay_slots (gcc::context *ctxt)
3942 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3943 {}
3944
3945 /* opt_pass methods: */
3946 virtual bool gate (function *);
execute(function *)3947 virtual unsigned int execute (function *)
3948 {
3949 return rest_of_handle_delay_slots ();
3950 }
3951
3952 }; // class pass_delay_slots
3953
3954 bool
gate(function *)3955 pass_delay_slots::gate (function *)
3956 {
3957 /* At -O0 dataflow info isn't updated after RA. */
3958 if (DELAY_SLOTS)
3959 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3960
3961 return false;
3962 }
3963
3964 } // anon namespace
3965
3966 rtl_opt_pass *
make_pass_delay_slots(gcc::context * ctxt)3967 make_pass_delay_slots (gcc::context *ctxt)
3968 {
3969 return new pass_delay_slots (ctxt);
3970 }
3971
3972 /* Machine dependent reorg pass. */
3973
3974 namespace {
3975
3976 const pass_data pass_data_machine_reorg =
3977 {
3978 RTL_PASS, /* type */
3979 "mach", /* name */
3980 OPTGROUP_NONE, /* optinfo_flags */
3981 TV_MACH_DEP, /* tv_id */
3982 0, /* properties_required */
3983 0, /* properties_provided */
3984 0, /* properties_destroyed */
3985 0, /* todo_flags_start */
3986 0, /* todo_flags_finish */
3987 };
3988
3989 class pass_machine_reorg : public rtl_opt_pass
3990 {
3991 public:
pass_machine_reorg(gcc::context * ctxt)3992 pass_machine_reorg (gcc::context *ctxt)
3993 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3994 {}
3995
3996 /* opt_pass methods: */
gate(function *)3997 virtual bool gate (function *)
3998 {
3999 return targetm.machine_dependent_reorg != 0;
4000 }
4001
execute(function *)4002 virtual unsigned int execute (function *)
4003 {
4004 targetm.machine_dependent_reorg ();
4005 return 0;
4006 }
4007
4008 }; // class pass_machine_reorg
4009
4010 } // anon namespace
4011
4012 rtl_opt_pass *
make_pass_machine_reorg(gcc::context * ctxt)4013 make_pass_machine_reorg (gcc::context *ctxt)
4014 {
4015 return new pass_machine_reorg (ctxt);
4016 }
4017