1*e4b17023SJohn Marino /* Perform doloop optimizations
2*e4b17023SJohn Marino Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
5*e4b17023SJohn Marino
6*e4b17023SJohn Marino This file is part of GCC.
7*e4b17023SJohn Marino
8*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
9*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
10*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
11*e4b17023SJohn Marino version.
12*e4b17023SJohn Marino
13*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16*e4b17023SJohn Marino for more details.
17*e4b17023SJohn Marino
18*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
19*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
20*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
21*e4b17023SJohn Marino
22*e4b17023SJohn Marino #include "config.h"
23*e4b17023SJohn Marino #include "system.h"
24*e4b17023SJohn Marino #include "coretypes.h"
25*e4b17023SJohn Marino #include "tm.h"
26*e4b17023SJohn Marino #include "rtl.h"
27*e4b17023SJohn Marino #include "flags.h"
28*e4b17023SJohn Marino #include "expr.h"
29*e4b17023SJohn Marino #include "hard-reg-set.h"
30*e4b17023SJohn Marino #include "basic-block.h"
31*e4b17023SJohn Marino #include "diagnostic-core.h"
32*e4b17023SJohn Marino #include "tm_p.h"
33*e4b17023SJohn Marino #include "cfgloop.h"
34*e4b17023SJohn Marino #include "output.h"
35*e4b17023SJohn Marino #include "params.h"
36*e4b17023SJohn Marino #include "target.h"
37*e4b17023SJohn Marino
38*e4b17023SJohn Marino /* This module is used to modify loops with a determinable number of
39*e4b17023SJohn Marino iterations to use special low-overhead looping instructions.
40*e4b17023SJohn Marino
41*e4b17023SJohn Marino It first validates whether the loop is well behaved and has a
42*e4b17023SJohn Marino determinable number of iterations (either at compile or run-time).
43*e4b17023SJohn Marino It then modifies the loop to use a low-overhead looping pattern as
44*e4b17023SJohn Marino follows:
45*e4b17023SJohn Marino
46*e4b17023SJohn Marino 1. A pseudo register is allocated as the loop iteration counter.
47*e4b17023SJohn Marino
48*e4b17023SJohn Marino 2. The number of loop iterations is calculated and is stored
49*e4b17023SJohn Marino in the loop counter.
50*e4b17023SJohn Marino
51*e4b17023SJohn Marino 3. At the end of the loop, the jump insn is replaced by the
52*e4b17023SJohn Marino doloop_end pattern. The compare must remain because it might be
53*e4b17023SJohn Marino used elsewhere. If the loop-variable or condition register are
54*e4b17023SJohn Marino used elsewhere, they will be eliminated by flow.
55*e4b17023SJohn Marino
56*e4b17023SJohn Marino 4. An optional doloop_begin pattern is inserted at the top of the
57*e4b17023SJohn Marino loop.
58*e4b17023SJohn Marino
59*e4b17023SJohn Marino TODO The optimization should only performed when either the biv used for exit
60*e4b17023SJohn Marino condition is unused at all except for the exit test, or if we do not have to
61*e4b17023SJohn Marino change its value, since otherwise we have to add a new induction variable,
62*e4b17023SJohn Marino which usually will not pay up (unless the cost of the doloop pattern is
63*e4b17023SJohn Marino somehow extremely lower than the cost of compare & jump, or unless the bct
64*e4b17023SJohn Marino register cannot be used for anything else but doloop -- ??? detect these
65*e4b17023SJohn Marino cases). */
66*e4b17023SJohn Marino
67*e4b17023SJohn Marino #ifdef HAVE_doloop_end
68*e4b17023SJohn Marino
69*e4b17023SJohn Marino /* Return the loop termination condition for PATTERN or zero
70*e4b17023SJohn Marino if it is not a decrement and branch jump insn. */
71*e4b17023SJohn Marino
72*e4b17023SJohn Marino rtx
doloop_condition_get(rtx doloop_pat)73*e4b17023SJohn Marino doloop_condition_get (rtx doloop_pat)
74*e4b17023SJohn Marino {
75*e4b17023SJohn Marino rtx cmp;
76*e4b17023SJohn Marino rtx inc;
77*e4b17023SJohn Marino rtx reg;
78*e4b17023SJohn Marino rtx inc_src;
79*e4b17023SJohn Marino rtx condition;
80*e4b17023SJohn Marino rtx pattern;
81*e4b17023SJohn Marino rtx cc_reg = NULL_RTX;
82*e4b17023SJohn Marino rtx reg_orig = NULL_RTX;
83*e4b17023SJohn Marino
84*e4b17023SJohn Marino /* The canonical doloop pattern we expect has one of the following
85*e4b17023SJohn Marino forms:
86*e4b17023SJohn Marino
87*e4b17023SJohn Marino 1) (parallel [(set (pc) (if_then_else (condition)
88*e4b17023SJohn Marino (label_ref (label))
89*e4b17023SJohn Marino (pc)))
90*e4b17023SJohn Marino (set (reg) (plus (reg) (const_int -1)))
91*e4b17023SJohn Marino (additional clobbers and uses)])
92*e4b17023SJohn Marino
93*e4b17023SJohn Marino The branch must be the first entry of the parallel (also required
94*e4b17023SJohn Marino by jump.c), and the second entry of the parallel must be a set of
95*e4b17023SJohn Marino the loop counter register. Some targets (IA-64) wrap the set of
96*e4b17023SJohn Marino the loop counter in an if_then_else too.
97*e4b17023SJohn Marino
98*e4b17023SJohn Marino 2) (set (reg) (plus (reg) (const_int -1))
99*e4b17023SJohn Marino (set (pc) (if_then_else (reg != 0)
100*e4b17023SJohn Marino (label_ref (label))
101*e4b17023SJohn Marino (pc))).
102*e4b17023SJohn Marino
103*e4b17023SJohn Marino Some targets (ARM) do the comparison before the branch, as in the
104*e4b17023SJohn Marino following form:
105*e4b17023SJohn Marino
106*e4b17023SJohn Marino 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
107*e4b17023SJohn Marino (set (reg) (plus (reg) (const_int -1)))])
108*e4b17023SJohn Marino (set (pc) (if_then_else (cc == NE)
109*e4b17023SJohn Marino (label_ref (label))
110*e4b17023SJohn Marino (pc))) */
111*e4b17023SJohn Marino
112*e4b17023SJohn Marino pattern = PATTERN (doloop_pat);
113*e4b17023SJohn Marino
114*e4b17023SJohn Marino if (GET_CODE (pattern) != PARALLEL)
115*e4b17023SJohn Marino {
116*e4b17023SJohn Marino rtx cond;
117*e4b17023SJohn Marino rtx prev_insn = prev_nondebug_insn (doloop_pat);
118*e4b17023SJohn Marino rtx cmp_arg1, cmp_arg2;
119*e4b17023SJohn Marino rtx cmp_orig;
120*e4b17023SJohn Marino
121*e4b17023SJohn Marino /* In case the pattern is not PARALLEL we expect two forms
122*e4b17023SJohn Marino of doloop which are cases 2) and 3) above: in case 2) the
123*e4b17023SJohn Marino decrement immediately precedes the branch, while in case 3)
124*e4b17023SJohn Marino the compare and decrement instructions immediately precede
125*e4b17023SJohn Marino the branch. */
126*e4b17023SJohn Marino
127*e4b17023SJohn Marino if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
128*e4b17023SJohn Marino return 0;
129*e4b17023SJohn Marino
130*e4b17023SJohn Marino cmp = pattern;
131*e4b17023SJohn Marino if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
132*e4b17023SJohn Marino {
133*e4b17023SJohn Marino /* The third case: the compare and decrement instructions
134*e4b17023SJohn Marino immediately precede the branch. */
135*e4b17023SJohn Marino cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
136*e4b17023SJohn Marino if (GET_CODE (cmp_orig) != SET)
137*e4b17023SJohn Marino return 0;
138*e4b17023SJohn Marino if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
139*e4b17023SJohn Marino return 0;
140*e4b17023SJohn Marino cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
141*e4b17023SJohn Marino cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
142*e4b17023SJohn Marino if (cmp_arg2 != const0_rtx
143*e4b17023SJohn Marino || GET_CODE (cmp_arg1) != PLUS)
144*e4b17023SJohn Marino return 0;
145*e4b17023SJohn Marino reg_orig = XEXP (cmp_arg1, 0);
146*e4b17023SJohn Marino if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
147*e4b17023SJohn Marino || !REG_P (reg_orig))
148*e4b17023SJohn Marino return 0;
149*e4b17023SJohn Marino cc_reg = SET_DEST (cmp_orig);
150*e4b17023SJohn Marino
151*e4b17023SJohn Marino inc = XVECEXP (PATTERN (prev_insn), 0, 1);
152*e4b17023SJohn Marino }
153*e4b17023SJohn Marino else
154*e4b17023SJohn Marino inc = PATTERN (prev_insn);
155*e4b17023SJohn Marino /* We expect the condition to be of the form (reg != 0) */
156*e4b17023SJohn Marino cond = XEXP (SET_SRC (cmp), 0);
157*e4b17023SJohn Marino if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
158*e4b17023SJohn Marino return 0;
159*e4b17023SJohn Marino }
160*e4b17023SJohn Marino else
161*e4b17023SJohn Marino {
162*e4b17023SJohn Marino cmp = XVECEXP (pattern, 0, 0);
163*e4b17023SJohn Marino inc = XVECEXP (pattern, 0, 1);
164*e4b17023SJohn Marino }
165*e4b17023SJohn Marino
166*e4b17023SJohn Marino /* Check for (set (reg) (something)). */
167*e4b17023SJohn Marino if (GET_CODE (inc) != SET)
168*e4b17023SJohn Marino return 0;
169*e4b17023SJohn Marino reg = SET_DEST (inc);
170*e4b17023SJohn Marino if (! REG_P (reg))
171*e4b17023SJohn Marino return 0;
172*e4b17023SJohn Marino
173*e4b17023SJohn Marino /* Check if something = (plus (reg) (const_int -1)).
174*e4b17023SJohn Marino On IA-64, this decrement is wrapped in an if_then_else. */
175*e4b17023SJohn Marino inc_src = SET_SRC (inc);
176*e4b17023SJohn Marino if (GET_CODE (inc_src) == IF_THEN_ELSE)
177*e4b17023SJohn Marino inc_src = XEXP (inc_src, 1);
178*e4b17023SJohn Marino if (GET_CODE (inc_src) != PLUS
179*e4b17023SJohn Marino || XEXP (inc_src, 0) != reg
180*e4b17023SJohn Marino || XEXP (inc_src, 1) != constm1_rtx)
181*e4b17023SJohn Marino return 0;
182*e4b17023SJohn Marino
183*e4b17023SJohn Marino /* Check for (set (pc) (if_then_else (condition)
184*e4b17023SJohn Marino (label_ref (label))
185*e4b17023SJohn Marino (pc))). */
186*e4b17023SJohn Marino if (GET_CODE (cmp) != SET
187*e4b17023SJohn Marino || SET_DEST (cmp) != pc_rtx
188*e4b17023SJohn Marino || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
189*e4b17023SJohn Marino || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
190*e4b17023SJohn Marino || XEXP (SET_SRC (cmp), 2) != pc_rtx)
191*e4b17023SJohn Marino return 0;
192*e4b17023SJohn Marino
193*e4b17023SJohn Marino /* Extract loop termination condition. */
194*e4b17023SJohn Marino condition = XEXP (SET_SRC (cmp), 0);
195*e4b17023SJohn Marino
196*e4b17023SJohn Marino /* We expect a GE or NE comparison with 0 or 1. */
197*e4b17023SJohn Marino if ((GET_CODE (condition) != GE
198*e4b17023SJohn Marino && GET_CODE (condition) != NE)
199*e4b17023SJohn Marino || (XEXP (condition, 1) != const0_rtx
200*e4b17023SJohn Marino && XEXP (condition, 1) != const1_rtx))
201*e4b17023SJohn Marino return 0;
202*e4b17023SJohn Marino
203*e4b17023SJohn Marino if ((XEXP (condition, 0) == reg)
204*e4b17023SJohn Marino /* For the third case: */
205*e4b17023SJohn Marino || ((cc_reg != NULL_RTX)
206*e4b17023SJohn Marino && (XEXP (condition, 0) == cc_reg)
207*e4b17023SJohn Marino && (reg_orig == reg))
208*e4b17023SJohn Marino || (GET_CODE (XEXP (condition, 0)) == PLUS
209*e4b17023SJohn Marino && XEXP (XEXP (condition, 0), 0) == reg))
210*e4b17023SJohn Marino {
211*e4b17023SJohn Marino if (GET_CODE (pattern) != PARALLEL)
212*e4b17023SJohn Marino /* For the second form we expect:
213*e4b17023SJohn Marino
214*e4b17023SJohn Marino (set (reg) (plus (reg) (const_int -1))
215*e4b17023SJohn Marino (set (pc) (if_then_else (reg != 0)
216*e4b17023SJohn Marino (label_ref (label))
217*e4b17023SJohn Marino (pc))).
218*e4b17023SJohn Marino
219*e4b17023SJohn Marino is equivalent to the following:
220*e4b17023SJohn Marino
221*e4b17023SJohn Marino (parallel [(set (pc) (if_then_else (reg != 1)
222*e4b17023SJohn Marino (label_ref (label))
223*e4b17023SJohn Marino (pc)))
224*e4b17023SJohn Marino (set (reg) (plus (reg) (const_int -1)))
225*e4b17023SJohn Marino (additional clobbers and uses)])
226*e4b17023SJohn Marino
227*e4b17023SJohn Marino For the third form we expect:
228*e4b17023SJohn Marino
229*e4b17023SJohn Marino (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
230*e4b17023SJohn Marino (set (reg) (plus (reg) (const_int -1)))])
231*e4b17023SJohn Marino (set (pc) (if_then_else (cc == NE)
232*e4b17023SJohn Marino (label_ref (label))
233*e4b17023SJohn Marino (pc)))
234*e4b17023SJohn Marino
235*e4b17023SJohn Marino which is equivalent to the following:
236*e4b17023SJohn Marino
237*e4b17023SJohn Marino (parallel [(set (cc) (compare (reg, 1))
238*e4b17023SJohn Marino (set (reg) (plus (reg) (const_int -1)))
239*e4b17023SJohn Marino (set (pc) (if_then_else (NE == cc)
240*e4b17023SJohn Marino (label_ref (label))
241*e4b17023SJohn Marino (pc))))])
242*e4b17023SJohn Marino
243*e4b17023SJohn Marino So we return the second form instead for the two cases.
244*e4b17023SJohn Marino
245*e4b17023SJohn Marino */
246*e4b17023SJohn Marino condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
247*e4b17023SJohn Marino
248*e4b17023SJohn Marino return condition;
249*e4b17023SJohn Marino }
250*e4b17023SJohn Marino
251*e4b17023SJohn Marino /* ??? If a machine uses a funny comparison, we could return a
252*e4b17023SJohn Marino canonicalized form here. */
253*e4b17023SJohn Marino
254*e4b17023SJohn Marino return 0;
255*e4b17023SJohn Marino }
256*e4b17023SJohn Marino
257*e4b17023SJohn Marino /* Return nonzero if the loop specified by LOOP is suitable for
258*e4b17023SJohn Marino the use of special low-overhead looping instructions. DESC
259*e4b17023SJohn Marino describes the number of iterations of the loop. */
260*e4b17023SJohn Marino
261*e4b17023SJohn Marino static bool
doloop_valid_p(struct loop * loop,struct niter_desc * desc)262*e4b17023SJohn Marino doloop_valid_p (struct loop *loop, struct niter_desc *desc)
263*e4b17023SJohn Marino {
264*e4b17023SJohn Marino basic_block *body = get_loop_body (loop), bb;
265*e4b17023SJohn Marino rtx insn;
266*e4b17023SJohn Marino unsigned i;
267*e4b17023SJohn Marino bool result = true;
268*e4b17023SJohn Marino
269*e4b17023SJohn Marino /* Check for loops that may not terminate under special conditions. */
270*e4b17023SJohn Marino if (!desc->simple_p
271*e4b17023SJohn Marino || desc->assumptions
272*e4b17023SJohn Marino || desc->infinite)
273*e4b17023SJohn Marino {
274*e4b17023SJohn Marino /* There are some cases that would require a special attention.
275*e4b17023SJohn Marino For example if the comparison is LEU and the comparison value
276*e4b17023SJohn Marino is UINT_MAX then the loop will not terminate. Similarly, if the
277*e4b17023SJohn Marino comparison code is GEU and the comparison value is 0, the
278*e4b17023SJohn Marino loop will not terminate.
279*e4b17023SJohn Marino
280*e4b17023SJohn Marino If the absolute increment is not 1, the loop can be infinite
281*e4b17023SJohn Marino even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
282*e4b17023SJohn Marino
283*e4b17023SJohn Marino ??? We could compute these conditions at run-time and have a
284*e4b17023SJohn Marino additional jump around the loop to ensure an infinite loop.
285*e4b17023SJohn Marino However, it is very unlikely that this is the intended
286*e4b17023SJohn Marino behavior of the loop and checking for these rare boundary
287*e4b17023SJohn Marino conditions would pessimize all other code.
288*e4b17023SJohn Marino
289*e4b17023SJohn Marino If the loop is executed only a few times an extra check to
290*e4b17023SJohn Marino restart the loop could use up most of the benefits of using a
291*e4b17023SJohn Marino count register loop. Note however, that normally, this
292*e4b17023SJohn Marino restart branch would never execute, so it could be predicted
293*e4b17023SJohn Marino well by the CPU. We should generate the pessimistic code by
294*e4b17023SJohn Marino default, and have an option, e.g. -funsafe-loops that would
295*e4b17023SJohn Marino enable count-register loops in this case. */
296*e4b17023SJohn Marino if (dump_file)
297*e4b17023SJohn Marino fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
298*e4b17023SJohn Marino result = false;
299*e4b17023SJohn Marino goto cleanup;
300*e4b17023SJohn Marino }
301*e4b17023SJohn Marino
302*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
303*e4b17023SJohn Marino {
304*e4b17023SJohn Marino bb = body[i];
305*e4b17023SJohn Marino
306*e4b17023SJohn Marino for (insn = BB_HEAD (bb);
307*e4b17023SJohn Marino insn != NEXT_INSN (BB_END (bb));
308*e4b17023SJohn Marino insn = NEXT_INSN (insn))
309*e4b17023SJohn Marino {
310*e4b17023SJohn Marino /* Different targets have different necessities for low-overhead
311*e4b17023SJohn Marino looping. Call the back end for each instruction within the loop
312*e4b17023SJohn Marino to let it decide whether the insn prohibits a low-overhead loop.
313*e4b17023SJohn Marino It will then return the cause for it to emit to the dump file. */
314*e4b17023SJohn Marino const char * invalid = targetm.invalid_within_doloop (insn);
315*e4b17023SJohn Marino if (invalid)
316*e4b17023SJohn Marino {
317*e4b17023SJohn Marino if (dump_file)
318*e4b17023SJohn Marino fprintf (dump_file, "Doloop: %s\n", invalid);
319*e4b17023SJohn Marino result = false;
320*e4b17023SJohn Marino goto cleanup;
321*e4b17023SJohn Marino }
322*e4b17023SJohn Marino }
323*e4b17023SJohn Marino }
324*e4b17023SJohn Marino result = true;
325*e4b17023SJohn Marino
326*e4b17023SJohn Marino cleanup:
327*e4b17023SJohn Marino free (body);
328*e4b17023SJohn Marino
329*e4b17023SJohn Marino return result;
330*e4b17023SJohn Marino }
331*e4b17023SJohn Marino
332*e4b17023SJohn Marino /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
333*e4b17023SJohn Marino edge. If the condition is always false, do not do anything. If it is always
334*e4b17023SJohn Marino true, redirect E to DEST and return false. In all other cases, true is
335*e4b17023SJohn Marino returned. */
336*e4b17023SJohn Marino
337*e4b17023SJohn Marino static bool
add_test(rtx cond,edge * e,basic_block dest)338*e4b17023SJohn Marino add_test (rtx cond, edge *e, basic_block dest)
339*e4b17023SJohn Marino {
340*e4b17023SJohn Marino rtx seq, jump, label;
341*e4b17023SJohn Marino enum machine_mode mode;
342*e4b17023SJohn Marino rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
343*e4b17023SJohn Marino enum rtx_code code = GET_CODE (cond);
344*e4b17023SJohn Marino basic_block bb;
345*e4b17023SJohn Marino
346*e4b17023SJohn Marino mode = GET_MODE (XEXP (cond, 0));
347*e4b17023SJohn Marino if (mode == VOIDmode)
348*e4b17023SJohn Marino mode = GET_MODE (XEXP (cond, 1));
349*e4b17023SJohn Marino
350*e4b17023SJohn Marino start_sequence ();
351*e4b17023SJohn Marino op0 = force_operand (op0, NULL_RTX);
352*e4b17023SJohn Marino op1 = force_operand (op1, NULL_RTX);
353*e4b17023SJohn Marino label = block_label (dest);
354*e4b17023SJohn Marino do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX,
355*e4b17023SJohn Marino NULL_RTX, label, -1);
356*e4b17023SJohn Marino
357*e4b17023SJohn Marino jump = get_last_insn ();
358*e4b17023SJohn Marino if (!jump || !JUMP_P (jump))
359*e4b17023SJohn Marino {
360*e4b17023SJohn Marino /* The condition is always false and the jump was optimized out. */
361*e4b17023SJohn Marino end_sequence ();
362*e4b17023SJohn Marino return true;
363*e4b17023SJohn Marino }
364*e4b17023SJohn Marino
365*e4b17023SJohn Marino seq = get_insns ();
366*e4b17023SJohn Marino end_sequence ();
367*e4b17023SJohn Marino
368*e4b17023SJohn Marino /* There always is at least the jump insn in the sequence. */
369*e4b17023SJohn Marino gcc_assert (seq != NULL_RTX);
370*e4b17023SJohn Marino
371*e4b17023SJohn Marino bb = split_edge_and_insert (*e, seq);
372*e4b17023SJohn Marino *e = single_succ_edge (bb);
373*e4b17023SJohn Marino
374*e4b17023SJohn Marino if (any_uncondjump_p (jump))
375*e4b17023SJohn Marino {
376*e4b17023SJohn Marino /* The condition is always true. */
377*e4b17023SJohn Marino delete_insn (jump);
378*e4b17023SJohn Marino redirect_edge_and_branch_force (*e, dest);
379*e4b17023SJohn Marino return false;
380*e4b17023SJohn Marino }
381*e4b17023SJohn Marino
382*e4b17023SJohn Marino JUMP_LABEL (jump) = label;
383*e4b17023SJohn Marino
384*e4b17023SJohn Marino /* The jump is supposed to handle an unlikely special case. */
385*e4b17023SJohn Marino add_reg_note (jump, REG_BR_PROB, const0_rtx);
386*e4b17023SJohn Marino
387*e4b17023SJohn Marino LABEL_NUSES (label)++;
388*e4b17023SJohn Marino
389*e4b17023SJohn Marino make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
390*e4b17023SJohn Marino return true;
391*e4b17023SJohn Marino }
392*e4b17023SJohn Marino
393*e4b17023SJohn Marino /* Modify the loop to use the low-overhead looping insn where LOOP
394*e4b17023SJohn Marino describes the loop, DESC describes the number of iterations of the
395*e4b17023SJohn Marino loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
396*e4b17023SJohn Marino end of the loop. CONDITION is the condition separated from the
397*e4b17023SJohn Marino DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */
398*e4b17023SJohn Marino
399*e4b17023SJohn Marino static void
doloop_modify(struct loop * loop,struct niter_desc * desc,rtx doloop_seq,rtx condition,rtx count)400*e4b17023SJohn Marino doloop_modify (struct loop *loop, struct niter_desc *desc,
401*e4b17023SJohn Marino rtx doloop_seq, rtx condition, rtx count)
402*e4b17023SJohn Marino {
403*e4b17023SJohn Marino rtx counter_reg;
404*e4b17023SJohn Marino rtx tmp, noloop = NULL_RTX;
405*e4b17023SJohn Marino rtx sequence;
406*e4b17023SJohn Marino rtx jump_insn;
407*e4b17023SJohn Marino rtx jump_label;
408*e4b17023SJohn Marino int nonneg = 0;
409*e4b17023SJohn Marino bool increment_count;
410*e4b17023SJohn Marino basic_block loop_end = desc->out_edge->src;
411*e4b17023SJohn Marino enum machine_mode mode;
412*e4b17023SJohn Marino rtx true_prob_val;
413*e4b17023SJohn Marino
414*e4b17023SJohn Marino jump_insn = BB_END (loop_end);
415*e4b17023SJohn Marino
416*e4b17023SJohn Marino if (dump_file)
417*e4b17023SJohn Marino {
418*e4b17023SJohn Marino fprintf (dump_file, "Doloop: Inserting doloop pattern (");
419*e4b17023SJohn Marino if (desc->const_iter)
420*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
421*e4b17023SJohn Marino else
422*e4b17023SJohn Marino fputs ("runtime", dump_file);
423*e4b17023SJohn Marino fputs (" iterations).\n", dump_file);
424*e4b17023SJohn Marino }
425*e4b17023SJohn Marino
426*e4b17023SJohn Marino /* Get the probability of the original branch. If it exists we would
427*e4b17023SJohn Marino need to update REG_BR_PROB of the new jump_insn. */
428*e4b17023SJohn Marino true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
429*e4b17023SJohn Marino
430*e4b17023SJohn Marino /* Discard original jump to continue loop. The original compare
431*e4b17023SJohn Marino result may still be live, so it cannot be discarded explicitly. */
432*e4b17023SJohn Marino delete_insn (jump_insn);
433*e4b17023SJohn Marino
434*e4b17023SJohn Marino counter_reg = XEXP (condition, 0);
435*e4b17023SJohn Marino if (GET_CODE (counter_reg) == PLUS)
436*e4b17023SJohn Marino counter_reg = XEXP (counter_reg, 0);
437*e4b17023SJohn Marino mode = GET_MODE (counter_reg);
438*e4b17023SJohn Marino
439*e4b17023SJohn Marino increment_count = false;
440*e4b17023SJohn Marino switch (GET_CODE (condition))
441*e4b17023SJohn Marino {
442*e4b17023SJohn Marino case NE:
443*e4b17023SJohn Marino /* Currently only NE tests against zero and one are supported. */
444*e4b17023SJohn Marino noloop = XEXP (condition, 1);
445*e4b17023SJohn Marino if (noloop != const0_rtx)
446*e4b17023SJohn Marino {
447*e4b17023SJohn Marino gcc_assert (noloop == const1_rtx);
448*e4b17023SJohn Marino increment_count = true;
449*e4b17023SJohn Marino }
450*e4b17023SJohn Marino break;
451*e4b17023SJohn Marino
452*e4b17023SJohn Marino case GE:
453*e4b17023SJohn Marino /* Currently only GE tests against zero are supported. */
454*e4b17023SJohn Marino gcc_assert (XEXP (condition, 1) == const0_rtx);
455*e4b17023SJohn Marino
456*e4b17023SJohn Marino noloop = constm1_rtx;
457*e4b17023SJohn Marino
458*e4b17023SJohn Marino /* The iteration count does not need incrementing for a GE test. */
459*e4b17023SJohn Marino increment_count = false;
460*e4b17023SJohn Marino
461*e4b17023SJohn Marino /* Determine if the iteration counter will be non-negative.
462*e4b17023SJohn Marino Note that the maximum value loaded is iterations_max - 1. */
463*e4b17023SJohn Marino if (desc->niter_max
464*e4b17023SJohn Marino <= ((unsigned HOST_WIDEST_INT) 1
465*e4b17023SJohn Marino << (GET_MODE_PRECISION (mode) - 1)))
466*e4b17023SJohn Marino nonneg = 1;
467*e4b17023SJohn Marino break;
468*e4b17023SJohn Marino
469*e4b17023SJohn Marino /* Abort if an invalid doloop pattern has been generated. */
470*e4b17023SJohn Marino default:
471*e4b17023SJohn Marino gcc_unreachable ();
472*e4b17023SJohn Marino }
473*e4b17023SJohn Marino
474*e4b17023SJohn Marino if (increment_count)
475*e4b17023SJohn Marino count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
476*e4b17023SJohn Marino
477*e4b17023SJohn Marino /* Insert initialization of the count register into the loop header. */
478*e4b17023SJohn Marino start_sequence ();
479*e4b17023SJohn Marino tmp = force_operand (count, counter_reg);
480*e4b17023SJohn Marino convert_move (counter_reg, tmp, 1);
481*e4b17023SJohn Marino sequence = get_insns ();
482*e4b17023SJohn Marino end_sequence ();
483*e4b17023SJohn Marino emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
484*e4b17023SJohn Marino
485*e4b17023SJohn Marino if (desc->noloop_assumptions)
486*e4b17023SJohn Marino {
487*e4b17023SJohn Marino rtx ass = copy_rtx (desc->noloop_assumptions);
488*e4b17023SJohn Marino basic_block preheader = loop_preheader_edge (loop)->src;
489*e4b17023SJohn Marino basic_block set_zero
490*e4b17023SJohn Marino = split_edge (loop_preheader_edge (loop));
491*e4b17023SJohn Marino basic_block new_preheader
492*e4b17023SJohn Marino = split_edge (loop_preheader_edge (loop));
493*e4b17023SJohn Marino edge te;
494*e4b17023SJohn Marino
495*e4b17023SJohn Marino /* Expand the condition testing the assumptions and if it does not pass,
496*e4b17023SJohn Marino reset the count register to 0. */
497*e4b17023SJohn Marino redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
498*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
499*e4b17023SJohn Marino
500*e4b17023SJohn Marino set_zero->count = 0;
501*e4b17023SJohn Marino set_zero->frequency = 0;
502*e4b17023SJohn Marino
503*e4b17023SJohn Marino te = single_succ_edge (preheader);
504*e4b17023SJohn Marino for (; ass; ass = XEXP (ass, 1))
505*e4b17023SJohn Marino if (!add_test (XEXP (ass, 0), &te, set_zero))
506*e4b17023SJohn Marino break;
507*e4b17023SJohn Marino
508*e4b17023SJohn Marino if (ass)
509*e4b17023SJohn Marino {
510*e4b17023SJohn Marino /* We reached a condition that is always true. This is very hard to
511*e4b17023SJohn Marino reproduce (such a loop does not roll, and thus it would most
512*e4b17023SJohn Marino likely get optimized out by some of the preceding optimizations).
513*e4b17023SJohn Marino In fact, I do not have any testcase for it. However, it would
514*e4b17023SJohn Marino also be very hard to show that it is impossible, so we must
515*e4b17023SJohn Marino handle this case. */
516*e4b17023SJohn Marino set_zero->count = preheader->count;
517*e4b17023SJohn Marino set_zero->frequency = preheader->frequency;
518*e4b17023SJohn Marino }
519*e4b17023SJohn Marino
520*e4b17023SJohn Marino if (EDGE_COUNT (set_zero->preds) == 0)
521*e4b17023SJohn Marino {
522*e4b17023SJohn Marino /* All the conditions were simplified to false, remove the
523*e4b17023SJohn Marino unreachable set_zero block. */
524*e4b17023SJohn Marino delete_basic_block (set_zero);
525*e4b17023SJohn Marino }
526*e4b17023SJohn Marino else
527*e4b17023SJohn Marino {
528*e4b17023SJohn Marino /* Reset the counter to zero in the set_zero block. */
529*e4b17023SJohn Marino start_sequence ();
530*e4b17023SJohn Marino convert_move (counter_reg, noloop, 0);
531*e4b17023SJohn Marino sequence = get_insns ();
532*e4b17023SJohn Marino end_sequence ();
533*e4b17023SJohn Marino emit_insn_after (sequence, BB_END (set_zero));
534*e4b17023SJohn Marino
535*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, set_zero,
536*e4b17023SJohn Marino recompute_dominator (CDI_DOMINATORS,
537*e4b17023SJohn Marino set_zero));
538*e4b17023SJohn Marino }
539*e4b17023SJohn Marino
540*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, new_preheader,
541*e4b17023SJohn Marino recompute_dominator (CDI_DOMINATORS,
542*e4b17023SJohn Marino new_preheader));
543*e4b17023SJohn Marino }
544*e4b17023SJohn Marino
545*e4b17023SJohn Marino /* Some targets (eg, C4x) need to initialize special looping
546*e4b17023SJohn Marino registers. */
547*e4b17023SJohn Marino #ifdef HAVE_doloop_begin
548*e4b17023SJohn Marino {
549*e4b17023SJohn Marino rtx init;
550*e4b17023SJohn Marino unsigned level = get_loop_level (loop) + 1;
551*e4b17023SJohn Marino init = gen_doloop_begin (counter_reg,
552*e4b17023SJohn Marino desc->const_iter ? desc->niter_expr : const0_rtx,
553*e4b17023SJohn Marino GEN_INT (desc->niter_max),
554*e4b17023SJohn Marino GEN_INT (level));
555*e4b17023SJohn Marino if (init)
556*e4b17023SJohn Marino {
557*e4b17023SJohn Marino start_sequence ();
558*e4b17023SJohn Marino emit_insn (init);
559*e4b17023SJohn Marino sequence = get_insns ();
560*e4b17023SJohn Marino end_sequence ();
561*e4b17023SJohn Marino emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
562*e4b17023SJohn Marino }
563*e4b17023SJohn Marino }
564*e4b17023SJohn Marino #endif
565*e4b17023SJohn Marino
566*e4b17023SJohn Marino /* Insert the new low-overhead looping insn. */
567*e4b17023SJohn Marino emit_jump_insn_after (doloop_seq, BB_END (loop_end));
568*e4b17023SJohn Marino jump_insn = BB_END (loop_end);
569*e4b17023SJohn Marino jump_label = block_label (desc->in_edge->dest);
570*e4b17023SJohn Marino JUMP_LABEL (jump_insn) = jump_label;
571*e4b17023SJohn Marino LABEL_NUSES (jump_label)++;
572*e4b17023SJohn Marino
573*e4b17023SJohn Marino /* Ensure the right fallthru edge is marked, for case we have reversed
574*e4b17023SJohn Marino the condition. */
575*e4b17023SJohn Marino desc->in_edge->flags &= ~EDGE_FALLTHRU;
576*e4b17023SJohn Marino desc->out_edge->flags |= EDGE_FALLTHRU;
577*e4b17023SJohn Marino
578*e4b17023SJohn Marino /* Add a REG_NONNEG note if the actual or estimated maximum number
579*e4b17023SJohn Marino of iterations is non-negative. */
580*e4b17023SJohn Marino if (nonneg)
581*e4b17023SJohn Marino add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
582*e4b17023SJohn Marino
583*e4b17023SJohn Marino /* Update the REG_BR_PROB note. */
584*e4b17023SJohn Marino if (true_prob_val)
585*e4b17023SJohn Marino {
586*e4b17023SJohn Marino /* Seems safer to use the branch probability. */
587*e4b17023SJohn Marino add_reg_note (jump_insn, REG_BR_PROB,
588*e4b17023SJohn Marino GEN_INT (desc->in_edge->probability));
589*e4b17023SJohn Marino }
590*e4b17023SJohn Marino }
591*e4b17023SJohn Marino
592*e4b17023SJohn Marino /* Process loop described by LOOP validating that the loop is suitable for
593*e4b17023SJohn Marino conversion to use a low overhead looping instruction, replacing the jump
594*e4b17023SJohn Marino insn where suitable. Returns true if the loop was successfully
595*e4b17023SJohn Marino modified. */
596*e4b17023SJohn Marino
597*e4b17023SJohn Marino static bool
doloop_optimize(struct loop * loop)598*e4b17023SJohn Marino doloop_optimize (struct loop *loop)
599*e4b17023SJohn Marino {
600*e4b17023SJohn Marino enum machine_mode mode;
601*e4b17023SJohn Marino rtx doloop_seq, doloop_pat, doloop_reg;
602*e4b17023SJohn Marino rtx iterations, count;
603*e4b17023SJohn Marino rtx iterations_max;
604*e4b17023SJohn Marino rtx start_label;
605*e4b17023SJohn Marino rtx condition;
606*e4b17023SJohn Marino unsigned level, est_niter;
607*e4b17023SJohn Marino int max_cost;
608*e4b17023SJohn Marino struct niter_desc *desc;
609*e4b17023SJohn Marino unsigned word_mode_size;
610*e4b17023SJohn Marino unsigned HOST_WIDE_INT word_mode_max;
611*e4b17023SJohn Marino
612*e4b17023SJohn Marino if (dump_file)
613*e4b17023SJohn Marino fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
614*e4b17023SJohn Marino
615*e4b17023SJohn Marino iv_analysis_loop_init (loop);
616*e4b17023SJohn Marino
617*e4b17023SJohn Marino /* Find the simple exit of a LOOP. */
618*e4b17023SJohn Marino desc = get_simple_loop_desc (loop);
619*e4b17023SJohn Marino
620*e4b17023SJohn Marino /* Check that loop is a candidate for a low-overhead looping insn. */
621*e4b17023SJohn Marino if (!doloop_valid_p (loop, desc))
622*e4b17023SJohn Marino {
623*e4b17023SJohn Marino if (dump_file)
624*e4b17023SJohn Marino fprintf (dump_file,
625*e4b17023SJohn Marino "Doloop: The loop is not suitable.\n");
626*e4b17023SJohn Marino return false;
627*e4b17023SJohn Marino }
628*e4b17023SJohn Marino mode = desc->mode;
629*e4b17023SJohn Marino
630*e4b17023SJohn Marino est_niter = 3;
631*e4b17023SJohn Marino if (desc->const_iter)
632*e4b17023SJohn Marino est_niter = desc->niter;
633*e4b17023SJohn Marino /* If the estimate on number of iterations is reliable (comes from profile
634*e4b17023SJohn Marino feedback), use it. Do not use it normally, since the expected number
635*e4b17023SJohn Marino of iterations of an unrolled loop is 2. */
636*e4b17023SJohn Marino if (loop->header->count)
637*e4b17023SJohn Marino est_niter = expected_loop_iterations (loop);
638*e4b17023SJohn Marino
639*e4b17023SJohn Marino if (est_niter < 3)
640*e4b17023SJohn Marino {
641*e4b17023SJohn Marino if (dump_file)
642*e4b17023SJohn Marino fprintf (dump_file,
643*e4b17023SJohn Marino "Doloop: Too few iterations (%u) to be profitable.\n",
644*e4b17023SJohn Marino est_niter);
645*e4b17023SJohn Marino return false;
646*e4b17023SJohn Marino }
647*e4b17023SJohn Marino
648*e4b17023SJohn Marino max_cost
649*e4b17023SJohn Marino = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
650*e4b17023SJohn Marino if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop))
651*e4b17023SJohn Marino > max_cost)
652*e4b17023SJohn Marino {
653*e4b17023SJohn Marino if (dump_file)
654*e4b17023SJohn Marino fprintf (dump_file,
655*e4b17023SJohn Marino "Doloop: number of iterations too costly to compute.\n");
656*e4b17023SJohn Marino return false;
657*e4b17023SJohn Marino }
658*e4b17023SJohn Marino
659*e4b17023SJohn Marino count = copy_rtx (desc->niter_expr);
660*e4b17023SJohn Marino iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
661*e4b17023SJohn Marino iterations_max = GEN_INT (desc->niter_max);
662*e4b17023SJohn Marino level = get_loop_level (loop) + 1;
663*e4b17023SJohn Marino
664*e4b17023SJohn Marino /* Generate looping insn. If the pattern FAILs then give up trying
665*e4b17023SJohn Marino to modify the loop since there is some aspect the back-end does
666*e4b17023SJohn Marino not like. */
667*e4b17023SJohn Marino start_label = block_label (desc->in_edge->dest);
668*e4b17023SJohn Marino doloop_reg = gen_reg_rtx (mode);
669*e4b17023SJohn Marino doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
670*e4b17023SJohn Marino GEN_INT (level), start_label);
671*e4b17023SJohn Marino
672*e4b17023SJohn Marino word_mode_size = GET_MODE_PRECISION (word_mode);
673*e4b17023SJohn Marino word_mode_max
674*e4b17023SJohn Marino = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
675*e4b17023SJohn Marino if (! doloop_seq
676*e4b17023SJohn Marino && mode != word_mode
677*e4b17023SJohn Marino /* Before trying mode different from the one in that # of iterations is
678*e4b17023SJohn Marino computed, we must be sure that the number of iterations fits into
679*e4b17023SJohn Marino the new mode. */
680*e4b17023SJohn Marino && (word_mode_size >= GET_MODE_PRECISION (mode)
681*e4b17023SJohn Marino || desc->niter_max <= word_mode_max))
682*e4b17023SJohn Marino {
683*e4b17023SJohn Marino if (word_mode_size > GET_MODE_PRECISION (mode))
684*e4b17023SJohn Marino {
685*e4b17023SJohn Marino count = simplify_gen_unary (ZERO_EXTEND, word_mode,
686*e4b17023SJohn Marino count, mode);
687*e4b17023SJohn Marino iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
688*e4b17023SJohn Marino iterations, mode);
689*e4b17023SJohn Marino iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
690*e4b17023SJohn Marino iterations_max, mode);
691*e4b17023SJohn Marino }
692*e4b17023SJohn Marino else
693*e4b17023SJohn Marino {
694*e4b17023SJohn Marino count = lowpart_subreg (word_mode, count, mode);
695*e4b17023SJohn Marino iterations = lowpart_subreg (word_mode, iterations, mode);
696*e4b17023SJohn Marino iterations_max = lowpart_subreg (word_mode, iterations_max, mode);
697*e4b17023SJohn Marino }
698*e4b17023SJohn Marino PUT_MODE (doloop_reg, word_mode);
699*e4b17023SJohn Marino doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
700*e4b17023SJohn Marino GEN_INT (level), start_label);
701*e4b17023SJohn Marino }
702*e4b17023SJohn Marino if (! doloop_seq)
703*e4b17023SJohn Marino {
704*e4b17023SJohn Marino if (dump_file)
705*e4b17023SJohn Marino fprintf (dump_file,
706*e4b17023SJohn Marino "Doloop: Target unwilling to use doloop pattern!\n");
707*e4b17023SJohn Marino return false;
708*e4b17023SJohn Marino }
709*e4b17023SJohn Marino
710*e4b17023SJohn Marino /* If multiple instructions were created, the last must be the
711*e4b17023SJohn Marino jump instruction. Also, a raw define_insn may yield a plain
712*e4b17023SJohn Marino pattern. */
713*e4b17023SJohn Marino doloop_pat = doloop_seq;
714*e4b17023SJohn Marino if (INSN_P (doloop_pat))
715*e4b17023SJohn Marino {
716*e4b17023SJohn Marino while (NEXT_INSN (doloop_pat) != NULL_RTX)
717*e4b17023SJohn Marino doloop_pat = NEXT_INSN (doloop_pat);
718*e4b17023SJohn Marino if (!JUMP_P (doloop_pat))
719*e4b17023SJohn Marino doloop_pat = NULL_RTX;
720*e4b17023SJohn Marino }
721*e4b17023SJohn Marino
722*e4b17023SJohn Marino if (! doloop_pat
723*e4b17023SJohn Marino || ! (condition = doloop_condition_get (doloop_pat)))
724*e4b17023SJohn Marino {
725*e4b17023SJohn Marino if (dump_file)
726*e4b17023SJohn Marino fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
727*e4b17023SJohn Marino return false;
728*e4b17023SJohn Marino }
729*e4b17023SJohn Marino
730*e4b17023SJohn Marino doloop_modify (loop, desc, doloop_seq, condition, count);
731*e4b17023SJohn Marino return true;
732*e4b17023SJohn Marino }
733*e4b17023SJohn Marino
734*e4b17023SJohn Marino /* This is the main entry point. Process all loops using doloop_optimize. */
735*e4b17023SJohn Marino
736*e4b17023SJohn Marino void
doloop_optimize_loops(void)737*e4b17023SJohn Marino doloop_optimize_loops (void)
738*e4b17023SJohn Marino {
739*e4b17023SJohn Marino loop_iterator li;
740*e4b17023SJohn Marino struct loop *loop;
741*e4b17023SJohn Marino
742*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, 0)
743*e4b17023SJohn Marino {
744*e4b17023SJohn Marino doloop_optimize (loop);
745*e4b17023SJohn Marino }
746*e4b17023SJohn Marino
747*e4b17023SJohn Marino iv_analysis_done ();
748*e4b17023SJohn Marino
749*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
750*e4b17023SJohn Marino verify_dominators (CDI_DOMINATORS);
751*e4b17023SJohn Marino verify_loop_structure ();
752*e4b17023SJohn Marino #endif
753*e4b17023SJohn Marino }
754*e4b17023SJohn Marino #endif /* HAVE_doloop_end */
755*e4b17023SJohn Marino
756