138fd1498Szrj /* Loop interchange.
238fd1498Szrj Copyright (C) 2017-2018 Free Software Foundation, Inc.
338fd1498Szrj Contributed by ARM Ltd.
438fd1498Szrj
538fd1498Szrj This file is part of GCC.
638fd1498Szrj
738fd1498Szrj GCC is free software; you can redistribute it and/or modify it
838fd1498Szrj under the terms of the GNU General Public License as published by the
938fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
1038fd1498Szrj later version.
1138fd1498Szrj
1238fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1338fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1438fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1538fd1498Szrj for more details.
1638fd1498Szrj
1738fd1498Szrj You should have received a copy of the GNU General Public License
1838fd1498Szrj along with GCC; see the file COPYING3. If not see
1938fd1498Szrj <http://www.gnu.org/licenses/>. */
2038fd1498Szrj
2138fd1498Szrj #include "config.h"
2238fd1498Szrj #include "system.h"
2338fd1498Szrj #include "coretypes.h"
2438fd1498Szrj #include "backend.h"
2538fd1498Szrj #include "is-a.h"
2638fd1498Szrj #include "tree.h"
2738fd1498Szrj #include "gimple.h"
2838fd1498Szrj #include "tree-pass.h"
2938fd1498Szrj #include "ssa.h"
3038fd1498Szrj #include "gimple-pretty-print.h"
3138fd1498Szrj #include "fold-const.h"
3238fd1498Szrj #include "gimplify.h"
3338fd1498Szrj #include "gimple-iterator.h"
3438fd1498Szrj #include "gimplify-me.h"
3538fd1498Szrj #include "cfgloop.h"
3638fd1498Szrj #include "params.h"
3738fd1498Szrj #include "tree-ssa.h"
3838fd1498Szrj #include "tree-scalar-evolution.h"
3938fd1498Szrj #include "tree-ssa-loop-manip.h"
4038fd1498Szrj #include "tree-ssa-loop-niter.h"
4138fd1498Szrj #include "tree-ssa-loop-ivopts.h"
4238fd1498Szrj #include "tree-ssa-dce.h"
4338fd1498Szrj #include "tree-data-ref.h"
4438fd1498Szrj #include "tree-vectorizer.h"
4538fd1498Szrj
4638fd1498Szrj /* This pass performs loop interchange: for example, the loop nest
4738fd1498Szrj
4838fd1498Szrj for (int j = 0; j < N; j++)
4938fd1498Szrj for (int k = 0; k < N; k++)
5038fd1498Szrj for (int i = 0; i < N; i++)
5138fd1498Szrj c[i][j] = c[i][j] + a[i][k]*b[k][j];
5238fd1498Szrj
5338fd1498Szrj is transformed to
5438fd1498Szrj
5538fd1498Szrj for (int i = 0; i < N; i++)
5638fd1498Szrj for (int j = 0; j < N; j++)
5738fd1498Szrj for (int k = 0; k < N; k++)
5838fd1498Szrj c[i][j] = c[i][j] + a[i][k]*b[k][j];
5938fd1498Szrj
6038fd1498Szrj This pass implements loop interchange in the following steps:
6138fd1498Szrj
6238fd1498Szrj 1) Find perfect loop nest for each innermost loop and compute data
6338fd1498Szrj dependence relations for it. For above example, loop nest is
6438fd1498Szrj <loop_j, loop_k, loop_i>.
6538fd1498Szrj 2) From innermost to outermost loop, this pass tries to interchange
6638fd1498Szrj each loop pair. For above case, it firstly tries to interchange
6738fd1498Szrj <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
6838fd1498Szrj Then it tries to interchange <loop_j, loop_i> and loop nest becomes
6938fd1498Szrj <loop_i, loop_j, loop_k>. The overall effect is to move innermost
7038fd1498Szrj loop to the outermost position. For loop pair <loop_i, loop_j>
7138fd1498Szrj to be interchanged, we:
7238fd1498Szrj 3) Check if data dependence relations are valid for loop interchange.
7338fd1498Szrj 4) Check if both loops can be interchanged in terms of transformation.
7438fd1498Szrj 5) Check if interchanging the two loops is profitable.
7538fd1498Szrj 6) Interchange the two loops by mapping induction variables.
7638fd1498Szrj
7738fd1498Szrj This pass also handles reductions in loop nest. So far we only support
7838fd1498Szrj simple reduction of inner loop and double reduction of the loop nest. */
7938fd1498Szrj
8038fd1498Szrj /* Maximum number of stmts in each loop that should be interchanged. */
8138fd1498Szrj #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
8238fd1498Szrj /* Maximum number of data references in loop nest. */
8338fd1498Szrj #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
8438fd1498Szrj
8538fd1498Szrj /* Comparison ratio of access stride between inner/outer loops to be
8638fd1498Szrj interchanged. This is the minimum stride ratio for loop interchange
8738fd1498Szrj to be profitable. */
8838fd1498Szrj #define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
8938fd1498Szrj /* The same as above, but we require higher ratio for interchanging the
9038fd1498Szrj innermost two loops. */
9138fd1498Szrj #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
9238fd1498Szrj
9338fd1498Szrj /* Comparison ratio of stmt cost between inner/outer loops. Loops won't
9438fd1498Szrj be interchanged if outer loop has too many stmts. */
9538fd1498Szrj #define STMT_COST_RATIO (3)
9638fd1498Szrj
9738fd1498Szrj /* Vector of strides that DR accesses in each level loop of a loop nest. */
9838fd1498Szrj #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
9938fd1498Szrj
10038fd1498Szrj /* Structure recording loop induction variable. */
10138fd1498Szrj typedef struct induction
10238fd1498Szrj {
10338fd1498Szrj /* IV itself. */
10438fd1498Szrj tree var;
10538fd1498Szrj /* IV's initializing value, which is the init arg of the IV PHI node. */
10638fd1498Szrj tree init_val;
10738fd1498Szrj /* IV's initializing expr, which is (the expanded result of) init_val. */
10838fd1498Szrj tree init_expr;
10938fd1498Szrj /* IV's step. */
11038fd1498Szrj tree step;
11138fd1498Szrj } *induction_p;
11238fd1498Szrj
11338fd1498Szrj /* Enum type for loop reduction variable. */
11438fd1498Szrj enum reduction_type
11538fd1498Szrj {
11638fd1498Szrj UNKNOWN_RTYPE = 0,
11738fd1498Szrj SIMPLE_RTYPE,
11838fd1498Szrj DOUBLE_RTYPE
11938fd1498Szrj };
12038fd1498Szrj
12138fd1498Szrj /* Structure recording loop reduction variable. */
12238fd1498Szrj typedef struct reduction
12338fd1498Szrj {
12438fd1498Szrj /* Reduction itself. */
12538fd1498Szrj tree var;
12638fd1498Szrj /* PHI node defining reduction variable. */
12738fd1498Szrj gphi *phi;
12838fd1498Szrj /* Init and next variables of the reduction. */
12938fd1498Szrj tree init;
13038fd1498Szrj tree next;
13138fd1498Szrj /* Lcssa PHI node if reduction is used outside of its definition loop. */
13238fd1498Szrj gphi *lcssa_phi;
13338fd1498Szrj /* Stmts defining init and next. */
13438fd1498Szrj gimple *producer;
13538fd1498Szrj gimple *consumer;
13638fd1498Szrj /* If init is loaded from memory, this is the loading memory reference. */
13738fd1498Szrj tree init_ref;
13838fd1498Szrj /* If reduction is finally stored to memory, this is the stored memory
13938fd1498Szrj reference. */
14038fd1498Szrj tree fini_ref;
14138fd1498Szrj enum reduction_type type;
14238fd1498Szrj } *reduction_p;
14338fd1498Szrj
14438fd1498Szrj
14538fd1498Szrj /* Dump reduction RE. */
14638fd1498Szrj
14738fd1498Szrj static void
dump_reduction(reduction_p re)14838fd1498Szrj dump_reduction (reduction_p re)
14938fd1498Szrj {
15038fd1498Szrj if (re->type == SIMPLE_RTYPE)
15138fd1498Szrj fprintf (dump_file, " Simple reduction: ");
15238fd1498Szrj else if (re->type == DOUBLE_RTYPE)
15338fd1498Szrj fprintf (dump_file, " Double reduction: ");
15438fd1498Szrj else
15538fd1498Szrj fprintf (dump_file, " Unknown reduction: ");
15638fd1498Szrj
15738fd1498Szrj print_gimple_stmt (dump_file, re->phi, 0);
15838fd1498Szrj }
15938fd1498Szrj
16038fd1498Szrj /* Dump LOOP's induction IV. */
16138fd1498Szrj static void
dump_induction(struct loop * loop,induction_p iv)16238fd1498Szrj dump_induction (struct loop *loop, induction_p iv)
16338fd1498Szrj {
16438fd1498Szrj fprintf (dump_file, " Induction: ");
16538fd1498Szrj print_generic_expr (dump_file, iv->var, TDF_SLIM);
16638fd1498Szrj fprintf (dump_file, " = {");
16738fd1498Szrj print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
16838fd1498Szrj fprintf (dump_file, ", ");
16938fd1498Szrj print_generic_expr (dump_file, iv->step, TDF_SLIM);
17038fd1498Szrj fprintf (dump_file, "}_%d\n", loop->num);
17138fd1498Szrj }
17238fd1498Szrj
17338fd1498Szrj /* Loop candidate for interchange. */
17438fd1498Szrj
17538fd1498Szrj struct loop_cand
17638fd1498Szrj {
17738fd1498Szrj loop_cand (struct loop *, struct loop *);
17838fd1498Szrj ~loop_cand ();
17938fd1498Szrj
18038fd1498Szrj reduction_p find_reduction_by_stmt (gimple *);
18138fd1498Szrj void classify_simple_reduction (reduction_p);
18238fd1498Szrj bool analyze_iloop_reduction_var (tree);
18338fd1498Szrj bool analyze_oloop_reduction_var (loop_cand *, tree);
18438fd1498Szrj bool analyze_induction_var (tree, tree);
18538fd1498Szrj bool analyze_carried_vars (loop_cand *);
18638fd1498Szrj bool analyze_lcssa_phis (void);
18738fd1498Szrj bool can_interchange_p (loop_cand *);
18838fd1498Szrj void undo_simple_reduction (reduction_p, bitmap);
18938fd1498Szrj
19038fd1498Szrj /* The loop itself. */
19138fd1498Szrj struct loop *m_loop;
19238fd1498Szrj /* The outer loop for interchange. It equals to loop if this loop cand
19338fd1498Szrj itself represents the outer loop. */
19438fd1498Szrj struct loop *m_outer;
19538fd1498Szrj /* Vector of induction variables in loop. */
19638fd1498Szrj vec<induction_p> m_inductions;
19738fd1498Szrj /* Vector of reduction variables in loop. */
19838fd1498Szrj vec<reduction_p> m_reductions;
19938fd1498Szrj /* Lcssa PHI nodes of this loop. */
20038fd1498Szrj vec<gphi *> m_lcssa_nodes;
20138fd1498Szrj /* Single exit edge of this loop. */
20238fd1498Szrj edge m_exit;
20338fd1498Szrj /* Basic blocks of this loop. */
20438fd1498Szrj basic_block *m_bbs;
20538fd1498Szrj /* Number of stmts of this loop. Inner loops' stmts are not included. */
20638fd1498Szrj int m_num_stmts;
20738fd1498Szrj /* Number of constant initialized simple reduction. */
20838fd1498Szrj int m_const_init_reduc;
20938fd1498Szrj };
21038fd1498Szrj
21138fd1498Szrj /* Constructor. */
21238fd1498Szrj
loop_cand(struct loop * loop,struct loop * outer)21338fd1498Szrj loop_cand::loop_cand (struct loop *loop, struct loop *outer)
21438fd1498Szrj : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
21538fd1498Szrj m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
21638fd1498Szrj {
21738fd1498Szrj m_inductions.create (3);
21838fd1498Szrj m_reductions.create (3);
21938fd1498Szrj m_lcssa_nodes.create (3);
22038fd1498Szrj }
22138fd1498Szrj
22238fd1498Szrj /* Destructor. */
22338fd1498Szrj
~loop_cand()22438fd1498Szrj loop_cand::~loop_cand ()
22538fd1498Szrj {
22638fd1498Szrj induction_p iv;
22738fd1498Szrj for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
22838fd1498Szrj free (iv);
22938fd1498Szrj
23038fd1498Szrj reduction_p re;
23138fd1498Szrj for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
23238fd1498Szrj free (re);
23338fd1498Szrj
23438fd1498Szrj m_inductions.release ();
23538fd1498Szrj m_reductions.release ();
23638fd1498Szrj m_lcssa_nodes.release ();
23738fd1498Szrj free (m_bbs);
23838fd1498Szrj }
23938fd1498Szrj
24038fd1498Szrj /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
24138fd1498Szrj
24238fd1498Szrj static gimple *
single_use_in_loop(tree var,struct loop * loop)24338fd1498Szrj single_use_in_loop (tree var, struct loop *loop)
24438fd1498Szrj {
24538fd1498Szrj gimple *stmt, *res = NULL;
24638fd1498Szrj use_operand_p use_p;
24738fd1498Szrj imm_use_iterator iterator;
24838fd1498Szrj
24938fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
25038fd1498Szrj {
25138fd1498Szrj stmt = USE_STMT (use_p);
25238fd1498Szrj if (is_gimple_debug (stmt))
25338fd1498Szrj continue;
25438fd1498Szrj
25538fd1498Szrj if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
25638fd1498Szrj continue;
25738fd1498Szrj
25838fd1498Szrj if (res)
25938fd1498Szrj return NULL;
26038fd1498Szrj
26138fd1498Szrj res = stmt;
26238fd1498Szrj }
26338fd1498Szrj return res;
26438fd1498Szrj }
26538fd1498Szrj
26638fd1498Szrj /* Return true if E is unsupported in loop interchange, i.e, E is a complex
26738fd1498Szrj edge or part of irreducible loop. */
26838fd1498Szrj
26938fd1498Szrj static inline bool
unsupported_edge(edge e)27038fd1498Szrj unsupported_edge (edge e)
27138fd1498Szrj {
27238fd1498Szrj return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
27338fd1498Szrj }
27438fd1498Szrj
27538fd1498Szrj /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
27638fd1498Szrj stmt. */
27738fd1498Szrj
27838fd1498Szrj reduction_p
find_reduction_by_stmt(gimple * stmt)27938fd1498Szrj loop_cand::find_reduction_by_stmt (gimple *stmt)
28038fd1498Szrj {
28138fd1498Szrj gphi *phi = dyn_cast <gphi *> (stmt);
28238fd1498Szrj reduction_p re;
28338fd1498Szrj
28438fd1498Szrj for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
28538fd1498Szrj if ((phi != NULL && phi == re->lcssa_phi)
28638fd1498Szrj || (stmt == re->producer || stmt == re->consumer))
28738fd1498Szrj return re;
28838fd1498Szrj
28938fd1498Szrj return NULL;
29038fd1498Szrj }
29138fd1498Szrj
29238fd1498Szrj /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
29338fd1498Szrj current loop_cand is outer loop in loop nest. */
29438fd1498Szrj
29538fd1498Szrj bool
can_interchange_p(loop_cand * iloop)29638fd1498Szrj loop_cand::can_interchange_p (loop_cand *iloop)
29738fd1498Szrj {
29838fd1498Szrj /* For now we only support at most one reduction. */
29938fd1498Szrj unsigned allowed_reduction_num = 1;
30038fd1498Szrj
30138fd1498Szrj /* Only support reduction if the loop nest to be interchanged is the
30238fd1498Szrj innermostin two loops. */
30338fd1498Szrj if ((iloop == NULL && m_loop->inner != NULL)
30438fd1498Szrj || (iloop != NULL && iloop->m_loop->inner != NULL))
30538fd1498Szrj allowed_reduction_num = 0;
30638fd1498Szrj
30738fd1498Szrj if (m_reductions.length () > allowed_reduction_num
30838fd1498Szrj || (m_reductions.length () == 1
30938fd1498Szrj && m_reductions[0]->type == UNKNOWN_RTYPE))
31038fd1498Szrj return false;
31138fd1498Szrj
31238fd1498Szrj /* Only support lcssa PHI node which is for reduction. */
31338fd1498Szrj if (m_lcssa_nodes.length () > allowed_reduction_num)
31438fd1498Szrj return false;
31538fd1498Szrj
31638fd1498Szrj /* Check if basic block has any unsupported operation. Note basic blocks
31738fd1498Szrj of inner loops are not checked here. */
31838fd1498Szrj for (unsigned i = 0; i < m_loop->num_nodes; i++)
31938fd1498Szrj {
32038fd1498Szrj basic_block bb = m_bbs[i];
32138fd1498Szrj gphi_iterator psi;
32238fd1498Szrj gimple_stmt_iterator gsi;
32338fd1498Szrj
32438fd1498Szrj /* Skip basic blocks of inner loops. */
32538fd1498Szrj if (bb->loop_father != m_loop)
32638fd1498Szrj continue;
32738fd1498Szrj
32838fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
32938fd1498Szrj {
33038fd1498Szrj gimple *stmt = gsi_stmt (gsi);
33138fd1498Szrj if (is_gimple_debug (stmt))
33238fd1498Szrj continue;
33338fd1498Szrj
33438fd1498Szrj if (gimple_has_side_effects (stmt))
33538fd1498Szrj return false;
33638fd1498Szrj
33738fd1498Szrj m_num_stmts++;
33838fd1498Szrj if (gcall *call = dyn_cast <gcall *> (stmt))
33938fd1498Szrj {
34038fd1498Szrj /* In basic block of outer loop, the call should be cheap since
34138fd1498Szrj it will be moved to inner loop. */
34238fd1498Szrj if (iloop != NULL
34338fd1498Szrj && !gimple_inexpensive_call_p (call))
34438fd1498Szrj return false;
34538fd1498Szrj continue;
34638fd1498Szrj }
34738fd1498Szrj
34838fd1498Szrj if (!iloop || !gimple_vuse (stmt))
34938fd1498Szrj continue;
35038fd1498Szrj
35138fd1498Szrj /* Support stmt accessing memory in outer loop only if it is for
35238fd1498Szrj inner loop's reduction. */
35338fd1498Szrj if (iloop->find_reduction_by_stmt (stmt))
35438fd1498Szrj continue;
35538fd1498Szrj
35638fd1498Szrj tree lhs;
35738fd1498Szrj /* Support loop invariant memory reference if it's only used once by
35838fd1498Szrj inner loop. */
35938fd1498Szrj /* ??? How's this checking for invariantness? */
36038fd1498Szrj if (gimple_assign_single_p (stmt)
36138fd1498Szrj && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
36238fd1498Szrj && TREE_CODE (lhs) == SSA_NAME
36338fd1498Szrj && single_use_in_loop (lhs, iloop->m_loop))
36438fd1498Szrj continue;
36538fd1498Szrj
36638fd1498Szrj return false;
36738fd1498Szrj }
36838fd1498Szrj /* Check if loop has too many stmts. */
36938fd1498Szrj if (m_num_stmts > MAX_NUM_STMT)
37038fd1498Szrj return false;
37138fd1498Szrj
37238fd1498Szrj /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
37338fd1498Szrj loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
37438fd1498Szrj if (!iloop || bb == m_loop->header
37538fd1498Szrj || bb == iloop->m_exit->dest)
37638fd1498Szrj continue;
37738fd1498Szrj
37838fd1498Szrj /* Don't allow any other PHI nodes. */
37938fd1498Szrj for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
38038fd1498Szrj if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
38138fd1498Szrj return false;
38238fd1498Szrj }
38338fd1498Szrj
38438fd1498Szrj return true;
38538fd1498Szrj }
38638fd1498Szrj
38738fd1498Szrj /* Programmers and optimizers (like loop store motion) may optimize code:
38838fd1498Szrj
38938fd1498Szrj for (int i = 0; i < N; i++)
39038fd1498Szrj for (int j = 0; j < N; j++)
39138fd1498Szrj a[i] += b[j][i] * c[j][i];
39238fd1498Szrj
39338fd1498Szrj into reduction:
39438fd1498Szrj
39538fd1498Szrj for (int i = 0; i < N; i++)
39638fd1498Szrj {
39738fd1498Szrj // producer. Note sum can be intitialized to a constant.
39838fd1498Szrj int sum = a[i];
39938fd1498Szrj for (int j = 0; j < N; j++)
40038fd1498Szrj {
40138fd1498Szrj sum += b[j][i] * c[j][i];
40238fd1498Szrj }
40338fd1498Szrj // consumer.
40438fd1498Szrj a[i] = sum;
40538fd1498Szrj }
40638fd1498Szrj
40738fd1498Szrj The result code can't be interchanged without undoing the optimization.
40838fd1498Szrj This function classifies this kind reduction and records information so
40938fd1498Szrj that we can undo the store motion during interchange. */
41038fd1498Szrj
41138fd1498Szrj void
classify_simple_reduction(reduction_p re)41238fd1498Szrj loop_cand::classify_simple_reduction (reduction_p re)
41338fd1498Szrj {
41438fd1498Szrj gimple *producer, *consumer;
41538fd1498Szrj
41638fd1498Szrj /* Check init variable of reduction and how it is initialized. */
41738fd1498Szrj if (TREE_CODE (re->init) == SSA_NAME)
41838fd1498Szrj {
41938fd1498Szrj producer = SSA_NAME_DEF_STMT (re->init);
42038fd1498Szrj re->producer = producer;
42138fd1498Szrj basic_block bb = gimple_bb (producer);
42238fd1498Szrj if (!bb || bb->loop_father != m_outer)
42338fd1498Szrj return;
42438fd1498Szrj
42538fd1498Szrj if (!gimple_assign_load_p (producer))
42638fd1498Szrj return;
42738fd1498Szrj
42838fd1498Szrj re->init_ref = gimple_assign_rhs1 (producer);
42938fd1498Szrj }
43038fd1498Szrj else if (CONSTANT_CLASS_P (re->init))
43138fd1498Szrj m_const_init_reduc++;
43238fd1498Szrj else
43338fd1498Szrj return;
43438fd1498Szrj
43538fd1498Szrj /* Check how reduction variable is used. */
43638fd1498Szrj consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
43738fd1498Szrj if (!consumer
43838fd1498Szrj || !gimple_store_p (consumer))
43938fd1498Szrj return;
44038fd1498Szrj
44138fd1498Szrj re->fini_ref = gimple_get_lhs (consumer);
44238fd1498Szrj re->consumer = consumer;
44338fd1498Szrj
44438fd1498Szrj /* Simple reduction with constant initializer. */
44538fd1498Szrj if (!re->init_ref)
44638fd1498Szrj {
44738fd1498Szrj gcc_assert (CONSTANT_CLASS_P (re->init));
44838fd1498Szrj re->init_ref = unshare_expr (re->fini_ref);
44938fd1498Szrj }
45038fd1498Szrj
45138fd1498Szrj /* Require memory references in producer and consumer are the same so
45238fd1498Szrj that we can undo reduction during interchange. */
45338fd1498Szrj if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
45438fd1498Szrj return;
45538fd1498Szrj
45638fd1498Szrj re->type = SIMPLE_RTYPE;
45738fd1498Szrj }
45838fd1498Szrj
45938fd1498Szrj /* Analyze reduction variable VAR for inner loop of the loop nest to be
46038fd1498Szrj interchanged. Return true if analysis succeeds. */
46138fd1498Szrj
46238fd1498Szrj bool
analyze_iloop_reduction_var(tree var)46338fd1498Szrj loop_cand::analyze_iloop_reduction_var (tree var)
46438fd1498Szrj {
46538fd1498Szrj gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
46638fd1498Szrj gphi *lcssa_phi = NULL, *use_phi;
46738fd1498Szrj tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
46838fd1498Szrj tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
46938fd1498Szrj reduction_p re;
47038fd1498Szrj gimple *stmt, *next_def, *single_use = NULL;
47138fd1498Szrj use_operand_p use_p;
47238fd1498Szrj imm_use_iterator iterator;
47338fd1498Szrj
47438fd1498Szrj if (TREE_CODE (next) != SSA_NAME)
47538fd1498Szrj return false;
47638fd1498Szrj
47738fd1498Szrj next_def = SSA_NAME_DEF_STMT (next);
47838fd1498Szrj basic_block bb = gimple_bb (next_def);
47938fd1498Szrj if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
48038fd1498Szrj return false;
48138fd1498Szrj
48238fd1498Szrj /* In restricted reduction, the var is (and must be) used in defining
48338fd1498Szrj the updated var. The process can be depicted as below:
48438fd1498Szrj
48538fd1498Szrj var ;; = PHI<init, next>
48638fd1498Szrj |
48738fd1498Szrj |
48838fd1498Szrj v
48938fd1498Szrj +---------------------+
49038fd1498Szrj | reduction operators | <-- other operands
49138fd1498Szrj +---------------------+
49238fd1498Szrj |
49338fd1498Szrj |
49438fd1498Szrj v
49538fd1498Szrj next
49638fd1498Szrj
49738fd1498Szrj In terms loop interchange, we don't change how NEXT is computed based
49838fd1498Szrj on VAR and OTHER OPERANDS. In case of double reduction in loop nest
49938fd1498Szrj to be interchanged, we don't changed it at all. In the case of simple
50038fd1498Szrj reduction in inner loop, we only make change how VAR/NEXT is loaded or
50138fd1498Szrj stored. With these conditions, we can relax restrictions on reduction
50238fd1498Szrj in a way that reduction operation is seen as black box. In general,
50338fd1498Szrj we can ignore reassociation of reduction operator; we can handle fake
50438fd1498Szrj reductions in which VAR is not even used to compute NEXT. */
50538fd1498Szrj if (! single_imm_use (var, &use_p, &single_use)
50638fd1498Szrj || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
50738fd1498Szrj return false;
50838fd1498Szrj
50938fd1498Szrj /* Check the reduction operation. We require a left-associative operation.
51038fd1498Szrj For FP math we also need to be allowed to associate operations. */
51138fd1498Szrj if (gassign *ass = dyn_cast <gassign *> (single_use))
51238fd1498Szrj {
51338fd1498Szrj enum tree_code code = gimple_assign_rhs_code (ass);
51438fd1498Szrj if (! (associative_tree_code (code)
51538fd1498Szrj || (code == MINUS_EXPR
51638fd1498Szrj && use_p->use == gimple_assign_rhs1_ptr (ass)))
51738fd1498Szrj || (FLOAT_TYPE_P (TREE_TYPE (var))
51838fd1498Szrj && ! flag_associative_math))
51938fd1498Szrj return false;
52038fd1498Szrj }
52138fd1498Szrj else
52238fd1498Szrj return false;
52338fd1498Szrj
52438fd1498Szrj /* Handle and verify a series of stmts feeding the reduction op. */
52538fd1498Szrj if (single_use != next_def
52638fd1498Szrj && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
52738fd1498Szrj gimple_assign_rhs_code (single_use)))
52838fd1498Szrj return false;
52938fd1498Szrj
53038fd1498Szrj /* Only support cases in which INIT is used in inner loop. */
53138fd1498Szrj if (TREE_CODE (init) == SSA_NAME)
53238fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
53338fd1498Szrj {
53438fd1498Szrj stmt = USE_STMT (use_p);
53538fd1498Szrj if (is_gimple_debug (stmt))
53638fd1498Szrj continue;
53738fd1498Szrj
53838fd1498Szrj if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
53938fd1498Szrj return false;
54038fd1498Szrj }
54138fd1498Szrj
54238fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
54338fd1498Szrj {
54438fd1498Szrj stmt = USE_STMT (use_p);
54538fd1498Szrj if (is_gimple_debug (stmt))
54638fd1498Szrj continue;
54738fd1498Szrj
54838fd1498Szrj /* Or else it's used in PHI itself. */
54938fd1498Szrj use_phi = dyn_cast <gphi *> (stmt);
55038fd1498Szrj if (use_phi == phi)
55138fd1498Szrj continue;
55238fd1498Szrj
55338fd1498Szrj if (use_phi != NULL
55438fd1498Szrj && lcssa_phi == NULL
55538fd1498Szrj && gimple_bb (stmt) == m_exit->dest
55638fd1498Szrj && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
55738fd1498Szrj lcssa_phi = use_phi;
55838fd1498Szrj else
55938fd1498Szrj return false;
56038fd1498Szrj }
56138fd1498Szrj if (!lcssa_phi)
56238fd1498Szrj return false;
56338fd1498Szrj
56438fd1498Szrj re = XCNEW (struct reduction);
56538fd1498Szrj re->var = var;
56638fd1498Szrj re->init = init;
56738fd1498Szrj re->next = next;
56838fd1498Szrj re->phi = phi;
56938fd1498Szrj re->lcssa_phi = lcssa_phi;
57038fd1498Szrj
57138fd1498Szrj classify_simple_reduction (re);
57238fd1498Szrj
57338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
57438fd1498Szrj dump_reduction (re);
57538fd1498Szrj
57638fd1498Szrj m_reductions.safe_push (re);
57738fd1498Szrj return true;
57838fd1498Szrj }
57938fd1498Szrj
58038fd1498Szrj /* Analyze reduction variable VAR for outer loop of the loop nest to be
58138fd1498Szrj interchanged. ILOOP is not NULL and points to inner loop. For the
58238fd1498Szrj moment, we only support double reduction for outer loop, like:
58338fd1498Szrj
58438fd1498Szrj for (int i = 0; i < n; i++)
58538fd1498Szrj {
58638fd1498Szrj int sum = 0;
58738fd1498Szrj
58838fd1498Szrj for (int j = 0; j < n; j++) // outer loop
58938fd1498Szrj for (int k = 0; k < n; k++) // inner loop
59038fd1498Szrj sum += a[i][k]*b[k][j];
59138fd1498Szrj
59238fd1498Szrj s[i] = sum;
59338fd1498Szrj }
59438fd1498Szrj
59538fd1498Szrj Note the innermost two loops are the loop nest to be interchanged.
59638fd1498Szrj Return true if analysis succeeds. */
59738fd1498Szrj
59838fd1498Szrj bool
analyze_oloop_reduction_var(loop_cand * iloop,tree var)59938fd1498Szrj loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
60038fd1498Szrj {
60138fd1498Szrj gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
60238fd1498Szrj gphi *lcssa_phi = NULL, *use_phi;
60338fd1498Szrj tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
60438fd1498Szrj tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
60538fd1498Szrj reduction_p re;
60638fd1498Szrj gimple *stmt, *next_def;
60738fd1498Szrj use_operand_p use_p;
60838fd1498Szrj imm_use_iterator iterator;
60938fd1498Szrj
61038fd1498Szrj if (TREE_CODE (next) != SSA_NAME)
61138fd1498Szrj return false;
61238fd1498Szrj
61338fd1498Szrj next_def = SSA_NAME_DEF_STMT (next);
61438fd1498Szrj basic_block bb = gimple_bb (next_def);
61538fd1498Szrj if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
61638fd1498Szrj return false;
61738fd1498Szrj
61838fd1498Szrj /* Find inner loop's simple reduction that uses var as initializer. */
61938fd1498Szrj reduction_p inner_re = NULL;
62038fd1498Szrj for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
62138fd1498Szrj if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
62238fd1498Szrj break;
62338fd1498Szrj
62438fd1498Szrj if (inner_re == NULL
62538fd1498Szrj || inner_re->type != UNKNOWN_RTYPE
62638fd1498Szrj || inner_re->producer != phi)
62738fd1498Szrj return false;
62838fd1498Szrj
62938fd1498Szrj /* In case of double reduction, outer loop's reduction should be updated
63038fd1498Szrj by inner loop's simple reduction. */
63138fd1498Szrj if (next_def != inner_re->lcssa_phi)
63238fd1498Szrj return false;
63338fd1498Szrj
63438fd1498Szrj /* Outer loop's reduction should only be used to initialize inner loop's
63538fd1498Szrj simple reduction. */
63638fd1498Szrj if (! single_imm_use (var, &use_p, &stmt)
63738fd1498Szrj || stmt != inner_re->phi)
63838fd1498Szrj return false;
63938fd1498Szrj
64038fd1498Szrj /* Check this reduction is correctly used outside of loop via lcssa phi. */
64138fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
64238fd1498Szrj {
64338fd1498Szrj stmt = USE_STMT (use_p);
64438fd1498Szrj if (is_gimple_debug (stmt))
64538fd1498Szrj continue;
64638fd1498Szrj
64738fd1498Szrj /* Or else it's used in PHI itself. */
64838fd1498Szrj use_phi = dyn_cast <gphi *> (stmt);
64938fd1498Szrj if (use_phi == phi)
65038fd1498Szrj continue;
65138fd1498Szrj
65238fd1498Szrj if (lcssa_phi == NULL
65338fd1498Szrj && use_phi != NULL
65438fd1498Szrj && gimple_bb (stmt) == m_exit->dest
65538fd1498Szrj && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
65638fd1498Szrj lcssa_phi = use_phi;
65738fd1498Szrj else
65838fd1498Szrj return false;
65938fd1498Szrj }
66038fd1498Szrj if (!lcssa_phi)
66138fd1498Szrj return false;
66238fd1498Szrj
66338fd1498Szrj re = XCNEW (struct reduction);
66438fd1498Szrj re->var = var;
66538fd1498Szrj re->init = init;
66638fd1498Szrj re->next = next;
66738fd1498Szrj re->phi = phi;
66838fd1498Szrj re->lcssa_phi = lcssa_phi;
66938fd1498Szrj re->type = DOUBLE_RTYPE;
67038fd1498Szrj inner_re->type = DOUBLE_RTYPE;
67138fd1498Szrj
67238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
67338fd1498Szrj dump_reduction (re);
67438fd1498Szrj
67538fd1498Szrj m_reductions.safe_push (re);
67638fd1498Szrj return true;
67738fd1498Szrj }
67838fd1498Szrj
67938fd1498Szrj /* Return true if VAR is induction variable of current loop whose scev is
68038fd1498Szrj specified by CHREC. */
68138fd1498Szrj
68238fd1498Szrj bool
analyze_induction_var(tree var,tree chrec)68338fd1498Szrj loop_cand::analyze_induction_var (tree var, tree chrec)
68438fd1498Szrj {
68538fd1498Szrj gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
68638fd1498Szrj tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
68738fd1498Szrj
68838fd1498Szrj /* Var is loop invariant, though it's unlikely to happen. */
68938fd1498Szrj if (tree_does_not_contain_chrecs (chrec))
69038fd1498Szrj {
691*58e805e6Szrj /* Punt on floating point invariants if honoring signed zeros,
692*58e805e6Szrj representing that as + 0.0 would change the result if init
693*58e805e6Szrj is -0.0. Similarly for SNaNs it can raise exception. */
694*58e805e6Szrj if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
695*58e805e6Szrj return false;
69638fd1498Szrj struct induction *iv = XCNEW (struct induction);
69738fd1498Szrj iv->var = var;
69838fd1498Szrj iv->init_val = init;
69938fd1498Szrj iv->init_expr = chrec;
700*58e805e6Szrj iv->step = build_zero_cst (TREE_TYPE (chrec));
70138fd1498Szrj m_inductions.safe_push (iv);
70238fd1498Szrj return true;
70338fd1498Szrj }
70438fd1498Szrj
70538fd1498Szrj if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
70638fd1498Szrj || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
70738fd1498Szrj || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
70838fd1498Szrj || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
70938fd1498Szrj return false;
71038fd1498Szrj
71138fd1498Szrj struct induction *iv = XCNEW (struct induction);
71238fd1498Szrj iv->var = var;
71338fd1498Szrj iv->init_val = init;
71438fd1498Szrj iv->init_expr = CHREC_LEFT (chrec);
71538fd1498Szrj iv->step = CHREC_RIGHT (chrec);
71638fd1498Szrj
71738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
71838fd1498Szrj dump_induction (m_loop, iv);
71938fd1498Szrj
72038fd1498Szrj m_inductions.safe_push (iv);
72138fd1498Szrj return true;
72238fd1498Szrj }
72338fd1498Szrj
72438fd1498Szrj /* Return true if all loop carried variables defined in loop header can
72538fd1498Szrj be successfully analyzed. */
72638fd1498Szrj
72738fd1498Szrj bool
analyze_carried_vars(loop_cand * iloop)72838fd1498Szrj loop_cand::analyze_carried_vars (loop_cand *iloop)
72938fd1498Szrj {
73038fd1498Szrj edge e = loop_preheader_edge (m_outer);
73138fd1498Szrj gphi_iterator gsi;
73238fd1498Szrj
73338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
73438fd1498Szrj fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
73538fd1498Szrj
73638fd1498Szrj for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
73738fd1498Szrj {
73838fd1498Szrj gphi *phi = gsi.phi ();
73938fd1498Szrj
74038fd1498Szrj tree var = PHI_RESULT (phi);
74138fd1498Szrj if (virtual_operand_p (var))
74238fd1498Szrj continue;
74338fd1498Szrj
74438fd1498Szrj tree chrec = analyze_scalar_evolution (m_loop, var);
74538fd1498Szrj chrec = instantiate_scev (e, m_loop, chrec);
74638fd1498Szrj
74738fd1498Szrj /* Analyze var as reduction variable. */
74838fd1498Szrj if (chrec_contains_undetermined (chrec)
74938fd1498Szrj || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
75038fd1498Szrj {
75138fd1498Szrj if (iloop && !analyze_oloop_reduction_var (iloop, var))
75238fd1498Szrj return false;
75338fd1498Szrj if (!iloop && !analyze_iloop_reduction_var (var))
75438fd1498Szrj return false;
75538fd1498Szrj }
75638fd1498Szrj /* Analyze var as induction variable. */
75738fd1498Szrj else if (!analyze_induction_var (var, chrec))
75838fd1498Szrj return false;
75938fd1498Szrj }
76038fd1498Szrj
76138fd1498Szrj return true;
76238fd1498Szrj }
76338fd1498Szrj
76438fd1498Szrj /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
76538fd1498Szrj
76638fd1498Szrj bool
analyze_lcssa_phis(void)76738fd1498Szrj loop_cand::analyze_lcssa_phis (void)
76838fd1498Szrj {
76938fd1498Szrj gphi_iterator gsi;
77038fd1498Szrj for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
77138fd1498Szrj {
77238fd1498Szrj gphi *phi = gsi.phi ();
77338fd1498Szrj
77438fd1498Szrj if (virtual_operand_p (PHI_RESULT (phi)))
77538fd1498Szrj continue;
77638fd1498Szrj
77738fd1498Szrj /* TODO: We only support lcssa phi for reduction for now. */
77838fd1498Szrj if (!find_reduction_by_stmt (phi))
77938fd1498Szrj return false;
78038fd1498Szrj }
78138fd1498Szrj
78238fd1498Szrj return true;
78338fd1498Szrj }
78438fd1498Szrj
78538fd1498Szrj /* CONSUMER is a stmt in BB storing reduction result into memory object.
78638fd1498Szrj When the reduction is intialized from constant value, we need to add
78738fd1498Szrj a stmt loading from the memory object to target basic block in inner
78838fd1498Szrj loop during undoing the reduction. Problem is that memory reference
78938fd1498Szrj may use ssa variables not dominating the target basic block. This
79038fd1498Szrj function finds all stmts on which CONSUMER depends in basic block BB,
79138fd1498Szrj records and returns them via STMTS. */
79238fd1498Szrj
79338fd1498Szrj static void
find_deps_in_bb_for_stmt(gimple_seq * stmts,basic_block bb,gimple * consumer)79438fd1498Szrj find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
79538fd1498Szrj {
79638fd1498Szrj auto_vec<gimple *, 4> worklist;
79738fd1498Szrj use_operand_p use_p;
79838fd1498Szrj ssa_op_iter iter;
79938fd1498Szrj gimple *stmt, *def_stmt;
80038fd1498Szrj gimple_stmt_iterator gsi;
80138fd1498Szrj
80238fd1498Szrj /* First clear flag for stmts in bb. */
80338fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
80438fd1498Szrj gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
80538fd1498Szrj
80638fd1498Szrj /* DFS search all depended stmts in bb and mark flag for these stmts. */
80738fd1498Szrj worklist.safe_push (consumer);
80838fd1498Szrj while (!worklist.is_empty ())
80938fd1498Szrj {
81038fd1498Szrj stmt = worklist.pop ();
81138fd1498Szrj FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
81238fd1498Szrj {
81338fd1498Szrj def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
81438fd1498Szrj
81538fd1498Szrj if (is_a <gphi *> (def_stmt)
81638fd1498Szrj || gimple_bb (def_stmt) != bb
81738fd1498Szrj || gimple_plf (def_stmt, GF_PLF_1))
81838fd1498Szrj continue;
81938fd1498Szrj
82038fd1498Szrj worklist.safe_push (def_stmt);
82138fd1498Szrj }
82238fd1498Szrj gimple_set_plf (stmt, GF_PLF_1, true);
82338fd1498Szrj }
82438fd1498Szrj for (gsi = gsi_start_nondebug_bb (bb);
82538fd1498Szrj !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
82638fd1498Szrj {
82738fd1498Szrj /* Move dep stmts to sequence STMTS. */
82838fd1498Szrj if (gimple_plf (stmt, GF_PLF_1))
82938fd1498Szrj {
83038fd1498Szrj gsi_remove (&gsi, false);
83138fd1498Szrj gimple_seq_add_stmt_without_update (stmts, stmt);
83238fd1498Szrj }
83338fd1498Szrj else
83438fd1498Szrj gsi_next_nondebug (&gsi);
83538fd1498Szrj }
83638fd1498Szrj }
83738fd1498Szrj
83838fd1498Szrj /* User can write, optimizers can generate simple reduction RE for inner
83938fd1498Szrj loop. In order to make interchange valid, we have to undo reduction by
84038fd1498Szrj moving producer and consumer stmts into the inner loop. For example,
84138fd1498Szrj below code:
84238fd1498Szrj
84338fd1498Szrj init = MEM_REF[idx]; //producer
84438fd1498Szrj loop:
84538fd1498Szrj var = phi<init, next>
84638fd1498Szrj next = var op ...
84738fd1498Szrj reduc_sum = phi<next>
84838fd1498Szrj MEM_REF[idx] = reduc_sum //consumer
84938fd1498Szrj
85038fd1498Szrj is transformed into:
85138fd1498Szrj
85238fd1498Szrj loop:
85338fd1498Szrj new_var = MEM_REF[idx]; //producer after moving
85438fd1498Szrj next = new_var op ...
85538fd1498Szrj MEM_REF[idx] = next; //consumer after moving
85638fd1498Szrj
85738fd1498Szrj Note if the reduction variable is initialized to constant, like:
85838fd1498Szrj
85938fd1498Szrj var = phi<0.0, next>
86038fd1498Szrj
86138fd1498Szrj we compute new_var as below:
86238fd1498Szrj
86338fd1498Szrj loop:
86438fd1498Szrj tmp = MEM_REF[idx];
86538fd1498Szrj new_var = !first_iteration ? tmp : 0.0;
86638fd1498Szrj
86738fd1498Szrj so that the initial const is used in the first iteration of loop. Also
86838fd1498Szrj record ssa variables for dead code elimination in DCE_SEEDS. */
86938fd1498Szrj
87038fd1498Szrj void
undo_simple_reduction(reduction_p re,bitmap dce_seeds)87138fd1498Szrj loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
87238fd1498Szrj {
87338fd1498Szrj gimple *stmt;
87438fd1498Szrj gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
87538fd1498Szrj gimple_seq stmts = NULL;
87638fd1498Szrj tree new_var;
87738fd1498Szrj
87838fd1498Szrj /* Prepare the initialization stmts and insert it to inner loop. */
87938fd1498Szrj if (re->producer != NULL)
88038fd1498Szrj {
88138fd1498Szrj gimple_set_vuse (re->producer, NULL_TREE);
88238fd1498Szrj from = gsi_for_stmt (re->producer);
88338fd1498Szrj gsi_remove (&from, false);
88438fd1498Szrj gimple_seq_add_stmt_without_update (&stmts, re->producer);
88538fd1498Szrj new_var = re->init;
88638fd1498Szrj }
88738fd1498Szrj else
88838fd1498Szrj {
88938fd1498Szrj /* Find all stmts on which expression "MEM_REF[idx]" depends. */
89038fd1498Szrj find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
89138fd1498Szrj /* Because we generate new stmt loading from the MEM_REF to TMP. */
89238fd1498Szrj tree cond, tmp = copy_ssa_name (re->var);
89338fd1498Szrj stmt = gimple_build_assign (tmp, re->init_ref);
89438fd1498Szrj gimple_seq_add_stmt_without_update (&stmts, stmt);
89538fd1498Szrj
89638fd1498Szrj /* Init new_var to MEM_REF or CONST depending on if it is the first
89738fd1498Szrj iteration. */
89838fd1498Szrj induction_p iv = m_inductions[0];
89938fd1498Szrj cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
90038fd1498Szrj new_var = copy_ssa_name (re->var);
90138fd1498Szrj stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
90238fd1498Szrj gimple_seq_add_stmt_without_update (&stmts, stmt);
90338fd1498Szrj }
90438fd1498Szrj gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
90538fd1498Szrj
90638fd1498Szrj /* Replace all uses of reduction var with new variable. */
90738fd1498Szrj use_operand_p use_p;
90838fd1498Szrj imm_use_iterator iterator;
90938fd1498Szrj FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
91038fd1498Szrj {
91138fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
91238fd1498Szrj SET_USE (use_p, new_var);
91338fd1498Szrj
91438fd1498Szrj update_stmt (stmt);
91538fd1498Szrj }
91638fd1498Szrj
91738fd1498Szrj /* Move consumer stmt into inner loop, just after reduction next's def. */
91838fd1498Szrj unlink_stmt_vdef (re->consumer);
91938fd1498Szrj release_ssa_name (gimple_vdef (re->consumer));
92038fd1498Szrj gimple_set_vdef (re->consumer, NULL_TREE);
92138fd1498Szrj gimple_set_vuse (re->consumer, NULL_TREE);
92238fd1498Szrj gimple_assign_set_rhs1 (re->consumer, re->next);
92338fd1498Szrj from = gsi_for_stmt (re->consumer);
92438fd1498Szrj to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
92538fd1498Szrj gsi_move_after (&from, &to);
92638fd1498Szrj
92738fd1498Szrj /* Mark the reduction variables for DCE. */
92838fd1498Szrj bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
92938fd1498Szrj bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
93038fd1498Szrj }
93138fd1498Szrj
93238fd1498Szrj /* Free DATAREFS and its auxiliary memory. */
93338fd1498Szrj
93438fd1498Szrj static void
free_data_refs_with_aux(vec<data_reference_p> datarefs)93538fd1498Szrj free_data_refs_with_aux (vec<data_reference_p> datarefs)
93638fd1498Szrj {
93738fd1498Szrj data_reference_p dr;
93838fd1498Szrj for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
93938fd1498Szrj if (dr->aux != NULL)
94038fd1498Szrj {
94138fd1498Szrj DR_ACCESS_STRIDE (dr)->release ();
94238fd1498Szrj delete (vec<tree> *) dr->aux;
94338fd1498Szrj }
94438fd1498Szrj
94538fd1498Szrj free_data_refs (datarefs);
94638fd1498Szrj }
94738fd1498Szrj
94838fd1498Szrj /* Class for loop interchange transformation. */
94938fd1498Szrj
95038fd1498Szrj class tree_loop_interchange
95138fd1498Szrj {
95238fd1498Szrj public:
tree_loop_interchange(vec<struct loop * > loop_nest)95338fd1498Szrj tree_loop_interchange (vec<struct loop *> loop_nest)
95438fd1498Szrj : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
95538fd1498Szrj m_dce_seeds (BITMAP_ALLOC (NULL)) { }
~tree_loop_interchange()95638fd1498Szrj ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
95738fd1498Szrj bool interchange (vec<data_reference_p>, vec<ddr_p>);
95838fd1498Szrj
95938fd1498Szrj private:
96038fd1498Szrj void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
96138fd1498Szrj bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
96238fd1498Szrj void interchange_loops (loop_cand &, loop_cand &);
96338fd1498Szrj void map_inductions_to_loop (loop_cand &, loop_cand &);
96438fd1498Szrj void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
96538fd1498Szrj
96638fd1498Szrj /* The whole loop nest in which interchange is ongoing. */
96738fd1498Szrj vec<struct loop *> m_loop_nest;
96838fd1498Szrj /* We create new IV which is only used in loop's exit condition check.
96938fd1498Szrj In case of 3-level loop nest interchange, when we interchange the
97038fd1498Szrj innermost two loops, new IV created in the middle level loop does
97138fd1498Szrj not need to be preserved in interchanging the outermost two loops
97238fd1498Szrj later. We record the IV so that it can be skipped. */
97338fd1498Szrj tree m_niters_iv_var;
97438fd1498Szrj /* Bitmap of seed variables for dead code elimination after interchange. */
97538fd1498Szrj bitmap m_dce_seeds;
97638fd1498Szrj };
97738fd1498Szrj
97838fd1498Szrj /* Update data refs' access stride and dependence information after loop
97938fd1498Szrj interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
98038fd1498Szrj nest. DATAREFS are data references. DDRS are data dependences. */
98138fd1498Szrj
98238fd1498Szrj void
update_data_info(unsigned i_idx,unsigned o_idx,vec<data_reference_p> datarefs,vec<ddr_p> ddrs)98338fd1498Szrj tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
98438fd1498Szrj vec<data_reference_p> datarefs,
98538fd1498Szrj vec<ddr_p> ddrs)
98638fd1498Szrj {
98738fd1498Szrj struct data_reference *dr;
98838fd1498Szrj struct data_dependence_relation *ddr;
98938fd1498Szrj
99038fd1498Szrj /* Update strides of data references. */
99138fd1498Szrj for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
99238fd1498Szrj {
99338fd1498Szrj vec<tree> *stride = DR_ACCESS_STRIDE (dr);
99438fd1498Szrj gcc_assert (stride->length () > i_idx);
99538fd1498Szrj std::swap ((*stride)[i_idx], (*stride)[o_idx]);
99638fd1498Szrj }
99738fd1498Szrj /* Update data dependences. */
99838fd1498Szrj for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
99938fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
100038fd1498Szrj {
100138fd1498Szrj for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
100238fd1498Szrj {
100338fd1498Szrj lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
100438fd1498Szrj std::swap (dist_vect[i_idx], dist_vect[o_idx]);
100538fd1498Szrj }
100638fd1498Szrj }
100738fd1498Szrj }
100838fd1498Szrj
100938fd1498Szrj /* Check data dependence relations, return TRUE if it's valid to interchange
101038fd1498Szrj two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
101138fd1498Szrj loops is valid only if dist vector, after interchanging, doesn't have '>'
101238fd1498Szrj as the leftmost non-'=' direction. Practically, this function have been
101338fd1498Szrj conservative here by not checking some valid cases. */
101438fd1498Szrj
101538fd1498Szrj bool
valid_data_dependences(unsigned i_idx,unsigned o_idx,vec<ddr_p> ddrs)101638fd1498Szrj tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
101738fd1498Szrj vec<ddr_p> ddrs)
101838fd1498Szrj {
101938fd1498Szrj struct data_dependence_relation *ddr;
102038fd1498Szrj
102138fd1498Szrj for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
102238fd1498Szrj {
102338fd1498Szrj /* Skip no-dependence case. */
102438fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
102538fd1498Szrj continue;
102638fd1498Szrj
102738fd1498Szrj for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
102838fd1498Szrj {
102938fd1498Szrj lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
103038fd1498Szrj unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
103138fd1498Szrj
103238fd1498Szrj /* If there is no carried dependence. */
103338fd1498Szrj if (level == 0)
103438fd1498Szrj continue;
103538fd1498Szrj
103638fd1498Szrj level --;
103738fd1498Szrj
103838fd1498Szrj /* If dependence is not carried by any loop in between the two
103938fd1498Szrj loops [oloop, iloop] to interchange. */
104038fd1498Szrj if (level < o_idx || level > i_idx)
104138fd1498Szrj continue;
104238fd1498Szrj
104338fd1498Szrj /* Be conservative, skip case if either direction at i_idx/o_idx
104438fd1498Szrj levels is not '=' or '<'. */
104538fd1498Szrj if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
104638fd1498Szrj return false;
104738fd1498Szrj }
104838fd1498Szrj }
104938fd1498Szrj
105038fd1498Szrj return true;
105138fd1498Szrj }
105238fd1498Szrj
105338fd1498Szrj /* Interchange two loops specified by ILOOP and OLOOP. */
105438fd1498Szrj
105538fd1498Szrj void
interchange_loops(loop_cand & iloop,loop_cand & oloop)105638fd1498Szrj tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
105738fd1498Szrj {
105838fd1498Szrj reduction_p re;
105938fd1498Szrj gimple_stmt_iterator gsi;
106038fd1498Szrj tree i_niters, o_niters, var_after;
106138fd1498Szrj
106238fd1498Szrj /* Undo inner loop's simple reduction. */
106338fd1498Szrj for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
106438fd1498Szrj if (re->type != DOUBLE_RTYPE)
106538fd1498Szrj {
106638fd1498Szrj if (re->producer)
106738fd1498Szrj reset_debug_uses (re->producer);
106838fd1498Szrj
106938fd1498Szrj iloop.undo_simple_reduction (re, m_dce_seeds);
107038fd1498Szrj }
107138fd1498Szrj
107238fd1498Szrj /* Only need to reset debug uses for double reduction. */
107338fd1498Szrj for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
107438fd1498Szrj {
107538fd1498Szrj gcc_assert (re->type == DOUBLE_RTYPE);
107638fd1498Szrj reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
107738fd1498Szrj reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
107838fd1498Szrj }
107938fd1498Szrj
108038fd1498Szrj /* Prepare niters for both loops. */
108138fd1498Szrj struct loop *loop_nest = m_loop_nest[0];
108238fd1498Szrj edge instantiate_below = loop_preheader_edge (loop_nest);
108338fd1498Szrj gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
108438fd1498Szrj i_niters = number_of_latch_executions (iloop.m_loop);
108538fd1498Szrj i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
108638fd1498Szrj i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
108738fd1498Szrj i_niters);
108838fd1498Szrj i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
108938fd1498Szrj NULL_TREE, false, GSI_CONTINUE_LINKING);
109038fd1498Szrj o_niters = number_of_latch_executions (oloop.m_loop);
109138fd1498Szrj if (oloop.m_loop != loop_nest)
109238fd1498Szrj {
109338fd1498Szrj o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
109438fd1498Szrj o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
109538fd1498Szrj o_niters);
109638fd1498Szrj }
109738fd1498Szrj o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
109838fd1498Szrj NULL_TREE, false, GSI_CONTINUE_LINKING);
109938fd1498Szrj
110038fd1498Szrj /* Move src's code to tgt loop. This is necessary when src is the outer
110138fd1498Szrj loop and tgt is the inner loop. */
110238fd1498Szrj move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
110338fd1498Szrj
110438fd1498Szrj /* Map outer loop's IV to inner loop, and vice versa. */
110538fd1498Szrj map_inductions_to_loop (oloop, iloop);
110638fd1498Szrj map_inductions_to_loop (iloop, oloop);
110738fd1498Szrj
110838fd1498Szrj /* Create canonical IV for both loops. Note canonical IV for outer/inner
110938fd1498Szrj loop is actually from inner/outer loop. Also we record the new IV
111038fd1498Szrj created for the outer loop so that it can be skipped in later loop
111138fd1498Szrj interchange. */
111238fd1498Szrj create_canonical_iv (oloop.m_loop, oloop.m_exit,
111338fd1498Szrj i_niters, &m_niters_iv_var, &var_after);
111438fd1498Szrj bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
111538fd1498Szrj create_canonical_iv (iloop.m_loop, iloop.m_exit,
111638fd1498Szrj o_niters, NULL, &var_after);
111738fd1498Szrj bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
111838fd1498Szrj
111938fd1498Szrj /* Scrap niters estimation of interchanged loops. */
112038fd1498Szrj iloop.m_loop->any_upper_bound = false;
112138fd1498Szrj iloop.m_loop->any_likely_upper_bound = false;
112238fd1498Szrj free_numbers_of_iterations_estimates (iloop.m_loop);
112338fd1498Szrj oloop.m_loop->any_upper_bound = false;
112438fd1498Szrj oloop.m_loop->any_likely_upper_bound = false;
112538fd1498Szrj free_numbers_of_iterations_estimates (oloop.m_loop);
112638fd1498Szrj
112738fd1498Szrj /* Clear all cached scev information. This is expensive but shouldn't be
112838fd1498Szrj a problem given we interchange in very limited times. */
112938fd1498Szrj scev_reset_htab ();
113038fd1498Szrj
113138fd1498Szrj /* ??? The association between the loop data structure and the
113238fd1498Szrj CFG changed, so what was loop N at the source level is now
113338fd1498Szrj loop M. We should think of retaining the association or breaking
113438fd1498Szrj it fully by creating a new loop instead of re-using the "wrong" one. */
113538fd1498Szrj }
113638fd1498Szrj
113738fd1498Szrj /* Map induction variables of SRC loop to TGT loop. The function firstly
113838fd1498Szrj creates the same IV of SRC loop in TGT loop, then deletes the original
113938fd1498Szrj IV and re-initialize it using the newly created IV. For example, loop
114038fd1498Szrj nest:
114138fd1498Szrj
114238fd1498Szrj for (i = 0; i < N; i++)
114338fd1498Szrj for (j = 0; j < M; j++)
114438fd1498Szrj {
114538fd1498Szrj //use of i;
114638fd1498Szrj //use of j;
114738fd1498Szrj }
114838fd1498Szrj
114938fd1498Szrj will be transformed into:
115038fd1498Szrj
115138fd1498Szrj for (jj = 0; jj < M; jj++)
115238fd1498Szrj for (ii = 0; ii < N; ii++)
115338fd1498Szrj {
115438fd1498Szrj //use of ii;
115538fd1498Szrj //use of jj;
115638fd1498Szrj }
115738fd1498Szrj
115838fd1498Szrj after loop interchange. */
115938fd1498Szrj
116038fd1498Szrj void
map_inductions_to_loop(loop_cand & src,loop_cand & tgt)116138fd1498Szrj tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
116238fd1498Szrj {
116338fd1498Szrj induction_p iv;
116438fd1498Szrj edge e = tgt.m_exit;
116538fd1498Szrj gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
116638fd1498Szrj
116738fd1498Szrj /* Map source loop's IV to target loop. */
116838fd1498Szrj for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
116938fd1498Szrj {
117038fd1498Szrj gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
117138fd1498Szrj gcc_assert (is_a <gphi *> (stmt));
117238fd1498Szrj
117338fd1498Szrj use_operand_p use_p;
117438fd1498Szrj /* Only map original IV to target loop. */
117538fd1498Szrj if (m_niters_iv_var != iv->var)
117638fd1498Szrj {
117738fd1498Szrj /* Map the IV by creating the same one in target loop. */
117838fd1498Szrj tree var_before, var_after;
117938fd1498Szrj tree base = unshare_expr (iv->init_expr);
118038fd1498Szrj tree step = unshare_expr (iv->step);
118138fd1498Szrj create_iv (base, step, SSA_NAME_VAR (iv->var),
118238fd1498Szrj tgt.m_loop, &incr_pos, false, &var_before, &var_after);
118338fd1498Szrj bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
118438fd1498Szrj bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
118538fd1498Szrj
118638fd1498Szrj /* Replace uses of the original IV var with newly created IV var. */
118738fd1498Szrj imm_use_iterator imm_iter;
118838fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
118938fd1498Szrj {
119038fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
119138fd1498Szrj SET_USE (use_p, var_before);
119238fd1498Szrj
119338fd1498Szrj update_stmt (use_stmt);
119438fd1498Szrj }
119538fd1498Szrj }
119638fd1498Szrj
119738fd1498Szrj /* Mark all uses for DCE. */
119838fd1498Szrj ssa_op_iter op_iter;
119938fd1498Szrj FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
120038fd1498Szrj {
120138fd1498Szrj tree use = USE_FROM_PTR (use_p);
120238fd1498Szrj if (TREE_CODE (use) == SSA_NAME
120338fd1498Szrj && ! SSA_NAME_IS_DEFAULT_DEF (use))
120438fd1498Szrj bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
120538fd1498Szrj }
120638fd1498Szrj
120738fd1498Szrj /* Delete definition of the original IV in the source loop. */
120838fd1498Szrj gsi = gsi_for_stmt (stmt);
120938fd1498Szrj remove_phi_node (&gsi, true);
121038fd1498Szrj }
121138fd1498Szrj }
121238fd1498Szrj
121338fd1498Szrj /* Move stmts of outer loop to inner loop. */
121438fd1498Szrj
121538fd1498Szrj void
move_code_to_inner_loop(struct loop * outer,struct loop * inner,basic_block * outer_bbs)121638fd1498Szrj tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
121738fd1498Szrj struct loop *inner,
121838fd1498Szrj basic_block *outer_bbs)
121938fd1498Szrj {
122038fd1498Szrj basic_block oloop_exit_bb = single_exit (outer)->src;
122138fd1498Szrj gimple_stmt_iterator gsi, to;
122238fd1498Szrj
122338fd1498Szrj for (unsigned i = 0; i < outer->num_nodes; i++)
122438fd1498Szrj {
122538fd1498Szrj basic_block bb = outer_bbs[i];
122638fd1498Szrj
122738fd1498Szrj /* Skip basic blocks of inner loop. */
122838fd1498Szrj if (flow_bb_inside_loop_p (inner, bb))
122938fd1498Szrj continue;
123038fd1498Szrj
123138fd1498Szrj /* Move code from header/latch to header/latch. */
123238fd1498Szrj if (bb == outer->header)
123338fd1498Szrj to = gsi_after_labels (inner->header);
123438fd1498Szrj else if (bb == outer->latch)
123538fd1498Szrj to = gsi_after_labels (inner->latch);
123638fd1498Szrj else
123738fd1498Szrj /* Otherwise, simply move to exit->src. */
123838fd1498Szrj to = gsi_last_bb (single_exit (inner)->src);
123938fd1498Szrj
124038fd1498Szrj for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
124138fd1498Szrj {
124238fd1498Szrj gimple *stmt = gsi_stmt (gsi);
124338fd1498Szrj
124438fd1498Szrj if (oloop_exit_bb == bb
124538fd1498Szrj && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
124638fd1498Szrj {
124738fd1498Szrj gsi_next (&gsi);
124838fd1498Szrj continue;
124938fd1498Szrj }
125038fd1498Szrj
125138fd1498Szrj if (gimple_vuse (stmt))
125238fd1498Szrj gimple_set_vuse (stmt, NULL_TREE);
125338fd1498Szrj if (gimple_vdef (stmt))
125438fd1498Szrj {
125538fd1498Szrj unlink_stmt_vdef (stmt);
125638fd1498Szrj release_ssa_name (gimple_vdef (stmt));
125738fd1498Szrj gimple_set_vdef (stmt, NULL_TREE);
125838fd1498Szrj }
125938fd1498Szrj
126038fd1498Szrj reset_debug_uses (stmt);
126138fd1498Szrj gsi_move_before (&gsi, &to);
126238fd1498Szrj }
126338fd1498Szrj }
126438fd1498Szrj }
126538fd1498Szrj
126638fd1498Szrj /* Given data reference DR in LOOP_NEST, the function computes DR's access
126738fd1498Szrj stride at each level of loop from innermost LOOP to outer. On success,
126838fd1498Szrj it saves access stride at each level loop in a vector which is pointed
126938fd1498Szrj by DR->aux. For example:
127038fd1498Szrj
127138fd1498Szrj int arr[100][100][100];
127238fd1498Szrj for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
127338fd1498Szrj for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
127438fd1498Szrj for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
127538fd1498Szrj arr[i][j - 1][k] = 0; */
127638fd1498Szrj
127738fd1498Szrj static void
compute_access_stride(struct loop * loop_nest,struct loop * loop,data_reference_p dr)127838fd1498Szrj compute_access_stride (struct loop *loop_nest, struct loop *loop,
127938fd1498Szrj data_reference_p dr)
128038fd1498Szrj {
128138fd1498Szrj vec<tree> *strides = new vec<tree> ();
128238fd1498Szrj basic_block bb = gimple_bb (DR_STMT (dr));
128338fd1498Szrj
128438fd1498Szrj while (!flow_bb_inside_loop_p (loop, bb))
128538fd1498Szrj {
128638fd1498Szrj strides->safe_push (build_int_cst (sizetype, 0));
128738fd1498Szrj loop = loop_outer (loop);
128838fd1498Szrj }
128938fd1498Szrj gcc_assert (loop == bb->loop_father);
129038fd1498Szrj
129138fd1498Szrj tree ref = DR_REF (dr);
129238fd1498Szrj if (TREE_CODE (ref) == COMPONENT_REF
129338fd1498Szrj && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
129438fd1498Szrj {
129538fd1498Szrj /* We can't take address of bitfields. If the bitfield is at constant
129638fd1498Szrj offset from the start of the struct, just use address of the
129738fd1498Szrj struct, for analysis of the strides that shouldn't matter. */
129838fd1498Szrj if (!TREE_OPERAND (ref, 2)
129938fd1498Szrj || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
130038fd1498Szrj ref = TREE_OPERAND (ref, 0);
130138fd1498Szrj /* Otherwise, if we have a bit field representative, use that. */
130238fd1498Szrj else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
130338fd1498Szrj != NULL_TREE)
130438fd1498Szrj {
130538fd1498Szrj tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
130638fd1498Szrj ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
130738fd1498Szrj repr, TREE_OPERAND (ref, 2));
130838fd1498Szrj }
130938fd1498Szrj /* Otherwise punt. */
131038fd1498Szrj else
131138fd1498Szrj {
131238fd1498Szrj dr->aux = strides;
131338fd1498Szrj return;
131438fd1498Szrj }
131538fd1498Szrj }
131638fd1498Szrj tree scev_base = build_fold_addr_expr (ref);
131738fd1498Szrj tree scev = analyze_scalar_evolution (loop, scev_base);
131838fd1498Szrj scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
131938fd1498Szrj if (! chrec_contains_undetermined (scev))
132038fd1498Szrj {
132138fd1498Szrj tree sl = scev;
132238fd1498Szrj struct loop *expected = loop;
132338fd1498Szrj while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
132438fd1498Szrj {
132538fd1498Szrj struct loop *sl_loop = get_chrec_loop (sl);
132638fd1498Szrj while (sl_loop != expected)
132738fd1498Szrj {
132838fd1498Szrj strides->safe_push (size_int (0));
132938fd1498Szrj expected = loop_outer (expected);
133038fd1498Szrj }
133138fd1498Szrj strides->safe_push (CHREC_RIGHT (sl));
133238fd1498Szrj sl = CHREC_LEFT (sl);
133338fd1498Szrj expected = loop_outer (expected);
133438fd1498Szrj }
133538fd1498Szrj if (! tree_contains_chrecs (sl, NULL))
133638fd1498Szrj while (expected != loop_outer (loop_nest))
133738fd1498Szrj {
133838fd1498Szrj strides->safe_push (size_int (0));
133938fd1498Szrj expected = loop_outer (expected);
134038fd1498Szrj }
134138fd1498Szrj }
134238fd1498Szrj
134338fd1498Szrj dr->aux = strides;
134438fd1498Szrj }
134538fd1498Szrj
134638fd1498Szrj /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
134738fd1498Szrj access strides with respect to each level loop for all data refs in
134838fd1498Szrj DATAREFS from inner loop to outer loop. On success, it returns the
134938fd1498Szrj outermost loop that access strides can be computed successfully for
135038fd1498Szrj all data references. If access strides cannot be computed at least
135138fd1498Szrj for two levels of loop for any data reference, it returns NULL. */
135238fd1498Szrj
135338fd1498Szrj static struct loop *
compute_access_strides(struct loop * loop_nest,struct loop * loop,vec<data_reference_p> datarefs)135438fd1498Szrj compute_access_strides (struct loop *loop_nest, struct loop *loop,
135538fd1498Szrj vec<data_reference_p> datarefs)
135638fd1498Szrj {
135738fd1498Szrj unsigned i, j, num_loops = (unsigned) -1;
135838fd1498Szrj data_reference_p dr;
135938fd1498Szrj vec<tree> *stride;
136038fd1498Szrj
136138fd1498Szrj for (i = 0; datarefs.iterate (i, &dr); ++i)
136238fd1498Szrj {
136338fd1498Szrj compute_access_stride (loop_nest, loop, dr);
136438fd1498Szrj stride = DR_ACCESS_STRIDE (dr);
136538fd1498Szrj if (stride->length () < num_loops)
136638fd1498Szrj {
136738fd1498Szrj num_loops = stride->length ();
136838fd1498Szrj if (num_loops < 2)
136938fd1498Szrj return NULL;
137038fd1498Szrj }
137138fd1498Szrj }
137238fd1498Szrj
137338fd1498Szrj for (i = 0; datarefs.iterate (i, &dr); ++i)
137438fd1498Szrj {
137538fd1498Szrj stride = DR_ACCESS_STRIDE (dr);
137638fd1498Szrj if (stride->length () > num_loops)
137738fd1498Szrj stride->truncate (num_loops);
137838fd1498Szrj
137938fd1498Szrj for (j = 0; j < (num_loops >> 1); ++j)
138038fd1498Szrj std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
138138fd1498Szrj }
138238fd1498Szrj
138338fd1498Szrj loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
138438fd1498Szrj gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
138538fd1498Szrj return loop;
138638fd1498Szrj }
138738fd1498Szrj
138838fd1498Szrj /* Prune access strides for data references in DATAREFS by removing strides
138938fd1498Szrj of loops that isn't in current LOOP_NEST. */
139038fd1498Szrj
139138fd1498Szrj static void
prune_access_strides_not_in_loop(struct loop * loop_nest,struct loop * innermost,vec<data_reference_p> datarefs)139238fd1498Szrj prune_access_strides_not_in_loop (struct loop *loop_nest,
139338fd1498Szrj struct loop *innermost,
139438fd1498Szrj vec<data_reference_p> datarefs)
139538fd1498Szrj {
139638fd1498Szrj data_reference_p dr;
139738fd1498Szrj unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
139838fd1498Szrj gcc_assert (num_loops > 1);
139938fd1498Szrj
140038fd1498Szrj /* Block remove strides of loops that is not in current loop nest. */
140138fd1498Szrj for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
140238fd1498Szrj {
140338fd1498Szrj vec<tree> *stride = DR_ACCESS_STRIDE (dr);
140438fd1498Szrj if (stride->length () > num_loops)
140538fd1498Szrj stride->block_remove (0, stride->length () - num_loops);
140638fd1498Szrj }
140738fd1498Szrj }
140838fd1498Szrj
140938fd1498Szrj /* Dump access strides for all DATAREFS. */
141038fd1498Szrj
141138fd1498Szrj static void
dump_access_strides(vec<data_reference_p> datarefs)141238fd1498Szrj dump_access_strides (vec<data_reference_p> datarefs)
141338fd1498Szrj {
141438fd1498Szrj data_reference_p dr;
141538fd1498Szrj fprintf (dump_file, "Access Strides for DRs:\n");
141638fd1498Szrj for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
141738fd1498Szrj {
141838fd1498Szrj fprintf (dump_file, " ");
141938fd1498Szrj print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
142038fd1498Szrj fprintf (dump_file, ":\t\t<");
142138fd1498Szrj
142238fd1498Szrj vec<tree> *stride = DR_ACCESS_STRIDE (dr);
142338fd1498Szrj unsigned num_loops = stride->length ();
142438fd1498Szrj for (unsigned j = 0; j < num_loops; ++j)
142538fd1498Szrj {
142638fd1498Szrj print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
142738fd1498Szrj fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
142838fd1498Szrj }
142938fd1498Szrj }
143038fd1498Szrj }
143138fd1498Szrj
143238fd1498Szrj /* Return true if it's profitable to interchange two loops whose index
143338fd1498Szrj in whole loop nest vector are I_IDX/O_IDX respectively. The function
143438fd1498Szrj computes and compares three types information from all DATAREFS:
143538fd1498Szrj 1) Access stride for loop I_IDX and O_IDX.
143638fd1498Szrj 2) Number of invariant memory references with respect to I_IDX before
143738fd1498Szrj and after loop interchange.
143838fd1498Szrj 3) Flags indicating if all memory references access sequential memory
143938fd1498Szrj in ILOOP, before and after loop interchange.
144038fd1498Szrj If INNMOST_LOOP_P is true, the two loops for interchanging are the two
144138fd1498Szrj innermost loops in loop nest. This function also dumps information if
144238fd1498Szrj DUMP_INFO_P is true. */
144338fd1498Szrj
144438fd1498Szrj static bool
should_interchange_loops(unsigned i_idx,unsigned o_idx,vec<data_reference_p> datarefs,unsigned i_stmt_cost,unsigned o_stmt_cost,bool innermost_loops_p,bool dump_info_p=true)144538fd1498Szrj should_interchange_loops (unsigned i_idx, unsigned o_idx,
144638fd1498Szrj vec<data_reference_p> datarefs,
144738fd1498Szrj unsigned i_stmt_cost, unsigned o_stmt_cost,
144838fd1498Szrj bool innermost_loops_p, bool dump_info_p = true)
144938fd1498Szrj {
145038fd1498Szrj unsigned HOST_WIDE_INT ratio;
145138fd1498Szrj unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
145238fd1498Szrj struct data_reference *dr;
145338fd1498Szrj bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
145438fd1498Szrj widest_int iloop_strides = 0, oloop_strides = 0;
145538fd1498Szrj unsigned num_unresolved_drs = 0;
145638fd1498Szrj unsigned num_resolved_ok_drs = 0;
145738fd1498Szrj unsigned num_resolved_not_ok_drs = 0;
145838fd1498Szrj
145938fd1498Szrj if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
146038fd1498Szrj fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
146138fd1498Szrj
146238fd1498Szrj for (i = 0; datarefs.iterate (i, &dr); ++i)
146338fd1498Szrj {
146438fd1498Szrj vec<tree> *stride = DR_ACCESS_STRIDE (dr);
146538fd1498Szrj tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
146638fd1498Szrj
146738fd1498Szrj bool subloop_stride_p = false;
146838fd1498Szrj /* Data ref can't be invariant or sequential access at current loop if
146938fd1498Szrj its address changes with respect to any subloops. */
147038fd1498Szrj for (j = i_idx + 1; j < stride->length (); ++j)
147138fd1498Szrj if (!integer_zerop ((*stride)[j]))
147238fd1498Szrj {
147338fd1498Szrj subloop_stride_p = true;
147438fd1498Szrj break;
147538fd1498Szrj }
147638fd1498Szrj
147738fd1498Szrj if (integer_zerop (iloop_stride))
147838fd1498Szrj {
147938fd1498Szrj if (!subloop_stride_p)
148038fd1498Szrj num_old_inv_drs++;
148138fd1498Szrj }
148238fd1498Szrj if (integer_zerop (oloop_stride))
148338fd1498Szrj {
148438fd1498Szrj if (!subloop_stride_p)
148538fd1498Szrj num_new_inv_drs++;
148638fd1498Szrj }
148738fd1498Szrj
148838fd1498Szrj if (TREE_CODE (iloop_stride) == INTEGER_CST
148938fd1498Szrj && TREE_CODE (oloop_stride) == INTEGER_CST)
149038fd1498Szrj {
149138fd1498Szrj iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
149238fd1498Szrj oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
149338fd1498Szrj }
149438fd1498Szrj else if (multiple_of_p (TREE_TYPE (iloop_stride),
149538fd1498Szrj iloop_stride, oloop_stride))
149638fd1498Szrj num_resolved_ok_drs++;
149738fd1498Szrj else if (multiple_of_p (TREE_TYPE (iloop_stride),
149838fd1498Szrj oloop_stride, iloop_stride))
149938fd1498Szrj num_resolved_not_ok_drs++;
150038fd1498Szrj else
150138fd1498Szrj num_unresolved_drs++;
150238fd1498Szrj
150338fd1498Szrj /* Data ref can't be sequential access if its address changes in sub
150438fd1498Szrj loop. */
150538fd1498Szrj if (subloop_stride_p)
150638fd1498Szrj {
150738fd1498Szrj all_seq_dr_before_p = false;
150838fd1498Szrj all_seq_dr_after_p = false;
150938fd1498Szrj continue;
151038fd1498Szrj }
151138fd1498Szrj /* Track if all data references are sequential accesses before/after loop
151238fd1498Szrj interchange. Note invariant is considered sequential here. */
151338fd1498Szrj tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
151438fd1498Szrj if (all_seq_dr_before_p
151538fd1498Szrj && ! (integer_zerop (iloop_stride)
151638fd1498Szrj || operand_equal_p (access_size, iloop_stride, 0)))
151738fd1498Szrj all_seq_dr_before_p = false;
151838fd1498Szrj if (all_seq_dr_after_p
151938fd1498Szrj && ! (integer_zerop (oloop_stride)
152038fd1498Szrj || operand_equal_p (access_size, oloop_stride, 0)))
152138fd1498Szrj all_seq_dr_after_p = false;
152238fd1498Szrj }
152338fd1498Szrj
152438fd1498Szrj if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
152538fd1498Szrj {
152638fd1498Szrj fprintf (dump_file, "\toverall:\t\t");
152738fd1498Szrj print_decu (iloop_strides, dump_file);
152838fd1498Szrj fprintf (dump_file, "\t");
152938fd1498Szrj print_decu (oloop_strides, dump_file);
153038fd1498Szrj fprintf (dump_file, "\n");
153138fd1498Szrj
153238fd1498Szrj fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
153338fd1498Szrj num_old_inv_drs, num_new_inv_drs);
153438fd1498Szrj fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
153538fd1498Szrj all_seq_dr_before_p ? "true" : "false",
153638fd1498Szrj all_seq_dr_after_p ? "true" : "false");
153738fd1498Szrj fprintf (dump_file, "OK to interchage with variable strides: %d\n",
153838fd1498Szrj num_resolved_ok_drs);
153938fd1498Szrj fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
154038fd1498Szrj num_resolved_not_ok_drs);
154138fd1498Szrj fprintf (dump_file, "Variable strides we cannot decide: %d\n",
154238fd1498Szrj num_unresolved_drs);
154338fd1498Szrj fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
154438fd1498Szrj fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
154538fd1498Szrj }
154638fd1498Szrj
154738fd1498Szrj if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
154838fd1498Szrj return false;
154938fd1498Szrj
155038fd1498Szrj /* Stmts of outer loop will be moved to inner loop. If there are two many
155138fd1498Szrj such stmts, it could make inner loop costly. Here we compare stmt cost
155238fd1498Szrj between outer and inner loops. */
155338fd1498Szrj if (i_stmt_cost && o_stmt_cost
155438fd1498Szrj && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
155538fd1498Szrj && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
155638fd1498Szrj return false;
155738fd1498Szrj
155838fd1498Szrj /* We use different stride comparison ratio for interchanging innermost
155938fd1498Szrj two loops or not. The idea is to be conservative in interchange for
156038fd1498Szrj the innermost loops. */
156138fd1498Szrj ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
156238fd1498Szrj /* Do interchange if it gives better data locality behavior. */
156338fd1498Szrj if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
156438fd1498Szrj return true;
156538fd1498Szrj if (wi::gtu_p (iloop_strides, oloop_strides))
156638fd1498Szrj {
156738fd1498Szrj /* Or it creates more invariant memory references. */
156838fd1498Szrj if ((!all_seq_dr_before_p || all_seq_dr_after_p)
156938fd1498Szrj && num_new_inv_drs > num_old_inv_drs)
157038fd1498Szrj return true;
157138fd1498Szrj /* Or it makes all memory references sequential. */
157238fd1498Szrj if (num_new_inv_drs >= num_old_inv_drs
157338fd1498Szrj && !all_seq_dr_before_p && all_seq_dr_after_p)
157438fd1498Szrj return true;
157538fd1498Szrj }
157638fd1498Szrj
157738fd1498Szrj return false;
157838fd1498Szrj }
157938fd1498Szrj
158038fd1498Szrj /* Try to interchange inner loop of a loop nest to outer level. */
158138fd1498Szrj
158238fd1498Szrj bool
interchange(vec<data_reference_p> datarefs,vec<ddr_p> ddrs)158338fd1498Szrj tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
158438fd1498Szrj vec<ddr_p> ddrs)
158538fd1498Szrj {
158638fd1498Szrj location_t loc = find_loop_location (m_loop_nest[0]);
158738fd1498Szrj bool changed_p = false;
158838fd1498Szrj /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
158938fd1498Szrj The overall effect is to push inner loop to outermost level in whole
159038fd1498Szrj loop nest. */
159138fd1498Szrj for (unsigned i = m_loop_nest.length (); i > 1; --i)
159238fd1498Szrj {
159338fd1498Szrj unsigned i_idx = i - 1, o_idx = i - 2;
159438fd1498Szrj
159538fd1498Szrj /* Check validity for loop interchange. */
159638fd1498Szrj if (!valid_data_dependences (i_idx, o_idx, ddrs))
159738fd1498Szrj break;
159838fd1498Szrj
159938fd1498Szrj loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
160038fd1498Szrj loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
160138fd1498Szrj
160238fd1498Szrj /* Check if we can do transformation for loop interchange. */
160338fd1498Szrj if (!iloop.analyze_carried_vars (NULL)
160438fd1498Szrj || !iloop.analyze_lcssa_phis ()
160538fd1498Szrj || !oloop.analyze_carried_vars (&iloop)
160638fd1498Szrj || !oloop.analyze_lcssa_phis ()
160738fd1498Szrj || !iloop.can_interchange_p (NULL)
160838fd1498Szrj || !oloop.can_interchange_p (&iloop))
160938fd1498Szrj break;
161038fd1498Szrj
161138fd1498Szrj /* Outer loop's stmts will be moved to inner loop during interchange.
161238fd1498Szrj If there are many of them, it may make inner loop very costly. We
161338fd1498Szrj need to check number of outer loop's stmts in profit cost model of
161438fd1498Szrj interchange. */
161538fd1498Szrj int stmt_cost = oloop.m_num_stmts;
161638fd1498Szrj /* Count out the exit checking stmt of outer loop. */
161738fd1498Szrj stmt_cost --;
161838fd1498Szrj /* Count out IV's increasing stmt, IVOPTs takes care if it. */
161938fd1498Szrj stmt_cost -= oloop.m_inductions.length ();
162038fd1498Szrj /* Count in the additional load and cond_expr stmts caused by inner
162138fd1498Szrj loop's constant initialized reduction. */
162238fd1498Szrj stmt_cost += iloop.m_const_init_reduc * 2;
162338fd1498Szrj if (stmt_cost < 0)
162438fd1498Szrj stmt_cost = 0;
162538fd1498Szrj
162638fd1498Szrj /* Check profitability for loop interchange. */
162738fd1498Szrj if (should_interchange_loops (i_idx, o_idx, datarefs,
162838fd1498Szrj (unsigned) iloop.m_num_stmts,
162938fd1498Szrj (unsigned) stmt_cost,
163038fd1498Szrj iloop.m_loop->inner == NULL))
163138fd1498Szrj {
163238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
163338fd1498Szrj fprintf (dump_file,
163438fd1498Szrj "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
163538fd1498Szrj oloop.m_loop->num, iloop.m_loop->num);
163638fd1498Szrj
163738fd1498Szrj changed_p = true;
163838fd1498Szrj interchange_loops (iloop, oloop);
163938fd1498Szrj /* No need to update if there is no further loop interchange. */
164038fd1498Szrj if (o_idx > 0)
164138fd1498Szrj update_data_info (i_idx, o_idx, datarefs, ddrs);
164238fd1498Szrj }
164338fd1498Szrj else
164438fd1498Szrj {
164538fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
164638fd1498Szrj fprintf (dump_file,
164738fd1498Szrj "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
164838fd1498Szrj oloop.m_loop->num, iloop.m_loop->num);
164938fd1498Szrj }
165038fd1498Szrj }
165138fd1498Szrj simple_dce_from_worklist (m_dce_seeds);
165238fd1498Szrj
165338fd1498Szrj if (changed_p)
165438fd1498Szrj dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
165538fd1498Szrj "loops interchanged in loop nest\n");
165638fd1498Szrj
165738fd1498Szrj return changed_p;
165838fd1498Szrj }
165938fd1498Szrj
166038fd1498Szrj
166138fd1498Szrj /* Loop interchange pass. */
166238fd1498Szrj
166338fd1498Szrj namespace {
166438fd1498Szrj
166538fd1498Szrj const pass_data pass_data_linterchange =
166638fd1498Szrj {
166738fd1498Szrj GIMPLE_PASS, /* type */
166838fd1498Szrj "linterchange", /* name */
166938fd1498Szrj OPTGROUP_LOOP, /* optinfo_flags */
167038fd1498Szrj TV_LINTERCHANGE, /* tv_id */
167138fd1498Szrj PROP_cfg, /* properties_required */
167238fd1498Szrj 0, /* properties_provided */
167338fd1498Szrj 0, /* properties_destroyed */
167438fd1498Szrj 0, /* todo_flags_start */
167538fd1498Szrj 0, /* todo_flags_finish */
167638fd1498Szrj };
167738fd1498Szrj
167838fd1498Szrj class pass_linterchange : public gimple_opt_pass
167938fd1498Szrj {
168038fd1498Szrj public:
pass_linterchange(gcc::context * ctxt)168138fd1498Szrj pass_linterchange (gcc::context *ctxt)
168238fd1498Szrj : gimple_opt_pass (pass_data_linterchange, ctxt)
168338fd1498Szrj {}
168438fd1498Szrj
168538fd1498Szrj /* opt_pass methods: */
clone()168638fd1498Szrj opt_pass * clone () { return new pass_linterchange (m_ctxt); }
gate(function *)168738fd1498Szrj virtual bool gate (function *) { return flag_loop_interchange; }
168838fd1498Szrj virtual unsigned int execute (function *);
168938fd1498Szrj
169038fd1498Szrj }; // class pass_linterchange
169138fd1498Szrj
169238fd1498Szrj
169338fd1498Szrj /* Return true if LOOP has proper form for interchange. We check three
169438fd1498Szrj conditions in the function:
169538fd1498Szrj 1) In general, a loop can be interchanged only if it doesn't have
169638fd1498Szrj basic blocks other than header, exit and latch besides possible
169738fd1498Szrj inner loop nest. This basically restricts loop interchange to
169838fd1498Szrj below form loop nests:
169938fd1498Szrj
170038fd1498Szrj header<---+
170138fd1498Szrj | |
170238fd1498Szrj v |
170338fd1498Szrj INNER_LOOP |
170438fd1498Szrj | |
170538fd1498Szrj v |
170638fd1498Szrj exit--->latch
170738fd1498Szrj
170838fd1498Szrj 2) Data reference in basic block that executes in different times
170938fd1498Szrj than loop head/exit is not allowed.
171038fd1498Szrj 3) Record the innermost outer loop that doesn't form rectangle loop
171138fd1498Szrj nest with LOOP. */
171238fd1498Szrj
171338fd1498Szrj static bool
proper_loop_form_for_interchange(struct loop * loop,struct loop ** min_outer)171438fd1498Szrj proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
171538fd1498Szrj {
171638fd1498Szrj edge e0, e1, exit;
171738fd1498Szrj
171838fd1498Szrj /* Don't interchange if loop has unsupported information for the moment. */
171938fd1498Szrj if (loop->safelen > 0
172038fd1498Szrj || loop->constraints != 0
172138fd1498Szrj || loop->can_be_parallel
172238fd1498Szrj || loop->dont_vectorize
172338fd1498Szrj || loop->force_vectorize
172438fd1498Szrj || loop->in_oacc_kernels_region
172538fd1498Szrj || loop->orig_loop_num != 0
172638fd1498Szrj || loop->simduid != NULL_TREE)
172738fd1498Szrj return false;
172838fd1498Szrj
172938fd1498Szrj /* Don't interchange if outer loop has basic block other than header, exit
173038fd1498Szrj and latch. */
173138fd1498Szrj if (loop->inner != NULL
173238fd1498Szrj && loop->num_nodes != loop->inner->num_nodes + 3)
173338fd1498Szrj return false;
173438fd1498Szrj
173538fd1498Szrj if ((exit = single_dom_exit (loop)) == NULL)
173638fd1498Szrj return false;
173738fd1498Szrj
173838fd1498Szrj /* Check control flow on loop header/exit blocks. */
173938fd1498Szrj if (loop->header == exit->src
174038fd1498Szrj && (EDGE_COUNT (loop->header->preds) != 2
174138fd1498Szrj || EDGE_COUNT (loop->header->succs) != 2))
174238fd1498Szrj return false;
174338fd1498Szrj else if (loop->header != exit->src
174438fd1498Szrj && (EDGE_COUNT (loop->header->preds) != 2
174538fd1498Szrj || !single_succ_p (loop->header)
174638fd1498Szrj || unsupported_edge (single_succ_edge (loop->header))
174738fd1498Szrj || EDGE_COUNT (exit->src->succs) != 2
174838fd1498Szrj || !single_pred_p (exit->src)
174938fd1498Szrj || unsupported_edge (single_pred_edge (exit->src))))
175038fd1498Szrj return false;
175138fd1498Szrj
175238fd1498Szrj e0 = EDGE_PRED (loop->header, 0);
175338fd1498Szrj e1 = EDGE_PRED (loop->header, 1);
175438fd1498Szrj if (unsupported_edge (e0) || unsupported_edge (e1)
175538fd1498Szrj || (e0->src != loop->latch && e1->src != loop->latch)
175638fd1498Szrj || (e0->src->loop_father == loop && e1->src->loop_father == loop))
175738fd1498Szrj return false;
175838fd1498Szrj
175938fd1498Szrj e0 = EDGE_SUCC (exit->src, 0);
176038fd1498Szrj e1 = EDGE_SUCC (exit->src, 1);
176138fd1498Szrj if (unsupported_edge (e0) || unsupported_edge (e1)
176238fd1498Szrj || (e0->dest != loop->latch && e1->dest != loop->latch)
176338fd1498Szrj || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
176438fd1498Szrj return false;
176538fd1498Szrj
176638fd1498Szrj /* Don't interchange if any reference is in basic block that doesn't
176738fd1498Szrj dominate exit block. */
176838fd1498Szrj basic_block *bbs = get_loop_body (loop);
176938fd1498Szrj for (unsigned i = 0; i < loop->num_nodes; i++)
177038fd1498Szrj {
177138fd1498Szrj basic_block bb = bbs[i];
177238fd1498Szrj
177338fd1498Szrj if (bb->loop_father != loop
177438fd1498Szrj || bb == loop->header || bb == exit->src
177538fd1498Szrj || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
177638fd1498Szrj continue;
177738fd1498Szrj
177838fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
177938fd1498Szrj !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
178038fd1498Szrj if (gimple_vuse (gsi_stmt (gsi)))
178138fd1498Szrj {
178238fd1498Szrj free (bbs);
178338fd1498Szrj return false;
178438fd1498Szrj }
178538fd1498Szrj }
178638fd1498Szrj free (bbs);
178738fd1498Szrj
178838fd1498Szrj tree niters = number_of_latch_executions (loop);
178938fd1498Szrj niters = analyze_scalar_evolution (loop_outer (loop), niters);
179038fd1498Szrj if (!niters || chrec_contains_undetermined (niters))
179138fd1498Szrj return false;
179238fd1498Szrj
179338fd1498Szrj /* Record the innermost outer loop that doesn't form rectangle loop nest. */
179438fd1498Szrj for (loop_p loop2 = loop_outer (loop);
179538fd1498Szrj loop2 && flow_loop_nested_p (*min_outer, loop2);
179638fd1498Szrj loop2 = loop_outer (loop2))
179738fd1498Szrj {
179838fd1498Szrj niters = instantiate_scev (loop_preheader_edge (loop2),
179938fd1498Szrj loop_outer (loop), niters);
180038fd1498Szrj if (!evolution_function_is_invariant_p (niters, loop2->num))
180138fd1498Szrj {
180238fd1498Szrj *min_outer = loop2;
180338fd1498Szrj break;
180438fd1498Szrj }
180538fd1498Szrj }
180638fd1498Szrj return true;
180738fd1498Szrj }
180838fd1498Szrj
180938fd1498Szrj /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
181038fd1498Szrj should be interchanged by looking into all DATAREFS. */
181138fd1498Szrj
181238fd1498Szrj static bool
should_interchange_loop_nest(struct loop * loop_nest,struct loop * innermost,vec<data_reference_p> datarefs)181338fd1498Szrj should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
181438fd1498Szrj vec<data_reference_p> datarefs)
181538fd1498Szrj {
181638fd1498Szrj unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
181738fd1498Szrj gcc_assert (idx > 0);
181838fd1498Szrj
181938fd1498Szrj /* Check if any two adjacent loops should be interchanged. */
182038fd1498Szrj for (struct loop *loop = innermost;
182138fd1498Szrj loop != loop_nest; loop = loop_outer (loop), idx--)
182238fd1498Szrj if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
182338fd1498Szrj loop == innermost, false))
182438fd1498Szrj return true;
182538fd1498Szrj
182638fd1498Szrj return false;
182738fd1498Szrj }
182838fd1498Szrj
182938fd1498Szrj /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
183038fd1498Szrj dependences for loop interchange and store it in DDRS. Note we compute
183138fd1498Szrj dependences directly rather than call generic interface so that we can
183238fd1498Szrj return on unknown dependence instantly. */
183338fd1498Szrj
183438fd1498Szrj static bool
tree_loop_interchange_compute_ddrs(vec<loop_p> loop_nest,vec<data_reference_p> datarefs,vec<ddr_p> * ddrs)183538fd1498Szrj tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
183638fd1498Szrj vec<data_reference_p> datarefs,
183738fd1498Szrj vec<ddr_p> *ddrs)
183838fd1498Szrj {
183938fd1498Szrj struct data_reference *a, *b;
184038fd1498Szrj struct loop *innermost = loop_nest.last ();
184138fd1498Szrj
184238fd1498Szrj for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
184338fd1498Szrj {
184438fd1498Szrj bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
184538fd1498Szrj for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
184638fd1498Szrj if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
184738fd1498Szrj {
184838fd1498Szrj bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
184938fd1498Szrj /* Don't support multiple write references in outer loop. */
185038fd1498Szrj if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
185138fd1498Szrj return false;
185238fd1498Szrj
185338fd1498Szrj ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
185438fd1498Szrj ddrs->safe_push (ddr);
185538fd1498Szrj compute_affine_dependence (ddr, loop_nest[0]);
185638fd1498Szrj
185738fd1498Szrj /* Give up if ddr is unknown dependence or classic direct vector
185838fd1498Szrj is not available. */
185938fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
186038fd1498Szrj || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
186138fd1498Szrj && DDR_NUM_DIR_VECTS (ddr) == 0))
186238fd1498Szrj return false;
186338fd1498Szrj
186438fd1498Szrj /* If either data references is in outer loop of nest, we require
186538fd1498Szrj no dependence here because the data reference need to be moved
186638fd1498Szrj into inner loop during interchange. */
186738fd1498Szrj if (a_outer_p && b_outer_p
186838fd1498Szrj && operand_equal_p (DR_REF (a), DR_REF (b), 0))
186938fd1498Szrj continue;
187038fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) != chrec_known
187138fd1498Szrj && (a_outer_p || b_outer_p))
187238fd1498Szrj return false;
187338fd1498Szrj }
187438fd1498Szrj }
187538fd1498Szrj
187638fd1498Szrj return true;
187738fd1498Szrj }
187838fd1498Szrj
187938fd1498Szrj /* Prune DATAREFS by removing any data reference not inside of LOOP. */
188038fd1498Szrj
188138fd1498Szrj static inline void
prune_datarefs_not_in_loop(struct loop * loop,vec<data_reference_p> datarefs)188238fd1498Szrj prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
188338fd1498Szrj {
188438fd1498Szrj unsigned i, j;
188538fd1498Szrj struct data_reference *dr;
188638fd1498Szrj
188738fd1498Szrj for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
188838fd1498Szrj {
188938fd1498Szrj if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
189038fd1498Szrj datarefs[j++] = dr;
189138fd1498Szrj else
189238fd1498Szrj {
189338fd1498Szrj if (dr->aux)
189438fd1498Szrj {
189538fd1498Szrj DR_ACCESS_STRIDE (dr)->release ();
189638fd1498Szrj delete (vec<tree> *) dr->aux;
189738fd1498Szrj }
189838fd1498Szrj free_data_ref (dr);
189938fd1498Szrj }
190038fd1498Szrj }
190138fd1498Szrj datarefs.truncate (j);
190238fd1498Szrj }
190338fd1498Szrj
190438fd1498Szrj /* Find and store data references in DATAREFS for LOOP nest. If there's
190538fd1498Szrj difficult data reference in a basic block, we shrink the loop nest to
190638fd1498Szrj inner loop of that basic block's father loop. On success, return the
190738fd1498Szrj outer loop of the result loop nest. */
190838fd1498Szrj
190938fd1498Szrj static struct loop *
prepare_data_references(struct loop * loop,vec<data_reference_p> * datarefs)191038fd1498Szrj prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
191138fd1498Szrj {
191238fd1498Szrj struct loop *loop_nest = loop;
191338fd1498Szrj vec<data_reference_p> *bb_refs;
191438fd1498Szrj basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
191538fd1498Szrj
191638fd1498Szrj for (unsigned i = 0; i < loop->num_nodes; i++)
191738fd1498Szrj bbs[i]->aux = NULL;
191838fd1498Szrj
191938fd1498Szrj /* Find data references for all basic blocks. Shrink loop nest on difficult
192038fd1498Szrj data reference. */
192138fd1498Szrj for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
192238fd1498Szrj {
192338fd1498Szrj bb = bbs[i];
192438fd1498Szrj if (!flow_bb_inside_loop_p (loop_nest, bb))
192538fd1498Szrj continue;
192638fd1498Szrj
192738fd1498Szrj bb_refs = new vec<data_reference_p> ();
192838fd1498Szrj if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
192938fd1498Szrj {
193038fd1498Szrj loop_nest = bb->loop_father->inner;
193138fd1498Szrj if (loop_nest && !loop_nest->inner)
193238fd1498Szrj loop_nest = NULL;
193338fd1498Szrj
193438fd1498Szrj free_data_refs (*bb_refs);
193538fd1498Szrj delete bb_refs;
193638fd1498Szrj }
193738fd1498Szrj else if (bb_refs->is_empty ())
193838fd1498Szrj delete bb_refs;
193938fd1498Szrj else
194038fd1498Szrj bb->aux = bb_refs;
194138fd1498Szrj }
194238fd1498Szrj
194338fd1498Szrj /* Collect all data references in loop nest. */
194438fd1498Szrj for (unsigned i = 0; i < loop->num_nodes; i++)
194538fd1498Szrj {
194638fd1498Szrj bb = bbs[i];
194738fd1498Szrj if (!bb->aux)
194838fd1498Szrj continue;
194938fd1498Szrj
195038fd1498Szrj bb_refs = (vec<data_reference_p> *) bb->aux;
195138fd1498Szrj if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
195238fd1498Szrj datarefs->safe_splice (*bb_refs);
195338fd1498Szrj else
195438fd1498Szrj free_data_refs (*bb_refs);
195538fd1498Szrj
195638fd1498Szrj delete bb_refs;
195738fd1498Szrj bb->aux = NULL;
195838fd1498Szrj }
195938fd1498Szrj free (bbs);
196038fd1498Szrj
196138fd1498Szrj return loop_nest;
196238fd1498Szrj }
196338fd1498Szrj
196438fd1498Szrj /* Given innermost LOOP, return true if perfect loop nest can be found and
196538fd1498Szrj data dependences can be computed. If succeed, record the perfect loop
196638fd1498Szrj nest in LOOP_NEST; record all data references in DATAREFS and record all
196738fd1498Szrj data dependence relations in DDRS.
196838fd1498Szrj
196938fd1498Szrj We do support a restricted form of imperfect loop nest, i.e, loop nest
197038fd1498Szrj with load/store in outer loop initializing/finalizing simple reduction
197138fd1498Szrj of the innermost loop. For such outer loop reference, we require that
197238fd1498Szrj it has no dependence with others sinve it will be moved to inner loop
197338fd1498Szrj in interchange. */
197438fd1498Szrj
197538fd1498Szrj static bool
prepare_perfect_loop_nest(struct loop * loop,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * ddrs)197638fd1498Szrj prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
197738fd1498Szrj vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
197838fd1498Szrj {
197938fd1498Szrj struct loop *start_loop = NULL, *innermost = loop;
198038fd1498Szrj struct loop *outermost = loops_for_fn (cfun)->tree_root;
198138fd1498Szrj
198238fd1498Szrj /* Find loop nest from the innermost loop. The outermost is the innermost
198338fd1498Szrj outer*/
198438fd1498Szrj while (loop->num != 0 && loop->inner == start_loop
198538fd1498Szrj && flow_loop_nested_p (outermost, loop))
198638fd1498Szrj {
198738fd1498Szrj if (!proper_loop_form_for_interchange (loop, &outermost))
198838fd1498Szrj break;
198938fd1498Szrj
199038fd1498Szrj start_loop = loop;
199138fd1498Szrj /* If this loop has sibling loop, the father loop won't be in perfect
199238fd1498Szrj loop nest. */
199338fd1498Szrj if (loop->next != NULL)
199438fd1498Szrj break;
199538fd1498Szrj
199638fd1498Szrj loop = loop_outer (loop);
199738fd1498Szrj }
199838fd1498Szrj if (!start_loop || !start_loop->inner)
199938fd1498Szrj return false;
200038fd1498Szrj
200138fd1498Szrj /* Prepare the data reference vector for the loop nest, pruning outer
200238fd1498Szrj loops we cannot handle. */
200338fd1498Szrj start_loop = prepare_data_references (start_loop, datarefs);
200438fd1498Szrj if (!start_loop
200538fd1498Szrj /* Check if there is no data reference. */
200638fd1498Szrj || datarefs->is_empty ()
200738fd1498Szrj /* Check if there are too many of data references. */
200838fd1498Szrj || (int) datarefs->length () > MAX_DATAREFS)
200938fd1498Szrj return false;
201038fd1498Szrj
201138fd1498Szrj /* Compute access strides for all data references, pruning outer
201238fd1498Szrj loops we cannot analyze refs in. */
201338fd1498Szrj start_loop = compute_access_strides (start_loop, innermost, *datarefs);
201438fd1498Szrj if (!start_loop)
201538fd1498Szrj return false;
201638fd1498Szrj
201738fd1498Szrj /* Check if any interchange is profitable in the loop nest. */
201838fd1498Szrj if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
201938fd1498Szrj return false;
202038fd1498Szrj
202138fd1498Szrj /* Check if data dependences can be computed for loop nest starting from
202238fd1498Szrj start_loop. */
202338fd1498Szrj loop = start_loop;
202438fd1498Szrj do {
202538fd1498Szrj loop_nest->truncate (0);
202638fd1498Szrj
202738fd1498Szrj if (loop != start_loop)
202838fd1498Szrj prune_datarefs_not_in_loop (start_loop, *datarefs);
202938fd1498Szrj
203038fd1498Szrj if (find_loop_nest (start_loop, loop_nest)
203138fd1498Szrj && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
203238fd1498Szrj {
203338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
203438fd1498Szrj fprintf (dump_file,
203538fd1498Szrj "\nConsider loop interchange for loop_nest<%d - %d>\n",
203638fd1498Szrj start_loop->num, innermost->num);
203738fd1498Szrj
203838fd1498Szrj if (loop != start_loop)
203938fd1498Szrj prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
204038fd1498Szrj
204138fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
204238fd1498Szrj dump_access_strides (*datarefs);
204338fd1498Szrj
204438fd1498Szrj return true;
204538fd1498Szrj }
204638fd1498Szrj
204738fd1498Szrj free_dependence_relations (*ddrs);
204838fd1498Szrj *ddrs = vNULL;
204938fd1498Szrj /* Try to compute data dependences with the outermost loop stripped. */
205038fd1498Szrj loop = start_loop;
205138fd1498Szrj start_loop = start_loop->inner;
205238fd1498Szrj } while (start_loop && start_loop->inner);
205338fd1498Szrj
205438fd1498Szrj return false;
205538fd1498Szrj }
205638fd1498Szrj
205738fd1498Szrj /* Main entry for loop interchange pass. */
205838fd1498Szrj
205938fd1498Szrj unsigned int
execute(function * fun)206038fd1498Szrj pass_linterchange::execute (function *fun)
206138fd1498Szrj {
206238fd1498Szrj if (number_of_loops (fun) <= 2)
206338fd1498Szrj return 0;
206438fd1498Szrj
206538fd1498Szrj bool changed_p = false;
206638fd1498Szrj struct loop *loop;
206738fd1498Szrj FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
206838fd1498Szrj {
206938fd1498Szrj vec<loop_p> loop_nest = vNULL;
207038fd1498Szrj vec<data_reference_p> datarefs = vNULL;
207138fd1498Szrj vec<ddr_p> ddrs = vNULL;
207238fd1498Szrj if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
207338fd1498Szrj {
207438fd1498Szrj tree_loop_interchange loop_interchange (loop_nest);
207538fd1498Szrj changed_p |= loop_interchange.interchange (datarefs, ddrs);
207638fd1498Szrj }
207738fd1498Szrj free_dependence_relations (ddrs);
207838fd1498Szrj free_data_refs_with_aux (datarefs);
207938fd1498Szrj loop_nest.release ();
208038fd1498Szrj }
208138fd1498Szrj
208238fd1498Szrj return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
208338fd1498Szrj }
208438fd1498Szrj
208538fd1498Szrj } // anon namespace
208638fd1498Szrj
208738fd1498Szrj gimple_opt_pass *
make_pass_linterchange(gcc::context * ctxt)208838fd1498Szrj make_pass_linterchange (gcc::context *ctxt)
208938fd1498Szrj {
209038fd1498Szrj return new pass_linterchange (ctxt);
209138fd1498Szrj }
2092